Merge remote-tracking branch 'lsk/v3.10/topic/gator' into linux-linaro-lsk
[firefly-linux-kernel-4.4.55.git] / drivers / net / vmxnet3 / vmxnet3_drv.c
1 /*
2  * Linux driver for VMware's vmxnet3 ethernet NIC.
3  *
4  * Copyright (C) 2008-2009, VMware, Inc. All Rights Reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License as published by the
8  * Free Software Foundation; version 2 of the License and no later version.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
13  * NON INFRINGEMENT. See the GNU General Public License for more
14  * details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19  *
20  * The full GNU General Public License is included in this distribution in
21  * the file called "COPYING".
22  *
23  * Maintained by: Shreyas Bhatewara <pv-drivers@vmware.com>
24  *
25  */
26
27 #include <linux/module.h>
28 #include <net/ip6_checksum.h>
29
30 #include "vmxnet3_int.h"
31
32 char vmxnet3_driver_name[] = "vmxnet3";
33 #define VMXNET3_DRIVER_DESC "VMware vmxnet3 virtual NIC driver"
34
35 /*
36  * PCI Device ID Table
37  * Last entry must be all 0s
38  */
39 static DEFINE_PCI_DEVICE_TABLE(vmxnet3_pciid_table) = {
40         {PCI_VDEVICE(VMWARE, PCI_DEVICE_ID_VMWARE_VMXNET3)},
41         {0}
42 };
43
44 MODULE_DEVICE_TABLE(pci, vmxnet3_pciid_table);
45
46 static int enable_mq = 1;
47
48 static void
49 vmxnet3_write_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac);
50
51 /*
52  *    Enable/Disable the given intr
53  */
54 static void
55 vmxnet3_enable_intr(struct vmxnet3_adapter *adapter, unsigned intr_idx)
56 {
57         VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_IMR + intr_idx * 8, 0);
58 }
59
60
61 static void
62 vmxnet3_disable_intr(struct vmxnet3_adapter *adapter, unsigned intr_idx)
63 {
64         VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_IMR + intr_idx * 8, 1);
65 }
66
67
68 /*
69  *    Enable/Disable all intrs used by the device
70  */
71 static void
72 vmxnet3_enable_all_intrs(struct vmxnet3_adapter *adapter)
73 {
74         int i;
75
76         for (i = 0; i < adapter->intr.num_intrs; i++)
77                 vmxnet3_enable_intr(adapter, i);
78         adapter->shared->devRead.intrConf.intrCtrl &=
79                                         cpu_to_le32(~VMXNET3_IC_DISABLE_ALL);
80 }
81
82
83 static void
84 vmxnet3_disable_all_intrs(struct vmxnet3_adapter *adapter)
85 {
86         int i;
87
88         adapter->shared->devRead.intrConf.intrCtrl |=
89                                         cpu_to_le32(VMXNET3_IC_DISABLE_ALL);
90         for (i = 0; i < adapter->intr.num_intrs; i++)
91                 vmxnet3_disable_intr(adapter, i);
92 }
93
94
95 static void
96 vmxnet3_ack_events(struct vmxnet3_adapter *adapter, u32 events)
97 {
98         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_ECR, events);
99 }
100
101
102 static bool
103 vmxnet3_tq_stopped(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
104 {
105         return tq->stopped;
106 }
107
108
109 static void
110 vmxnet3_tq_start(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
111 {
112         tq->stopped = false;
113         netif_start_subqueue(adapter->netdev, tq - adapter->tx_queue);
114 }
115
116
117 static void
118 vmxnet3_tq_wake(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
119 {
120         tq->stopped = false;
121         netif_wake_subqueue(adapter->netdev, (tq - adapter->tx_queue));
122 }
123
124
125 static void
126 vmxnet3_tq_stop(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
127 {
128         tq->stopped = true;
129         tq->num_stop++;
130         netif_stop_subqueue(adapter->netdev, (tq - adapter->tx_queue));
131 }
132
133
134 /*
135  * Check the link state. This may start or stop the tx queue.
136  */
137 static void
138 vmxnet3_check_link(struct vmxnet3_adapter *adapter, bool affectTxQueue)
139 {
140         u32 ret;
141         int i;
142         unsigned long flags;
143
144         spin_lock_irqsave(&adapter->cmd_lock, flags);
145         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_GET_LINK);
146         ret = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
147         spin_unlock_irqrestore(&adapter->cmd_lock, flags);
148
149         adapter->link_speed = ret >> 16;
150         if (ret & 1) { /* Link is up. */
151                 netdev_info(adapter->netdev, "NIC Link is Up %d Mbps\n",
152                             adapter->link_speed);
153                 netif_carrier_on(adapter->netdev);
154
155                 if (affectTxQueue) {
156                         for (i = 0; i < adapter->num_tx_queues; i++)
157                                 vmxnet3_tq_start(&adapter->tx_queue[i],
158                                                  adapter);
159                 }
160         } else {
161                 netdev_info(adapter->netdev, "NIC Link is Down\n");
162                 netif_carrier_off(adapter->netdev);
163
164                 if (affectTxQueue) {
165                         for (i = 0; i < adapter->num_tx_queues; i++)
166                                 vmxnet3_tq_stop(&adapter->tx_queue[i], adapter);
167                 }
168         }
169 }
170
171 static void
172 vmxnet3_process_events(struct vmxnet3_adapter *adapter)
173 {
174         int i;
175         unsigned long flags;
176         u32 events = le32_to_cpu(adapter->shared->ecr);
177         if (!events)
178                 return;
179
180         vmxnet3_ack_events(adapter, events);
181
182         /* Check if link state has changed */
183         if (events & VMXNET3_ECR_LINK)
184                 vmxnet3_check_link(adapter, true);
185
186         /* Check if there is an error on xmit/recv queues */
187         if (events & (VMXNET3_ECR_TQERR | VMXNET3_ECR_RQERR)) {
188                 spin_lock_irqsave(&adapter->cmd_lock, flags);
189                 VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
190                                        VMXNET3_CMD_GET_QUEUE_STATUS);
191                 spin_unlock_irqrestore(&adapter->cmd_lock, flags);
192
193                 for (i = 0; i < adapter->num_tx_queues; i++)
194                         if (adapter->tqd_start[i].status.stopped)
195                                 dev_err(&adapter->netdev->dev,
196                                         "%s: tq[%d] error 0x%x\n",
197                                         adapter->netdev->name, i, le32_to_cpu(
198                                         adapter->tqd_start[i].status.error));
199                 for (i = 0; i < adapter->num_rx_queues; i++)
200                         if (adapter->rqd_start[i].status.stopped)
201                                 dev_err(&adapter->netdev->dev,
202                                         "%s: rq[%d] error 0x%x\n",
203                                         adapter->netdev->name, i,
204                                         adapter->rqd_start[i].status.error);
205
206                 schedule_work(&adapter->work);
207         }
208 }
209
210 #ifdef __BIG_ENDIAN_BITFIELD
211 /*
212  * The device expects the bitfields in shared structures to be written in
213  * little endian. When CPU is big endian, the following routines are used to
214  * correctly read and write into ABI.
215  * The general technique used here is : double word bitfields are defined in
216  * opposite order for big endian architecture. Then before reading them in
217  * driver the complete double word is translated using le32_to_cpu. Similarly
218  * After the driver writes into bitfields, cpu_to_le32 is used to translate the
219  * double words into required format.
220  * In order to avoid touching bits in shared structure more than once, temporary
221  * descriptors are used. These are passed as srcDesc to following functions.
222  */
223 static void vmxnet3_RxDescToCPU(const struct Vmxnet3_RxDesc *srcDesc,
224                                 struct Vmxnet3_RxDesc *dstDesc)
225 {
226         u32 *src = (u32 *)srcDesc + 2;
227         u32 *dst = (u32 *)dstDesc + 2;
228         dstDesc->addr = le64_to_cpu(srcDesc->addr);
229         *dst = le32_to_cpu(*src);
230         dstDesc->ext1 = le32_to_cpu(srcDesc->ext1);
231 }
232
233 static void vmxnet3_TxDescToLe(const struct Vmxnet3_TxDesc *srcDesc,
234                                struct Vmxnet3_TxDesc *dstDesc)
235 {
236         int i;
237         u32 *src = (u32 *)(srcDesc + 1);
238         u32 *dst = (u32 *)(dstDesc + 1);
239
240         /* Working backwards so that the gen bit is set at the end. */
241         for (i = 2; i > 0; i--) {
242                 src--;
243                 dst--;
244                 *dst = cpu_to_le32(*src);
245         }
246 }
247
248
249 static void vmxnet3_RxCompToCPU(const struct Vmxnet3_RxCompDesc *srcDesc,
250                                 struct Vmxnet3_RxCompDesc *dstDesc)
251 {
252         int i = 0;
253         u32 *src = (u32 *)srcDesc;
254         u32 *dst = (u32 *)dstDesc;
255         for (i = 0; i < sizeof(struct Vmxnet3_RxCompDesc) / sizeof(u32); i++) {
256                 *dst = le32_to_cpu(*src);
257                 src++;
258                 dst++;
259         }
260 }
261
262
263 /* Used to read bitfield values from double words. */
264 static u32 get_bitfield32(const __le32 *bitfield, u32 pos, u32 size)
265 {
266         u32 temp = le32_to_cpu(*bitfield);
267         u32 mask = ((1 << size) - 1) << pos;
268         temp &= mask;
269         temp >>= pos;
270         return temp;
271 }
272
273
274
275 #endif  /* __BIG_ENDIAN_BITFIELD */
276
277 #ifdef __BIG_ENDIAN_BITFIELD
278
279 #   define VMXNET3_TXDESC_GET_GEN(txdesc) get_bitfield32(((const __le32 *) \
280                         txdesc) + VMXNET3_TXD_GEN_DWORD_SHIFT, \
281                         VMXNET3_TXD_GEN_SHIFT, VMXNET3_TXD_GEN_SIZE)
282 #   define VMXNET3_TXDESC_GET_EOP(txdesc) get_bitfield32(((const __le32 *) \
283                         txdesc) + VMXNET3_TXD_EOP_DWORD_SHIFT, \
284                         VMXNET3_TXD_EOP_SHIFT, VMXNET3_TXD_EOP_SIZE)
285 #   define VMXNET3_TCD_GET_GEN(tcd) get_bitfield32(((const __le32 *)tcd) + \
286                         VMXNET3_TCD_GEN_DWORD_SHIFT, VMXNET3_TCD_GEN_SHIFT, \
287                         VMXNET3_TCD_GEN_SIZE)
288 #   define VMXNET3_TCD_GET_TXIDX(tcd) get_bitfield32((const __le32 *)tcd, \
289                         VMXNET3_TCD_TXIDX_SHIFT, VMXNET3_TCD_TXIDX_SIZE)
290 #   define vmxnet3_getRxComp(dstrcd, rcd, tmp) do { \
291                         (dstrcd) = (tmp); \
292                         vmxnet3_RxCompToCPU((rcd), (tmp)); \
293                 } while (0)
294 #   define vmxnet3_getRxDesc(dstrxd, rxd, tmp) do { \
295                         (dstrxd) = (tmp); \
296                         vmxnet3_RxDescToCPU((rxd), (tmp)); \
297                 } while (0)
298
299 #else
300
301 #   define VMXNET3_TXDESC_GET_GEN(txdesc) ((txdesc)->gen)
302 #   define VMXNET3_TXDESC_GET_EOP(txdesc) ((txdesc)->eop)
303 #   define VMXNET3_TCD_GET_GEN(tcd) ((tcd)->gen)
304 #   define VMXNET3_TCD_GET_TXIDX(tcd) ((tcd)->txdIdx)
305 #   define vmxnet3_getRxComp(dstrcd, rcd, tmp) (dstrcd) = (rcd)
306 #   define vmxnet3_getRxDesc(dstrxd, rxd, tmp) (dstrxd) = (rxd)
307
308 #endif /* __BIG_ENDIAN_BITFIELD  */
309
310
311 static void
312 vmxnet3_unmap_tx_buf(struct vmxnet3_tx_buf_info *tbi,
313                      struct pci_dev *pdev)
314 {
315         if (tbi->map_type == VMXNET3_MAP_SINGLE)
316                 pci_unmap_single(pdev, tbi->dma_addr, tbi->len,
317                                  PCI_DMA_TODEVICE);
318         else if (tbi->map_type == VMXNET3_MAP_PAGE)
319                 pci_unmap_page(pdev, tbi->dma_addr, tbi->len,
320                                PCI_DMA_TODEVICE);
321         else
322                 BUG_ON(tbi->map_type != VMXNET3_MAP_NONE);
323
324         tbi->map_type = VMXNET3_MAP_NONE; /* to help debugging */
325 }
326
327
328 static int
329 vmxnet3_unmap_pkt(u32 eop_idx, struct vmxnet3_tx_queue *tq,
330                   struct pci_dev *pdev, struct vmxnet3_adapter *adapter)
331 {
332         struct sk_buff *skb;
333         int entries = 0;
334
335         /* no out of order completion */
336         BUG_ON(tq->buf_info[eop_idx].sop_idx != tq->tx_ring.next2comp);
337         BUG_ON(VMXNET3_TXDESC_GET_EOP(&(tq->tx_ring.base[eop_idx].txd)) != 1);
338
339         skb = tq->buf_info[eop_idx].skb;
340         BUG_ON(skb == NULL);
341         tq->buf_info[eop_idx].skb = NULL;
342
343         VMXNET3_INC_RING_IDX_ONLY(eop_idx, tq->tx_ring.size);
344
345         while (tq->tx_ring.next2comp != eop_idx) {
346                 vmxnet3_unmap_tx_buf(tq->buf_info + tq->tx_ring.next2comp,
347                                      pdev);
348
349                 /* update next2comp w/o tx_lock. Since we are marking more,
350                  * instead of less, tx ring entries avail, the worst case is
351                  * that the tx routine incorrectly re-queues a pkt due to
352                  * insufficient tx ring entries.
353                  */
354                 vmxnet3_cmd_ring_adv_next2comp(&tq->tx_ring);
355                 entries++;
356         }
357
358         dev_kfree_skb_any(skb);
359         return entries;
360 }
361
362
363 static int
364 vmxnet3_tq_tx_complete(struct vmxnet3_tx_queue *tq,
365                         struct vmxnet3_adapter *adapter)
366 {
367         int completed = 0;
368         union Vmxnet3_GenericDesc *gdesc;
369
370         gdesc = tq->comp_ring.base + tq->comp_ring.next2proc;
371         while (VMXNET3_TCD_GET_GEN(&gdesc->tcd) == tq->comp_ring.gen) {
372                 completed += vmxnet3_unmap_pkt(VMXNET3_TCD_GET_TXIDX(
373                                                &gdesc->tcd), tq, adapter->pdev,
374                                                adapter);
375
376                 vmxnet3_comp_ring_adv_next2proc(&tq->comp_ring);
377                 gdesc = tq->comp_ring.base + tq->comp_ring.next2proc;
378         }
379
380         if (completed) {
381                 spin_lock(&tq->tx_lock);
382                 if (unlikely(vmxnet3_tq_stopped(tq, adapter) &&
383                              vmxnet3_cmd_ring_desc_avail(&tq->tx_ring) >
384                              VMXNET3_WAKE_QUEUE_THRESHOLD(tq) &&
385                              netif_carrier_ok(adapter->netdev))) {
386                         vmxnet3_tq_wake(tq, adapter);
387                 }
388                 spin_unlock(&tq->tx_lock);
389         }
390         return completed;
391 }
392
393
394 static void
395 vmxnet3_tq_cleanup(struct vmxnet3_tx_queue *tq,
396                    struct vmxnet3_adapter *adapter)
397 {
398         int i;
399
400         while (tq->tx_ring.next2comp != tq->tx_ring.next2fill) {
401                 struct vmxnet3_tx_buf_info *tbi;
402
403                 tbi = tq->buf_info + tq->tx_ring.next2comp;
404
405                 vmxnet3_unmap_tx_buf(tbi, adapter->pdev);
406                 if (tbi->skb) {
407                         dev_kfree_skb_any(tbi->skb);
408                         tbi->skb = NULL;
409                 }
410                 vmxnet3_cmd_ring_adv_next2comp(&tq->tx_ring);
411         }
412
413         /* sanity check, verify all buffers are indeed unmapped and freed */
414         for (i = 0; i < tq->tx_ring.size; i++) {
415                 BUG_ON(tq->buf_info[i].skb != NULL ||
416                        tq->buf_info[i].map_type != VMXNET3_MAP_NONE);
417         }
418
419         tq->tx_ring.gen = VMXNET3_INIT_GEN;
420         tq->tx_ring.next2fill = tq->tx_ring.next2comp = 0;
421
422         tq->comp_ring.gen = VMXNET3_INIT_GEN;
423         tq->comp_ring.next2proc = 0;
424 }
425
426
427 static void
428 vmxnet3_tq_destroy(struct vmxnet3_tx_queue *tq,
429                    struct vmxnet3_adapter *adapter)
430 {
431         if (tq->tx_ring.base) {
432                 pci_free_consistent(adapter->pdev, tq->tx_ring.size *
433                                     sizeof(struct Vmxnet3_TxDesc),
434                                     tq->tx_ring.base, tq->tx_ring.basePA);
435                 tq->tx_ring.base = NULL;
436         }
437         if (tq->data_ring.base) {
438                 pci_free_consistent(adapter->pdev, tq->data_ring.size *
439                                     sizeof(struct Vmxnet3_TxDataDesc),
440                                     tq->data_ring.base, tq->data_ring.basePA);
441                 tq->data_ring.base = NULL;
442         }
443         if (tq->comp_ring.base) {
444                 pci_free_consistent(adapter->pdev, tq->comp_ring.size *
445                                     sizeof(struct Vmxnet3_TxCompDesc),
446                                     tq->comp_ring.base, tq->comp_ring.basePA);
447                 tq->comp_ring.base = NULL;
448         }
449         kfree(tq->buf_info);
450         tq->buf_info = NULL;
451 }
452
453
454 /* Destroy all tx queues */
455 void
456 vmxnet3_tq_destroy_all(struct vmxnet3_adapter *adapter)
457 {
458         int i;
459
460         for (i = 0; i < adapter->num_tx_queues; i++)
461                 vmxnet3_tq_destroy(&adapter->tx_queue[i], adapter);
462 }
463
464
465 static void
466 vmxnet3_tq_init(struct vmxnet3_tx_queue *tq,
467                 struct vmxnet3_adapter *adapter)
468 {
469         int i;
470
471         /* reset the tx ring contents to 0 and reset the tx ring states */
472         memset(tq->tx_ring.base, 0, tq->tx_ring.size *
473                sizeof(struct Vmxnet3_TxDesc));
474         tq->tx_ring.next2fill = tq->tx_ring.next2comp = 0;
475         tq->tx_ring.gen = VMXNET3_INIT_GEN;
476
477         memset(tq->data_ring.base, 0, tq->data_ring.size *
478                sizeof(struct Vmxnet3_TxDataDesc));
479
480         /* reset the tx comp ring contents to 0 and reset comp ring states */
481         memset(tq->comp_ring.base, 0, tq->comp_ring.size *
482                sizeof(struct Vmxnet3_TxCompDesc));
483         tq->comp_ring.next2proc = 0;
484         tq->comp_ring.gen = VMXNET3_INIT_GEN;
485
486         /* reset the bookkeeping data */
487         memset(tq->buf_info, 0, sizeof(tq->buf_info[0]) * tq->tx_ring.size);
488         for (i = 0; i < tq->tx_ring.size; i++)
489                 tq->buf_info[i].map_type = VMXNET3_MAP_NONE;
490
491         /* stats are not reset */
492 }
493
494
495 static int
496 vmxnet3_tq_create(struct vmxnet3_tx_queue *tq,
497                   struct vmxnet3_adapter *adapter)
498 {
499         BUG_ON(tq->tx_ring.base || tq->data_ring.base ||
500                tq->comp_ring.base || tq->buf_info);
501
502         tq->tx_ring.base = pci_alloc_consistent(adapter->pdev, tq->tx_ring.size
503                            * sizeof(struct Vmxnet3_TxDesc),
504                            &tq->tx_ring.basePA);
505         if (!tq->tx_ring.base) {
506                 netdev_err(adapter->netdev, "failed to allocate tx ring\n");
507                 goto err;
508         }
509
510         tq->data_ring.base = pci_alloc_consistent(adapter->pdev,
511                              tq->data_ring.size *
512                              sizeof(struct Vmxnet3_TxDataDesc),
513                              &tq->data_ring.basePA);
514         if (!tq->data_ring.base) {
515                 netdev_err(adapter->netdev, "failed to allocate data ring\n");
516                 goto err;
517         }
518
519         tq->comp_ring.base = pci_alloc_consistent(adapter->pdev,
520                              tq->comp_ring.size *
521                              sizeof(struct Vmxnet3_TxCompDesc),
522                              &tq->comp_ring.basePA);
523         if (!tq->comp_ring.base) {
524                 netdev_err(adapter->netdev, "failed to allocate tx comp ring\n");
525                 goto err;
526         }
527
528         tq->buf_info = kcalloc(tq->tx_ring.size, sizeof(tq->buf_info[0]),
529                                GFP_KERNEL);
530         if (!tq->buf_info)
531                 goto err;
532
533         return 0;
534
535 err:
536         vmxnet3_tq_destroy(tq, adapter);
537         return -ENOMEM;
538 }
539
540 static void
541 vmxnet3_tq_cleanup_all(struct vmxnet3_adapter *adapter)
542 {
543         int i;
544
545         for (i = 0; i < adapter->num_tx_queues; i++)
546                 vmxnet3_tq_cleanup(&adapter->tx_queue[i], adapter);
547 }
548
549 /*
550  *    starting from ring->next2fill, allocate rx buffers for the given ring
551  *    of the rx queue and update the rx desc. stop after @num_to_alloc buffers
552  *    are allocated or allocation fails
553  */
554
555 static int
556 vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx,
557                         int num_to_alloc, struct vmxnet3_adapter *adapter)
558 {
559         int num_allocated = 0;
560         struct vmxnet3_rx_buf_info *rbi_base = rq->buf_info[ring_idx];
561         struct vmxnet3_cmd_ring *ring = &rq->rx_ring[ring_idx];
562         u32 val;
563
564         while (num_allocated <= num_to_alloc) {
565                 struct vmxnet3_rx_buf_info *rbi;
566                 union Vmxnet3_GenericDesc *gd;
567
568                 rbi = rbi_base + ring->next2fill;
569                 gd = ring->base + ring->next2fill;
570
571                 if (rbi->buf_type == VMXNET3_RX_BUF_SKB) {
572                         if (rbi->skb == NULL) {
573                                 rbi->skb = __netdev_alloc_skb_ip_align(adapter->netdev,
574                                                                        rbi->len,
575                                                                        GFP_KERNEL);
576                                 if (unlikely(rbi->skb == NULL)) {
577                                         rq->stats.rx_buf_alloc_failure++;
578                                         break;
579                                 }
580
581                                 rbi->dma_addr = pci_map_single(adapter->pdev,
582                                                 rbi->skb->data, rbi->len,
583                                                 PCI_DMA_FROMDEVICE);
584                         } else {
585                                 /* rx buffer skipped by the device */
586                         }
587                         val = VMXNET3_RXD_BTYPE_HEAD << VMXNET3_RXD_BTYPE_SHIFT;
588                 } else {
589                         BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_PAGE ||
590                                rbi->len  != PAGE_SIZE);
591
592                         if (rbi->page == NULL) {
593                                 rbi->page = alloc_page(GFP_ATOMIC);
594                                 if (unlikely(rbi->page == NULL)) {
595                                         rq->stats.rx_buf_alloc_failure++;
596                                         break;
597                                 }
598                                 rbi->dma_addr = pci_map_page(adapter->pdev,
599                                                 rbi->page, 0, PAGE_SIZE,
600                                                 PCI_DMA_FROMDEVICE);
601                         } else {
602                                 /* rx buffers skipped by the device */
603                         }
604                         val = VMXNET3_RXD_BTYPE_BODY << VMXNET3_RXD_BTYPE_SHIFT;
605                 }
606
607                 BUG_ON(rbi->dma_addr == 0);
608                 gd->rxd.addr = cpu_to_le64(rbi->dma_addr);
609                 gd->dword[2] = cpu_to_le32((!ring->gen << VMXNET3_RXD_GEN_SHIFT)
610                                            | val | rbi->len);
611
612                 /* Fill the last buffer but dont mark it ready, or else the
613                  * device will think that the queue is full */
614                 if (num_allocated == num_to_alloc)
615                         break;
616
617                 gd->dword[2] |= cpu_to_le32(ring->gen << VMXNET3_RXD_GEN_SHIFT);
618                 num_allocated++;
619                 vmxnet3_cmd_ring_adv_next2fill(ring);
620         }
621
622         netdev_dbg(adapter->netdev,
623                 "alloc_rx_buf: %d allocated, next2fill %u, next2comp %u\n",
624                 num_allocated, ring->next2fill, ring->next2comp);
625
626         /* so that the device can distinguish a full ring and an empty ring */
627         BUG_ON(num_allocated != 0 && ring->next2fill == ring->next2comp);
628
629         return num_allocated;
630 }
631
632
633 static void
634 vmxnet3_append_frag(struct sk_buff *skb, struct Vmxnet3_RxCompDesc *rcd,
635                     struct vmxnet3_rx_buf_info *rbi)
636 {
637         struct skb_frag_struct *frag = skb_shinfo(skb)->frags +
638                 skb_shinfo(skb)->nr_frags;
639
640         BUG_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS);
641
642         __skb_frag_set_page(frag, rbi->page);
643         frag->page_offset = 0;
644         skb_frag_size_set(frag, rcd->len);
645         skb->data_len += rcd->len;
646         skb->truesize += PAGE_SIZE;
647         skb_shinfo(skb)->nr_frags++;
648 }
649
650
651 static void
652 vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx,
653                 struct vmxnet3_tx_queue *tq, struct pci_dev *pdev,
654                 struct vmxnet3_adapter *adapter)
655 {
656         u32 dw2, len;
657         unsigned long buf_offset;
658         int i;
659         union Vmxnet3_GenericDesc *gdesc;
660         struct vmxnet3_tx_buf_info *tbi = NULL;
661
662         BUG_ON(ctx->copy_size > skb_headlen(skb));
663
664         /* use the previous gen bit for the SOP desc */
665         dw2 = (tq->tx_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
666
667         ctx->sop_txd = tq->tx_ring.base + tq->tx_ring.next2fill;
668         gdesc = ctx->sop_txd; /* both loops below can be skipped */
669
670         /* no need to map the buffer if headers are copied */
671         if (ctx->copy_size) {
672                 ctx->sop_txd->txd.addr = cpu_to_le64(tq->data_ring.basePA +
673                                         tq->tx_ring.next2fill *
674                                         sizeof(struct Vmxnet3_TxDataDesc));
675                 ctx->sop_txd->dword[2] = cpu_to_le32(dw2 | ctx->copy_size);
676                 ctx->sop_txd->dword[3] = 0;
677
678                 tbi = tq->buf_info + tq->tx_ring.next2fill;
679                 tbi->map_type = VMXNET3_MAP_NONE;
680
681                 netdev_dbg(adapter->netdev,
682                         "txd[%u]: 0x%Lx 0x%x 0x%x\n",
683                         tq->tx_ring.next2fill,
684                         le64_to_cpu(ctx->sop_txd->txd.addr),
685                         ctx->sop_txd->dword[2], ctx->sop_txd->dword[3]);
686                 vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
687
688                 /* use the right gen for non-SOP desc */
689                 dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT;
690         }
691
692         /* linear part can use multiple tx desc if it's big */
693         len = skb_headlen(skb) - ctx->copy_size;
694         buf_offset = ctx->copy_size;
695         while (len) {
696                 u32 buf_size;
697
698                 if (len < VMXNET3_MAX_TX_BUF_SIZE) {
699                         buf_size = len;
700                         dw2 |= len;
701                 } else {
702                         buf_size = VMXNET3_MAX_TX_BUF_SIZE;
703                         /* spec says that for TxDesc.len, 0 == 2^14 */
704                 }
705
706                 tbi = tq->buf_info + tq->tx_ring.next2fill;
707                 tbi->map_type = VMXNET3_MAP_SINGLE;
708                 tbi->dma_addr = pci_map_single(adapter->pdev,
709                                 skb->data + buf_offset, buf_size,
710                                 PCI_DMA_TODEVICE);
711
712                 tbi->len = buf_size;
713
714                 gdesc = tq->tx_ring.base + tq->tx_ring.next2fill;
715                 BUG_ON(gdesc->txd.gen == tq->tx_ring.gen);
716
717                 gdesc->txd.addr = cpu_to_le64(tbi->dma_addr);
718                 gdesc->dword[2] = cpu_to_le32(dw2);
719                 gdesc->dword[3] = 0;
720
721                 netdev_dbg(adapter->netdev,
722                         "txd[%u]: 0x%Lx 0x%x 0x%x\n",
723                         tq->tx_ring.next2fill, le64_to_cpu(gdesc->txd.addr),
724                         le32_to_cpu(gdesc->dword[2]), gdesc->dword[3]);
725                 vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
726                 dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT;
727
728                 len -= buf_size;
729                 buf_offset += buf_size;
730         }
731
732         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
733                 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
734                 u32 buf_size;
735
736                 buf_offset = 0;
737                 len = skb_frag_size(frag);
738                 while (len) {
739                         tbi = tq->buf_info + tq->tx_ring.next2fill;
740                         if (len < VMXNET3_MAX_TX_BUF_SIZE) {
741                                 buf_size = len;
742                                 dw2 |= len;
743                         } else {
744                                 buf_size = VMXNET3_MAX_TX_BUF_SIZE;
745                                 /* spec says that for TxDesc.len, 0 == 2^14 */
746                         }
747                         tbi->map_type = VMXNET3_MAP_PAGE;
748                         tbi->dma_addr = skb_frag_dma_map(&adapter->pdev->dev, frag,
749                                                          buf_offset, buf_size,
750                                                          DMA_TO_DEVICE);
751
752                         tbi->len = buf_size;
753
754                         gdesc = tq->tx_ring.base + tq->tx_ring.next2fill;
755                         BUG_ON(gdesc->txd.gen == tq->tx_ring.gen);
756
757                         gdesc->txd.addr = cpu_to_le64(tbi->dma_addr);
758                         gdesc->dword[2] = cpu_to_le32(dw2);
759                         gdesc->dword[3] = 0;
760
761                         netdev_dbg(adapter->netdev,
762                                 "txd[%u]: 0x%llu %u %u\n",
763                                 tq->tx_ring.next2fill, le64_to_cpu(gdesc->txd.addr),
764                                 le32_to_cpu(gdesc->dword[2]), gdesc->dword[3]);
765                         vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
766                         dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT;
767
768                         len -= buf_size;
769                         buf_offset += buf_size;
770                 }
771         }
772
773         ctx->eop_txd = gdesc;
774
775         /* set the last buf_info for the pkt */
776         tbi->skb = skb;
777         tbi->sop_idx = ctx->sop_txd - tq->tx_ring.base;
778 }
779
780
781 /* Init all tx queues */
782 static void
783 vmxnet3_tq_init_all(struct vmxnet3_adapter *adapter)
784 {
785         int i;
786
787         for (i = 0; i < adapter->num_tx_queues; i++)
788                 vmxnet3_tq_init(&adapter->tx_queue[i], adapter);
789 }
790
791
792 /*
793  *    parse and copy relevant protocol headers:
794  *      For a tso pkt, relevant headers are L2/3/4 including options
795  *      For a pkt requesting csum offloading, they are L2/3 and may include L4
796  *      if it's a TCP/UDP pkt
797  *
798  * Returns:
799  *    -1:  error happens during parsing
800  *     0:  protocol headers parsed, but too big to be copied
801  *     1:  protocol headers parsed and copied
802  *
803  * Other effects:
804  *    1. related *ctx fields are updated.
805  *    2. ctx->copy_size is # of bytes copied
806  *    3. the portion copied is guaranteed to be in the linear part
807  *
808  */
809 static int
810 vmxnet3_parse_and_copy_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
811                            struct vmxnet3_tx_ctx *ctx,
812                            struct vmxnet3_adapter *adapter)
813 {
814         struct Vmxnet3_TxDataDesc *tdd;
815
816         if (ctx->mss) { /* TSO */
817                 ctx->eth_ip_hdr_size = skb_transport_offset(skb);
818                 ctx->l4_hdr_size = tcp_hdrlen(skb);
819                 ctx->copy_size = ctx->eth_ip_hdr_size + ctx->l4_hdr_size;
820         } else {
821                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
822                         ctx->eth_ip_hdr_size = skb_checksum_start_offset(skb);
823
824                         if (ctx->ipv4) {
825                                 const struct iphdr *iph = ip_hdr(skb);
826
827                                 if (iph->protocol == IPPROTO_TCP)
828                                         ctx->l4_hdr_size = tcp_hdrlen(skb);
829                                 else if (iph->protocol == IPPROTO_UDP)
830                                         ctx->l4_hdr_size = sizeof(struct udphdr);
831                                 else
832                                         ctx->l4_hdr_size = 0;
833                         } else {
834                                 /* for simplicity, don't copy L4 headers */
835                                 ctx->l4_hdr_size = 0;
836                         }
837                         ctx->copy_size = min(ctx->eth_ip_hdr_size +
838                                          ctx->l4_hdr_size, skb->len);
839                 } else {
840                         ctx->eth_ip_hdr_size = 0;
841                         ctx->l4_hdr_size = 0;
842                         /* copy as much as allowed */
843                         ctx->copy_size = min((unsigned int)VMXNET3_HDR_COPY_SIZE
844                                              , skb_headlen(skb));
845                 }
846
847                 /* make sure headers are accessible directly */
848                 if (unlikely(!pskb_may_pull(skb, ctx->copy_size)))
849                         goto err;
850         }
851
852         if (unlikely(ctx->copy_size > VMXNET3_HDR_COPY_SIZE)) {
853                 tq->stats.oversized_hdr++;
854                 ctx->copy_size = 0;
855                 return 0;
856         }
857
858         tdd = tq->data_ring.base + tq->tx_ring.next2fill;
859
860         memcpy(tdd->data, skb->data, ctx->copy_size);
861         netdev_dbg(adapter->netdev,
862                 "copy %u bytes to dataRing[%u]\n",
863                 ctx->copy_size, tq->tx_ring.next2fill);
864         return 1;
865
866 err:
867         return -1;
868 }
869
870
871 static void
872 vmxnet3_prepare_tso(struct sk_buff *skb,
873                     struct vmxnet3_tx_ctx *ctx)
874 {
875         struct tcphdr *tcph = tcp_hdr(skb);
876
877         if (ctx->ipv4) {
878                 struct iphdr *iph = ip_hdr(skb);
879
880                 iph->check = 0;
881                 tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 0,
882                                                  IPPROTO_TCP, 0);
883         } else {
884                 struct ipv6hdr *iph = ipv6_hdr(skb);
885
886                 tcph->check = ~csum_ipv6_magic(&iph->saddr, &iph->daddr, 0,
887                                                IPPROTO_TCP, 0);
888         }
889 }
890
891 static int txd_estimate(const struct sk_buff *skb)
892 {
893         int count = VMXNET3_TXD_NEEDED(skb_headlen(skb)) + 1;
894         int i;
895
896         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
897                 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
898
899                 count += VMXNET3_TXD_NEEDED(skb_frag_size(frag));
900         }
901         return count;
902 }
903
904 /*
905  * Transmits a pkt thru a given tq
906  * Returns:
907  *    NETDEV_TX_OK:      descriptors are setup successfully
908  *    NETDEV_TX_OK:      error occurred, the pkt is dropped
909  *    NETDEV_TX_BUSY:    tx ring is full, queue is stopped
910  *
911  * Side-effects:
912  *    1. tx ring may be changed
913  *    2. tq stats may be updated accordingly
914  *    3. shared->txNumDeferred may be updated
915  */
916
917 static int
918 vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
919                 struct vmxnet3_adapter *adapter, struct net_device *netdev)
920 {
921         int ret;
922         u32 count;
923         unsigned long flags;
924         struct vmxnet3_tx_ctx ctx;
925         union Vmxnet3_GenericDesc *gdesc;
926 #ifdef __BIG_ENDIAN_BITFIELD
927         /* Use temporary descriptor to avoid touching bits multiple times */
928         union Vmxnet3_GenericDesc tempTxDesc;
929 #endif
930
931         count = txd_estimate(skb);
932
933         ctx.ipv4 = (vlan_get_protocol(skb) == cpu_to_be16(ETH_P_IP));
934
935         ctx.mss = skb_shinfo(skb)->gso_size;
936         if (ctx.mss) {
937                 if (skb_header_cloned(skb)) {
938                         if (unlikely(pskb_expand_head(skb, 0, 0,
939                                                       GFP_ATOMIC) != 0)) {
940                                 tq->stats.drop_tso++;
941                                 goto drop_pkt;
942                         }
943                         tq->stats.copy_skb_header++;
944                 }
945                 vmxnet3_prepare_tso(skb, &ctx);
946         } else {
947                 if (unlikely(count > VMXNET3_MAX_TXD_PER_PKT)) {
948
949                         /* non-tso pkts must not use more than
950                          * VMXNET3_MAX_TXD_PER_PKT entries
951                          */
952                         if (skb_linearize(skb) != 0) {
953                                 tq->stats.drop_too_many_frags++;
954                                 goto drop_pkt;
955                         }
956                         tq->stats.linearized++;
957
958                         /* recalculate the # of descriptors to use */
959                         count = VMXNET3_TXD_NEEDED(skb_headlen(skb)) + 1;
960                 }
961         }
962
963         spin_lock_irqsave(&tq->tx_lock, flags);
964
965         if (count > vmxnet3_cmd_ring_desc_avail(&tq->tx_ring)) {
966                 tq->stats.tx_ring_full++;
967                 netdev_dbg(adapter->netdev,
968                         "tx queue stopped on %s, next2comp %u"
969                         " next2fill %u\n", adapter->netdev->name,
970                         tq->tx_ring.next2comp, tq->tx_ring.next2fill);
971
972                 vmxnet3_tq_stop(tq, adapter);
973                 spin_unlock_irqrestore(&tq->tx_lock, flags);
974                 return NETDEV_TX_BUSY;
975         }
976
977
978         ret = vmxnet3_parse_and_copy_hdr(skb, tq, &ctx, adapter);
979         if (ret >= 0) {
980                 BUG_ON(ret <= 0 && ctx.copy_size != 0);
981                 /* hdrs parsed, check against other limits */
982                 if (ctx.mss) {
983                         if (unlikely(ctx.eth_ip_hdr_size + ctx.l4_hdr_size >
984                                      VMXNET3_MAX_TX_BUF_SIZE)) {
985                                 goto hdr_too_big;
986                         }
987                 } else {
988                         if (skb->ip_summed == CHECKSUM_PARTIAL) {
989                                 if (unlikely(ctx.eth_ip_hdr_size +
990                                              skb->csum_offset >
991                                              VMXNET3_MAX_CSUM_OFFSET)) {
992                                         goto hdr_too_big;
993                                 }
994                         }
995                 }
996         } else {
997                 tq->stats.drop_hdr_inspect_err++;
998                 goto unlock_drop_pkt;
999         }
1000
1001         /* fill tx descs related to addr & len */
1002         vmxnet3_map_pkt(skb, &ctx, tq, adapter->pdev, adapter);
1003
1004         /* setup the EOP desc */
1005         ctx.eop_txd->dword[3] = cpu_to_le32(VMXNET3_TXD_CQ | VMXNET3_TXD_EOP);
1006
1007         /* setup the SOP desc */
1008 #ifdef __BIG_ENDIAN_BITFIELD
1009         gdesc = &tempTxDesc;
1010         gdesc->dword[2] = ctx.sop_txd->dword[2];
1011         gdesc->dword[3] = ctx.sop_txd->dword[3];
1012 #else
1013         gdesc = ctx.sop_txd;
1014 #endif
1015         if (ctx.mss) {
1016                 gdesc->txd.hlen = ctx.eth_ip_hdr_size + ctx.l4_hdr_size;
1017                 gdesc->txd.om = VMXNET3_OM_TSO;
1018                 gdesc->txd.msscof = ctx.mss;
1019                 le32_add_cpu(&tq->shared->txNumDeferred, (skb->len -
1020                              gdesc->txd.hlen + ctx.mss - 1) / ctx.mss);
1021         } else {
1022                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1023                         gdesc->txd.hlen = ctx.eth_ip_hdr_size;
1024                         gdesc->txd.om = VMXNET3_OM_CSUM;
1025                         gdesc->txd.msscof = ctx.eth_ip_hdr_size +
1026                                             skb->csum_offset;
1027                 } else {
1028                         gdesc->txd.om = 0;
1029                         gdesc->txd.msscof = 0;
1030                 }
1031                 le32_add_cpu(&tq->shared->txNumDeferred, 1);
1032         }
1033
1034         if (vlan_tx_tag_present(skb)) {
1035                 gdesc->txd.ti = 1;
1036                 gdesc->txd.tci = vlan_tx_tag_get(skb);
1037         }
1038
1039         /* finally flips the GEN bit of the SOP desc. */
1040         gdesc->dword[2] = cpu_to_le32(le32_to_cpu(gdesc->dword[2]) ^
1041                                                   VMXNET3_TXD_GEN);
1042 #ifdef __BIG_ENDIAN_BITFIELD
1043         /* Finished updating in bitfields of Tx Desc, so write them in original
1044          * place.
1045          */
1046         vmxnet3_TxDescToLe((struct Vmxnet3_TxDesc *)gdesc,
1047                            (struct Vmxnet3_TxDesc *)ctx.sop_txd);
1048         gdesc = ctx.sop_txd;
1049 #endif
1050         netdev_dbg(adapter->netdev,
1051                 "txd[%u]: SOP 0x%Lx 0x%x 0x%x\n",
1052                 (u32)(ctx.sop_txd -
1053                 tq->tx_ring.base), le64_to_cpu(gdesc->txd.addr),
1054                 le32_to_cpu(gdesc->dword[2]), le32_to_cpu(gdesc->dword[3]));
1055
1056         spin_unlock_irqrestore(&tq->tx_lock, flags);
1057
1058         if (le32_to_cpu(tq->shared->txNumDeferred) >=
1059                                         le32_to_cpu(tq->shared->txThreshold)) {
1060                 tq->shared->txNumDeferred = 0;
1061                 VMXNET3_WRITE_BAR0_REG(adapter,
1062                                        VMXNET3_REG_TXPROD + tq->qid * 8,
1063                                        tq->tx_ring.next2fill);
1064         }
1065
1066         return NETDEV_TX_OK;
1067
1068 hdr_too_big:
1069         tq->stats.drop_oversized_hdr++;
1070 unlock_drop_pkt:
1071         spin_unlock_irqrestore(&tq->tx_lock, flags);
1072 drop_pkt:
1073         tq->stats.drop_total++;
1074         dev_kfree_skb(skb);
1075         return NETDEV_TX_OK;
1076 }
1077
1078
1079 static netdev_tx_t
1080 vmxnet3_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
1081 {
1082         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1083
1084         BUG_ON(skb->queue_mapping > adapter->num_tx_queues);
1085         return vmxnet3_tq_xmit(skb,
1086                                &adapter->tx_queue[skb->queue_mapping],
1087                                adapter, netdev);
1088 }
1089
1090
1091 static void
1092 vmxnet3_rx_csum(struct vmxnet3_adapter *adapter,
1093                 struct sk_buff *skb,
1094                 union Vmxnet3_GenericDesc *gdesc)
1095 {
1096         if (!gdesc->rcd.cnc && adapter->netdev->features & NETIF_F_RXCSUM) {
1097                 /* typical case: TCP/UDP over IP and both csums are correct */
1098                 if ((le32_to_cpu(gdesc->dword[3]) & VMXNET3_RCD_CSUM_OK) ==
1099                                                         VMXNET3_RCD_CSUM_OK) {
1100                         skb->ip_summed = CHECKSUM_UNNECESSARY;
1101                         BUG_ON(!(gdesc->rcd.tcp || gdesc->rcd.udp));
1102                         BUG_ON(!(gdesc->rcd.v4  || gdesc->rcd.v6));
1103                         BUG_ON(gdesc->rcd.frg);
1104                 } else {
1105                         if (gdesc->rcd.csum) {
1106                                 skb->csum = htons(gdesc->rcd.csum);
1107                                 skb->ip_summed = CHECKSUM_PARTIAL;
1108                         } else {
1109                                 skb_checksum_none_assert(skb);
1110                         }
1111                 }
1112         } else {
1113                 skb_checksum_none_assert(skb);
1114         }
1115 }
1116
1117
1118 static void
1119 vmxnet3_rx_error(struct vmxnet3_rx_queue *rq, struct Vmxnet3_RxCompDesc *rcd,
1120                  struct vmxnet3_rx_ctx *ctx,  struct vmxnet3_adapter *adapter)
1121 {
1122         rq->stats.drop_err++;
1123         if (!rcd->fcs)
1124                 rq->stats.drop_fcs++;
1125
1126         rq->stats.drop_total++;
1127
1128         /*
1129          * We do not unmap and chain the rx buffer to the skb.
1130          * We basically pretend this buffer is not used and will be recycled
1131          * by vmxnet3_rq_alloc_rx_buf()
1132          */
1133
1134         /*
1135          * ctx->skb may be NULL if this is the first and the only one
1136          * desc for the pkt
1137          */
1138         if (ctx->skb)
1139                 dev_kfree_skb_irq(ctx->skb);
1140
1141         ctx->skb = NULL;
1142 }
1143
1144
1145 static int
1146 vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
1147                        struct vmxnet3_adapter *adapter, int quota)
1148 {
1149         static const u32 rxprod_reg[2] = {
1150                 VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2
1151         };
1152         u32 num_rxd = 0;
1153         bool skip_page_frags = false;
1154         struct Vmxnet3_RxCompDesc *rcd;
1155         struct vmxnet3_rx_ctx *ctx = &rq->rx_ctx;
1156 #ifdef __BIG_ENDIAN_BITFIELD
1157         struct Vmxnet3_RxDesc rxCmdDesc;
1158         struct Vmxnet3_RxCompDesc rxComp;
1159 #endif
1160         vmxnet3_getRxComp(rcd, &rq->comp_ring.base[rq->comp_ring.next2proc].rcd,
1161                           &rxComp);
1162         while (rcd->gen == rq->comp_ring.gen) {
1163                 struct vmxnet3_rx_buf_info *rbi;
1164                 struct sk_buff *skb, *new_skb = NULL;
1165                 struct page *new_page = NULL;
1166                 int num_to_alloc;
1167                 struct Vmxnet3_RxDesc *rxd;
1168                 u32 idx, ring_idx;
1169                 struct vmxnet3_cmd_ring *ring = NULL;
1170                 if (num_rxd >= quota) {
1171                         /* we may stop even before we see the EOP desc of
1172                          * the current pkt
1173                          */
1174                         break;
1175                 }
1176                 num_rxd++;
1177                 BUG_ON(rcd->rqID != rq->qid && rcd->rqID != rq->qid2);
1178                 idx = rcd->rxdIdx;
1179                 ring_idx = rcd->rqID < adapter->num_rx_queues ? 0 : 1;
1180                 ring = rq->rx_ring + ring_idx;
1181                 vmxnet3_getRxDesc(rxd, &rq->rx_ring[ring_idx].base[idx].rxd,
1182                                   &rxCmdDesc);
1183                 rbi = rq->buf_info[ring_idx] + idx;
1184
1185                 BUG_ON(rxd->addr != rbi->dma_addr ||
1186                        rxd->len != rbi->len);
1187
1188                 if (unlikely(rcd->eop && rcd->err)) {
1189                         vmxnet3_rx_error(rq, rcd, ctx, adapter);
1190                         goto rcd_done;
1191                 }
1192
1193                 if (rcd->sop) { /* first buf of the pkt */
1194                         BUG_ON(rxd->btype != VMXNET3_RXD_BTYPE_HEAD ||
1195                                rcd->rqID != rq->qid);
1196
1197                         BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_SKB);
1198                         BUG_ON(ctx->skb != NULL || rbi->skb == NULL);
1199
1200                         if (unlikely(rcd->len == 0)) {
1201                                 /* Pretend the rx buffer is skipped. */
1202                                 BUG_ON(!(rcd->sop && rcd->eop));
1203                                 netdev_dbg(adapter->netdev,
1204                                         "rxRing[%u][%u] 0 length\n",
1205                                         ring_idx, idx);
1206                                 goto rcd_done;
1207                         }
1208
1209                         skip_page_frags = false;
1210                         ctx->skb = rbi->skb;
1211                         new_skb = netdev_alloc_skb_ip_align(adapter->netdev,
1212                                                             rbi->len);
1213                         if (new_skb == NULL) {
1214                                 /* Skb allocation failed, do not handover this
1215                                  * skb to stack. Reuse it. Drop the existing pkt
1216                                  */
1217                                 rq->stats.rx_buf_alloc_failure++;
1218                                 ctx->skb = NULL;
1219                                 rq->stats.drop_total++;
1220                                 skip_page_frags = true;
1221                                 goto rcd_done;
1222                         }
1223
1224                         pci_unmap_single(adapter->pdev, rbi->dma_addr, rbi->len,
1225                                          PCI_DMA_FROMDEVICE);
1226
1227 #ifdef VMXNET3_RSS
1228                         if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE &&
1229                             (adapter->netdev->features & NETIF_F_RXHASH))
1230                                 ctx->skb->rxhash = le32_to_cpu(rcd->rssHash);
1231 #endif
1232                         skb_put(ctx->skb, rcd->len);
1233
1234                         /* Immediate refill */
1235                         rbi->skb = new_skb;
1236                         rbi->dma_addr = pci_map_single(adapter->pdev,
1237                                                        rbi->skb->data, rbi->len,
1238                                                        PCI_DMA_FROMDEVICE);
1239                         rxd->addr = cpu_to_le64(rbi->dma_addr);
1240                         rxd->len = rbi->len;
1241
1242                 } else {
1243                         BUG_ON(ctx->skb == NULL && !skip_page_frags);
1244
1245                         /* non SOP buffer must be type 1 in most cases */
1246                         BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_PAGE);
1247                         BUG_ON(rxd->btype != VMXNET3_RXD_BTYPE_BODY);
1248
1249                         /* If an sop buffer was dropped, skip all
1250                          * following non-sop fragments. They will be reused.
1251                          */
1252                         if (skip_page_frags)
1253                                 goto rcd_done;
1254
1255                         new_page = alloc_page(GFP_ATOMIC);
1256                         if (unlikely(new_page == NULL)) {
1257                                 /* Replacement page frag could not be allocated.
1258                                  * Reuse this page. Drop the pkt and free the
1259                                  * skb which contained this page as a frag. Skip
1260                                  * processing all the following non-sop frags.
1261                                  */
1262                                 rq->stats.rx_buf_alloc_failure++;
1263                                 dev_kfree_skb(ctx->skb);
1264                                 ctx->skb = NULL;
1265                                 skip_page_frags = true;
1266                                 goto rcd_done;
1267                         }
1268
1269                         if (rcd->len) {
1270                                 pci_unmap_page(adapter->pdev,
1271                                                rbi->dma_addr, rbi->len,
1272                                                PCI_DMA_FROMDEVICE);
1273
1274                                 vmxnet3_append_frag(ctx->skb, rcd, rbi);
1275                         }
1276
1277                         /* Immediate refill */
1278                         rbi->page = new_page;
1279                         rbi->dma_addr = pci_map_page(adapter->pdev, rbi->page,
1280                                                      0, PAGE_SIZE,
1281                                                      PCI_DMA_FROMDEVICE);
1282                         rxd->addr = cpu_to_le64(rbi->dma_addr);
1283                         rxd->len = rbi->len;
1284                 }
1285
1286
1287                 skb = ctx->skb;
1288                 if (rcd->eop) {
1289                         skb->len += skb->data_len;
1290
1291                         vmxnet3_rx_csum(adapter, skb,
1292                                         (union Vmxnet3_GenericDesc *)rcd);
1293                         skb->protocol = eth_type_trans(skb, adapter->netdev);
1294
1295                         if (unlikely(rcd->ts))
1296                                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rcd->tci);
1297
1298                         if (adapter->netdev->features & NETIF_F_LRO)
1299                                 netif_receive_skb(skb);
1300                         else
1301                                 napi_gro_receive(&rq->napi, skb);
1302
1303                         ctx->skb = NULL;
1304                 }
1305
1306 rcd_done:
1307                 /* device may have skipped some rx descs */
1308                 ring->next2comp = idx;
1309                 num_to_alloc = vmxnet3_cmd_ring_desc_avail(ring);
1310                 ring = rq->rx_ring + ring_idx;
1311                 while (num_to_alloc) {
1312                         vmxnet3_getRxDesc(rxd, &ring->base[ring->next2fill].rxd,
1313                                           &rxCmdDesc);
1314                         BUG_ON(!rxd->addr);
1315
1316                         /* Recv desc is ready to be used by the device */
1317                         rxd->gen = ring->gen;
1318                         vmxnet3_cmd_ring_adv_next2fill(ring);
1319                         num_to_alloc--;
1320                 }
1321
1322                 /* if needed, update the register */
1323                 if (unlikely(rq->shared->updateRxProd)) {
1324                         VMXNET3_WRITE_BAR0_REG(adapter,
1325                                                rxprod_reg[ring_idx] + rq->qid * 8,
1326                                                ring->next2fill);
1327                 }
1328
1329                 vmxnet3_comp_ring_adv_next2proc(&rq->comp_ring);
1330                 vmxnet3_getRxComp(rcd,
1331                                   &rq->comp_ring.base[rq->comp_ring.next2proc].rcd, &rxComp);
1332         }
1333
1334         return num_rxd;
1335 }
1336
1337
1338 static void
1339 vmxnet3_rq_cleanup(struct vmxnet3_rx_queue *rq,
1340                    struct vmxnet3_adapter *adapter)
1341 {
1342         u32 i, ring_idx;
1343         struct Vmxnet3_RxDesc *rxd;
1344
1345         for (ring_idx = 0; ring_idx < 2; ring_idx++) {
1346                 for (i = 0; i < rq->rx_ring[ring_idx].size; i++) {
1347 #ifdef __BIG_ENDIAN_BITFIELD
1348                         struct Vmxnet3_RxDesc rxDesc;
1349 #endif
1350                         vmxnet3_getRxDesc(rxd,
1351                                 &rq->rx_ring[ring_idx].base[i].rxd, &rxDesc);
1352
1353                         if (rxd->btype == VMXNET3_RXD_BTYPE_HEAD &&
1354                                         rq->buf_info[ring_idx][i].skb) {
1355                                 pci_unmap_single(adapter->pdev, rxd->addr,
1356                                                  rxd->len, PCI_DMA_FROMDEVICE);
1357                                 dev_kfree_skb(rq->buf_info[ring_idx][i].skb);
1358                                 rq->buf_info[ring_idx][i].skb = NULL;
1359                         } else if (rxd->btype == VMXNET3_RXD_BTYPE_BODY &&
1360                                         rq->buf_info[ring_idx][i].page) {
1361                                 pci_unmap_page(adapter->pdev, rxd->addr,
1362                                                rxd->len, PCI_DMA_FROMDEVICE);
1363                                 put_page(rq->buf_info[ring_idx][i].page);
1364                                 rq->buf_info[ring_idx][i].page = NULL;
1365                         }
1366                 }
1367
1368                 rq->rx_ring[ring_idx].gen = VMXNET3_INIT_GEN;
1369                 rq->rx_ring[ring_idx].next2fill =
1370                                         rq->rx_ring[ring_idx].next2comp = 0;
1371         }
1372
1373         rq->comp_ring.gen = VMXNET3_INIT_GEN;
1374         rq->comp_ring.next2proc = 0;
1375 }
1376
1377
1378 static void
1379 vmxnet3_rq_cleanup_all(struct vmxnet3_adapter *adapter)
1380 {
1381         int i;
1382
1383         for (i = 0; i < adapter->num_rx_queues; i++)
1384                 vmxnet3_rq_cleanup(&adapter->rx_queue[i], adapter);
1385 }
1386
1387
1388 static void vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq,
1389                                struct vmxnet3_adapter *adapter)
1390 {
1391         int i;
1392         int j;
1393
1394         /* all rx buffers must have already been freed */
1395         for (i = 0; i < 2; i++) {
1396                 if (rq->buf_info[i]) {
1397                         for (j = 0; j < rq->rx_ring[i].size; j++)
1398                                 BUG_ON(rq->buf_info[i][j].page != NULL);
1399                 }
1400         }
1401
1402
1403         kfree(rq->buf_info[0]);
1404
1405         for (i = 0; i < 2; i++) {
1406                 if (rq->rx_ring[i].base) {
1407                         pci_free_consistent(adapter->pdev, rq->rx_ring[i].size
1408                                             * sizeof(struct Vmxnet3_RxDesc),
1409                                             rq->rx_ring[i].base,
1410                                             rq->rx_ring[i].basePA);
1411                         rq->rx_ring[i].base = NULL;
1412                 }
1413                 rq->buf_info[i] = NULL;
1414         }
1415
1416         if (rq->comp_ring.base) {
1417                 pci_free_consistent(adapter->pdev, rq->comp_ring.size *
1418                                     sizeof(struct Vmxnet3_RxCompDesc),
1419                                     rq->comp_ring.base, rq->comp_ring.basePA);
1420                 rq->comp_ring.base = NULL;
1421         }
1422 }
1423
1424
1425 static int
1426 vmxnet3_rq_init(struct vmxnet3_rx_queue *rq,
1427                 struct vmxnet3_adapter  *adapter)
1428 {
1429         int i;
1430
1431         /* initialize buf_info */
1432         for (i = 0; i < rq->rx_ring[0].size; i++) {
1433
1434                 /* 1st buf for a pkt is skbuff */
1435                 if (i % adapter->rx_buf_per_pkt == 0) {
1436                         rq->buf_info[0][i].buf_type = VMXNET3_RX_BUF_SKB;
1437                         rq->buf_info[0][i].len = adapter->skb_buf_size;
1438                 } else { /* subsequent bufs for a pkt is frag */
1439                         rq->buf_info[0][i].buf_type = VMXNET3_RX_BUF_PAGE;
1440                         rq->buf_info[0][i].len = PAGE_SIZE;
1441                 }
1442         }
1443         for (i = 0; i < rq->rx_ring[1].size; i++) {
1444                 rq->buf_info[1][i].buf_type = VMXNET3_RX_BUF_PAGE;
1445                 rq->buf_info[1][i].len = PAGE_SIZE;
1446         }
1447
1448         /* reset internal state and allocate buffers for both rings */
1449         for (i = 0; i < 2; i++) {
1450                 rq->rx_ring[i].next2fill = rq->rx_ring[i].next2comp = 0;
1451
1452                 memset(rq->rx_ring[i].base, 0, rq->rx_ring[i].size *
1453                        sizeof(struct Vmxnet3_RxDesc));
1454                 rq->rx_ring[i].gen = VMXNET3_INIT_GEN;
1455         }
1456         if (vmxnet3_rq_alloc_rx_buf(rq, 0, rq->rx_ring[0].size - 1,
1457                                     adapter) == 0) {
1458                 /* at least has 1 rx buffer for the 1st ring */
1459                 return -ENOMEM;
1460         }
1461         vmxnet3_rq_alloc_rx_buf(rq, 1, rq->rx_ring[1].size - 1, adapter);
1462
1463         /* reset the comp ring */
1464         rq->comp_ring.next2proc = 0;
1465         memset(rq->comp_ring.base, 0, rq->comp_ring.size *
1466                sizeof(struct Vmxnet3_RxCompDesc));
1467         rq->comp_ring.gen = VMXNET3_INIT_GEN;
1468
1469         /* reset rxctx */
1470         rq->rx_ctx.skb = NULL;
1471
1472         /* stats are not reset */
1473         return 0;
1474 }
1475
1476
1477 static int
1478 vmxnet3_rq_init_all(struct vmxnet3_adapter *adapter)
1479 {
1480         int i, err = 0;
1481
1482         for (i = 0; i < adapter->num_rx_queues; i++) {
1483                 err = vmxnet3_rq_init(&adapter->rx_queue[i], adapter);
1484                 if (unlikely(err)) {
1485                         dev_err(&adapter->netdev->dev, "%s: failed to "
1486                                 "initialize rx queue%i\n",
1487                                 adapter->netdev->name, i);
1488                         break;
1489                 }
1490         }
1491         return err;
1492
1493 }
1494
1495
1496 static int
1497 vmxnet3_rq_create(struct vmxnet3_rx_queue *rq, struct vmxnet3_adapter *adapter)
1498 {
1499         int i;
1500         size_t sz;
1501         struct vmxnet3_rx_buf_info *bi;
1502
1503         for (i = 0; i < 2; i++) {
1504
1505                 sz = rq->rx_ring[i].size * sizeof(struct Vmxnet3_RxDesc);
1506                 rq->rx_ring[i].base = pci_alloc_consistent(adapter->pdev, sz,
1507                                                         &rq->rx_ring[i].basePA);
1508                 if (!rq->rx_ring[i].base) {
1509                         netdev_err(adapter->netdev,
1510                                    "failed to allocate rx ring %d\n", i);
1511                         goto err;
1512                 }
1513         }
1514
1515         sz = rq->comp_ring.size * sizeof(struct Vmxnet3_RxCompDesc);
1516         rq->comp_ring.base = pci_alloc_consistent(adapter->pdev, sz,
1517                                                   &rq->comp_ring.basePA);
1518         if (!rq->comp_ring.base) {
1519                 netdev_err(adapter->netdev, "failed to allocate rx comp ring\n");
1520                 goto err;
1521         }
1522
1523         sz = sizeof(struct vmxnet3_rx_buf_info) * (rq->rx_ring[0].size +
1524                                                    rq->rx_ring[1].size);
1525         bi = kzalloc(sz, GFP_KERNEL);
1526         if (!bi)
1527                 goto err;
1528
1529         rq->buf_info[0] = bi;
1530         rq->buf_info[1] = bi + rq->rx_ring[0].size;
1531
1532         return 0;
1533
1534 err:
1535         vmxnet3_rq_destroy(rq, adapter);
1536         return -ENOMEM;
1537 }
1538
1539
1540 static int
1541 vmxnet3_rq_create_all(struct vmxnet3_adapter *adapter)
1542 {
1543         int i, err = 0;
1544
1545         for (i = 0; i < adapter->num_rx_queues; i++) {
1546                 err = vmxnet3_rq_create(&adapter->rx_queue[i], adapter);
1547                 if (unlikely(err)) {
1548                         dev_err(&adapter->netdev->dev,
1549                                 "%s: failed to create rx queue%i\n",
1550                                 adapter->netdev->name, i);
1551                         goto err_out;
1552                 }
1553         }
1554         return err;
1555 err_out:
1556         vmxnet3_rq_destroy_all(adapter);
1557         return err;
1558
1559 }
1560
1561 /* Multiple queue aware polling function for tx and rx */
1562
1563 static int
1564 vmxnet3_do_poll(struct vmxnet3_adapter *adapter, int budget)
1565 {
1566         int rcd_done = 0, i;
1567         if (unlikely(adapter->shared->ecr))
1568                 vmxnet3_process_events(adapter);
1569         for (i = 0; i < adapter->num_tx_queues; i++)
1570                 vmxnet3_tq_tx_complete(&adapter->tx_queue[i], adapter);
1571
1572         for (i = 0; i < adapter->num_rx_queues; i++)
1573                 rcd_done += vmxnet3_rq_rx_complete(&adapter->rx_queue[i],
1574                                                    adapter, budget);
1575         return rcd_done;
1576 }
1577
1578
1579 static int
1580 vmxnet3_poll(struct napi_struct *napi, int budget)
1581 {
1582         struct vmxnet3_rx_queue *rx_queue = container_of(napi,
1583                                           struct vmxnet3_rx_queue, napi);
1584         int rxd_done;
1585
1586         rxd_done = vmxnet3_do_poll(rx_queue->adapter, budget);
1587
1588         if (rxd_done < budget) {
1589                 napi_complete(napi);
1590                 vmxnet3_enable_all_intrs(rx_queue->adapter);
1591         }
1592         return rxd_done;
1593 }
1594
1595 /*
1596  * NAPI polling function for MSI-X mode with multiple Rx queues
1597  * Returns the # of the NAPI credit consumed (# of rx descriptors processed)
1598  */
1599
1600 static int
1601 vmxnet3_poll_rx_only(struct napi_struct *napi, int budget)
1602 {
1603         struct vmxnet3_rx_queue *rq = container_of(napi,
1604                                                 struct vmxnet3_rx_queue, napi);
1605         struct vmxnet3_adapter *adapter = rq->adapter;
1606         int rxd_done;
1607
1608         /* When sharing interrupt with corresponding tx queue, process
1609          * tx completions in that queue as well
1610          */
1611         if (adapter->share_intr == VMXNET3_INTR_BUDDYSHARE) {
1612                 struct vmxnet3_tx_queue *tq =
1613                                 &adapter->tx_queue[rq - adapter->rx_queue];
1614                 vmxnet3_tq_tx_complete(tq, adapter);
1615         }
1616
1617         rxd_done = vmxnet3_rq_rx_complete(rq, adapter, budget);
1618
1619         if (rxd_done < budget) {
1620                 napi_complete(napi);
1621                 vmxnet3_enable_intr(adapter, rq->comp_ring.intr_idx);
1622         }
1623         return rxd_done;
1624 }
1625
1626
1627 #ifdef CONFIG_PCI_MSI
1628
1629 /*
1630  * Handle completion interrupts on tx queues
1631  * Returns whether or not the intr is handled
1632  */
1633
1634 static irqreturn_t
1635 vmxnet3_msix_tx(int irq, void *data)
1636 {
1637         struct vmxnet3_tx_queue *tq = data;
1638         struct vmxnet3_adapter *adapter = tq->adapter;
1639
1640         if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1641                 vmxnet3_disable_intr(adapter, tq->comp_ring.intr_idx);
1642
1643         /* Handle the case where only one irq is allocate for all tx queues */
1644         if (adapter->share_intr == VMXNET3_INTR_TXSHARE) {
1645                 int i;
1646                 for (i = 0; i < adapter->num_tx_queues; i++) {
1647                         struct vmxnet3_tx_queue *txq = &adapter->tx_queue[i];
1648                         vmxnet3_tq_tx_complete(txq, adapter);
1649                 }
1650         } else {
1651                 vmxnet3_tq_tx_complete(tq, adapter);
1652         }
1653         vmxnet3_enable_intr(adapter, tq->comp_ring.intr_idx);
1654
1655         return IRQ_HANDLED;
1656 }
1657
1658
1659 /*
1660  * Handle completion interrupts on rx queues. Returns whether or not the
1661  * intr is handled
1662  */
1663
1664 static irqreturn_t
1665 vmxnet3_msix_rx(int irq, void *data)
1666 {
1667         struct vmxnet3_rx_queue *rq = data;
1668         struct vmxnet3_adapter *adapter = rq->adapter;
1669
1670         /* disable intr if needed */
1671         if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1672                 vmxnet3_disable_intr(adapter, rq->comp_ring.intr_idx);
1673         napi_schedule(&rq->napi);
1674
1675         return IRQ_HANDLED;
1676 }
1677
1678 /*
1679  *----------------------------------------------------------------------------
1680  *
1681  * vmxnet3_msix_event --
1682  *
1683  *    vmxnet3 msix event intr handler
1684  *
1685  * Result:
1686  *    whether or not the intr is handled
1687  *
1688  *----------------------------------------------------------------------------
1689  */
1690
1691 static irqreturn_t
1692 vmxnet3_msix_event(int irq, void *data)
1693 {
1694         struct net_device *dev = data;
1695         struct vmxnet3_adapter *adapter = netdev_priv(dev);
1696
1697         /* disable intr if needed */
1698         if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1699                 vmxnet3_disable_intr(adapter, adapter->intr.event_intr_idx);
1700
1701         if (adapter->shared->ecr)
1702                 vmxnet3_process_events(adapter);
1703
1704         vmxnet3_enable_intr(adapter, adapter->intr.event_intr_idx);
1705
1706         return IRQ_HANDLED;
1707 }
1708
1709 #endif /* CONFIG_PCI_MSI  */
1710
1711
1712 /* Interrupt handler for vmxnet3  */
1713 static irqreturn_t
1714 vmxnet3_intr(int irq, void *dev_id)
1715 {
1716         struct net_device *dev = dev_id;
1717         struct vmxnet3_adapter *adapter = netdev_priv(dev);
1718
1719         if (adapter->intr.type == VMXNET3_IT_INTX) {
1720                 u32 icr = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_ICR);
1721                 if (unlikely(icr == 0))
1722                         /* not ours */
1723                         return IRQ_NONE;
1724         }
1725
1726
1727         /* disable intr if needed */
1728         if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1729                 vmxnet3_disable_all_intrs(adapter);
1730
1731         napi_schedule(&adapter->rx_queue[0].napi);
1732
1733         return IRQ_HANDLED;
1734 }
1735
1736 #ifdef CONFIG_NET_POLL_CONTROLLER
1737
1738 /* netpoll callback. */
1739 static void
1740 vmxnet3_netpoll(struct net_device *netdev)
1741 {
1742         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1743
1744         switch (adapter->intr.type) {
1745 #ifdef CONFIG_PCI_MSI
1746         case VMXNET3_IT_MSIX: {
1747                 int i;
1748                 for (i = 0; i < adapter->num_rx_queues; i++)
1749                         vmxnet3_msix_rx(0, &adapter->rx_queue[i]);
1750                 break;
1751         }
1752 #endif
1753         case VMXNET3_IT_MSI:
1754         default:
1755                 vmxnet3_intr(0, adapter->netdev);
1756                 break;
1757         }
1758
1759 }
1760 #endif  /* CONFIG_NET_POLL_CONTROLLER */
1761
1762 static int
1763 vmxnet3_request_irqs(struct vmxnet3_adapter *adapter)
1764 {
1765         struct vmxnet3_intr *intr = &adapter->intr;
1766         int err = 0, i;
1767         int vector = 0;
1768
1769 #ifdef CONFIG_PCI_MSI
1770         if (adapter->intr.type == VMXNET3_IT_MSIX) {
1771                 for (i = 0; i < adapter->num_tx_queues; i++) {
1772                         if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE) {
1773                                 sprintf(adapter->tx_queue[i].name, "%s-tx-%d",
1774                                         adapter->netdev->name, vector);
1775                                 err = request_irq(
1776                                               intr->msix_entries[vector].vector,
1777                                               vmxnet3_msix_tx, 0,
1778                                               adapter->tx_queue[i].name,
1779                                               &adapter->tx_queue[i]);
1780                         } else {
1781                                 sprintf(adapter->tx_queue[i].name, "%s-rxtx-%d",
1782                                         adapter->netdev->name, vector);
1783                         }
1784                         if (err) {
1785                                 dev_err(&adapter->netdev->dev,
1786                                         "Failed to request irq for MSIX, %s, "
1787                                         "error %d\n",
1788                                         adapter->tx_queue[i].name, err);
1789                                 return err;
1790                         }
1791
1792                         /* Handle the case where only 1 MSIx was allocated for
1793                          * all tx queues */
1794                         if (adapter->share_intr == VMXNET3_INTR_TXSHARE) {
1795                                 for (; i < adapter->num_tx_queues; i++)
1796                                         adapter->tx_queue[i].comp_ring.intr_idx
1797                                                                 = vector;
1798                                 vector++;
1799                                 break;
1800                         } else {
1801                                 adapter->tx_queue[i].comp_ring.intr_idx
1802                                                                 = vector++;
1803                         }
1804                 }
1805                 if (adapter->share_intr == VMXNET3_INTR_BUDDYSHARE)
1806                         vector = 0;
1807
1808                 for (i = 0; i < adapter->num_rx_queues; i++) {
1809                         if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE)
1810                                 sprintf(adapter->rx_queue[i].name, "%s-rx-%d",
1811                                         adapter->netdev->name, vector);
1812                         else
1813                                 sprintf(adapter->rx_queue[i].name, "%s-rxtx-%d",
1814                                         adapter->netdev->name, vector);
1815                         err = request_irq(intr->msix_entries[vector].vector,
1816                                           vmxnet3_msix_rx, 0,
1817                                           adapter->rx_queue[i].name,
1818                                           &(adapter->rx_queue[i]));
1819                         if (err) {
1820                                 netdev_err(adapter->netdev,
1821                                            "Failed to request irq for MSIX, "
1822                                            "%s, error %d\n",
1823                                            adapter->rx_queue[i].name, err);
1824                                 return err;
1825                         }
1826
1827                         adapter->rx_queue[i].comp_ring.intr_idx = vector++;
1828                 }
1829
1830                 sprintf(intr->event_msi_vector_name, "%s-event-%d",
1831                         adapter->netdev->name, vector);
1832                 err = request_irq(intr->msix_entries[vector].vector,
1833                                   vmxnet3_msix_event, 0,
1834                                   intr->event_msi_vector_name, adapter->netdev);
1835                 intr->event_intr_idx = vector;
1836
1837         } else if (intr->type == VMXNET3_IT_MSI) {
1838                 adapter->num_rx_queues = 1;
1839                 err = request_irq(adapter->pdev->irq, vmxnet3_intr, 0,
1840                                   adapter->netdev->name, adapter->netdev);
1841         } else {
1842 #endif
1843                 adapter->num_rx_queues = 1;
1844                 err = request_irq(adapter->pdev->irq, vmxnet3_intr,
1845                                   IRQF_SHARED, adapter->netdev->name,
1846                                   adapter->netdev);
1847 #ifdef CONFIG_PCI_MSI
1848         }
1849 #endif
1850         intr->num_intrs = vector + 1;
1851         if (err) {
1852                 netdev_err(adapter->netdev,
1853                            "Failed to request irq (intr type:%d), error %d\n",
1854                            intr->type, err);
1855         } else {
1856                 /* Number of rx queues will not change after this */
1857                 for (i = 0; i < adapter->num_rx_queues; i++) {
1858                         struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
1859                         rq->qid = i;
1860                         rq->qid2 = i + adapter->num_rx_queues;
1861                 }
1862
1863
1864
1865                 /* init our intr settings */
1866                 for (i = 0; i < intr->num_intrs; i++)
1867                         intr->mod_levels[i] = UPT1_IML_ADAPTIVE;
1868                 if (adapter->intr.type != VMXNET3_IT_MSIX) {
1869                         adapter->intr.event_intr_idx = 0;
1870                         for (i = 0; i < adapter->num_tx_queues; i++)
1871                                 adapter->tx_queue[i].comp_ring.intr_idx = 0;
1872                         adapter->rx_queue[0].comp_ring.intr_idx = 0;
1873                 }
1874
1875                 netdev_info(adapter->netdev,
1876                             "intr type %u, mode %u, %u vectors allocated\n",
1877                             intr->type, intr->mask_mode, intr->num_intrs);
1878         }
1879
1880         return err;
1881 }
1882
1883
1884 static void
1885 vmxnet3_free_irqs(struct vmxnet3_adapter *adapter)
1886 {
1887         struct vmxnet3_intr *intr = &adapter->intr;
1888         BUG_ON(intr->type == VMXNET3_IT_AUTO || intr->num_intrs <= 0);
1889
1890         switch (intr->type) {
1891 #ifdef CONFIG_PCI_MSI
1892         case VMXNET3_IT_MSIX:
1893         {
1894                 int i, vector = 0;
1895
1896                 if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE) {
1897                         for (i = 0; i < adapter->num_tx_queues; i++) {
1898                                 free_irq(intr->msix_entries[vector++].vector,
1899                                          &(adapter->tx_queue[i]));
1900                                 if (adapter->share_intr == VMXNET3_INTR_TXSHARE)
1901                                         break;
1902                         }
1903                 }
1904
1905                 for (i = 0; i < adapter->num_rx_queues; i++) {
1906                         free_irq(intr->msix_entries[vector++].vector,
1907                                  &(adapter->rx_queue[i]));
1908                 }
1909
1910                 free_irq(intr->msix_entries[vector].vector,
1911                          adapter->netdev);
1912                 BUG_ON(vector >= intr->num_intrs);
1913                 break;
1914         }
1915 #endif
1916         case VMXNET3_IT_MSI:
1917                 free_irq(adapter->pdev->irq, adapter->netdev);
1918                 break;
1919         case VMXNET3_IT_INTX:
1920                 free_irq(adapter->pdev->irq, adapter->netdev);
1921                 break;
1922         default:
1923                 BUG();
1924         }
1925 }
1926
1927
1928 static void
1929 vmxnet3_restore_vlan(struct vmxnet3_adapter *adapter)
1930 {
1931         u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
1932         u16 vid;
1933
1934         /* allow untagged pkts */
1935         VMXNET3_SET_VFTABLE_ENTRY(vfTable, 0);
1936
1937         for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
1938                 VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid);
1939 }
1940
1941
1942 static int
1943 vmxnet3_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1944 {
1945         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1946
1947         if (!(netdev->flags & IFF_PROMISC)) {
1948                 u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
1949                 unsigned long flags;
1950
1951                 VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid);
1952                 spin_lock_irqsave(&adapter->cmd_lock, flags);
1953                 VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
1954                                        VMXNET3_CMD_UPDATE_VLAN_FILTERS);
1955                 spin_unlock_irqrestore(&adapter->cmd_lock, flags);
1956         }
1957
1958         set_bit(vid, adapter->active_vlans);
1959
1960         return 0;
1961 }
1962
1963
1964 static int
1965 vmxnet3_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid)
1966 {
1967         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1968
1969         if (!(netdev->flags & IFF_PROMISC)) {
1970                 u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
1971                 unsigned long flags;
1972
1973                 VMXNET3_CLEAR_VFTABLE_ENTRY(vfTable, vid);
1974                 spin_lock_irqsave(&adapter->cmd_lock, flags);
1975                 VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
1976                                        VMXNET3_CMD_UPDATE_VLAN_FILTERS);
1977                 spin_unlock_irqrestore(&adapter->cmd_lock, flags);
1978         }
1979
1980         clear_bit(vid, adapter->active_vlans);
1981
1982         return 0;
1983 }
1984
1985
1986 static u8 *
1987 vmxnet3_copy_mc(struct net_device *netdev)
1988 {
1989         u8 *buf = NULL;
1990         u32 sz = netdev_mc_count(netdev) * ETH_ALEN;
1991
1992         /* struct Vmxnet3_RxFilterConf.mfTableLen is u16. */
1993         if (sz <= 0xffff) {
1994                 /* We may be called with BH disabled */
1995                 buf = kmalloc(sz, GFP_ATOMIC);
1996                 if (buf) {
1997                         struct netdev_hw_addr *ha;
1998                         int i = 0;
1999
2000                         netdev_for_each_mc_addr(ha, netdev)
2001                                 memcpy(buf + i++ * ETH_ALEN, ha->addr,
2002                                        ETH_ALEN);
2003                 }
2004         }
2005         return buf;
2006 }
2007
2008
2009 static void
2010 vmxnet3_set_mc(struct net_device *netdev)
2011 {
2012         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2013         unsigned long flags;
2014         struct Vmxnet3_RxFilterConf *rxConf =
2015                                         &adapter->shared->devRead.rxFilterConf;
2016         u8 *new_table = NULL;
2017         u32 new_mode = VMXNET3_RXM_UCAST;
2018
2019         if (netdev->flags & IFF_PROMISC) {
2020                 u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
2021                 memset(vfTable, 0, VMXNET3_VFT_SIZE * sizeof(*vfTable));
2022
2023                 new_mode |= VMXNET3_RXM_PROMISC;
2024         } else {
2025                 vmxnet3_restore_vlan(adapter);
2026         }
2027
2028         if (netdev->flags & IFF_BROADCAST)
2029                 new_mode |= VMXNET3_RXM_BCAST;
2030
2031         if (netdev->flags & IFF_ALLMULTI)
2032                 new_mode |= VMXNET3_RXM_ALL_MULTI;
2033         else
2034                 if (!netdev_mc_empty(netdev)) {
2035                         new_table = vmxnet3_copy_mc(netdev);
2036                         if (new_table) {
2037                                 new_mode |= VMXNET3_RXM_MCAST;
2038                                 rxConf->mfTableLen = cpu_to_le16(
2039                                         netdev_mc_count(netdev) * ETH_ALEN);
2040                                 rxConf->mfTablePA = cpu_to_le64(virt_to_phys(
2041                                                     new_table));
2042                         } else {
2043                                 netdev_info(netdev, "failed to copy mcast list"
2044                                             ", setting ALL_MULTI\n");
2045                                 new_mode |= VMXNET3_RXM_ALL_MULTI;
2046                         }
2047                 }
2048
2049
2050         if (!(new_mode & VMXNET3_RXM_MCAST)) {
2051                 rxConf->mfTableLen = 0;
2052                 rxConf->mfTablePA = 0;
2053         }
2054
2055         spin_lock_irqsave(&adapter->cmd_lock, flags);
2056         if (new_mode != rxConf->rxMode) {
2057                 rxConf->rxMode = cpu_to_le32(new_mode);
2058                 VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2059                                        VMXNET3_CMD_UPDATE_RX_MODE);
2060                 VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2061                                        VMXNET3_CMD_UPDATE_VLAN_FILTERS);
2062         }
2063
2064         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2065                                VMXNET3_CMD_UPDATE_MAC_FILTERS);
2066         spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2067
2068         kfree(new_table);
2069 }
2070
2071 void
2072 vmxnet3_rq_destroy_all(struct vmxnet3_adapter *adapter)
2073 {
2074         int i;
2075
2076         for (i = 0; i < adapter->num_rx_queues; i++)
2077                 vmxnet3_rq_destroy(&adapter->rx_queue[i], adapter);
2078 }
2079
2080
2081 /*
2082  *   Set up driver_shared based on settings in adapter.
2083  */
2084
2085 static void
2086 vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter)
2087 {
2088         struct Vmxnet3_DriverShared *shared = adapter->shared;
2089         struct Vmxnet3_DSDevRead *devRead = &shared->devRead;
2090         struct Vmxnet3_TxQueueConf *tqc;
2091         struct Vmxnet3_RxQueueConf *rqc;
2092         int i;
2093
2094         memset(shared, 0, sizeof(*shared));
2095
2096         /* driver settings */
2097         shared->magic = cpu_to_le32(VMXNET3_REV1_MAGIC);
2098         devRead->misc.driverInfo.version = cpu_to_le32(
2099                                                 VMXNET3_DRIVER_VERSION_NUM);
2100         devRead->misc.driverInfo.gos.gosBits = (sizeof(void *) == 4 ?
2101                                 VMXNET3_GOS_BITS_32 : VMXNET3_GOS_BITS_64);
2102         devRead->misc.driverInfo.gos.gosType = VMXNET3_GOS_TYPE_LINUX;
2103         *((u32 *)&devRead->misc.driverInfo.gos) = cpu_to_le32(
2104                                 *((u32 *)&devRead->misc.driverInfo.gos));
2105         devRead->misc.driverInfo.vmxnet3RevSpt = cpu_to_le32(1);
2106         devRead->misc.driverInfo.uptVerSpt = cpu_to_le32(1);
2107
2108         devRead->misc.ddPA = cpu_to_le64(virt_to_phys(adapter));
2109         devRead->misc.ddLen = cpu_to_le32(sizeof(struct vmxnet3_adapter));
2110
2111         /* set up feature flags */
2112         if (adapter->netdev->features & NETIF_F_RXCSUM)
2113                 devRead->misc.uptFeatures |= UPT1_F_RXCSUM;
2114
2115         if (adapter->netdev->features & NETIF_F_LRO) {
2116                 devRead->misc.uptFeatures |= UPT1_F_LRO;
2117                 devRead->misc.maxNumRxSG = cpu_to_le16(1 + MAX_SKB_FRAGS);
2118         }
2119         if (adapter->netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
2120                 devRead->misc.uptFeatures |= UPT1_F_RXVLAN;
2121
2122         devRead->misc.mtu = cpu_to_le32(adapter->netdev->mtu);
2123         devRead->misc.queueDescPA = cpu_to_le64(adapter->queue_desc_pa);
2124         devRead->misc.queueDescLen = cpu_to_le32(
2125                 adapter->num_tx_queues * sizeof(struct Vmxnet3_TxQueueDesc) +
2126                 adapter->num_rx_queues * sizeof(struct Vmxnet3_RxQueueDesc));
2127
2128         /* tx queue settings */
2129         devRead->misc.numTxQueues =  adapter->num_tx_queues;
2130         for (i = 0; i < adapter->num_tx_queues; i++) {
2131                 struct vmxnet3_tx_queue *tq = &adapter->tx_queue[i];
2132                 BUG_ON(adapter->tx_queue[i].tx_ring.base == NULL);
2133                 tqc = &adapter->tqd_start[i].conf;
2134                 tqc->txRingBasePA   = cpu_to_le64(tq->tx_ring.basePA);
2135                 tqc->dataRingBasePA = cpu_to_le64(tq->data_ring.basePA);
2136                 tqc->compRingBasePA = cpu_to_le64(tq->comp_ring.basePA);
2137                 tqc->ddPA           = cpu_to_le64(virt_to_phys(tq->buf_info));
2138                 tqc->txRingSize     = cpu_to_le32(tq->tx_ring.size);
2139                 tqc->dataRingSize   = cpu_to_le32(tq->data_ring.size);
2140                 tqc->compRingSize   = cpu_to_le32(tq->comp_ring.size);
2141                 tqc->ddLen          = cpu_to_le32(
2142                                         sizeof(struct vmxnet3_tx_buf_info) *
2143                                         tqc->txRingSize);
2144                 tqc->intrIdx        = tq->comp_ring.intr_idx;
2145         }
2146
2147         /* rx queue settings */
2148         devRead->misc.numRxQueues = adapter->num_rx_queues;
2149         for (i = 0; i < adapter->num_rx_queues; i++) {
2150                 struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
2151                 rqc = &adapter->rqd_start[i].conf;
2152                 rqc->rxRingBasePA[0] = cpu_to_le64(rq->rx_ring[0].basePA);
2153                 rqc->rxRingBasePA[1] = cpu_to_le64(rq->rx_ring[1].basePA);
2154                 rqc->compRingBasePA  = cpu_to_le64(rq->comp_ring.basePA);
2155                 rqc->ddPA            = cpu_to_le64(virt_to_phys(
2156                                                         rq->buf_info));
2157                 rqc->rxRingSize[0]   = cpu_to_le32(rq->rx_ring[0].size);
2158                 rqc->rxRingSize[1]   = cpu_to_le32(rq->rx_ring[1].size);
2159                 rqc->compRingSize    = cpu_to_le32(rq->comp_ring.size);
2160                 rqc->ddLen           = cpu_to_le32(
2161                                         sizeof(struct vmxnet3_rx_buf_info) *
2162                                         (rqc->rxRingSize[0] +
2163                                          rqc->rxRingSize[1]));
2164                 rqc->intrIdx         = rq->comp_ring.intr_idx;
2165         }
2166
2167 #ifdef VMXNET3_RSS
2168         memset(adapter->rss_conf, 0, sizeof(*adapter->rss_conf));
2169
2170         if (adapter->rss) {
2171                 struct UPT1_RSSConf *rssConf = adapter->rss_conf;
2172                 static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = {
2173                         0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac,
2174                         0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28,
2175                         0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70,
2176                         0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3,
2177                         0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9,
2178                 };
2179
2180                 devRead->misc.uptFeatures |= UPT1_F_RSS;
2181                 devRead->misc.numRxQueues = adapter->num_rx_queues;
2182                 rssConf->hashType = UPT1_RSS_HASH_TYPE_TCP_IPV4 |
2183                                     UPT1_RSS_HASH_TYPE_IPV4 |
2184                                     UPT1_RSS_HASH_TYPE_TCP_IPV6 |
2185                                     UPT1_RSS_HASH_TYPE_IPV6;
2186                 rssConf->hashFunc = UPT1_RSS_HASH_FUNC_TOEPLITZ;
2187                 rssConf->hashKeySize = UPT1_RSS_MAX_KEY_SIZE;
2188                 rssConf->indTableSize = VMXNET3_RSS_IND_TABLE_SIZE;
2189                 memcpy(rssConf->hashKey, rss_key, sizeof(rss_key));
2190
2191                 for (i = 0; i < rssConf->indTableSize; i++)
2192                         rssConf->indTable[i] = ethtool_rxfh_indir_default(
2193                                 i, adapter->num_rx_queues);
2194
2195                 devRead->rssConfDesc.confVer = 1;
2196                 devRead->rssConfDesc.confLen = sizeof(*rssConf);
2197                 devRead->rssConfDesc.confPA  = virt_to_phys(rssConf);
2198         }
2199
2200 #endif /* VMXNET3_RSS */
2201
2202         /* intr settings */
2203         devRead->intrConf.autoMask = adapter->intr.mask_mode ==
2204                                      VMXNET3_IMM_AUTO;
2205         devRead->intrConf.numIntrs = adapter->intr.num_intrs;
2206         for (i = 0; i < adapter->intr.num_intrs; i++)
2207                 devRead->intrConf.modLevels[i] = adapter->intr.mod_levels[i];
2208
2209         devRead->intrConf.eventIntrIdx = adapter->intr.event_intr_idx;
2210         devRead->intrConf.intrCtrl |= cpu_to_le32(VMXNET3_IC_DISABLE_ALL);
2211
2212         /* rx filter settings */
2213         devRead->rxFilterConf.rxMode = 0;
2214         vmxnet3_restore_vlan(adapter);
2215         vmxnet3_write_mac_addr(adapter, adapter->netdev->dev_addr);
2216
2217         /* the rest are already zeroed */
2218 }
2219
2220
2221 int
2222 vmxnet3_activate_dev(struct vmxnet3_adapter *adapter)
2223 {
2224         int err, i;
2225         u32 ret;
2226         unsigned long flags;
2227
2228         netdev_dbg(adapter->netdev, "%s: skb_buf_size %d, rx_buf_per_pkt %d,"
2229                 " ring sizes %u %u %u\n", adapter->netdev->name,
2230                 adapter->skb_buf_size, adapter->rx_buf_per_pkt,
2231                 adapter->tx_queue[0].tx_ring.size,
2232                 adapter->rx_queue[0].rx_ring[0].size,
2233                 adapter->rx_queue[0].rx_ring[1].size);
2234
2235         vmxnet3_tq_init_all(adapter);
2236         err = vmxnet3_rq_init_all(adapter);
2237         if (err) {
2238                 netdev_err(adapter->netdev,
2239                            "Failed to init rx queue error %d\n", err);
2240                 goto rq_err;
2241         }
2242
2243         err = vmxnet3_request_irqs(adapter);
2244         if (err) {
2245                 netdev_err(adapter->netdev,
2246                            "Failed to setup irq for error %d\n", err);
2247                 goto irq_err;
2248         }
2249
2250         vmxnet3_setup_driver_shared(adapter);
2251
2252         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAL, VMXNET3_GET_ADDR_LO(
2253                                adapter->shared_pa));
2254         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAH, VMXNET3_GET_ADDR_HI(
2255                                adapter->shared_pa));
2256         spin_lock_irqsave(&adapter->cmd_lock, flags);
2257         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2258                                VMXNET3_CMD_ACTIVATE_DEV);
2259         ret = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
2260         spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2261
2262         if (ret != 0) {
2263                 netdev_err(adapter->netdev,
2264                            "Failed to activate dev: error %u\n", ret);
2265                 err = -EINVAL;
2266                 goto activate_err;
2267         }
2268
2269         for (i = 0; i < adapter->num_rx_queues; i++) {
2270                 VMXNET3_WRITE_BAR0_REG(adapter,
2271                                 VMXNET3_REG_RXPROD + i * VMXNET3_REG_ALIGN,
2272                                 adapter->rx_queue[i].rx_ring[0].next2fill);
2273                 VMXNET3_WRITE_BAR0_REG(adapter, (VMXNET3_REG_RXPROD2 +
2274                                 (i * VMXNET3_REG_ALIGN)),
2275                                 adapter->rx_queue[i].rx_ring[1].next2fill);
2276         }
2277
2278         /* Apply the rx filter settins last. */
2279         vmxnet3_set_mc(adapter->netdev);
2280
2281         /*
2282          * Check link state when first activating device. It will start the
2283          * tx queue if the link is up.
2284          */
2285         vmxnet3_check_link(adapter, true);
2286         for (i = 0; i < adapter->num_rx_queues; i++)
2287                 napi_enable(&adapter->rx_queue[i].napi);
2288         vmxnet3_enable_all_intrs(adapter);
2289         clear_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state);
2290         return 0;
2291
2292 activate_err:
2293         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAL, 0);
2294         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAH, 0);
2295         vmxnet3_free_irqs(adapter);
2296 irq_err:
2297 rq_err:
2298         /* free up buffers we allocated */
2299         vmxnet3_rq_cleanup_all(adapter);
2300         return err;
2301 }
2302
2303
2304 void
2305 vmxnet3_reset_dev(struct vmxnet3_adapter *adapter)
2306 {
2307         unsigned long flags;
2308         spin_lock_irqsave(&adapter->cmd_lock, flags);
2309         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_RESET_DEV);
2310         spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2311 }
2312
2313
2314 int
2315 vmxnet3_quiesce_dev(struct vmxnet3_adapter *adapter)
2316 {
2317         int i;
2318         unsigned long flags;
2319         if (test_and_set_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state))
2320                 return 0;
2321
2322
2323         spin_lock_irqsave(&adapter->cmd_lock, flags);
2324         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2325                                VMXNET3_CMD_QUIESCE_DEV);
2326         spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2327         vmxnet3_disable_all_intrs(adapter);
2328
2329         for (i = 0; i < adapter->num_rx_queues; i++)
2330                 napi_disable(&adapter->rx_queue[i].napi);
2331         netif_tx_disable(adapter->netdev);
2332         adapter->link_speed = 0;
2333         netif_carrier_off(adapter->netdev);
2334
2335         vmxnet3_tq_cleanup_all(adapter);
2336         vmxnet3_rq_cleanup_all(adapter);
2337         vmxnet3_free_irqs(adapter);
2338         return 0;
2339 }
2340
2341
2342 static void
2343 vmxnet3_write_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac)
2344 {
2345         u32 tmp;
2346
2347         tmp = *(u32 *)mac;
2348         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_MACL, tmp);
2349
2350         tmp = (mac[5] << 8) | mac[4];
2351         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_MACH, tmp);
2352 }
2353
2354
2355 static int
2356 vmxnet3_set_mac_addr(struct net_device *netdev, void *p)
2357 {
2358         struct sockaddr *addr = p;
2359         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2360
2361         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2362         vmxnet3_write_mac_addr(adapter, addr->sa_data);
2363
2364         return 0;
2365 }
2366
2367
2368 /* ==================== initialization and cleanup routines ============ */
2369
2370 static int
2371 vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter, bool *dma64)
2372 {
2373         int err;
2374         unsigned long mmio_start, mmio_len;
2375         struct pci_dev *pdev = adapter->pdev;
2376
2377         err = pci_enable_device(pdev);
2378         if (err) {
2379                 dev_err(&pdev->dev, "Failed to enable adapter: error %d\n", err);
2380                 return err;
2381         }
2382
2383         if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) {
2384                 if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) != 0) {
2385                         dev_err(&pdev->dev,
2386                                 "pci_set_consistent_dma_mask failed\n");
2387                         err = -EIO;
2388                         goto err_set_mask;
2389                 }
2390                 *dma64 = true;
2391         } else {
2392                 if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) {
2393                         dev_err(&pdev->dev,
2394                                 "pci_set_dma_mask failed\n");
2395                         err = -EIO;
2396                         goto err_set_mask;
2397                 }
2398                 *dma64 = false;
2399         }
2400
2401         err = pci_request_selected_regions(pdev, (1 << 2) - 1,
2402                                            vmxnet3_driver_name);
2403         if (err) {
2404                 dev_err(&pdev->dev,
2405                         "Failed to request region for adapter: error %d\n", err);
2406                 goto err_set_mask;
2407         }
2408
2409         pci_set_master(pdev);
2410
2411         mmio_start = pci_resource_start(pdev, 0);
2412         mmio_len = pci_resource_len(pdev, 0);
2413         adapter->hw_addr0 = ioremap(mmio_start, mmio_len);
2414         if (!adapter->hw_addr0) {
2415                 dev_err(&pdev->dev, "Failed to map bar0\n");
2416                 err = -EIO;
2417                 goto err_ioremap;
2418         }
2419
2420         mmio_start = pci_resource_start(pdev, 1);
2421         mmio_len = pci_resource_len(pdev, 1);
2422         adapter->hw_addr1 = ioremap(mmio_start, mmio_len);
2423         if (!adapter->hw_addr1) {
2424                 dev_err(&pdev->dev, "Failed to map bar1\n");
2425                 err = -EIO;
2426                 goto err_bar1;
2427         }
2428         return 0;
2429
2430 err_bar1:
2431         iounmap(adapter->hw_addr0);
2432 err_ioremap:
2433         pci_release_selected_regions(pdev, (1 << 2) - 1);
2434 err_set_mask:
2435         pci_disable_device(pdev);
2436         return err;
2437 }
2438
2439
2440 static void
2441 vmxnet3_free_pci_resources(struct vmxnet3_adapter *adapter)
2442 {
2443         BUG_ON(!adapter->pdev);
2444
2445         iounmap(adapter->hw_addr0);
2446         iounmap(adapter->hw_addr1);
2447         pci_release_selected_regions(adapter->pdev, (1 << 2) - 1);
2448         pci_disable_device(adapter->pdev);
2449 }
2450
2451
2452 static void
2453 vmxnet3_adjust_rx_ring_size(struct vmxnet3_adapter *adapter)
2454 {
2455         size_t sz, i, ring0_size, ring1_size, comp_size;
2456         struct vmxnet3_rx_queue *rq = &adapter->rx_queue[0];
2457
2458
2459         if (adapter->netdev->mtu <= VMXNET3_MAX_SKB_BUF_SIZE -
2460                                     VMXNET3_MAX_ETH_HDR_SIZE) {
2461                 adapter->skb_buf_size = adapter->netdev->mtu +
2462                                         VMXNET3_MAX_ETH_HDR_SIZE;
2463                 if (adapter->skb_buf_size < VMXNET3_MIN_T0_BUF_SIZE)
2464                         adapter->skb_buf_size = VMXNET3_MIN_T0_BUF_SIZE;
2465
2466                 adapter->rx_buf_per_pkt = 1;
2467         } else {
2468                 adapter->skb_buf_size = VMXNET3_MAX_SKB_BUF_SIZE;
2469                 sz = adapter->netdev->mtu - VMXNET3_MAX_SKB_BUF_SIZE +
2470                                             VMXNET3_MAX_ETH_HDR_SIZE;
2471                 adapter->rx_buf_per_pkt = 1 + (sz + PAGE_SIZE - 1) / PAGE_SIZE;
2472         }
2473
2474         /*
2475          * for simplicity, force the ring0 size to be a multiple of
2476          * rx_buf_per_pkt * VMXNET3_RING_SIZE_ALIGN
2477          */
2478         sz = adapter->rx_buf_per_pkt * VMXNET3_RING_SIZE_ALIGN;
2479         ring0_size = adapter->rx_queue[0].rx_ring[0].size;
2480         ring0_size = (ring0_size + sz - 1) / sz * sz;
2481         ring0_size = min_t(u32, ring0_size, VMXNET3_RX_RING_MAX_SIZE /
2482                            sz * sz);
2483         ring1_size = adapter->rx_queue[0].rx_ring[1].size;
2484         comp_size = ring0_size + ring1_size;
2485
2486         for (i = 0; i < adapter->num_rx_queues; i++) {
2487                 rq = &adapter->rx_queue[i];
2488                 rq->rx_ring[0].size = ring0_size;
2489                 rq->rx_ring[1].size = ring1_size;
2490                 rq->comp_ring.size = comp_size;
2491         }
2492 }
2493
2494
2495 int
2496 vmxnet3_create_queues(struct vmxnet3_adapter *adapter, u32 tx_ring_size,
2497                       u32 rx_ring_size, u32 rx_ring2_size)
2498 {
2499         int err = 0, i;
2500
2501         for (i = 0; i < adapter->num_tx_queues; i++) {
2502                 struct vmxnet3_tx_queue *tq = &adapter->tx_queue[i];
2503                 tq->tx_ring.size   = tx_ring_size;
2504                 tq->data_ring.size = tx_ring_size;
2505                 tq->comp_ring.size = tx_ring_size;
2506                 tq->shared = &adapter->tqd_start[i].ctrl;
2507                 tq->stopped = true;
2508                 tq->adapter = adapter;
2509                 tq->qid = i;
2510                 err = vmxnet3_tq_create(tq, adapter);
2511                 /*
2512                  * Too late to change num_tx_queues. We cannot do away with
2513                  * lesser number of queues than what we asked for
2514                  */
2515                 if (err)
2516                         goto queue_err;
2517         }
2518
2519         adapter->rx_queue[0].rx_ring[0].size = rx_ring_size;
2520         adapter->rx_queue[0].rx_ring[1].size = rx_ring2_size;
2521         vmxnet3_adjust_rx_ring_size(adapter);
2522         for (i = 0; i < adapter->num_rx_queues; i++) {
2523                 struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
2524                 /* qid and qid2 for rx queues will be assigned later when num
2525                  * of rx queues is finalized after allocating intrs */
2526                 rq->shared = &adapter->rqd_start[i].ctrl;
2527                 rq->adapter = adapter;
2528                 err = vmxnet3_rq_create(rq, adapter);
2529                 if (err) {
2530                         if (i == 0) {
2531                                 netdev_err(adapter->netdev,
2532                                            "Could not allocate any rx queues. "
2533                                            "Aborting.\n");
2534                                 goto queue_err;
2535                         } else {
2536                                 netdev_info(adapter->netdev,
2537                                             "Number of rx queues changed "
2538                                             "to : %d.\n", i);
2539                                 adapter->num_rx_queues = i;
2540                                 err = 0;
2541                                 break;
2542                         }
2543                 }
2544         }
2545         return err;
2546 queue_err:
2547         vmxnet3_tq_destroy_all(adapter);
2548         return err;
2549 }
2550
2551 static int
2552 vmxnet3_open(struct net_device *netdev)
2553 {
2554         struct vmxnet3_adapter *adapter;
2555         int err, i;
2556
2557         adapter = netdev_priv(netdev);
2558
2559         for (i = 0; i < adapter->num_tx_queues; i++)
2560                 spin_lock_init(&adapter->tx_queue[i].tx_lock);
2561
2562         err = vmxnet3_create_queues(adapter, VMXNET3_DEF_TX_RING_SIZE,
2563                                     VMXNET3_DEF_RX_RING_SIZE,
2564                                     VMXNET3_DEF_RX_RING_SIZE);
2565         if (err)
2566                 goto queue_err;
2567
2568         err = vmxnet3_activate_dev(adapter);
2569         if (err)
2570                 goto activate_err;
2571
2572         return 0;
2573
2574 activate_err:
2575         vmxnet3_rq_destroy_all(adapter);
2576         vmxnet3_tq_destroy_all(adapter);
2577 queue_err:
2578         return err;
2579 }
2580
2581
2582 static int
2583 vmxnet3_close(struct net_device *netdev)
2584 {
2585         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2586
2587         /*
2588          * Reset_work may be in the middle of resetting the device, wait for its
2589          * completion.
2590          */
2591         while (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
2592                 msleep(1);
2593
2594         vmxnet3_quiesce_dev(adapter);
2595
2596         vmxnet3_rq_destroy_all(adapter);
2597         vmxnet3_tq_destroy_all(adapter);
2598
2599         clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
2600
2601
2602         return 0;
2603 }
2604
2605
2606 void
2607 vmxnet3_force_close(struct vmxnet3_adapter *adapter)
2608 {
2609         int i;
2610
2611         /*
2612          * we must clear VMXNET3_STATE_BIT_RESETTING, otherwise
2613          * vmxnet3_close() will deadlock.
2614          */
2615         BUG_ON(test_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state));
2616
2617         /* we need to enable NAPI, otherwise dev_close will deadlock */
2618         for (i = 0; i < adapter->num_rx_queues; i++)
2619                 napi_enable(&adapter->rx_queue[i].napi);
2620         dev_close(adapter->netdev);
2621 }
2622
2623
2624 static int
2625 vmxnet3_change_mtu(struct net_device *netdev, int new_mtu)
2626 {
2627         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2628         int err = 0;
2629
2630         if (new_mtu < VMXNET3_MIN_MTU || new_mtu > VMXNET3_MAX_MTU)
2631                 return -EINVAL;
2632
2633         netdev->mtu = new_mtu;
2634
2635         /*
2636          * Reset_work may be in the middle of resetting the device, wait for its
2637          * completion.
2638          */
2639         while (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
2640                 msleep(1);
2641
2642         if (netif_running(netdev)) {
2643                 vmxnet3_quiesce_dev(adapter);
2644                 vmxnet3_reset_dev(adapter);
2645
2646                 /* we need to re-create the rx queue based on the new mtu */
2647                 vmxnet3_rq_destroy_all(adapter);
2648                 vmxnet3_adjust_rx_ring_size(adapter);
2649                 err = vmxnet3_rq_create_all(adapter);
2650                 if (err) {
2651                         netdev_err(netdev,
2652                                    "failed to re-create rx queues, "
2653                                    " error %d. Closing it.\n", err);
2654                         goto out;
2655                 }
2656
2657                 err = vmxnet3_activate_dev(adapter);
2658                 if (err) {
2659                         netdev_err(netdev,
2660                                    "failed to re-activate, error %d. "
2661                                    "Closing it\n", err);
2662                         goto out;
2663                 }
2664         }
2665
2666 out:
2667         clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
2668         if (err)
2669                 vmxnet3_force_close(adapter);
2670
2671         return err;
2672 }
2673
2674
2675 static void
2676 vmxnet3_declare_features(struct vmxnet3_adapter *adapter, bool dma64)
2677 {
2678         struct net_device *netdev = adapter->netdev;
2679
2680         netdev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM |
2681                 NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
2682                 NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_TSO | NETIF_F_TSO6 |
2683                 NETIF_F_LRO;
2684         if (dma64)
2685                 netdev->hw_features |= NETIF_F_HIGHDMA;
2686         netdev->vlan_features = netdev->hw_features &
2687                                 ~(NETIF_F_HW_VLAN_CTAG_TX |
2688                                   NETIF_F_HW_VLAN_CTAG_RX);
2689         netdev->features = netdev->hw_features | NETIF_F_HW_VLAN_CTAG_FILTER;
2690 }
2691
2692
2693 static void
2694 vmxnet3_read_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac)
2695 {
2696         u32 tmp;
2697
2698         tmp = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_MACL);
2699         *(u32 *)mac = tmp;
2700
2701         tmp = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_MACH);
2702         mac[4] = tmp & 0xff;
2703         mac[5] = (tmp >> 8) & 0xff;
2704 }
2705
2706 #ifdef CONFIG_PCI_MSI
2707
2708 /*
2709  * Enable MSIx vectors.
2710  * Returns :
2711  *      0 on successful enabling of required vectors,
2712  *      VMXNET3_LINUX_MIN_MSIX_VECT when only minimum number of vectors required
2713  *       could be enabled.
2714  *      number of vectors which can be enabled otherwise (this number is smaller
2715  *       than VMXNET3_LINUX_MIN_MSIX_VECT)
2716  */
2717
2718 static int
2719 vmxnet3_acquire_msix_vectors(struct vmxnet3_adapter *adapter,
2720                              int vectors)
2721 {
2722         int err = 0, vector_threshold;
2723         vector_threshold = VMXNET3_LINUX_MIN_MSIX_VECT;
2724
2725         while (vectors >= vector_threshold) {
2726                 err = pci_enable_msix(adapter->pdev, adapter->intr.msix_entries,
2727                                       vectors);
2728                 if (!err) {
2729                         adapter->intr.num_intrs = vectors;
2730                         return 0;
2731                 } else if (err < 0) {
2732                         dev_err(&adapter->netdev->dev,
2733                                    "Failed to enable MSI-X, error: %d\n", err);
2734                         vectors = 0;
2735                 } else if (err < vector_threshold) {
2736                         break;
2737                 } else {
2738                         /* If fails to enable required number of MSI-x vectors
2739                          * try enabling minimum number of vectors required.
2740                          */
2741                         dev_err(&adapter->netdev->dev,
2742                                 "Failed to enable %d MSI-X, trying %d instead\n",
2743                                     vectors, vector_threshold);
2744                         vectors = vector_threshold;
2745                 }
2746         }
2747
2748         dev_info(&adapter->pdev->dev,
2749                  "Number of MSI-X interrupts which can be allocated "
2750                  "is lower than min threshold required.\n");
2751         return err;
2752 }
2753
2754
2755 #endif /* CONFIG_PCI_MSI */
2756
2757 static void
2758 vmxnet3_alloc_intr_resources(struct vmxnet3_adapter *adapter)
2759 {
2760         u32 cfg;
2761         unsigned long flags;
2762
2763         /* intr settings */
2764         spin_lock_irqsave(&adapter->cmd_lock, flags);
2765         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2766                                VMXNET3_CMD_GET_CONF_INTR);
2767         cfg = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
2768         spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2769         adapter->intr.type = cfg & 0x3;
2770         adapter->intr.mask_mode = (cfg >> 2) & 0x3;
2771
2772         if (adapter->intr.type == VMXNET3_IT_AUTO) {
2773                 adapter->intr.type = VMXNET3_IT_MSIX;
2774         }
2775
2776 #ifdef CONFIG_PCI_MSI
2777         if (adapter->intr.type == VMXNET3_IT_MSIX) {
2778                 int vector, err = 0;
2779
2780                 adapter->intr.num_intrs = (adapter->share_intr ==
2781                                            VMXNET3_INTR_TXSHARE) ? 1 :
2782                                            adapter->num_tx_queues;
2783                 adapter->intr.num_intrs += (adapter->share_intr ==
2784                                            VMXNET3_INTR_BUDDYSHARE) ? 0 :
2785                                            adapter->num_rx_queues;
2786                 adapter->intr.num_intrs += 1;           /* for link event */
2787
2788                 adapter->intr.num_intrs = (adapter->intr.num_intrs >
2789                                            VMXNET3_LINUX_MIN_MSIX_VECT
2790                                            ? adapter->intr.num_intrs :
2791                                            VMXNET3_LINUX_MIN_MSIX_VECT);
2792
2793                 for (vector = 0; vector < adapter->intr.num_intrs; vector++)
2794                         adapter->intr.msix_entries[vector].entry = vector;
2795
2796                 err = vmxnet3_acquire_msix_vectors(adapter,
2797                                                    adapter->intr.num_intrs);
2798                 /* If we cannot allocate one MSIx vector per queue
2799                  * then limit the number of rx queues to 1
2800                  */
2801                 if (err == VMXNET3_LINUX_MIN_MSIX_VECT) {
2802                         if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE
2803                             || adapter->num_rx_queues != 1) {
2804                                 adapter->share_intr = VMXNET3_INTR_TXSHARE;
2805                                 netdev_err(adapter->netdev,
2806                                            "Number of rx queues : 1\n");
2807                                 adapter->num_rx_queues = 1;
2808                                 adapter->intr.num_intrs =
2809                                                 VMXNET3_LINUX_MIN_MSIX_VECT;
2810                         }
2811                         return;
2812                 }
2813                 if (!err)
2814                         return;
2815
2816                 /* If we cannot allocate MSIx vectors use only one rx queue */
2817                 dev_info(&adapter->pdev->dev,
2818                          "Failed to enable MSI-X, error %d. "
2819                          "Limiting #rx queues to 1, try MSI.\n", err);
2820
2821                 adapter->intr.type = VMXNET3_IT_MSI;
2822         }
2823
2824         if (adapter->intr.type == VMXNET3_IT_MSI) {
2825                 int err;
2826                 err = pci_enable_msi(adapter->pdev);
2827                 if (!err) {
2828                         adapter->num_rx_queues = 1;
2829                         adapter->intr.num_intrs = 1;
2830                         return;
2831                 }
2832         }
2833 #endif /* CONFIG_PCI_MSI */
2834
2835         adapter->num_rx_queues = 1;
2836         dev_info(&adapter->netdev->dev,
2837                  "Using INTx interrupt, #Rx queues: 1.\n");
2838         adapter->intr.type = VMXNET3_IT_INTX;
2839
2840         /* INT-X related setting */
2841         adapter->intr.num_intrs = 1;
2842 }
2843
2844
2845 static void
2846 vmxnet3_free_intr_resources(struct vmxnet3_adapter *adapter)
2847 {
2848         if (adapter->intr.type == VMXNET3_IT_MSIX)
2849                 pci_disable_msix(adapter->pdev);
2850         else if (adapter->intr.type == VMXNET3_IT_MSI)
2851                 pci_disable_msi(adapter->pdev);
2852         else
2853                 BUG_ON(adapter->intr.type != VMXNET3_IT_INTX);
2854 }
2855
2856
2857 static void
2858 vmxnet3_tx_timeout(struct net_device *netdev)
2859 {
2860         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2861         adapter->tx_timeout_count++;
2862
2863         netdev_err(adapter->netdev, "tx hang\n");
2864         schedule_work(&adapter->work);
2865         netif_wake_queue(adapter->netdev);
2866 }
2867
2868
2869 static void
2870 vmxnet3_reset_work(struct work_struct *data)
2871 {
2872         struct vmxnet3_adapter *adapter;
2873
2874         adapter = container_of(data, struct vmxnet3_adapter, work);
2875
2876         /* if another thread is resetting the device, no need to proceed */
2877         if (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
2878                 return;
2879
2880         /* if the device is closed, we must leave it alone */
2881         rtnl_lock();
2882         if (netif_running(adapter->netdev)) {
2883                 netdev_notice(adapter->netdev, "resetting\n");
2884                 vmxnet3_quiesce_dev(adapter);
2885                 vmxnet3_reset_dev(adapter);
2886                 vmxnet3_activate_dev(adapter);
2887         } else {
2888                 netdev_info(adapter->netdev, "already closed\n");
2889         }
2890         rtnl_unlock();
2891
2892         clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
2893 }
2894
2895
2896 static int
2897 vmxnet3_probe_device(struct pci_dev *pdev,
2898                      const struct pci_device_id *id)
2899 {
2900         static const struct net_device_ops vmxnet3_netdev_ops = {
2901                 .ndo_open = vmxnet3_open,
2902                 .ndo_stop = vmxnet3_close,
2903                 .ndo_start_xmit = vmxnet3_xmit_frame,
2904                 .ndo_set_mac_address = vmxnet3_set_mac_addr,
2905                 .ndo_change_mtu = vmxnet3_change_mtu,
2906                 .ndo_set_features = vmxnet3_set_features,
2907                 .ndo_get_stats64 = vmxnet3_get_stats64,
2908                 .ndo_tx_timeout = vmxnet3_tx_timeout,
2909                 .ndo_set_rx_mode = vmxnet3_set_mc,
2910                 .ndo_vlan_rx_add_vid = vmxnet3_vlan_rx_add_vid,
2911                 .ndo_vlan_rx_kill_vid = vmxnet3_vlan_rx_kill_vid,
2912 #ifdef CONFIG_NET_POLL_CONTROLLER
2913                 .ndo_poll_controller = vmxnet3_netpoll,
2914 #endif
2915         };
2916         int err;
2917         bool dma64 = false; /* stupid gcc */
2918         u32 ver;
2919         struct net_device *netdev;
2920         struct vmxnet3_adapter *adapter;
2921         u8 mac[ETH_ALEN];
2922         int size;
2923         int num_tx_queues;
2924         int num_rx_queues;
2925
2926         if (!pci_msi_enabled())
2927                 enable_mq = 0;
2928
2929 #ifdef VMXNET3_RSS
2930         if (enable_mq)
2931                 num_rx_queues = min(VMXNET3_DEVICE_MAX_RX_QUEUES,
2932                                     (int)num_online_cpus());
2933         else
2934 #endif
2935                 num_rx_queues = 1;
2936         num_rx_queues = rounddown_pow_of_two(num_rx_queues);
2937
2938         if (enable_mq)
2939                 num_tx_queues = min(VMXNET3_DEVICE_MAX_TX_QUEUES,
2940                                     (int)num_online_cpus());
2941         else
2942                 num_tx_queues = 1;
2943
2944         num_tx_queues = rounddown_pow_of_two(num_tx_queues);
2945         netdev = alloc_etherdev_mq(sizeof(struct vmxnet3_adapter),
2946                                    max(num_tx_queues, num_rx_queues));
2947         dev_info(&pdev->dev,
2948                  "# of Tx queues : %d, # of Rx queues : %d\n",
2949                  num_tx_queues, num_rx_queues);
2950
2951         if (!netdev)
2952                 return -ENOMEM;
2953
2954         pci_set_drvdata(pdev, netdev);
2955         adapter = netdev_priv(netdev);
2956         adapter->netdev = netdev;
2957         adapter->pdev = pdev;
2958
2959         spin_lock_init(&adapter->cmd_lock);
2960         adapter->shared = pci_alloc_consistent(adapter->pdev,
2961                                                sizeof(struct Vmxnet3_DriverShared),
2962                                                &adapter->shared_pa);
2963         if (!adapter->shared) {
2964                 dev_err(&pdev->dev, "Failed to allocate memory\n");
2965                 err = -ENOMEM;
2966                 goto err_alloc_shared;
2967         }
2968
2969         adapter->num_rx_queues = num_rx_queues;
2970         adapter->num_tx_queues = num_tx_queues;
2971         adapter->rx_buf_per_pkt = 1;
2972
2973         size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues;
2974         size += sizeof(struct Vmxnet3_RxQueueDesc) * adapter->num_rx_queues;
2975         adapter->tqd_start = pci_alloc_consistent(adapter->pdev, size,
2976                                                   &adapter->queue_desc_pa);
2977
2978         if (!adapter->tqd_start) {
2979                 dev_err(&pdev->dev, "Failed to allocate memory\n");
2980                 err = -ENOMEM;
2981                 goto err_alloc_queue_desc;
2982         }
2983         adapter->rqd_start = (struct Vmxnet3_RxQueueDesc *)(adapter->tqd_start +
2984                                                             adapter->num_tx_queues);
2985
2986         adapter->pm_conf = kmalloc(sizeof(struct Vmxnet3_PMConf), GFP_KERNEL);
2987         if (adapter->pm_conf == NULL) {
2988                 err = -ENOMEM;
2989                 goto err_alloc_pm;
2990         }
2991
2992 #ifdef VMXNET3_RSS
2993
2994         adapter->rss_conf = kmalloc(sizeof(struct UPT1_RSSConf), GFP_KERNEL);
2995         if (adapter->rss_conf == NULL) {
2996                 err = -ENOMEM;
2997                 goto err_alloc_rss;
2998         }
2999 #endif /* VMXNET3_RSS */
3000
3001         err = vmxnet3_alloc_pci_resources(adapter, &dma64);
3002         if (err < 0)
3003                 goto err_alloc_pci;
3004
3005         ver = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_VRRS);
3006         if (ver & 1) {
3007                 VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_VRRS, 1);
3008         } else {
3009                 dev_err(&pdev->dev,
3010                         "Incompatible h/w version (0x%x) for adapter\n", ver);
3011                 err = -EBUSY;
3012                 goto err_ver;
3013         }
3014
3015         ver = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_UVRS);
3016         if (ver & 1) {
3017                 VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_UVRS, 1);
3018         } else {
3019                 dev_err(&pdev->dev,
3020                         "Incompatible upt version (0x%x) for adapter\n", ver);
3021                 err = -EBUSY;
3022                 goto err_ver;
3023         }
3024
3025         SET_NETDEV_DEV(netdev, &pdev->dev);
3026         vmxnet3_declare_features(adapter, dma64);
3027
3028         if (adapter->num_tx_queues == adapter->num_rx_queues)
3029                 adapter->share_intr = VMXNET3_INTR_BUDDYSHARE;
3030         else
3031                 adapter->share_intr = VMXNET3_INTR_DONTSHARE;
3032
3033         vmxnet3_alloc_intr_resources(adapter);
3034
3035 #ifdef VMXNET3_RSS
3036         if (adapter->num_rx_queues > 1 &&
3037             adapter->intr.type == VMXNET3_IT_MSIX) {
3038                 adapter->rss = true;
3039                 netdev->hw_features |= NETIF_F_RXHASH;
3040                 netdev->features |= NETIF_F_RXHASH;
3041                 dev_dbg(&pdev->dev, "RSS is enabled.\n");
3042         } else {
3043                 adapter->rss = false;
3044         }
3045 #endif
3046
3047         vmxnet3_read_mac_addr(adapter, mac);
3048         memcpy(netdev->dev_addr,  mac, netdev->addr_len);
3049
3050         netdev->netdev_ops = &vmxnet3_netdev_ops;
3051         vmxnet3_set_ethtool_ops(netdev);
3052         netdev->watchdog_timeo = 5 * HZ;
3053
3054         INIT_WORK(&adapter->work, vmxnet3_reset_work);
3055         set_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state);
3056
3057         if (adapter->intr.type == VMXNET3_IT_MSIX) {
3058                 int i;
3059                 for (i = 0; i < adapter->num_rx_queues; i++) {
3060                         netif_napi_add(adapter->netdev,
3061                                        &adapter->rx_queue[i].napi,
3062                                        vmxnet3_poll_rx_only, 64);
3063                 }
3064         } else {
3065                 netif_napi_add(adapter->netdev, &adapter->rx_queue[0].napi,
3066                                vmxnet3_poll, 64);
3067         }
3068
3069         netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
3070         netif_set_real_num_rx_queues(adapter->netdev, adapter->num_rx_queues);
3071
3072         netif_carrier_off(netdev);
3073         err = register_netdev(netdev);
3074
3075         if (err) {
3076                 dev_err(&pdev->dev, "Failed to register adapter\n");
3077                 goto err_register;
3078         }
3079
3080         vmxnet3_check_link(adapter, false);
3081         return 0;
3082
3083 err_register:
3084         vmxnet3_free_intr_resources(adapter);
3085 err_ver:
3086         vmxnet3_free_pci_resources(adapter);
3087 err_alloc_pci:
3088 #ifdef VMXNET3_RSS
3089         kfree(adapter->rss_conf);
3090 err_alloc_rss:
3091 #endif
3092         kfree(adapter->pm_conf);
3093 err_alloc_pm:
3094         pci_free_consistent(adapter->pdev, size, adapter->tqd_start,
3095                             adapter->queue_desc_pa);
3096 err_alloc_queue_desc:
3097         pci_free_consistent(adapter->pdev, sizeof(struct Vmxnet3_DriverShared),
3098                             adapter->shared, adapter->shared_pa);
3099 err_alloc_shared:
3100         pci_set_drvdata(pdev, NULL);
3101         free_netdev(netdev);
3102         return err;
3103 }
3104
3105
3106 static void
3107 vmxnet3_remove_device(struct pci_dev *pdev)
3108 {
3109         struct net_device *netdev = pci_get_drvdata(pdev);
3110         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3111         int size = 0;
3112         int num_rx_queues;
3113
3114 #ifdef VMXNET3_RSS
3115         if (enable_mq)
3116                 num_rx_queues = min(VMXNET3_DEVICE_MAX_RX_QUEUES,
3117                                     (int)num_online_cpus());
3118         else
3119 #endif
3120                 num_rx_queues = 1;
3121         num_rx_queues = rounddown_pow_of_two(num_rx_queues);
3122
3123         cancel_work_sync(&adapter->work);
3124
3125         unregister_netdev(netdev);
3126
3127         vmxnet3_free_intr_resources(adapter);
3128         vmxnet3_free_pci_resources(adapter);
3129 #ifdef VMXNET3_RSS
3130         kfree(adapter->rss_conf);
3131 #endif
3132         kfree(adapter->pm_conf);
3133
3134         size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues;
3135         size += sizeof(struct Vmxnet3_RxQueueDesc) * num_rx_queues;
3136         pci_free_consistent(adapter->pdev, size, adapter->tqd_start,
3137                             adapter->queue_desc_pa);
3138         pci_free_consistent(adapter->pdev, sizeof(struct Vmxnet3_DriverShared),
3139                             adapter->shared, adapter->shared_pa);
3140         free_netdev(netdev);
3141 }
3142
3143
3144 #ifdef CONFIG_PM
3145
3146 static int
3147 vmxnet3_suspend(struct device *device)
3148 {
3149         struct pci_dev *pdev = to_pci_dev(device);
3150         struct net_device *netdev = pci_get_drvdata(pdev);
3151         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3152         struct Vmxnet3_PMConf *pmConf;
3153         struct ethhdr *ehdr;
3154         struct arphdr *ahdr;
3155         u8 *arpreq;
3156         struct in_device *in_dev;
3157         struct in_ifaddr *ifa;
3158         unsigned long flags;
3159         int i = 0;
3160
3161         if (!netif_running(netdev))
3162                 return 0;
3163
3164         for (i = 0; i < adapter->num_rx_queues; i++)
3165                 napi_disable(&adapter->rx_queue[i].napi);
3166
3167         vmxnet3_disable_all_intrs(adapter);
3168         vmxnet3_free_irqs(adapter);
3169         vmxnet3_free_intr_resources(adapter);
3170
3171         netif_device_detach(netdev);
3172         netif_tx_stop_all_queues(netdev);
3173
3174         /* Create wake-up filters. */
3175         pmConf = adapter->pm_conf;
3176         memset(pmConf, 0, sizeof(*pmConf));
3177
3178         if (adapter->wol & WAKE_UCAST) {
3179                 pmConf->filters[i].patternSize = ETH_ALEN;
3180                 pmConf->filters[i].maskSize = 1;
3181                 memcpy(pmConf->filters[i].pattern, netdev->dev_addr, ETH_ALEN);
3182                 pmConf->filters[i].mask[0] = 0x3F; /* LSB ETH_ALEN bits */
3183
3184                 pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_FILTER;
3185                 i++;
3186         }
3187
3188         if (adapter->wol & WAKE_ARP) {
3189                 in_dev = in_dev_get(netdev);
3190                 if (!in_dev)
3191                         goto skip_arp;
3192
3193                 ifa = (struct in_ifaddr *)in_dev->ifa_list;
3194                 if (!ifa)
3195                         goto skip_arp;
3196
3197                 pmConf->filters[i].patternSize = ETH_HLEN + /* Ethernet header*/
3198                         sizeof(struct arphdr) +         /* ARP header */
3199                         2 * ETH_ALEN +          /* 2 Ethernet addresses*/
3200                         2 * sizeof(u32);        /*2 IPv4 addresses */
3201                 pmConf->filters[i].maskSize =
3202                         (pmConf->filters[i].patternSize - 1) / 8 + 1;
3203
3204                 /* ETH_P_ARP in Ethernet header. */
3205                 ehdr = (struct ethhdr *)pmConf->filters[i].pattern;
3206                 ehdr->h_proto = htons(ETH_P_ARP);
3207
3208                 /* ARPOP_REQUEST in ARP header. */
3209                 ahdr = (struct arphdr *)&pmConf->filters[i].pattern[ETH_HLEN];
3210                 ahdr->ar_op = htons(ARPOP_REQUEST);
3211                 arpreq = (u8 *)(ahdr + 1);
3212
3213                 /* The Unicast IPv4 address in 'tip' field. */
3214                 arpreq += 2 * ETH_ALEN + sizeof(u32);
3215                 *(u32 *)arpreq = ifa->ifa_address;
3216
3217                 /* The mask for the relevant bits. */
3218                 pmConf->filters[i].mask[0] = 0x00;
3219                 pmConf->filters[i].mask[1] = 0x30; /* ETH_P_ARP */
3220                 pmConf->filters[i].mask[2] = 0x30; /* ARPOP_REQUEST */
3221                 pmConf->filters[i].mask[3] = 0x00;
3222                 pmConf->filters[i].mask[4] = 0xC0; /* IPv4 TIP */
3223                 pmConf->filters[i].mask[5] = 0x03; /* IPv4 TIP */
3224                 in_dev_put(in_dev);
3225
3226                 pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_FILTER;
3227                 i++;
3228         }
3229
3230 skip_arp:
3231         if (adapter->wol & WAKE_MAGIC)
3232                 pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_MAGIC;
3233
3234         pmConf->numFilters = i;
3235
3236         adapter->shared->devRead.pmConfDesc.confVer = cpu_to_le32(1);
3237         adapter->shared->devRead.pmConfDesc.confLen = cpu_to_le32(sizeof(
3238                                                                   *pmConf));
3239         adapter->shared->devRead.pmConfDesc.confPA = cpu_to_le64(virt_to_phys(
3240                                                                  pmConf));
3241
3242         spin_lock_irqsave(&adapter->cmd_lock, flags);
3243         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
3244                                VMXNET3_CMD_UPDATE_PMCFG);
3245         spin_unlock_irqrestore(&adapter->cmd_lock, flags);
3246
3247         pci_save_state(pdev);
3248         pci_enable_wake(pdev, pci_choose_state(pdev, PMSG_SUSPEND),
3249                         adapter->wol);
3250         pci_disable_device(pdev);
3251         pci_set_power_state(pdev, pci_choose_state(pdev, PMSG_SUSPEND));
3252
3253         return 0;
3254 }
3255
3256
3257 static int
3258 vmxnet3_resume(struct device *device)
3259 {
3260         int err, i = 0;
3261         unsigned long flags;
3262         struct pci_dev *pdev = to_pci_dev(device);
3263         struct net_device *netdev = pci_get_drvdata(pdev);
3264         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3265         struct Vmxnet3_PMConf *pmConf;
3266
3267         if (!netif_running(netdev))
3268                 return 0;
3269
3270         /* Destroy wake-up filters. */
3271         pmConf = adapter->pm_conf;
3272         memset(pmConf, 0, sizeof(*pmConf));
3273
3274         adapter->shared->devRead.pmConfDesc.confVer = cpu_to_le32(1);
3275         adapter->shared->devRead.pmConfDesc.confLen = cpu_to_le32(sizeof(
3276                                                                   *pmConf));
3277         adapter->shared->devRead.pmConfDesc.confPA = cpu_to_le64(virt_to_phys(
3278                                                                  pmConf));
3279
3280         netif_device_attach(netdev);
3281         pci_set_power_state(pdev, PCI_D0);
3282         pci_restore_state(pdev);
3283         err = pci_enable_device_mem(pdev);
3284         if (err != 0)
3285                 return err;
3286
3287         pci_enable_wake(pdev, PCI_D0, 0);
3288
3289         spin_lock_irqsave(&adapter->cmd_lock, flags);
3290         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
3291                                VMXNET3_CMD_UPDATE_PMCFG);
3292         spin_unlock_irqrestore(&adapter->cmd_lock, flags);
3293         vmxnet3_alloc_intr_resources(adapter);
3294         vmxnet3_request_irqs(adapter);
3295         for (i = 0; i < adapter->num_rx_queues; i++)
3296                 napi_enable(&adapter->rx_queue[i].napi);
3297         vmxnet3_enable_all_intrs(adapter);
3298
3299         return 0;
3300 }
3301
3302 static const struct dev_pm_ops vmxnet3_pm_ops = {
3303         .suspend = vmxnet3_suspend,
3304         .resume = vmxnet3_resume,
3305 };
3306 #endif
3307
3308 static struct pci_driver vmxnet3_driver = {
3309         .name           = vmxnet3_driver_name,
3310         .id_table       = vmxnet3_pciid_table,
3311         .probe          = vmxnet3_probe_device,
3312         .remove         = vmxnet3_remove_device,
3313 #ifdef CONFIG_PM
3314         .driver.pm      = &vmxnet3_pm_ops,
3315 #endif
3316 };
3317
3318
3319 static int __init
3320 vmxnet3_init_module(void)
3321 {
3322         pr_info("%s - version %s\n", VMXNET3_DRIVER_DESC,
3323                 VMXNET3_DRIVER_VERSION_REPORT);
3324         return pci_register_driver(&vmxnet3_driver);
3325 }
3326
3327 module_init(vmxnet3_init_module);
3328
3329
3330 static void
3331 vmxnet3_exit_module(void)
3332 {
3333         pci_unregister_driver(&vmxnet3_driver);
3334 }
3335
3336 module_exit(vmxnet3_exit_module);
3337
3338 MODULE_AUTHOR("VMware, Inc.");
3339 MODULE_DESCRIPTION(VMXNET3_DRIVER_DESC);
3340 MODULE_LICENSE("GPL v2");
3341 MODULE_VERSION(VMXNET3_DRIVER_VERSION_STRING);