Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/linville/wirel...
[linux-drm-fsl-dcu.git] / drivers / net / vmxnet3 / vmxnet3_drv.c
1 /*
2  * Linux driver for VMware's vmxnet3 ethernet NIC.
3  *
4  * Copyright (C) 2008-2009, VMware, Inc. All Rights Reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License as published by the
8  * Free Software Foundation; version 2 of the License and no later version.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
13  * NON INFRINGEMENT. See the GNU General Public License for more
14  * details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19  *
20  * The full GNU General Public License is included in this distribution in
21  * the file called "COPYING".
22  *
23  * Maintained by: Shreyas Bhatewara <pv-drivers@vmware.com>
24  *
25  */
26
27 #include <linux/module.h>
28 #include <net/ip6_checksum.h>
29
30 #include "vmxnet3_int.h"
31
32 char vmxnet3_driver_name[] = "vmxnet3";
33 #define VMXNET3_DRIVER_DESC "VMware vmxnet3 virtual NIC driver"
34
35 /*
36  * PCI Device ID Table
37  * Last entry must be all 0s
38  */
39 static DEFINE_PCI_DEVICE_TABLE(vmxnet3_pciid_table) = {
40         {PCI_VDEVICE(VMWARE, PCI_DEVICE_ID_VMWARE_VMXNET3)},
41         {0}
42 };
43
44 MODULE_DEVICE_TABLE(pci, vmxnet3_pciid_table);
45
46 static int enable_mq = 1;
47
48 static void
49 vmxnet3_write_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac);
50
51 /*
52  *    Enable/Disable the given intr
53  */
54 static void
55 vmxnet3_enable_intr(struct vmxnet3_adapter *adapter, unsigned intr_idx)
56 {
57         VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_IMR + intr_idx * 8, 0);
58 }
59
60
61 static void
62 vmxnet3_disable_intr(struct vmxnet3_adapter *adapter, unsigned intr_idx)
63 {
64         VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_IMR + intr_idx * 8, 1);
65 }
66
67
68 /*
69  *    Enable/Disable all intrs used by the device
70  */
71 static void
72 vmxnet3_enable_all_intrs(struct vmxnet3_adapter *adapter)
73 {
74         int i;
75
76         for (i = 0; i < adapter->intr.num_intrs; i++)
77                 vmxnet3_enable_intr(adapter, i);
78         adapter->shared->devRead.intrConf.intrCtrl &=
79                                         cpu_to_le32(~VMXNET3_IC_DISABLE_ALL);
80 }
81
82
83 static void
84 vmxnet3_disable_all_intrs(struct vmxnet3_adapter *adapter)
85 {
86         int i;
87
88         adapter->shared->devRead.intrConf.intrCtrl |=
89                                         cpu_to_le32(VMXNET3_IC_DISABLE_ALL);
90         for (i = 0; i < adapter->intr.num_intrs; i++)
91                 vmxnet3_disable_intr(adapter, i);
92 }
93
94
95 static void
96 vmxnet3_ack_events(struct vmxnet3_adapter *adapter, u32 events)
97 {
98         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_ECR, events);
99 }
100
101
102 static bool
103 vmxnet3_tq_stopped(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
104 {
105         return tq->stopped;
106 }
107
108
109 static void
110 vmxnet3_tq_start(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
111 {
112         tq->stopped = false;
113         netif_start_subqueue(adapter->netdev, tq - adapter->tx_queue);
114 }
115
116
117 static void
118 vmxnet3_tq_wake(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
119 {
120         tq->stopped = false;
121         netif_wake_subqueue(adapter->netdev, (tq - adapter->tx_queue));
122 }
123
124
125 static void
126 vmxnet3_tq_stop(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
127 {
128         tq->stopped = true;
129         tq->num_stop++;
130         netif_stop_subqueue(adapter->netdev, (tq - adapter->tx_queue));
131 }
132
133
134 /*
135  * Check the link state. This may start or stop the tx queue.
136  */
137 static void
138 vmxnet3_check_link(struct vmxnet3_adapter *adapter, bool affectTxQueue)
139 {
140         u32 ret;
141         int i;
142         unsigned long flags;
143
144         spin_lock_irqsave(&adapter->cmd_lock, flags);
145         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_GET_LINK);
146         ret = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
147         spin_unlock_irqrestore(&adapter->cmd_lock, flags);
148
149         adapter->link_speed = ret >> 16;
150         if (ret & 1) { /* Link is up. */
151                 netdev_info(adapter->netdev, "NIC Link is Up %d Mbps\n",
152                             adapter->link_speed);
153                 netif_carrier_on(adapter->netdev);
154
155                 if (affectTxQueue) {
156                         for (i = 0; i < adapter->num_tx_queues; i++)
157                                 vmxnet3_tq_start(&adapter->tx_queue[i],
158                                                  adapter);
159                 }
160         } else {
161                 netdev_info(adapter->netdev, "NIC Link is Down\n");
162                 netif_carrier_off(adapter->netdev);
163
164                 if (affectTxQueue) {
165                         for (i = 0; i < adapter->num_tx_queues; i++)
166                                 vmxnet3_tq_stop(&adapter->tx_queue[i], adapter);
167                 }
168         }
169 }
170
171 static void
172 vmxnet3_process_events(struct vmxnet3_adapter *adapter)
173 {
174         int i;
175         unsigned long flags;
176         u32 events = le32_to_cpu(adapter->shared->ecr);
177         if (!events)
178                 return;
179
180         vmxnet3_ack_events(adapter, events);
181
182         /* Check if link state has changed */
183         if (events & VMXNET3_ECR_LINK)
184                 vmxnet3_check_link(adapter, true);
185
186         /* Check if there is an error on xmit/recv queues */
187         if (events & (VMXNET3_ECR_TQERR | VMXNET3_ECR_RQERR)) {
188                 spin_lock_irqsave(&adapter->cmd_lock, flags);
189                 VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
190                                        VMXNET3_CMD_GET_QUEUE_STATUS);
191                 spin_unlock_irqrestore(&adapter->cmd_lock, flags);
192
193                 for (i = 0; i < adapter->num_tx_queues; i++)
194                         if (adapter->tqd_start[i].status.stopped)
195                                 dev_err(&adapter->netdev->dev,
196                                         "%s: tq[%d] error 0x%x\n",
197                                         adapter->netdev->name, i, le32_to_cpu(
198                                         adapter->tqd_start[i].status.error));
199                 for (i = 0; i < adapter->num_rx_queues; i++)
200                         if (adapter->rqd_start[i].status.stopped)
201                                 dev_err(&adapter->netdev->dev,
202                                         "%s: rq[%d] error 0x%x\n",
203                                         adapter->netdev->name, i,
204                                         adapter->rqd_start[i].status.error);
205
206                 schedule_work(&adapter->work);
207         }
208 }
209
210 #ifdef __BIG_ENDIAN_BITFIELD
211 /*
212  * The device expects the bitfields in shared structures to be written in
213  * little endian. When CPU is big endian, the following routines are used to
214  * correctly read and write into ABI.
215  * The general technique used here is : double word bitfields are defined in
216  * opposite order for big endian architecture. Then before reading them in
217  * driver the complete double word is translated using le32_to_cpu. Similarly
218  * After the driver writes into bitfields, cpu_to_le32 is used to translate the
219  * double words into required format.
220  * In order to avoid touching bits in shared structure more than once, temporary
221  * descriptors are used. These are passed as srcDesc to following functions.
222  */
223 static void vmxnet3_RxDescToCPU(const struct Vmxnet3_RxDesc *srcDesc,
224                                 struct Vmxnet3_RxDesc *dstDesc)
225 {
226         u32 *src = (u32 *)srcDesc + 2;
227         u32 *dst = (u32 *)dstDesc + 2;
228         dstDesc->addr = le64_to_cpu(srcDesc->addr);
229         *dst = le32_to_cpu(*src);
230         dstDesc->ext1 = le32_to_cpu(srcDesc->ext1);
231 }
232
233 static void vmxnet3_TxDescToLe(const struct Vmxnet3_TxDesc *srcDesc,
234                                struct Vmxnet3_TxDesc *dstDesc)
235 {
236         int i;
237         u32 *src = (u32 *)(srcDesc + 1);
238         u32 *dst = (u32 *)(dstDesc + 1);
239
240         /* Working backwards so that the gen bit is set at the end. */
241         for (i = 2; i > 0; i--) {
242                 src--;
243                 dst--;
244                 *dst = cpu_to_le32(*src);
245         }
246 }
247
248
249 static void vmxnet3_RxCompToCPU(const struct Vmxnet3_RxCompDesc *srcDesc,
250                                 struct Vmxnet3_RxCompDesc *dstDesc)
251 {
252         int i = 0;
253         u32 *src = (u32 *)srcDesc;
254         u32 *dst = (u32 *)dstDesc;
255         for (i = 0; i < sizeof(struct Vmxnet3_RxCompDesc) / sizeof(u32); i++) {
256                 *dst = le32_to_cpu(*src);
257                 src++;
258                 dst++;
259         }
260 }
261
262
263 /* Used to read bitfield values from double words. */
264 static u32 get_bitfield32(const __le32 *bitfield, u32 pos, u32 size)
265 {
266         u32 temp = le32_to_cpu(*bitfield);
267         u32 mask = ((1 << size) - 1) << pos;
268         temp &= mask;
269         temp >>= pos;
270         return temp;
271 }
272
273
274
275 #endif  /* __BIG_ENDIAN_BITFIELD */
276
277 #ifdef __BIG_ENDIAN_BITFIELD
278
279 #   define VMXNET3_TXDESC_GET_GEN(txdesc) get_bitfield32(((const __le32 *) \
280                         txdesc) + VMXNET3_TXD_GEN_DWORD_SHIFT, \
281                         VMXNET3_TXD_GEN_SHIFT, VMXNET3_TXD_GEN_SIZE)
282 #   define VMXNET3_TXDESC_GET_EOP(txdesc) get_bitfield32(((const __le32 *) \
283                         txdesc) + VMXNET3_TXD_EOP_DWORD_SHIFT, \
284                         VMXNET3_TXD_EOP_SHIFT, VMXNET3_TXD_EOP_SIZE)
285 #   define VMXNET3_TCD_GET_GEN(tcd) get_bitfield32(((const __le32 *)tcd) + \
286                         VMXNET3_TCD_GEN_DWORD_SHIFT, VMXNET3_TCD_GEN_SHIFT, \
287                         VMXNET3_TCD_GEN_SIZE)
288 #   define VMXNET3_TCD_GET_TXIDX(tcd) get_bitfield32((const __le32 *)tcd, \
289                         VMXNET3_TCD_TXIDX_SHIFT, VMXNET3_TCD_TXIDX_SIZE)
290 #   define vmxnet3_getRxComp(dstrcd, rcd, tmp) do { \
291                         (dstrcd) = (tmp); \
292                         vmxnet3_RxCompToCPU((rcd), (tmp)); \
293                 } while (0)
294 #   define vmxnet3_getRxDesc(dstrxd, rxd, tmp) do { \
295                         (dstrxd) = (tmp); \
296                         vmxnet3_RxDescToCPU((rxd), (tmp)); \
297                 } while (0)
298
299 #else
300
301 #   define VMXNET3_TXDESC_GET_GEN(txdesc) ((txdesc)->gen)
302 #   define VMXNET3_TXDESC_GET_EOP(txdesc) ((txdesc)->eop)
303 #   define VMXNET3_TCD_GET_GEN(tcd) ((tcd)->gen)
304 #   define VMXNET3_TCD_GET_TXIDX(tcd) ((tcd)->txdIdx)
305 #   define vmxnet3_getRxComp(dstrcd, rcd, tmp) (dstrcd) = (rcd)
306 #   define vmxnet3_getRxDesc(dstrxd, rxd, tmp) (dstrxd) = (rxd)
307
308 #endif /* __BIG_ENDIAN_BITFIELD  */
309
310
311 static void
312 vmxnet3_unmap_tx_buf(struct vmxnet3_tx_buf_info *tbi,
313                      struct pci_dev *pdev)
314 {
315         if (tbi->map_type == VMXNET3_MAP_SINGLE)
316                 dma_unmap_single(&pdev->dev, tbi->dma_addr, tbi->len,
317                                  PCI_DMA_TODEVICE);
318         else if (tbi->map_type == VMXNET3_MAP_PAGE)
319                 dma_unmap_page(&pdev->dev, tbi->dma_addr, tbi->len,
320                                PCI_DMA_TODEVICE);
321         else
322                 BUG_ON(tbi->map_type != VMXNET3_MAP_NONE);
323
324         tbi->map_type = VMXNET3_MAP_NONE; /* to help debugging */
325 }
326
327
328 static int
329 vmxnet3_unmap_pkt(u32 eop_idx, struct vmxnet3_tx_queue *tq,
330                   struct pci_dev *pdev, struct vmxnet3_adapter *adapter)
331 {
332         struct sk_buff *skb;
333         int entries = 0;
334
335         /* no out of order completion */
336         BUG_ON(tq->buf_info[eop_idx].sop_idx != tq->tx_ring.next2comp);
337         BUG_ON(VMXNET3_TXDESC_GET_EOP(&(tq->tx_ring.base[eop_idx].txd)) != 1);
338
339         skb = tq->buf_info[eop_idx].skb;
340         BUG_ON(skb == NULL);
341         tq->buf_info[eop_idx].skb = NULL;
342
343         VMXNET3_INC_RING_IDX_ONLY(eop_idx, tq->tx_ring.size);
344
345         while (tq->tx_ring.next2comp != eop_idx) {
346                 vmxnet3_unmap_tx_buf(tq->buf_info + tq->tx_ring.next2comp,
347                                      pdev);
348
349                 /* update next2comp w/o tx_lock. Since we are marking more,
350                  * instead of less, tx ring entries avail, the worst case is
351                  * that the tx routine incorrectly re-queues a pkt due to
352                  * insufficient tx ring entries.
353                  */
354                 vmxnet3_cmd_ring_adv_next2comp(&tq->tx_ring);
355                 entries++;
356         }
357
358         dev_kfree_skb_any(skb);
359         return entries;
360 }
361
362
363 static int
364 vmxnet3_tq_tx_complete(struct vmxnet3_tx_queue *tq,
365                         struct vmxnet3_adapter *adapter)
366 {
367         int completed = 0;
368         union Vmxnet3_GenericDesc *gdesc;
369
370         gdesc = tq->comp_ring.base + tq->comp_ring.next2proc;
371         while (VMXNET3_TCD_GET_GEN(&gdesc->tcd) == tq->comp_ring.gen) {
372                 completed += vmxnet3_unmap_pkt(VMXNET3_TCD_GET_TXIDX(
373                                                &gdesc->tcd), tq, adapter->pdev,
374                                                adapter);
375
376                 vmxnet3_comp_ring_adv_next2proc(&tq->comp_ring);
377                 gdesc = tq->comp_ring.base + tq->comp_ring.next2proc;
378         }
379
380         if (completed) {
381                 spin_lock(&tq->tx_lock);
382                 if (unlikely(vmxnet3_tq_stopped(tq, adapter) &&
383                              vmxnet3_cmd_ring_desc_avail(&tq->tx_ring) >
384                              VMXNET3_WAKE_QUEUE_THRESHOLD(tq) &&
385                              netif_carrier_ok(adapter->netdev))) {
386                         vmxnet3_tq_wake(tq, adapter);
387                 }
388                 spin_unlock(&tq->tx_lock);
389         }
390         return completed;
391 }
392
393
394 static void
395 vmxnet3_tq_cleanup(struct vmxnet3_tx_queue *tq,
396                    struct vmxnet3_adapter *adapter)
397 {
398         int i;
399
400         while (tq->tx_ring.next2comp != tq->tx_ring.next2fill) {
401                 struct vmxnet3_tx_buf_info *tbi;
402
403                 tbi = tq->buf_info + tq->tx_ring.next2comp;
404
405                 vmxnet3_unmap_tx_buf(tbi, adapter->pdev);
406                 if (tbi->skb) {
407                         dev_kfree_skb_any(tbi->skb);
408                         tbi->skb = NULL;
409                 }
410                 vmxnet3_cmd_ring_adv_next2comp(&tq->tx_ring);
411         }
412
413         /* sanity check, verify all buffers are indeed unmapped and freed */
414         for (i = 0; i < tq->tx_ring.size; i++) {
415                 BUG_ON(tq->buf_info[i].skb != NULL ||
416                        tq->buf_info[i].map_type != VMXNET3_MAP_NONE);
417         }
418
419         tq->tx_ring.gen = VMXNET3_INIT_GEN;
420         tq->tx_ring.next2fill = tq->tx_ring.next2comp = 0;
421
422         tq->comp_ring.gen = VMXNET3_INIT_GEN;
423         tq->comp_ring.next2proc = 0;
424 }
425
426
427 static void
428 vmxnet3_tq_destroy(struct vmxnet3_tx_queue *tq,
429                    struct vmxnet3_adapter *adapter)
430 {
431         if (tq->tx_ring.base) {
432                 dma_free_coherent(&adapter->pdev->dev, tq->tx_ring.size *
433                                   sizeof(struct Vmxnet3_TxDesc),
434                                   tq->tx_ring.base, tq->tx_ring.basePA);
435                 tq->tx_ring.base = NULL;
436         }
437         if (tq->data_ring.base) {
438                 dma_free_coherent(&adapter->pdev->dev, tq->data_ring.size *
439                                   sizeof(struct Vmxnet3_TxDataDesc),
440                                   tq->data_ring.base, tq->data_ring.basePA);
441                 tq->data_ring.base = NULL;
442         }
443         if (tq->comp_ring.base) {
444                 dma_free_coherent(&adapter->pdev->dev, tq->comp_ring.size *
445                                   sizeof(struct Vmxnet3_TxCompDesc),
446                                   tq->comp_ring.base, tq->comp_ring.basePA);
447                 tq->comp_ring.base = NULL;
448         }
449         if (tq->buf_info) {
450                 dma_free_coherent(&adapter->pdev->dev,
451                                   tq->tx_ring.size * sizeof(tq->buf_info[0]),
452                                   tq->buf_info, tq->buf_info_pa);
453                 tq->buf_info = NULL;
454         }
455 }
456
457
458 /* Destroy all tx queues */
459 void
460 vmxnet3_tq_destroy_all(struct vmxnet3_adapter *adapter)
461 {
462         int i;
463
464         for (i = 0; i < adapter->num_tx_queues; i++)
465                 vmxnet3_tq_destroy(&adapter->tx_queue[i], adapter);
466 }
467
468
469 static void
470 vmxnet3_tq_init(struct vmxnet3_tx_queue *tq,
471                 struct vmxnet3_adapter *adapter)
472 {
473         int i;
474
475         /* reset the tx ring contents to 0 and reset the tx ring states */
476         memset(tq->tx_ring.base, 0, tq->tx_ring.size *
477                sizeof(struct Vmxnet3_TxDesc));
478         tq->tx_ring.next2fill = tq->tx_ring.next2comp = 0;
479         tq->tx_ring.gen = VMXNET3_INIT_GEN;
480
481         memset(tq->data_ring.base, 0, tq->data_ring.size *
482                sizeof(struct Vmxnet3_TxDataDesc));
483
484         /* reset the tx comp ring contents to 0 and reset comp ring states */
485         memset(tq->comp_ring.base, 0, tq->comp_ring.size *
486                sizeof(struct Vmxnet3_TxCompDesc));
487         tq->comp_ring.next2proc = 0;
488         tq->comp_ring.gen = VMXNET3_INIT_GEN;
489
490         /* reset the bookkeeping data */
491         memset(tq->buf_info, 0, sizeof(tq->buf_info[0]) * tq->tx_ring.size);
492         for (i = 0; i < tq->tx_ring.size; i++)
493                 tq->buf_info[i].map_type = VMXNET3_MAP_NONE;
494
495         /* stats are not reset */
496 }
497
498
499 static int
500 vmxnet3_tq_create(struct vmxnet3_tx_queue *tq,
501                   struct vmxnet3_adapter *adapter)
502 {
503         size_t sz;
504
505         BUG_ON(tq->tx_ring.base || tq->data_ring.base ||
506                tq->comp_ring.base || tq->buf_info);
507
508         tq->tx_ring.base = dma_alloc_coherent(&adapter->pdev->dev,
509                         tq->tx_ring.size * sizeof(struct Vmxnet3_TxDesc),
510                         &tq->tx_ring.basePA, GFP_KERNEL);
511         if (!tq->tx_ring.base) {
512                 netdev_err(adapter->netdev, "failed to allocate tx ring\n");
513                 goto err;
514         }
515
516         tq->data_ring.base = dma_alloc_coherent(&adapter->pdev->dev,
517                         tq->data_ring.size * sizeof(struct Vmxnet3_TxDataDesc),
518                         &tq->data_ring.basePA, GFP_KERNEL);
519         if (!tq->data_ring.base) {
520                 netdev_err(adapter->netdev, "failed to allocate data ring\n");
521                 goto err;
522         }
523
524         tq->comp_ring.base = dma_alloc_coherent(&adapter->pdev->dev,
525                         tq->comp_ring.size * sizeof(struct Vmxnet3_TxCompDesc),
526                         &tq->comp_ring.basePA, GFP_KERNEL);
527         if (!tq->comp_ring.base) {
528                 netdev_err(adapter->netdev, "failed to allocate tx comp ring\n");
529                 goto err;
530         }
531
532         sz = tq->tx_ring.size * sizeof(tq->buf_info[0]);
533         tq->buf_info = dma_zalloc_coherent(&adapter->pdev->dev, sz,
534                                            &tq->buf_info_pa, GFP_KERNEL);
535         if (!tq->buf_info)
536                 goto err;
537
538         return 0;
539
540 err:
541         vmxnet3_tq_destroy(tq, adapter);
542         return -ENOMEM;
543 }
544
545 static void
546 vmxnet3_tq_cleanup_all(struct vmxnet3_adapter *adapter)
547 {
548         int i;
549
550         for (i = 0; i < adapter->num_tx_queues; i++)
551                 vmxnet3_tq_cleanup(&adapter->tx_queue[i], adapter);
552 }
553
554 /*
555  *    starting from ring->next2fill, allocate rx buffers for the given ring
556  *    of the rx queue and update the rx desc. stop after @num_to_alloc buffers
557  *    are allocated or allocation fails
558  */
559
560 static int
561 vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx,
562                         int num_to_alloc, struct vmxnet3_adapter *adapter)
563 {
564         int num_allocated = 0;
565         struct vmxnet3_rx_buf_info *rbi_base = rq->buf_info[ring_idx];
566         struct vmxnet3_cmd_ring *ring = &rq->rx_ring[ring_idx];
567         u32 val;
568
569         while (num_allocated <= num_to_alloc) {
570                 struct vmxnet3_rx_buf_info *rbi;
571                 union Vmxnet3_GenericDesc *gd;
572
573                 rbi = rbi_base + ring->next2fill;
574                 gd = ring->base + ring->next2fill;
575
576                 if (rbi->buf_type == VMXNET3_RX_BUF_SKB) {
577                         if (rbi->skb == NULL) {
578                                 rbi->skb = __netdev_alloc_skb_ip_align(adapter->netdev,
579                                                                        rbi->len,
580                                                                        GFP_KERNEL);
581                                 if (unlikely(rbi->skb == NULL)) {
582                                         rq->stats.rx_buf_alloc_failure++;
583                                         break;
584                                 }
585
586                                 rbi->dma_addr = dma_map_single(
587                                                 &adapter->pdev->dev,
588                                                 rbi->skb->data, rbi->len,
589                                                 PCI_DMA_FROMDEVICE);
590                         } else {
591                                 /* rx buffer skipped by the device */
592                         }
593                         val = VMXNET3_RXD_BTYPE_HEAD << VMXNET3_RXD_BTYPE_SHIFT;
594                 } else {
595                         BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_PAGE ||
596                                rbi->len  != PAGE_SIZE);
597
598                         if (rbi->page == NULL) {
599                                 rbi->page = alloc_page(GFP_ATOMIC);
600                                 if (unlikely(rbi->page == NULL)) {
601                                         rq->stats.rx_buf_alloc_failure++;
602                                         break;
603                                 }
604                                 rbi->dma_addr = dma_map_page(
605                                                 &adapter->pdev->dev,
606                                                 rbi->page, 0, PAGE_SIZE,
607                                                 PCI_DMA_FROMDEVICE);
608                         } else {
609                                 /* rx buffers skipped by the device */
610                         }
611                         val = VMXNET3_RXD_BTYPE_BODY << VMXNET3_RXD_BTYPE_SHIFT;
612                 }
613
614                 BUG_ON(rbi->dma_addr == 0);
615                 gd->rxd.addr = cpu_to_le64(rbi->dma_addr);
616                 gd->dword[2] = cpu_to_le32((!ring->gen << VMXNET3_RXD_GEN_SHIFT)
617                                            | val | rbi->len);
618
619                 /* Fill the last buffer but dont mark it ready, or else the
620                  * device will think that the queue is full */
621                 if (num_allocated == num_to_alloc)
622                         break;
623
624                 gd->dword[2] |= cpu_to_le32(ring->gen << VMXNET3_RXD_GEN_SHIFT);
625                 num_allocated++;
626                 vmxnet3_cmd_ring_adv_next2fill(ring);
627         }
628
629         netdev_dbg(adapter->netdev,
630                 "alloc_rx_buf: %d allocated, next2fill %u, next2comp %u\n",
631                 num_allocated, ring->next2fill, ring->next2comp);
632
633         /* so that the device can distinguish a full ring and an empty ring */
634         BUG_ON(num_allocated != 0 && ring->next2fill == ring->next2comp);
635
636         return num_allocated;
637 }
638
639
640 static void
641 vmxnet3_append_frag(struct sk_buff *skb, struct Vmxnet3_RxCompDesc *rcd,
642                     struct vmxnet3_rx_buf_info *rbi)
643 {
644         struct skb_frag_struct *frag = skb_shinfo(skb)->frags +
645                 skb_shinfo(skb)->nr_frags;
646
647         BUG_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS);
648
649         __skb_frag_set_page(frag, rbi->page);
650         frag->page_offset = 0;
651         skb_frag_size_set(frag, rcd->len);
652         skb->data_len += rcd->len;
653         skb->truesize += PAGE_SIZE;
654         skb_shinfo(skb)->nr_frags++;
655 }
656
657
658 static void
659 vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx,
660                 struct vmxnet3_tx_queue *tq, struct pci_dev *pdev,
661                 struct vmxnet3_adapter *adapter)
662 {
663         u32 dw2, len;
664         unsigned long buf_offset;
665         int i;
666         union Vmxnet3_GenericDesc *gdesc;
667         struct vmxnet3_tx_buf_info *tbi = NULL;
668
669         BUG_ON(ctx->copy_size > skb_headlen(skb));
670
671         /* use the previous gen bit for the SOP desc */
672         dw2 = (tq->tx_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
673
674         ctx->sop_txd = tq->tx_ring.base + tq->tx_ring.next2fill;
675         gdesc = ctx->sop_txd; /* both loops below can be skipped */
676
677         /* no need to map the buffer if headers are copied */
678         if (ctx->copy_size) {
679                 ctx->sop_txd->txd.addr = cpu_to_le64(tq->data_ring.basePA +
680                                         tq->tx_ring.next2fill *
681                                         sizeof(struct Vmxnet3_TxDataDesc));
682                 ctx->sop_txd->dword[2] = cpu_to_le32(dw2 | ctx->copy_size);
683                 ctx->sop_txd->dword[3] = 0;
684
685                 tbi = tq->buf_info + tq->tx_ring.next2fill;
686                 tbi->map_type = VMXNET3_MAP_NONE;
687
688                 netdev_dbg(adapter->netdev,
689                         "txd[%u]: 0x%Lx 0x%x 0x%x\n",
690                         tq->tx_ring.next2fill,
691                         le64_to_cpu(ctx->sop_txd->txd.addr),
692                         ctx->sop_txd->dword[2], ctx->sop_txd->dword[3]);
693                 vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
694
695                 /* use the right gen for non-SOP desc */
696                 dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT;
697         }
698
699         /* linear part can use multiple tx desc if it's big */
700         len = skb_headlen(skb) - ctx->copy_size;
701         buf_offset = ctx->copy_size;
702         while (len) {
703                 u32 buf_size;
704
705                 if (len < VMXNET3_MAX_TX_BUF_SIZE) {
706                         buf_size = len;
707                         dw2 |= len;
708                 } else {
709                         buf_size = VMXNET3_MAX_TX_BUF_SIZE;
710                         /* spec says that for TxDesc.len, 0 == 2^14 */
711                 }
712
713                 tbi = tq->buf_info + tq->tx_ring.next2fill;
714                 tbi->map_type = VMXNET3_MAP_SINGLE;
715                 tbi->dma_addr = dma_map_single(&adapter->pdev->dev,
716                                 skb->data + buf_offset, buf_size,
717                                 PCI_DMA_TODEVICE);
718
719                 tbi->len = buf_size;
720
721                 gdesc = tq->tx_ring.base + tq->tx_ring.next2fill;
722                 BUG_ON(gdesc->txd.gen == tq->tx_ring.gen);
723
724                 gdesc->txd.addr = cpu_to_le64(tbi->dma_addr);
725                 gdesc->dword[2] = cpu_to_le32(dw2);
726                 gdesc->dword[3] = 0;
727
728                 netdev_dbg(adapter->netdev,
729                         "txd[%u]: 0x%Lx 0x%x 0x%x\n",
730                         tq->tx_ring.next2fill, le64_to_cpu(gdesc->txd.addr),
731                         le32_to_cpu(gdesc->dword[2]), gdesc->dword[3]);
732                 vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
733                 dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT;
734
735                 len -= buf_size;
736                 buf_offset += buf_size;
737         }
738
739         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
740                 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
741                 u32 buf_size;
742
743                 buf_offset = 0;
744                 len = skb_frag_size(frag);
745                 while (len) {
746                         tbi = tq->buf_info + tq->tx_ring.next2fill;
747                         if (len < VMXNET3_MAX_TX_BUF_SIZE) {
748                                 buf_size = len;
749                                 dw2 |= len;
750                         } else {
751                                 buf_size = VMXNET3_MAX_TX_BUF_SIZE;
752                                 /* spec says that for TxDesc.len, 0 == 2^14 */
753                         }
754                         tbi->map_type = VMXNET3_MAP_PAGE;
755                         tbi->dma_addr = skb_frag_dma_map(&adapter->pdev->dev, frag,
756                                                          buf_offset, buf_size,
757                                                          DMA_TO_DEVICE);
758
759                         tbi->len = buf_size;
760
761                         gdesc = tq->tx_ring.base + tq->tx_ring.next2fill;
762                         BUG_ON(gdesc->txd.gen == tq->tx_ring.gen);
763
764                         gdesc->txd.addr = cpu_to_le64(tbi->dma_addr);
765                         gdesc->dword[2] = cpu_to_le32(dw2);
766                         gdesc->dword[3] = 0;
767
768                         netdev_dbg(adapter->netdev,
769                                 "txd[%u]: 0x%llu %u %u\n",
770                                 tq->tx_ring.next2fill, le64_to_cpu(gdesc->txd.addr),
771                                 le32_to_cpu(gdesc->dword[2]), gdesc->dword[3]);
772                         vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
773                         dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT;
774
775                         len -= buf_size;
776                         buf_offset += buf_size;
777                 }
778         }
779
780         ctx->eop_txd = gdesc;
781
782         /* set the last buf_info for the pkt */
783         tbi->skb = skb;
784         tbi->sop_idx = ctx->sop_txd - tq->tx_ring.base;
785 }
786
787
788 /* Init all tx queues */
789 static void
790 vmxnet3_tq_init_all(struct vmxnet3_adapter *adapter)
791 {
792         int i;
793
794         for (i = 0; i < adapter->num_tx_queues; i++)
795                 vmxnet3_tq_init(&adapter->tx_queue[i], adapter);
796 }
797
798
799 /*
800  *    parse and copy relevant protocol headers:
801  *      For a tso pkt, relevant headers are L2/3/4 including options
802  *      For a pkt requesting csum offloading, they are L2/3 and may include L4
803  *      if it's a TCP/UDP pkt
804  *
805  * Returns:
806  *    -1:  error happens during parsing
807  *     0:  protocol headers parsed, but too big to be copied
808  *     1:  protocol headers parsed and copied
809  *
810  * Other effects:
811  *    1. related *ctx fields are updated.
812  *    2. ctx->copy_size is # of bytes copied
813  *    3. the portion copied is guaranteed to be in the linear part
814  *
815  */
816 static int
817 vmxnet3_parse_and_copy_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
818                            struct vmxnet3_tx_ctx *ctx,
819                            struct vmxnet3_adapter *adapter)
820 {
821         struct Vmxnet3_TxDataDesc *tdd;
822
823         if (ctx->mss) { /* TSO */
824                 ctx->eth_ip_hdr_size = skb_transport_offset(skb);
825                 ctx->l4_hdr_size = tcp_hdrlen(skb);
826                 ctx->copy_size = ctx->eth_ip_hdr_size + ctx->l4_hdr_size;
827         } else {
828                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
829                         ctx->eth_ip_hdr_size = skb_checksum_start_offset(skb);
830
831                         if (ctx->ipv4) {
832                                 const struct iphdr *iph = ip_hdr(skb);
833
834                                 if (iph->protocol == IPPROTO_TCP)
835                                         ctx->l4_hdr_size = tcp_hdrlen(skb);
836                                 else if (iph->protocol == IPPROTO_UDP)
837                                         ctx->l4_hdr_size = sizeof(struct udphdr);
838                                 else
839                                         ctx->l4_hdr_size = 0;
840                         } else {
841                                 /* for simplicity, don't copy L4 headers */
842                                 ctx->l4_hdr_size = 0;
843                         }
844                         ctx->copy_size = min(ctx->eth_ip_hdr_size +
845                                          ctx->l4_hdr_size, skb->len);
846                 } else {
847                         ctx->eth_ip_hdr_size = 0;
848                         ctx->l4_hdr_size = 0;
849                         /* copy as much as allowed */
850                         ctx->copy_size = min((unsigned int)VMXNET3_HDR_COPY_SIZE
851                                              , skb_headlen(skb));
852                 }
853
854                 /* make sure headers are accessible directly */
855                 if (unlikely(!pskb_may_pull(skb, ctx->copy_size)))
856                         goto err;
857         }
858
859         if (unlikely(ctx->copy_size > VMXNET3_HDR_COPY_SIZE)) {
860                 tq->stats.oversized_hdr++;
861                 ctx->copy_size = 0;
862                 return 0;
863         }
864
865         tdd = tq->data_ring.base + tq->tx_ring.next2fill;
866
867         memcpy(tdd->data, skb->data, ctx->copy_size);
868         netdev_dbg(adapter->netdev,
869                 "copy %u bytes to dataRing[%u]\n",
870                 ctx->copy_size, tq->tx_ring.next2fill);
871         return 1;
872
873 err:
874         return -1;
875 }
876
877
878 static void
879 vmxnet3_prepare_tso(struct sk_buff *skb,
880                     struct vmxnet3_tx_ctx *ctx)
881 {
882         struct tcphdr *tcph = tcp_hdr(skb);
883
884         if (ctx->ipv4) {
885                 struct iphdr *iph = ip_hdr(skb);
886
887                 iph->check = 0;
888                 tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 0,
889                                                  IPPROTO_TCP, 0);
890         } else {
891                 struct ipv6hdr *iph = ipv6_hdr(skb);
892
893                 tcph->check = ~csum_ipv6_magic(&iph->saddr, &iph->daddr, 0,
894                                                IPPROTO_TCP, 0);
895         }
896 }
897
898 static int txd_estimate(const struct sk_buff *skb)
899 {
900         int count = VMXNET3_TXD_NEEDED(skb_headlen(skb)) + 1;
901         int i;
902
903         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
904                 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
905
906                 count += VMXNET3_TXD_NEEDED(skb_frag_size(frag));
907         }
908         return count;
909 }
910
911 /*
912  * Transmits a pkt thru a given tq
913  * Returns:
914  *    NETDEV_TX_OK:      descriptors are setup successfully
915  *    NETDEV_TX_OK:      error occurred, the pkt is dropped
916  *    NETDEV_TX_BUSY:    tx ring is full, queue is stopped
917  *
918  * Side-effects:
919  *    1. tx ring may be changed
920  *    2. tq stats may be updated accordingly
921  *    3. shared->txNumDeferred may be updated
922  */
923
924 static int
925 vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
926                 struct vmxnet3_adapter *adapter, struct net_device *netdev)
927 {
928         int ret;
929         u32 count;
930         unsigned long flags;
931         struct vmxnet3_tx_ctx ctx;
932         union Vmxnet3_GenericDesc *gdesc;
933 #ifdef __BIG_ENDIAN_BITFIELD
934         /* Use temporary descriptor to avoid touching bits multiple times */
935         union Vmxnet3_GenericDesc tempTxDesc;
936 #endif
937
938         count = txd_estimate(skb);
939
940         ctx.ipv4 = (vlan_get_protocol(skb) == cpu_to_be16(ETH_P_IP));
941
942         ctx.mss = skb_shinfo(skb)->gso_size;
943         if (ctx.mss) {
944                 if (skb_header_cloned(skb)) {
945                         if (unlikely(pskb_expand_head(skb, 0, 0,
946                                                       GFP_ATOMIC) != 0)) {
947                                 tq->stats.drop_tso++;
948                                 goto drop_pkt;
949                         }
950                         tq->stats.copy_skb_header++;
951                 }
952                 vmxnet3_prepare_tso(skb, &ctx);
953         } else {
954                 if (unlikely(count > VMXNET3_MAX_TXD_PER_PKT)) {
955
956                         /* non-tso pkts must not use more than
957                          * VMXNET3_MAX_TXD_PER_PKT entries
958                          */
959                         if (skb_linearize(skb) != 0) {
960                                 tq->stats.drop_too_many_frags++;
961                                 goto drop_pkt;
962                         }
963                         tq->stats.linearized++;
964
965                         /* recalculate the # of descriptors to use */
966                         count = VMXNET3_TXD_NEEDED(skb_headlen(skb)) + 1;
967                 }
968         }
969
970         spin_lock_irqsave(&tq->tx_lock, flags);
971
972         if (count > vmxnet3_cmd_ring_desc_avail(&tq->tx_ring)) {
973                 tq->stats.tx_ring_full++;
974                 netdev_dbg(adapter->netdev,
975                         "tx queue stopped on %s, next2comp %u"
976                         " next2fill %u\n", adapter->netdev->name,
977                         tq->tx_ring.next2comp, tq->tx_ring.next2fill);
978
979                 vmxnet3_tq_stop(tq, adapter);
980                 spin_unlock_irqrestore(&tq->tx_lock, flags);
981                 return NETDEV_TX_BUSY;
982         }
983
984
985         ret = vmxnet3_parse_and_copy_hdr(skb, tq, &ctx, adapter);
986         if (ret >= 0) {
987                 BUG_ON(ret <= 0 && ctx.copy_size != 0);
988                 /* hdrs parsed, check against other limits */
989                 if (ctx.mss) {
990                         if (unlikely(ctx.eth_ip_hdr_size + ctx.l4_hdr_size >
991                                      VMXNET3_MAX_TX_BUF_SIZE)) {
992                                 goto hdr_too_big;
993                         }
994                 } else {
995                         if (skb->ip_summed == CHECKSUM_PARTIAL) {
996                                 if (unlikely(ctx.eth_ip_hdr_size +
997                                              skb->csum_offset >
998                                              VMXNET3_MAX_CSUM_OFFSET)) {
999                                         goto hdr_too_big;
1000                                 }
1001                         }
1002                 }
1003         } else {
1004                 tq->stats.drop_hdr_inspect_err++;
1005                 goto unlock_drop_pkt;
1006         }
1007
1008         /* fill tx descs related to addr & len */
1009         vmxnet3_map_pkt(skb, &ctx, tq, adapter->pdev, adapter);
1010
1011         /* setup the EOP desc */
1012         ctx.eop_txd->dword[3] = cpu_to_le32(VMXNET3_TXD_CQ | VMXNET3_TXD_EOP);
1013
1014         /* setup the SOP desc */
1015 #ifdef __BIG_ENDIAN_BITFIELD
1016         gdesc = &tempTxDesc;
1017         gdesc->dword[2] = ctx.sop_txd->dword[2];
1018         gdesc->dword[3] = ctx.sop_txd->dword[3];
1019 #else
1020         gdesc = ctx.sop_txd;
1021 #endif
1022         if (ctx.mss) {
1023                 gdesc->txd.hlen = ctx.eth_ip_hdr_size + ctx.l4_hdr_size;
1024                 gdesc->txd.om = VMXNET3_OM_TSO;
1025                 gdesc->txd.msscof = ctx.mss;
1026                 le32_add_cpu(&tq->shared->txNumDeferred, (skb->len -
1027                              gdesc->txd.hlen + ctx.mss - 1) / ctx.mss);
1028         } else {
1029                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1030                         gdesc->txd.hlen = ctx.eth_ip_hdr_size;
1031                         gdesc->txd.om = VMXNET3_OM_CSUM;
1032                         gdesc->txd.msscof = ctx.eth_ip_hdr_size +
1033                                             skb->csum_offset;
1034                 } else {
1035                         gdesc->txd.om = 0;
1036                         gdesc->txd.msscof = 0;
1037                 }
1038                 le32_add_cpu(&tq->shared->txNumDeferred, 1);
1039         }
1040
1041         if (vlan_tx_tag_present(skb)) {
1042                 gdesc->txd.ti = 1;
1043                 gdesc->txd.tci = vlan_tx_tag_get(skb);
1044         }
1045
1046         /* finally flips the GEN bit of the SOP desc. */
1047         gdesc->dword[2] = cpu_to_le32(le32_to_cpu(gdesc->dword[2]) ^
1048                                                   VMXNET3_TXD_GEN);
1049 #ifdef __BIG_ENDIAN_BITFIELD
1050         /* Finished updating in bitfields of Tx Desc, so write them in original
1051          * place.
1052          */
1053         vmxnet3_TxDescToLe((struct Vmxnet3_TxDesc *)gdesc,
1054                            (struct Vmxnet3_TxDesc *)ctx.sop_txd);
1055         gdesc = ctx.sop_txd;
1056 #endif
1057         netdev_dbg(adapter->netdev,
1058                 "txd[%u]: SOP 0x%Lx 0x%x 0x%x\n",
1059                 (u32)(ctx.sop_txd -
1060                 tq->tx_ring.base), le64_to_cpu(gdesc->txd.addr),
1061                 le32_to_cpu(gdesc->dword[2]), le32_to_cpu(gdesc->dword[3]));
1062
1063         spin_unlock_irqrestore(&tq->tx_lock, flags);
1064
1065         if (le32_to_cpu(tq->shared->txNumDeferred) >=
1066                                         le32_to_cpu(tq->shared->txThreshold)) {
1067                 tq->shared->txNumDeferred = 0;
1068                 VMXNET3_WRITE_BAR0_REG(adapter,
1069                                        VMXNET3_REG_TXPROD + tq->qid * 8,
1070                                        tq->tx_ring.next2fill);
1071         }
1072
1073         return NETDEV_TX_OK;
1074
1075 hdr_too_big:
1076         tq->stats.drop_oversized_hdr++;
1077 unlock_drop_pkt:
1078         spin_unlock_irqrestore(&tq->tx_lock, flags);
1079 drop_pkt:
1080         tq->stats.drop_total++;
1081         dev_kfree_skb(skb);
1082         return NETDEV_TX_OK;
1083 }
1084
1085
1086 static netdev_tx_t
1087 vmxnet3_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
1088 {
1089         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1090
1091         BUG_ON(skb->queue_mapping > adapter->num_tx_queues);
1092         return vmxnet3_tq_xmit(skb,
1093                                &adapter->tx_queue[skb->queue_mapping],
1094                                adapter, netdev);
1095 }
1096
1097
1098 static void
1099 vmxnet3_rx_csum(struct vmxnet3_adapter *adapter,
1100                 struct sk_buff *skb,
1101                 union Vmxnet3_GenericDesc *gdesc)
1102 {
1103         if (!gdesc->rcd.cnc && adapter->netdev->features & NETIF_F_RXCSUM) {
1104                 /* typical case: TCP/UDP over IP and both csums are correct */
1105                 if ((le32_to_cpu(gdesc->dword[3]) & VMXNET3_RCD_CSUM_OK) ==
1106                                                         VMXNET3_RCD_CSUM_OK) {
1107                         skb->ip_summed = CHECKSUM_UNNECESSARY;
1108                         BUG_ON(!(gdesc->rcd.tcp || gdesc->rcd.udp));
1109                         BUG_ON(!(gdesc->rcd.v4  || gdesc->rcd.v6));
1110                         BUG_ON(gdesc->rcd.frg);
1111                 } else {
1112                         if (gdesc->rcd.csum) {
1113                                 skb->csum = htons(gdesc->rcd.csum);
1114                                 skb->ip_summed = CHECKSUM_PARTIAL;
1115                         } else {
1116                                 skb_checksum_none_assert(skb);
1117                         }
1118                 }
1119         } else {
1120                 skb_checksum_none_assert(skb);
1121         }
1122 }
1123
1124
1125 static void
1126 vmxnet3_rx_error(struct vmxnet3_rx_queue *rq, struct Vmxnet3_RxCompDesc *rcd,
1127                  struct vmxnet3_rx_ctx *ctx,  struct vmxnet3_adapter *adapter)
1128 {
1129         rq->stats.drop_err++;
1130         if (!rcd->fcs)
1131                 rq->stats.drop_fcs++;
1132
1133         rq->stats.drop_total++;
1134
1135         /*
1136          * We do not unmap and chain the rx buffer to the skb.
1137          * We basically pretend this buffer is not used and will be recycled
1138          * by vmxnet3_rq_alloc_rx_buf()
1139          */
1140
1141         /*
1142          * ctx->skb may be NULL if this is the first and the only one
1143          * desc for the pkt
1144          */
1145         if (ctx->skb)
1146                 dev_kfree_skb_irq(ctx->skb);
1147
1148         ctx->skb = NULL;
1149 }
1150
1151
1152 static int
1153 vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
1154                        struct vmxnet3_adapter *adapter, int quota)
1155 {
1156         static const u32 rxprod_reg[2] = {
1157                 VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2
1158         };
1159         u32 num_rxd = 0;
1160         bool skip_page_frags = false;
1161         struct Vmxnet3_RxCompDesc *rcd;
1162         struct vmxnet3_rx_ctx *ctx = &rq->rx_ctx;
1163 #ifdef __BIG_ENDIAN_BITFIELD
1164         struct Vmxnet3_RxDesc rxCmdDesc;
1165         struct Vmxnet3_RxCompDesc rxComp;
1166 #endif
1167         vmxnet3_getRxComp(rcd, &rq->comp_ring.base[rq->comp_ring.next2proc].rcd,
1168                           &rxComp);
1169         while (rcd->gen == rq->comp_ring.gen) {
1170                 struct vmxnet3_rx_buf_info *rbi;
1171                 struct sk_buff *skb, *new_skb = NULL;
1172                 struct page *new_page = NULL;
1173                 int num_to_alloc;
1174                 struct Vmxnet3_RxDesc *rxd;
1175                 u32 idx, ring_idx;
1176                 struct vmxnet3_cmd_ring *ring = NULL;
1177                 if (num_rxd >= quota) {
1178                         /* we may stop even before we see the EOP desc of
1179                          * the current pkt
1180                          */
1181                         break;
1182                 }
1183                 num_rxd++;
1184                 BUG_ON(rcd->rqID != rq->qid && rcd->rqID != rq->qid2);
1185                 idx = rcd->rxdIdx;
1186                 ring_idx = rcd->rqID < adapter->num_rx_queues ? 0 : 1;
1187                 ring = rq->rx_ring + ring_idx;
1188                 vmxnet3_getRxDesc(rxd, &rq->rx_ring[ring_idx].base[idx].rxd,
1189                                   &rxCmdDesc);
1190                 rbi = rq->buf_info[ring_idx] + idx;
1191
1192                 BUG_ON(rxd->addr != rbi->dma_addr ||
1193                        rxd->len != rbi->len);
1194
1195                 if (unlikely(rcd->eop && rcd->err)) {
1196                         vmxnet3_rx_error(rq, rcd, ctx, adapter);
1197                         goto rcd_done;
1198                 }
1199
1200                 if (rcd->sop) { /* first buf of the pkt */
1201                         BUG_ON(rxd->btype != VMXNET3_RXD_BTYPE_HEAD ||
1202                                rcd->rqID != rq->qid);
1203
1204                         BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_SKB);
1205                         BUG_ON(ctx->skb != NULL || rbi->skb == NULL);
1206
1207                         if (unlikely(rcd->len == 0)) {
1208                                 /* Pretend the rx buffer is skipped. */
1209                                 BUG_ON(!(rcd->sop && rcd->eop));
1210                                 netdev_dbg(adapter->netdev,
1211                                         "rxRing[%u][%u] 0 length\n",
1212                                         ring_idx, idx);
1213                                 goto rcd_done;
1214                         }
1215
1216                         skip_page_frags = false;
1217                         ctx->skb = rbi->skb;
1218                         new_skb = netdev_alloc_skb_ip_align(adapter->netdev,
1219                                                             rbi->len);
1220                         if (new_skb == NULL) {
1221                                 /* Skb allocation failed, do not handover this
1222                                  * skb to stack. Reuse it. Drop the existing pkt
1223                                  */
1224                                 rq->stats.rx_buf_alloc_failure++;
1225                                 ctx->skb = NULL;
1226                                 rq->stats.drop_total++;
1227                                 skip_page_frags = true;
1228                                 goto rcd_done;
1229                         }
1230
1231                         dma_unmap_single(&adapter->pdev->dev, rbi->dma_addr,
1232                                          rbi->len,
1233                                          PCI_DMA_FROMDEVICE);
1234
1235 #ifdef VMXNET3_RSS
1236                         if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE &&
1237                             (adapter->netdev->features & NETIF_F_RXHASH))
1238                                 ctx->skb->rxhash = le32_to_cpu(rcd->rssHash);
1239 #endif
1240                         skb_put(ctx->skb, rcd->len);
1241
1242                         /* Immediate refill */
1243                         rbi->skb = new_skb;
1244                         rbi->dma_addr = dma_map_single(&adapter->pdev->dev,
1245                                                        rbi->skb->data, rbi->len,
1246                                                        PCI_DMA_FROMDEVICE);
1247                         rxd->addr = cpu_to_le64(rbi->dma_addr);
1248                         rxd->len = rbi->len;
1249
1250                 } else {
1251                         BUG_ON(ctx->skb == NULL && !skip_page_frags);
1252
1253                         /* non SOP buffer must be type 1 in most cases */
1254                         BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_PAGE);
1255                         BUG_ON(rxd->btype != VMXNET3_RXD_BTYPE_BODY);
1256
1257                         /* If an sop buffer was dropped, skip all
1258                          * following non-sop fragments. They will be reused.
1259                          */
1260                         if (skip_page_frags)
1261                                 goto rcd_done;
1262
1263                         new_page = alloc_page(GFP_ATOMIC);
1264                         if (unlikely(new_page == NULL)) {
1265                                 /* Replacement page frag could not be allocated.
1266                                  * Reuse this page. Drop the pkt and free the
1267                                  * skb which contained this page as a frag. Skip
1268                                  * processing all the following non-sop frags.
1269                                  */
1270                                 rq->stats.rx_buf_alloc_failure++;
1271                                 dev_kfree_skb(ctx->skb);
1272                                 ctx->skb = NULL;
1273                                 skip_page_frags = true;
1274                                 goto rcd_done;
1275                         }
1276
1277                         if (rcd->len) {
1278                                 dma_unmap_page(&adapter->pdev->dev,
1279                                                rbi->dma_addr, rbi->len,
1280                                                PCI_DMA_FROMDEVICE);
1281
1282                                 vmxnet3_append_frag(ctx->skb, rcd, rbi);
1283                         }
1284
1285                         /* Immediate refill */
1286                         rbi->page = new_page;
1287                         rbi->dma_addr = dma_map_page(&adapter->pdev->dev,
1288                                                      rbi->page,
1289                                                      0, PAGE_SIZE,
1290                                                      PCI_DMA_FROMDEVICE);
1291                         rxd->addr = cpu_to_le64(rbi->dma_addr);
1292                         rxd->len = rbi->len;
1293                 }
1294
1295
1296                 skb = ctx->skb;
1297                 if (rcd->eop) {
1298                         skb->len += skb->data_len;
1299
1300                         vmxnet3_rx_csum(adapter, skb,
1301                                         (union Vmxnet3_GenericDesc *)rcd);
1302                         skb->protocol = eth_type_trans(skb, adapter->netdev);
1303
1304                         if (unlikely(rcd->ts))
1305                                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rcd->tci);
1306
1307                         if (adapter->netdev->features & NETIF_F_LRO)
1308                                 netif_receive_skb(skb);
1309                         else
1310                                 napi_gro_receive(&rq->napi, skb);
1311
1312                         ctx->skb = NULL;
1313                 }
1314
1315 rcd_done:
1316                 /* device may have skipped some rx descs */
1317                 ring->next2comp = idx;
1318                 num_to_alloc = vmxnet3_cmd_ring_desc_avail(ring);
1319                 ring = rq->rx_ring + ring_idx;
1320                 while (num_to_alloc) {
1321                         vmxnet3_getRxDesc(rxd, &ring->base[ring->next2fill].rxd,
1322                                           &rxCmdDesc);
1323                         BUG_ON(!rxd->addr);
1324
1325                         /* Recv desc is ready to be used by the device */
1326                         rxd->gen = ring->gen;
1327                         vmxnet3_cmd_ring_adv_next2fill(ring);
1328                         num_to_alloc--;
1329                 }
1330
1331                 /* if needed, update the register */
1332                 if (unlikely(rq->shared->updateRxProd)) {
1333                         VMXNET3_WRITE_BAR0_REG(adapter,
1334                                                rxprod_reg[ring_idx] + rq->qid * 8,
1335                                                ring->next2fill);
1336                 }
1337
1338                 vmxnet3_comp_ring_adv_next2proc(&rq->comp_ring);
1339                 vmxnet3_getRxComp(rcd,
1340                                   &rq->comp_ring.base[rq->comp_ring.next2proc].rcd, &rxComp);
1341         }
1342
1343         return num_rxd;
1344 }
1345
1346
1347 static void
1348 vmxnet3_rq_cleanup(struct vmxnet3_rx_queue *rq,
1349                    struct vmxnet3_adapter *adapter)
1350 {
1351         u32 i, ring_idx;
1352         struct Vmxnet3_RxDesc *rxd;
1353
1354         for (ring_idx = 0; ring_idx < 2; ring_idx++) {
1355                 for (i = 0; i < rq->rx_ring[ring_idx].size; i++) {
1356 #ifdef __BIG_ENDIAN_BITFIELD
1357                         struct Vmxnet3_RxDesc rxDesc;
1358 #endif
1359                         vmxnet3_getRxDesc(rxd,
1360                                 &rq->rx_ring[ring_idx].base[i].rxd, &rxDesc);
1361
1362                         if (rxd->btype == VMXNET3_RXD_BTYPE_HEAD &&
1363                                         rq->buf_info[ring_idx][i].skb) {
1364                                 dma_unmap_single(&adapter->pdev->dev, rxd->addr,
1365                                                  rxd->len, PCI_DMA_FROMDEVICE);
1366                                 dev_kfree_skb(rq->buf_info[ring_idx][i].skb);
1367                                 rq->buf_info[ring_idx][i].skb = NULL;
1368                         } else if (rxd->btype == VMXNET3_RXD_BTYPE_BODY &&
1369                                         rq->buf_info[ring_idx][i].page) {
1370                                 dma_unmap_page(&adapter->pdev->dev, rxd->addr,
1371                                                rxd->len, PCI_DMA_FROMDEVICE);
1372                                 put_page(rq->buf_info[ring_idx][i].page);
1373                                 rq->buf_info[ring_idx][i].page = NULL;
1374                         }
1375                 }
1376
1377                 rq->rx_ring[ring_idx].gen = VMXNET3_INIT_GEN;
1378                 rq->rx_ring[ring_idx].next2fill =
1379                                         rq->rx_ring[ring_idx].next2comp = 0;
1380         }
1381
1382         rq->comp_ring.gen = VMXNET3_INIT_GEN;
1383         rq->comp_ring.next2proc = 0;
1384 }
1385
1386
1387 static void
1388 vmxnet3_rq_cleanup_all(struct vmxnet3_adapter *adapter)
1389 {
1390         int i;
1391
1392         for (i = 0; i < adapter->num_rx_queues; i++)
1393                 vmxnet3_rq_cleanup(&adapter->rx_queue[i], adapter);
1394 }
1395
1396
1397 static void vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq,
1398                                struct vmxnet3_adapter *adapter)
1399 {
1400         int i;
1401         int j;
1402
1403         /* all rx buffers must have already been freed */
1404         for (i = 0; i < 2; i++) {
1405                 if (rq->buf_info[i]) {
1406                         for (j = 0; j < rq->rx_ring[i].size; j++)
1407                                 BUG_ON(rq->buf_info[i][j].page != NULL);
1408                 }
1409         }
1410
1411
1412         for (i = 0; i < 2; i++) {
1413                 if (rq->rx_ring[i].base) {
1414                         dma_free_coherent(&adapter->pdev->dev,
1415                                           rq->rx_ring[i].size
1416                                           * sizeof(struct Vmxnet3_RxDesc),
1417                                           rq->rx_ring[i].base,
1418                                           rq->rx_ring[i].basePA);
1419                         rq->rx_ring[i].base = NULL;
1420                 }
1421                 rq->buf_info[i] = NULL;
1422         }
1423
1424         if (rq->comp_ring.base) {
1425                 dma_free_coherent(&adapter->pdev->dev, rq->comp_ring.size
1426                                   * sizeof(struct Vmxnet3_RxCompDesc),
1427                                   rq->comp_ring.base, rq->comp_ring.basePA);
1428                 rq->comp_ring.base = NULL;
1429         }
1430
1431         if (rq->buf_info[0]) {
1432                 size_t sz = sizeof(struct vmxnet3_rx_buf_info) *
1433                         (rq->rx_ring[0].size + rq->rx_ring[1].size);
1434                 dma_free_coherent(&adapter->pdev->dev, sz, rq->buf_info[0],
1435                                   rq->buf_info_pa);
1436         }
1437 }
1438
1439
1440 static int
1441 vmxnet3_rq_init(struct vmxnet3_rx_queue *rq,
1442                 struct vmxnet3_adapter  *adapter)
1443 {
1444         int i;
1445
1446         /* initialize buf_info */
1447         for (i = 0; i < rq->rx_ring[0].size; i++) {
1448
1449                 /* 1st buf for a pkt is skbuff */
1450                 if (i % adapter->rx_buf_per_pkt == 0) {
1451                         rq->buf_info[0][i].buf_type = VMXNET3_RX_BUF_SKB;
1452                         rq->buf_info[0][i].len = adapter->skb_buf_size;
1453                 } else { /* subsequent bufs for a pkt is frag */
1454                         rq->buf_info[0][i].buf_type = VMXNET3_RX_BUF_PAGE;
1455                         rq->buf_info[0][i].len = PAGE_SIZE;
1456                 }
1457         }
1458         for (i = 0; i < rq->rx_ring[1].size; i++) {
1459                 rq->buf_info[1][i].buf_type = VMXNET3_RX_BUF_PAGE;
1460                 rq->buf_info[1][i].len = PAGE_SIZE;
1461         }
1462
1463         /* reset internal state and allocate buffers for both rings */
1464         for (i = 0; i < 2; i++) {
1465                 rq->rx_ring[i].next2fill = rq->rx_ring[i].next2comp = 0;
1466
1467                 memset(rq->rx_ring[i].base, 0, rq->rx_ring[i].size *
1468                        sizeof(struct Vmxnet3_RxDesc));
1469                 rq->rx_ring[i].gen = VMXNET3_INIT_GEN;
1470         }
1471         if (vmxnet3_rq_alloc_rx_buf(rq, 0, rq->rx_ring[0].size - 1,
1472                                     adapter) == 0) {
1473                 /* at least has 1 rx buffer for the 1st ring */
1474                 return -ENOMEM;
1475         }
1476         vmxnet3_rq_alloc_rx_buf(rq, 1, rq->rx_ring[1].size - 1, adapter);
1477
1478         /* reset the comp ring */
1479         rq->comp_ring.next2proc = 0;
1480         memset(rq->comp_ring.base, 0, rq->comp_ring.size *
1481                sizeof(struct Vmxnet3_RxCompDesc));
1482         rq->comp_ring.gen = VMXNET3_INIT_GEN;
1483
1484         /* reset rxctx */
1485         rq->rx_ctx.skb = NULL;
1486
1487         /* stats are not reset */
1488         return 0;
1489 }
1490
1491
1492 static int
1493 vmxnet3_rq_init_all(struct vmxnet3_adapter *adapter)
1494 {
1495         int i, err = 0;
1496
1497         for (i = 0; i < adapter->num_rx_queues; i++) {
1498                 err = vmxnet3_rq_init(&adapter->rx_queue[i], adapter);
1499                 if (unlikely(err)) {
1500                         dev_err(&adapter->netdev->dev, "%s: failed to "
1501                                 "initialize rx queue%i\n",
1502                                 adapter->netdev->name, i);
1503                         break;
1504                 }
1505         }
1506         return err;
1507
1508 }
1509
1510
1511 static int
1512 vmxnet3_rq_create(struct vmxnet3_rx_queue *rq, struct vmxnet3_adapter *adapter)
1513 {
1514         int i;
1515         size_t sz;
1516         struct vmxnet3_rx_buf_info *bi;
1517
1518         for (i = 0; i < 2; i++) {
1519
1520                 sz = rq->rx_ring[i].size * sizeof(struct Vmxnet3_RxDesc);
1521                 rq->rx_ring[i].base = dma_alloc_coherent(
1522                                                 &adapter->pdev->dev, sz,
1523                                                 &rq->rx_ring[i].basePA,
1524                                                 GFP_KERNEL);
1525                 if (!rq->rx_ring[i].base) {
1526                         netdev_err(adapter->netdev,
1527                                    "failed to allocate rx ring %d\n", i);
1528                         goto err;
1529                 }
1530         }
1531
1532         sz = rq->comp_ring.size * sizeof(struct Vmxnet3_RxCompDesc);
1533         rq->comp_ring.base = dma_alloc_coherent(&adapter->pdev->dev, sz,
1534                                                 &rq->comp_ring.basePA,
1535                                                 GFP_KERNEL);
1536         if (!rq->comp_ring.base) {
1537                 netdev_err(adapter->netdev, "failed to allocate rx comp ring\n");
1538                 goto err;
1539         }
1540
1541         sz = sizeof(struct vmxnet3_rx_buf_info) * (rq->rx_ring[0].size +
1542                                                    rq->rx_ring[1].size);
1543         bi = dma_zalloc_coherent(&adapter->pdev->dev, sz, &rq->buf_info_pa,
1544                                  GFP_KERNEL);
1545         if (!bi)
1546                 goto err;
1547
1548         rq->buf_info[0] = bi;
1549         rq->buf_info[1] = bi + rq->rx_ring[0].size;
1550
1551         return 0;
1552
1553 err:
1554         vmxnet3_rq_destroy(rq, adapter);
1555         return -ENOMEM;
1556 }
1557
1558
1559 static int
1560 vmxnet3_rq_create_all(struct vmxnet3_adapter *adapter)
1561 {
1562         int i, err = 0;
1563
1564         for (i = 0; i < adapter->num_rx_queues; i++) {
1565                 err = vmxnet3_rq_create(&adapter->rx_queue[i], adapter);
1566                 if (unlikely(err)) {
1567                         dev_err(&adapter->netdev->dev,
1568                                 "%s: failed to create rx queue%i\n",
1569                                 adapter->netdev->name, i);
1570                         goto err_out;
1571                 }
1572         }
1573         return err;
1574 err_out:
1575         vmxnet3_rq_destroy_all(adapter);
1576         return err;
1577
1578 }
1579
1580 /* Multiple queue aware polling function for tx and rx */
1581
1582 static int
1583 vmxnet3_do_poll(struct vmxnet3_adapter *adapter, int budget)
1584 {
1585         int rcd_done = 0, i;
1586         if (unlikely(adapter->shared->ecr))
1587                 vmxnet3_process_events(adapter);
1588         for (i = 0; i < adapter->num_tx_queues; i++)
1589                 vmxnet3_tq_tx_complete(&adapter->tx_queue[i], adapter);
1590
1591         for (i = 0; i < adapter->num_rx_queues; i++)
1592                 rcd_done += vmxnet3_rq_rx_complete(&adapter->rx_queue[i],
1593                                                    adapter, budget);
1594         return rcd_done;
1595 }
1596
1597
1598 static int
1599 vmxnet3_poll(struct napi_struct *napi, int budget)
1600 {
1601         struct vmxnet3_rx_queue *rx_queue = container_of(napi,
1602                                           struct vmxnet3_rx_queue, napi);
1603         int rxd_done;
1604
1605         rxd_done = vmxnet3_do_poll(rx_queue->adapter, budget);
1606
1607         if (rxd_done < budget) {
1608                 napi_complete(napi);
1609                 vmxnet3_enable_all_intrs(rx_queue->adapter);
1610         }
1611         return rxd_done;
1612 }
1613
1614 /*
1615  * NAPI polling function for MSI-X mode with multiple Rx queues
1616  * Returns the # of the NAPI credit consumed (# of rx descriptors processed)
1617  */
1618
1619 static int
1620 vmxnet3_poll_rx_only(struct napi_struct *napi, int budget)
1621 {
1622         struct vmxnet3_rx_queue *rq = container_of(napi,
1623                                                 struct vmxnet3_rx_queue, napi);
1624         struct vmxnet3_adapter *adapter = rq->adapter;
1625         int rxd_done;
1626
1627         /* When sharing interrupt with corresponding tx queue, process
1628          * tx completions in that queue as well
1629          */
1630         if (adapter->share_intr == VMXNET3_INTR_BUDDYSHARE) {
1631                 struct vmxnet3_tx_queue *tq =
1632                                 &adapter->tx_queue[rq - adapter->rx_queue];
1633                 vmxnet3_tq_tx_complete(tq, adapter);
1634         }
1635
1636         rxd_done = vmxnet3_rq_rx_complete(rq, adapter, budget);
1637
1638         if (rxd_done < budget) {
1639                 napi_complete(napi);
1640                 vmxnet3_enable_intr(adapter, rq->comp_ring.intr_idx);
1641         }
1642         return rxd_done;
1643 }
1644
1645
1646 #ifdef CONFIG_PCI_MSI
1647
1648 /*
1649  * Handle completion interrupts on tx queues
1650  * Returns whether or not the intr is handled
1651  */
1652
1653 static irqreturn_t
1654 vmxnet3_msix_tx(int irq, void *data)
1655 {
1656         struct vmxnet3_tx_queue *tq = data;
1657         struct vmxnet3_adapter *adapter = tq->adapter;
1658
1659         if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1660                 vmxnet3_disable_intr(adapter, tq->comp_ring.intr_idx);
1661
1662         /* Handle the case where only one irq is allocate for all tx queues */
1663         if (adapter->share_intr == VMXNET3_INTR_TXSHARE) {
1664                 int i;
1665                 for (i = 0; i < adapter->num_tx_queues; i++) {
1666                         struct vmxnet3_tx_queue *txq = &adapter->tx_queue[i];
1667                         vmxnet3_tq_tx_complete(txq, adapter);
1668                 }
1669         } else {
1670                 vmxnet3_tq_tx_complete(tq, adapter);
1671         }
1672         vmxnet3_enable_intr(adapter, tq->comp_ring.intr_idx);
1673
1674         return IRQ_HANDLED;
1675 }
1676
1677
1678 /*
1679  * Handle completion interrupts on rx queues. Returns whether or not the
1680  * intr is handled
1681  */
1682
1683 static irqreturn_t
1684 vmxnet3_msix_rx(int irq, void *data)
1685 {
1686         struct vmxnet3_rx_queue *rq = data;
1687         struct vmxnet3_adapter *adapter = rq->adapter;
1688
1689         /* disable intr if needed */
1690         if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1691                 vmxnet3_disable_intr(adapter, rq->comp_ring.intr_idx);
1692         napi_schedule(&rq->napi);
1693
1694         return IRQ_HANDLED;
1695 }
1696
1697 /*
1698  *----------------------------------------------------------------------------
1699  *
1700  * vmxnet3_msix_event --
1701  *
1702  *    vmxnet3 msix event intr handler
1703  *
1704  * Result:
1705  *    whether or not the intr is handled
1706  *
1707  *----------------------------------------------------------------------------
1708  */
1709
1710 static irqreturn_t
1711 vmxnet3_msix_event(int irq, void *data)
1712 {
1713         struct net_device *dev = data;
1714         struct vmxnet3_adapter *adapter = netdev_priv(dev);
1715
1716         /* disable intr if needed */
1717         if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1718                 vmxnet3_disable_intr(adapter, adapter->intr.event_intr_idx);
1719
1720         if (adapter->shared->ecr)
1721                 vmxnet3_process_events(adapter);
1722
1723         vmxnet3_enable_intr(adapter, adapter->intr.event_intr_idx);
1724
1725         return IRQ_HANDLED;
1726 }
1727
1728 #endif /* CONFIG_PCI_MSI  */
1729
1730
1731 /* Interrupt handler for vmxnet3  */
1732 static irqreturn_t
1733 vmxnet3_intr(int irq, void *dev_id)
1734 {
1735         struct net_device *dev = dev_id;
1736         struct vmxnet3_adapter *adapter = netdev_priv(dev);
1737
1738         if (adapter->intr.type == VMXNET3_IT_INTX) {
1739                 u32 icr = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_ICR);
1740                 if (unlikely(icr == 0))
1741                         /* not ours */
1742                         return IRQ_NONE;
1743         }
1744
1745
1746         /* disable intr if needed */
1747         if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1748                 vmxnet3_disable_all_intrs(adapter);
1749
1750         napi_schedule(&adapter->rx_queue[0].napi);
1751
1752         return IRQ_HANDLED;
1753 }
1754
1755 #ifdef CONFIG_NET_POLL_CONTROLLER
1756
1757 /* netpoll callback. */
1758 static void
1759 vmxnet3_netpoll(struct net_device *netdev)
1760 {
1761         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1762
1763         if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1764                 vmxnet3_disable_all_intrs(adapter);
1765
1766         vmxnet3_do_poll(adapter, adapter->rx_queue[0].rx_ring[0].size);
1767         vmxnet3_enable_all_intrs(adapter);
1768
1769 }
1770 #endif  /* CONFIG_NET_POLL_CONTROLLER */
1771
1772 static int
1773 vmxnet3_request_irqs(struct vmxnet3_adapter *adapter)
1774 {
1775         struct vmxnet3_intr *intr = &adapter->intr;
1776         int err = 0, i;
1777         int vector = 0;
1778
1779 #ifdef CONFIG_PCI_MSI
1780         if (adapter->intr.type == VMXNET3_IT_MSIX) {
1781                 for (i = 0; i < adapter->num_tx_queues; i++) {
1782                         if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE) {
1783                                 sprintf(adapter->tx_queue[i].name, "%s-tx-%d",
1784                                         adapter->netdev->name, vector);
1785                                 err = request_irq(
1786                                               intr->msix_entries[vector].vector,
1787                                               vmxnet3_msix_tx, 0,
1788                                               adapter->tx_queue[i].name,
1789                                               &adapter->tx_queue[i]);
1790                         } else {
1791                                 sprintf(adapter->tx_queue[i].name, "%s-rxtx-%d",
1792                                         adapter->netdev->name, vector);
1793                         }
1794                         if (err) {
1795                                 dev_err(&adapter->netdev->dev,
1796                                         "Failed to request irq for MSIX, %s, "
1797                                         "error %d\n",
1798                                         adapter->tx_queue[i].name, err);
1799                                 return err;
1800                         }
1801
1802                         /* Handle the case where only 1 MSIx was allocated for
1803                          * all tx queues */
1804                         if (adapter->share_intr == VMXNET3_INTR_TXSHARE) {
1805                                 for (; i < adapter->num_tx_queues; i++)
1806                                         adapter->tx_queue[i].comp_ring.intr_idx
1807                                                                 = vector;
1808                                 vector++;
1809                                 break;
1810                         } else {
1811                                 adapter->tx_queue[i].comp_ring.intr_idx
1812                                                                 = vector++;
1813                         }
1814                 }
1815                 if (adapter->share_intr == VMXNET3_INTR_BUDDYSHARE)
1816                         vector = 0;
1817
1818                 for (i = 0; i < adapter->num_rx_queues; i++) {
1819                         if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE)
1820                                 sprintf(adapter->rx_queue[i].name, "%s-rx-%d",
1821                                         adapter->netdev->name, vector);
1822                         else
1823                                 sprintf(adapter->rx_queue[i].name, "%s-rxtx-%d",
1824                                         adapter->netdev->name, vector);
1825                         err = request_irq(intr->msix_entries[vector].vector,
1826                                           vmxnet3_msix_rx, 0,
1827                                           adapter->rx_queue[i].name,
1828                                           &(adapter->rx_queue[i]));
1829                         if (err) {
1830                                 netdev_err(adapter->netdev,
1831                                            "Failed to request irq for MSIX, "
1832                                            "%s, error %d\n",
1833                                            adapter->rx_queue[i].name, err);
1834                                 return err;
1835                         }
1836
1837                         adapter->rx_queue[i].comp_ring.intr_idx = vector++;
1838                 }
1839
1840                 sprintf(intr->event_msi_vector_name, "%s-event-%d",
1841                         adapter->netdev->name, vector);
1842                 err = request_irq(intr->msix_entries[vector].vector,
1843                                   vmxnet3_msix_event, 0,
1844                                   intr->event_msi_vector_name, adapter->netdev);
1845                 intr->event_intr_idx = vector;
1846
1847         } else if (intr->type == VMXNET3_IT_MSI) {
1848                 adapter->num_rx_queues = 1;
1849                 err = request_irq(adapter->pdev->irq, vmxnet3_intr, 0,
1850                                   adapter->netdev->name, adapter->netdev);
1851         } else {
1852 #endif
1853                 adapter->num_rx_queues = 1;
1854                 err = request_irq(adapter->pdev->irq, vmxnet3_intr,
1855                                   IRQF_SHARED, adapter->netdev->name,
1856                                   adapter->netdev);
1857 #ifdef CONFIG_PCI_MSI
1858         }
1859 #endif
1860         intr->num_intrs = vector + 1;
1861         if (err) {
1862                 netdev_err(adapter->netdev,
1863                            "Failed to request irq (intr type:%d), error %d\n",
1864                            intr->type, err);
1865         } else {
1866                 /* Number of rx queues will not change after this */
1867                 for (i = 0; i < adapter->num_rx_queues; i++) {
1868                         struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
1869                         rq->qid = i;
1870                         rq->qid2 = i + adapter->num_rx_queues;
1871                 }
1872
1873
1874
1875                 /* init our intr settings */
1876                 for (i = 0; i < intr->num_intrs; i++)
1877                         intr->mod_levels[i] = UPT1_IML_ADAPTIVE;
1878                 if (adapter->intr.type != VMXNET3_IT_MSIX) {
1879                         adapter->intr.event_intr_idx = 0;
1880                         for (i = 0; i < adapter->num_tx_queues; i++)
1881                                 adapter->tx_queue[i].comp_ring.intr_idx = 0;
1882                         adapter->rx_queue[0].comp_ring.intr_idx = 0;
1883                 }
1884
1885                 netdev_info(adapter->netdev,
1886                             "intr type %u, mode %u, %u vectors allocated\n",
1887                             intr->type, intr->mask_mode, intr->num_intrs);
1888         }
1889
1890         return err;
1891 }
1892
1893
1894 static void
1895 vmxnet3_free_irqs(struct vmxnet3_adapter *adapter)
1896 {
1897         struct vmxnet3_intr *intr = &adapter->intr;
1898         BUG_ON(intr->type == VMXNET3_IT_AUTO || intr->num_intrs <= 0);
1899
1900         switch (intr->type) {
1901 #ifdef CONFIG_PCI_MSI
1902         case VMXNET3_IT_MSIX:
1903         {
1904                 int i, vector = 0;
1905
1906                 if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE) {
1907                         for (i = 0; i < adapter->num_tx_queues; i++) {
1908                                 free_irq(intr->msix_entries[vector++].vector,
1909                                          &(adapter->tx_queue[i]));
1910                                 if (adapter->share_intr == VMXNET3_INTR_TXSHARE)
1911                                         break;
1912                         }
1913                 }
1914
1915                 for (i = 0; i < adapter->num_rx_queues; i++) {
1916                         free_irq(intr->msix_entries[vector++].vector,
1917                                  &(adapter->rx_queue[i]));
1918                 }
1919
1920                 free_irq(intr->msix_entries[vector].vector,
1921                          adapter->netdev);
1922                 BUG_ON(vector >= intr->num_intrs);
1923                 break;
1924         }
1925 #endif
1926         case VMXNET3_IT_MSI:
1927                 free_irq(adapter->pdev->irq, adapter->netdev);
1928                 break;
1929         case VMXNET3_IT_INTX:
1930                 free_irq(adapter->pdev->irq, adapter->netdev);
1931                 break;
1932         default:
1933                 BUG();
1934         }
1935 }
1936
1937
1938 static void
1939 vmxnet3_restore_vlan(struct vmxnet3_adapter *adapter)
1940 {
1941         u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
1942         u16 vid;
1943
1944         /* allow untagged pkts */
1945         VMXNET3_SET_VFTABLE_ENTRY(vfTable, 0);
1946
1947         for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
1948                 VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid);
1949 }
1950
1951
1952 static int
1953 vmxnet3_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1954 {
1955         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1956
1957         if (!(netdev->flags & IFF_PROMISC)) {
1958                 u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
1959                 unsigned long flags;
1960
1961                 VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid);
1962                 spin_lock_irqsave(&adapter->cmd_lock, flags);
1963                 VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
1964                                        VMXNET3_CMD_UPDATE_VLAN_FILTERS);
1965                 spin_unlock_irqrestore(&adapter->cmd_lock, flags);
1966         }
1967
1968         set_bit(vid, adapter->active_vlans);
1969
1970         return 0;
1971 }
1972
1973
1974 static int
1975 vmxnet3_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid)
1976 {
1977         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1978
1979         if (!(netdev->flags & IFF_PROMISC)) {
1980                 u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
1981                 unsigned long flags;
1982
1983                 VMXNET3_CLEAR_VFTABLE_ENTRY(vfTable, vid);
1984                 spin_lock_irqsave(&adapter->cmd_lock, flags);
1985                 VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
1986                                        VMXNET3_CMD_UPDATE_VLAN_FILTERS);
1987                 spin_unlock_irqrestore(&adapter->cmd_lock, flags);
1988         }
1989
1990         clear_bit(vid, adapter->active_vlans);
1991
1992         return 0;
1993 }
1994
1995
1996 static u8 *
1997 vmxnet3_copy_mc(struct net_device *netdev)
1998 {
1999         u8 *buf = NULL;
2000         u32 sz = netdev_mc_count(netdev) * ETH_ALEN;
2001
2002         /* struct Vmxnet3_RxFilterConf.mfTableLen is u16. */
2003         if (sz <= 0xffff) {
2004                 /* We may be called with BH disabled */
2005                 buf = kmalloc(sz, GFP_ATOMIC);
2006                 if (buf) {
2007                         struct netdev_hw_addr *ha;
2008                         int i = 0;
2009
2010                         netdev_for_each_mc_addr(ha, netdev)
2011                                 memcpy(buf + i++ * ETH_ALEN, ha->addr,
2012                                        ETH_ALEN);
2013                 }
2014         }
2015         return buf;
2016 }
2017
2018
2019 static void
2020 vmxnet3_set_mc(struct net_device *netdev)
2021 {
2022         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2023         unsigned long flags;
2024         struct Vmxnet3_RxFilterConf *rxConf =
2025                                         &adapter->shared->devRead.rxFilterConf;
2026         u8 *new_table = NULL;
2027         dma_addr_t new_table_pa = 0;
2028         u32 new_mode = VMXNET3_RXM_UCAST;
2029
2030         if (netdev->flags & IFF_PROMISC) {
2031                 u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
2032                 memset(vfTable, 0, VMXNET3_VFT_SIZE * sizeof(*vfTable));
2033
2034                 new_mode |= VMXNET3_RXM_PROMISC;
2035         } else {
2036                 vmxnet3_restore_vlan(adapter);
2037         }
2038
2039         if (netdev->flags & IFF_BROADCAST)
2040                 new_mode |= VMXNET3_RXM_BCAST;
2041
2042         if (netdev->flags & IFF_ALLMULTI)
2043                 new_mode |= VMXNET3_RXM_ALL_MULTI;
2044         else
2045                 if (!netdev_mc_empty(netdev)) {
2046                         new_table = vmxnet3_copy_mc(netdev);
2047                         if (new_table) {
2048                                 new_mode |= VMXNET3_RXM_MCAST;
2049                                 rxConf->mfTableLen = cpu_to_le16(
2050                                         netdev_mc_count(netdev) * ETH_ALEN);
2051                                 new_table_pa = dma_map_single(
2052                                                         &adapter->pdev->dev,
2053                                                         new_table,
2054                                                         rxConf->mfTableLen,
2055                                                         PCI_DMA_TODEVICE);
2056                                 rxConf->mfTablePA = cpu_to_le64(new_table_pa);
2057                         } else {
2058                                 netdev_info(netdev, "failed to copy mcast list"
2059                                             ", setting ALL_MULTI\n");
2060                                 new_mode |= VMXNET3_RXM_ALL_MULTI;
2061                         }
2062                 }
2063
2064
2065         if (!(new_mode & VMXNET3_RXM_MCAST)) {
2066                 rxConf->mfTableLen = 0;
2067                 rxConf->mfTablePA = 0;
2068         }
2069
2070         spin_lock_irqsave(&adapter->cmd_lock, flags);
2071         if (new_mode != rxConf->rxMode) {
2072                 rxConf->rxMode = cpu_to_le32(new_mode);
2073                 VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2074                                        VMXNET3_CMD_UPDATE_RX_MODE);
2075                 VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2076                                        VMXNET3_CMD_UPDATE_VLAN_FILTERS);
2077         }
2078
2079         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2080                                VMXNET3_CMD_UPDATE_MAC_FILTERS);
2081         spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2082
2083         if (new_table) {
2084                 dma_unmap_single(&adapter->pdev->dev, new_table_pa,
2085                                  rxConf->mfTableLen, PCI_DMA_TODEVICE);
2086                 kfree(new_table);
2087         }
2088 }
2089
2090 void
2091 vmxnet3_rq_destroy_all(struct vmxnet3_adapter *adapter)
2092 {
2093         int i;
2094
2095         for (i = 0; i < adapter->num_rx_queues; i++)
2096                 vmxnet3_rq_destroy(&adapter->rx_queue[i], adapter);
2097 }
2098
2099
2100 /*
2101  *   Set up driver_shared based on settings in adapter.
2102  */
2103
2104 static void
2105 vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter)
2106 {
2107         struct Vmxnet3_DriverShared *shared = adapter->shared;
2108         struct Vmxnet3_DSDevRead *devRead = &shared->devRead;
2109         struct Vmxnet3_TxQueueConf *tqc;
2110         struct Vmxnet3_RxQueueConf *rqc;
2111         int i;
2112
2113         memset(shared, 0, sizeof(*shared));
2114
2115         /* driver settings */
2116         shared->magic = cpu_to_le32(VMXNET3_REV1_MAGIC);
2117         devRead->misc.driverInfo.version = cpu_to_le32(
2118                                                 VMXNET3_DRIVER_VERSION_NUM);
2119         devRead->misc.driverInfo.gos.gosBits = (sizeof(void *) == 4 ?
2120                                 VMXNET3_GOS_BITS_32 : VMXNET3_GOS_BITS_64);
2121         devRead->misc.driverInfo.gos.gosType = VMXNET3_GOS_TYPE_LINUX;
2122         *((u32 *)&devRead->misc.driverInfo.gos) = cpu_to_le32(
2123                                 *((u32 *)&devRead->misc.driverInfo.gos));
2124         devRead->misc.driverInfo.vmxnet3RevSpt = cpu_to_le32(1);
2125         devRead->misc.driverInfo.uptVerSpt = cpu_to_le32(1);
2126
2127         devRead->misc.ddPA = cpu_to_le64(adapter->adapter_pa);
2128         devRead->misc.ddLen = cpu_to_le32(sizeof(struct vmxnet3_adapter));
2129
2130         /* set up feature flags */
2131         if (adapter->netdev->features & NETIF_F_RXCSUM)
2132                 devRead->misc.uptFeatures |= UPT1_F_RXCSUM;
2133
2134         if (adapter->netdev->features & NETIF_F_LRO) {
2135                 devRead->misc.uptFeatures |= UPT1_F_LRO;
2136                 devRead->misc.maxNumRxSG = cpu_to_le16(1 + MAX_SKB_FRAGS);
2137         }
2138         if (adapter->netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
2139                 devRead->misc.uptFeatures |= UPT1_F_RXVLAN;
2140
2141         devRead->misc.mtu = cpu_to_le32(adapter->netdev->mtu);
2142         devRead->misc.queueDescPA = cpu_to_le64(adapter->queue_desc_pa);
2143         devRead->misc.queueDescLen = cpu_to_le32(
2144                 adapter->num_tx_queues * sizeof(struct Vmxnet3_TxQueueDesc) +
2145                 adapter->num_rx_queues * sizeof(struct Vmxnet3_RxQueueDesc));
2146
2147         /* tx queue settings */
2148         devRead->misc.numTxQueues =  adapter->num_tx_queues;
2149         for (i = 0; i < adapter->num_tx_queues; i++) {
2150                 struct vmxnet3_tx_queue *tq = &adapter->tx_queue[i];
2151                 BUG_ON(adapter->tx_queue[i].tx_ring.base == NULL);
2152                 tqc = &adapter->tqd_start[i].conf;
2153                 tqc->txRingBasePA   = cpu_to_le64(tq->tx_ring.basePA);
2154                 tqc->dataRingBasePA = cpu_to_le64(tq->data_ring.basePA);
2155                 tqc->compRingBasePA = cpu_to_le64(tq->comp_ring.basePA);
2156                 tqc->ddPA           = cpu_to_le64(tq->buf_info_pa);
2157                 tqc->txRingSize     = cpu_to_le32(tq->tx_ring.size);
2158                 tqc->dataRingSize   = cpu_to_le32(tq->data_ring.size);
2159                 tqc->compRingSize   = cpu_to_le32(tq->comp_ring.size);
2160                 tqc->ddLen          = cpu_to_le32(
2161                                         sizeof(struct vmxnet3_tx_buf_info) *
2162                                         tqc->txRingSize);
2163                 tqc->intrIdx        = tq->comp_ring.intr_idx;
2164         }
2165
2166         /* rx queue settings */
2167         devRead->misc.numRxQueues = adapter->num_rx_queues;
2168         for (i = 0; i < adapter->num_rx_queues; i++) {
2169                 struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
2170                 rqc = &adapter->rqd_start[i].conf;
2171                 rqc->rxRingBasePA[0] = cpu_to_le64(rq->rx_ring[0].basePA);
2172                 rqc->rxRingBasePA[1] = cpu_to_le64(rq->rx_ring[1].basePA);
2173                 rqc->compRingBasePA  = cpu_to_le64(rq->comp_ring.basePA);
2174                 rqc->ddPA            = cpu_to_le64(rq->buf_info_pa);
2175                 rqc->rxRingSize[0]   = cpu_to_le32(rq->rx_ring[0].size);
2176                 rqc->rxRingSize[1]   = cpu_to_le32(rq->rx_ring[1].size);
2177                 rqc->compRingSize    = cpu_to_le32(rq->comp_ring.size);
2178                 rqc->ddLen           = cpu_to_le32(
2179                                         sizeof(struct vmxnet3_rx_buf_info) *
2180                                         (rqc->rxRingSize[0] +
2181                                          rqc->rxRingSize[1]));
2182                 rqc->intrIdx         = rq->comp_ring.intr_idx;
2183         }
2184
2185 #ifdef VMXNET3_RSS
2186         memset(adapter->rss_conf, 0, sizeof(*adapter->rss_conf));
2187
2188         if (adapter->rss) {
2189                 struct UPT1_RSSConf *rssConf = adapter->rss_conf;
2190                 static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = {
2191                         0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac,
2192                         0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28,
2193                         0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70,
2194                         0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3,
2195                         0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9,
2196                 };
2197
2198                 devRead->misc.uptFeatures |= UPT1_F_RSS;
2199                 devRead->misc.numRxQueues = adapter->num_rx_queues;
2200                 rssConf->hashType = UPT1_RSS_HASH_TYPE_TCP_IPV4 |
2201                                     UPT1_RSS_HASH_TYPE_IPV4 |
2202                                     UPT1_RSS_HASH_TYPE_TCP_IPV6 |
2203                                     UPT1_RSS_HASH_TYPE_IPV6;
2204                 rssConf->hashFunc = UPT1_RSS_HASH_FUNC_TOEPLITZ;
2205                 rssConf->hashKeySize = UPT1_RSS_MAX_KEY_SIZE;
2206                 rssConf->indTableSize = VMXNET3_RSS_IND_TABLE_SIZE;
2207                 memcpy(rssConf->hashKey, rss_key, sizeof(rss_key));
2208
2209                 for (i = 0; i < rssConf->indTableSize; i++)
2210                         rssConf->indTable[i] = ethtool_rxfh_indir_default(
2211                                 i, adapter->num_rx_queues);
2212
2213                 devRead->rssConfDesc.confVer = 1;
2214                 devRead->rssConfDesc.confLen = cpu_to_le32(sizeof(*rssConf));
2215                 devRead->rssConfDesc.confPA =
2216                         cpu_to_le64(adapter->rss_conf_pa);
2217         }
2218
2219 #endif /* VMXNET3_RSS */
2220
2221         /* intr settings */
2222         devRead->intrConf.autoMask = adapter->intr.mask_mode ==
2223                                      VMXNET3_IMM_AUTO;
2224         devRead->intrConf.numIntrs = adapter->intr.num_intrs;
2225         for (i = 0; i < adapter->intr.num_intrs; i++)
2226                 devRead->intrConf.modLevels[i] = adapter->intr.mod_levels[i];
2227
2228         devRead->intrConf.eventIntrIdx = adapter->intr.event_intr_idx;
2229         devRead->intrConf.intrCtrl |= cpu_to_le32(VMXNET3_IC_DISABLE_ALL);
2230
2231         /* rx filter settings */
2232         devRead->rxFilterConf.rxMode = 0;
2233         vmxnet3_restore_vlan(adapter);
2234         vmxnet3_write_mac_addr(adapter, adapter->netdev->dev_addr);
2235
2236         /* the rest are already zeroed */
2237 }
2238
2239
2240 int
2241 vmxnet3_activate_dev(struct vmxnet3_adapter *adapter)
2242 {
2243         int err, i;
2244         u32 ret;
2245         unsigned long flags;
2246
2247         netdev_dbg(adapter->netdev, "%s: skb_buf_size %d, rx_buf_per_pkt %d,"
2248                 " ring sizes %u %u %u\n", adapter->netdev->name,
2249                 adapter->skb_buf_size, adapter->rx_buf_per_pkt,
2250                 adapter->tx_queue[0].tx_ring.size,
2251                 adapter->rx_queue[0].rx_ring[0].size,
2252                 adapter->rx_queue[0].rx_ring[1].size);
2253
2254         vmxnet3_tq_init_all(adapter);
2255         err = vmxnet3_rq_init_all(adapter);
2256         if (err) {
2257                 netdev_err(adapter->netdev,
2258                            "Failed to init rx queue error %d\n", err);
2259                 goto rq_err;
2260         }
2261
2262         err = vmxnet3_request_irqs(adapter);
2263         if (err) {
2264                 netdev_err(adapter->netdev,
2265                            "Failed to setup irq for error %d\n", err);
2266                 goto irq_err;
2267         }
2268
2269         vmxnet3_setup_driver_shared(adapter);
2270
2271         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAL, VMXNET3_GET_ADDR_LO(
2272                                adapter->shared_pa));
2273         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAH, VMXNET3_GET_ADDR_HI(
2274                                adapter->shared_pa));
2275         spin_lock_irqsave(&adapter->cmd_lock, flags);
2276         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2277                                VMXNET3_CMD_ACTIVATE_DEV);
2278         ret = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
2279         spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2280
2281         if (ret != 0) {
2282                 netdev_err(adapter->netdev,
2283                            "Failed to activate dev: error %u\n", ret);
2284                 err = -EINVAL;
2285                 goto activate_err;
2286         }
2287
2288         for (i = 0; i < adapter->num_rx_queues; i++) {
2289                 VMXNET3_WRITE_BAR0_REG(adapter,
2290                                 VMXNET3_REG_RXPROD + i * VMXNET3_REG_ALIGN,
2291                                 adapter->rx_queue[i].rx_ring[0].next2fill);
2292                 VMXNET3_WRITE_BAR0_REG(adapter, (VMXNET3_REG_RXPROD2 +
2293                                 (i * VMXNET3_REG_ALIGN)),
2294                                 adapter->rx_queue[i].rx_ring[1].next2fill);
2295         }
2296
2297         /* Apply the rx filter settins last. */
2298         vmxnet3_set_mc(adapter->netdev);
2299
2300         /*
2301          * Check link state when first activating device. It will start the
2302          * tx queue if the link is up.
2303          */
2304         vmxnet3_check_link(adapter, true);
2305         for (i = 0; i < adapter->num_rx_queues; i++)
2306                 napi_enable(&adapter->rx_queue[i].napi);
2307         vmxnet3_enable_all_intrs(adapter);
2308         clear_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state);
2309         return 0;
2310
2311 activate_err:
2312         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAL, 0);
2313         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAH, 0);
2314         vmxnet3_free_irqs(adapter);
2315 irq_err:
2316 rq_err:
2317         /* free up buffers we allocated */
2318         vmxnet3_rq_cleanup_all(adapter);
2319         return err;
2320 }
2321
2322
2323 void
2324 vmxnet3_reset_dev(struct vmxnet3_adapter *adapter)
2325 {
2326         unsigned long flags;
2327         spin_lock_irqsave(&adapter->cmd_lock, flags);
2328         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_RESET_DEV);
2329         spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2330 }
2331
2332
2333 int
2334 vmxnet3_quiesce_dev(struct vmxnet3_adapter *adapter)
2335 {
2336         int i;
2337         unsigned long flags;
2338         if (test_and_set_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state))
2339                 return 0;
2340
2341
2342         spin_lock_irqsave(&adapter->cmd_lock, flags);
2343         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2344                                VMXNET3_CMD_QUIESCE_DEV);
2345         spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2346         vmxnet3_disable_all_intrs(adapter);
2347
2348         for (i = 0; i < adapter->num_rx_queues; i++)
2349                 napi_disable(&adapter->rx_queue[i].napi);
2350         netif_tx_disable(adapter->netdev);
2351         adapter->link_speed = 0;
2352         netif_carrier_off(adapter->netdev);
2353
2354         vmxnet3_tq_cleanup_all(adapter);
2355         vmxnet3_rq_cleanup_all(adapter);
2356         vmxnet3_free_irqs(adapter);
2357         return 0;
2358 }
2359
2360
2361 static void
2362 vmxnet3_write_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac)
2363 {
2364         u32 tmp;
2365
2366         tmp = *(u32 *)mac;
2367         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_MACL, tmp);
2368
2369         tmp = (mac[5] << 8) | mac[4];
2370         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_MACH, tmp);
2371 }
2372
2373
2374 static int
2375 vmxnet3_set_mac_addr(struct net_device *netdev, void *p)
2376 {
2377         struct sockaddr *addr = p;
2378         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2379
2380         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2381         vmxnet3_write_mac_addr(adapter, addr->sa_data);
2382
2383         return 0;
2384 }
2385
2386
2387 /* ==================== initialization and cleanup routines ============ */
2388
2389 static int
2390 vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter, bool *dma64)
2391 {
2392         int err;
2393         unsigned long mmio_start, mmio_len;
2394         struct pci_dev *pdev = adapter->pdev;
2395
2396         err = pci_enable_device(pdev);
2397         if (err) {
2398                 dev_err(&pdev->dev, "Failed to enable adapter: error %d\n", err);
2399                 return err;
2400         }
2401
2402         if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) {
2403                 if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) != 0) {
2404                         dev_err(&pdev->dev,
2405                                 "pci_set_consistent_dma_mask failed\n");
2406                         err = -EIO;
2407                         goto err_set_mask;
2408                 }
2409                 *dma64 = true;
2410         } else {
2411                 if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) {
2412                         dev_err(&pdev->dev,
2413                                 "pci_set_dma_mask failed\n");
2414                         err = -EIO;
2415                         goto err_set_mask;
2416                 }
2417                 *dma64 = false;
2418         }
2419
2420         err = pci_request_selected_regions(pdev, (1 << 2) - 1,
2421                                            vmxnet3_driver_name);
2422         if (err) {
2423                 dev_err(&pdev->dev,
2424                         "Failed to request region for adapter: error %d\n", err);
2425                 goto err_set_mask;
2426         }
2427
2428         pci_set_master(pdev);
2429
2430         mmio_start = pci_resource_start(pdev, 0);
2431         mmio_len = pci_resource_len(pdev, 0);
2432         adapter->hw_addr0 = ioremap(mmio_start, mmio_len);
2433         if (!adapter->hw_addr0) {
2434                 dev_err(&pdev->dev, "Failed to map bar0\n");
2435                 err = -EIO;
2436                 goto err_ioremap;
2437         }
2438
2439         mmio_start = pci_resource_start(pdev, 1);
2440         mmio_len = pci_resource_len(pdev, 1);
2441         adapter->hw_addr1 = ioremap(mmio_start, mmio_len);
2442         if (!adapter->hw_addr1) {
2443                 dev_err(&pdev->dev, "Failed to map bar1\n");
2444                 err = -EIO;
2445                 goto err_bar1;
2446         }
2447         return 0;
2448
2449 err_bar1:
2450         iounmap(adapter->hw_addr0);
2451 err_ioremap:
2452         pci_release_selected_regions(pdev, (1 << 2) - 1);
2453 err_set_mask:
2454         pci_disable_device(pdev);
2455         return err;
2456 }
2457
2458
2459 static void
2460 vmxnet3_free_pci_resources(struct vmxnet3_adapter *adapter)
2461 {
2462         BUG_ON(!adapter->pdev);
2463
2464         iounmap(adapter->hw_addr0);
2465         iounmap(adapter->hw_addr1);
2466         pci_release_selected_regions(adapter->pdev, (1 << 2) - 1);
2467         pci_disable_device(adapter->pdev);
2468 }
2469
2470
2471 static void
2472 vmxnet3_adjust_rx_ring_size(struct vmxnet3_adapter *adapter)
2473 {
2474         size_t sz, i, ring0_size, ring1_size, comp_size;
2475         struct vmxnet3_rx_queue *rq = &adapter->rx_queue[0];
2476
2477
2478         if (adapter->netdev->mtu <= VMXNET3_MAX_SKB_BUF_SIZE -
2479                                     VMXNET3_MAX_ETH_HDR_SIZE) {
2480                 adapter->skb_buf_size = adapter->netdev->mtu +
2481                                         VMXNET3_MAX_ETH_HDR_SIZE;
2482                 if (adapter->skb_buf_size < VMXNET3_MIN_T0_BUF_SIZE)
2483                         adapter->skb_buf_size = VMXNET3_MIN_T0_BUF_SIZE;
2484
2485                 adapter->rx_buf_per_pkt = 1;
2486         } else {
2487                 adapter->skb_buf_size = VMXNET3_MAX_SKB_BUF_SIZE;
2488                 sz = adapter->netdev->mtu - VMXNET3_MAX_SKB_BUF_SIZE +
2489                                             VMXNET3_MAX_ETH_HDR_SIZE;
2490                 adapter->rx_buf_per_pkt = 1 + (sz + PAGE_SIZE - 1) / PAGE_SIZE;
2491         }
2492
2493         /*
2494          * for simplicity, force the ring0 size to be a multiple of
2495          * rx_buf_per_pkt * VMXNET3_RING_SIZE_ALIGN
2496          */
2497         sz = adapter->rx_buf_per_pkt * VMXNET3_RING_SIZE_ALIGN;
2498         ring0_size = adapter->rx_queue[0].rx_ring[0].size;
2499         ring0_size = (ring0_size + sz - 1) / sz * sz;
2500         ring0_size = min_t(u32, ring0_size, VMXNET3_RX_RING_MAX_SIZE /
2501                            sz * sz);
2502         ring1_size = adapter->rx_queue[0].rx_ring[1].size;
2503         comp_size = ring0_size + ring1_size;
2504
2505         for (i = 0; i < adapter->num_rx_queues; i++) {
2506                 rq = &adapter->rx_queue[i];
2507                 rq->rx_ring[0].size = ring0_size;
2508                 rq->rx_ring[1].size = ring1_size;
2509                 rq->comp_ring.size = comp_size;
2510         }
2511 }
2512
2513
2514 int
2515 vmxnet3_create_queues(struct vmxnet3_adapter *adapter, u32 tx_ring_size,
2516                       u32 rx_ring_size, u32 rx_ring2_size)
2517 {
2518         int err = 0, i;
2519
2520         for (i = 0; i < adapter->num_tx_queues; i++) {
2521                 struct vmxnet3_tx_queue *tq = &adapter->tx_queue[i];
2522                 tq->tx_ring.size   = tx_ring_size;
2523                 tq->data_ring.size = tx_ring_size;
2524                 tq->comp_ring.size = tx_ring_size;
2525                 tq->shared = &adapter->tqd_start[i].ctrl;
2526                 tq->stopped = true;
2527                 tq->adapter = adapter;
2528                 tq->qid = i;
2529                 err = vmxnet3_tq_create(tq, adapter);
2530                 /*
2531                  * Too late to change num_tx_queues. We cannot do away with
2532                  * lesser number of queues than what we asked for
2533                  */
2534                 if (err)
2535                         goto queue_err;
2536         }
2537
2538         adapter->rx_queue[0].rx_ring[0].size = rx_ring_size;
2539         adapter->rx_queue[0].rx_ring[1].size = rx_ring2_size;
2540         vmxnet3_adjust_rx_ring_size(adapter);
2541         for (i = 0; i < adapter->num_rx_queues; i++) {
2542                 struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
2543                 /* qid and qid2 for rx queues will be assigned later when num
2544                  * of rx queues is finalized after allocating intrs */
2545                 rq->shared = &adapter->rqd_start[i].ctrl;
2546                 rq->adapter = adapter;
2547                 err = vmxnet3_rq_create(rq, adapter);
2548                 if (err) {
2549                         if (i == 0) {
2550                                 netdev_err(adapter->netdev,
2551                                            "Could not allocate any rx queues. "
2552                                            "Aborting.\n");
2553                                 goto queue_err;
2554                         } else {
2555                                 netdev_info(adapter->netdev,
2556                                             "Number of rx queues changed "
2557                                             "to : %d.\n", i);
2558                                 adapter->num_rx_queues = i;
2559                                 err = 0;
2560                                 break;
2561                         }
2562                 }
2563         }
2564         return err;
2565 queue_err:
2566         vmxnet3_tq_destroy_all(adapter);
2567         return err;
2568 }
2569
2570 static int
2571 vmxnet3_open(struct net_device *netdev)
2572 {
2573         struct vmxnet3_adapter *adapter;
2574         int err, i;
2575
2576         adapter = netdev_priv(netdev);
2577
2578         for (i = 0; i < adapter->num_tx_queues; i++)
2579                 spin_lock_init(&adapter->tx_queue[i].tx_lock);
2580
2581         err = vmxnet3_create_queues(adapter, VMXNET3_DEF_TX_RING_SIZE,
2582                                     VMXNET3_DEF_RX_RING_SIZE,
2583                                     VMXNET3_DEF_RX_RING_SIZE);
2584         if (err)
2585                 goto queue_err;
2586
2587         err = vmxnet3_activate_dev(adapter);
2588         if (err)
2589                 goto activate_err;
2590
2591         return 0;
2592
2593 activate_err:
2594         vmxnet3_rq_destroy_all(adapter);
2595         vmxnet3_tq_destroy_all(adapter);
2596 queue_err:
2597         return err;
2598 }
2599
2600
2601 static int
2602 vmxnet3_close(struct net_device *netdev)
2603 {
2604         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2605
2606         /*
2607          * Reset_work may be in the middle of resetting the device, wait for its
2608          * completion.
2609          */
2610         while (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
2611                 msleep(1);
2612
2613         vmxnet3_quiesce_dev(adapter);
2614
2615         vmxnet3_rq_destroy_all(adapter);
2616         vmxnet3_tq_destroy_all(adapter);
2617
2618         clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
2619
2620
2621         return 0;
2622 }
2623
2624
2625 void
2626 vmxnet3_force_close(struct vmxnet3_adapter *adapter)
2627 {
2628         int i;
2629
2630         /*
2631          * we must clear VMXNET3_STATE_BIT_RESETTING, otherwise
2632          * vmxnet3_close() will deadlock.
2633          */
2634         BUG_ON(test_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state));
2635
2636         /* we need to enable NAPI, otherwise dev_close will deadlock */
2637         for (i = 0; i < adapter->num_rx_queues; i++)
2638                 napi_enable(&adapter->rx_queue[i].napi);
2639         dev_close(adapter->netdev);
2640 }
2641
2642
2643 static int
2644 vmxnet3_change_mtu(struct net_device *netdev, int new_mtu)
2645 {
2646         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2647         int err = 0;
2648
2649         if (new_mtu < VMXNET3_MIN_MTU || new_mtu > VMXNET3_MAX_MTU)
2650                 return -EINVAL;
2651
2652         netdev->mtu = new_mtu;
2653
2654         /*
2655          * Reset_work may be in the middle of resetting the device, wait for its
2656          * completion.
2657          */
2658         while (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
2659                 msleep(1);
2660
2661         if (netif_running(netdev)) {
2662                 vmxnet3_quiesce_dev(adapter);
2663                 vmxnet3_reset_dev(adapter);
2664
2665                 /* we need to re-create the rx queue based on the new mtu */
2666                 vmxnet3_rq_destroy_all(adapter);
2667                 vmxnet3_adjust_rx_ring_size(adapter);
2668                 err = vmxnet3_rq_create_all(adapter);
2669                 if (err) {
2670                         netdev_err(netdev,
2671                                    "failed to re-create rx queues, "
2672                                    " error %d. Closing it.\n", err);
2673                         goto out;
2674                 }
2675
2676                 err = vmxnet3_activate_dev(adapter);
2677                 if (err) {
2678                         netdev_err(netdev,
2679                                    "failed to re-activate, error %d. "
2680                                    "Closing it\n", err);
2681                         goto out;
2682                 }
2683         }
2684
2685 out:
2686         clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
2687         if (err)
2688                 vmxnet3_force_close(adapter);
2689
2690         return err;
2691 }
2692
2693
2694 static void
2695 vmxnet3_declare_features(struct vmxnet3_adapter *adapter, bool dma64)
2696 {
2697         struct net_device *netdev = adapter->netdev;
2698
2699         netdev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM |
2700                 NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
2701                 NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_TSO | NETIF_F_TSO6 |
2702                 NETIF_F_LRO;
2703         if (dma64)
2704                 netdev->hw_features |= NETIF_F_HIGHDMA;
2705         netdev->vlan_features = netdev->hw_features &
2706                                 ~(NETIF_F_HW_VLAN_CTAG_TX |
2707                                   NETIF_F_HW_VLAN_CTAG_RX);
2708         netdev->features = netdev->hw_features | NETIF_F_HW_VLAN_CTAG_FILTER;
2709 }
2710
2711
2712 static void
2713 vmxnet3_read_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac)
2714 {
2715         u32 tmp;
2716
2717         tmp = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_MACL);
2718         *(u32 *)mac = tmp;
2719
2720         tmp = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_MACH);
2721         mac[4] = tmp & 0xff;
2722         mac[5] = (tmp >> 8) & 0xff;
2723 }
2724
2725 #ifdef CONFIG_PCI_MSI
2726
2727 /*
2728  * Enable MSIx vectors.
2729  * Returns :
2730  *      0 on successful enabling of required vectors,
2731  *      VMXNET3_LINUX_MIN_MSIX_VECT when only minimum number of vectors required
2732  *       could be enabled.
2733  *      number of vectors which can be enabled otherwise (this number is smaller
2734  *       than VMXNET3_LINUX_MIN_MSIX_VECT)
2735  */
2736
2737 static int
2738 vmxnet3_acquire_msix_vectors(struct vmxnet3_adapter *adapter,
2739                              int vectors)
2740 {
2741         int err = 0, vector_threshold;
2742         vector_threshold = VMXNET3_LINUX_MIN_MSIX_VECT;
2743
2744         while (vectors >= vector_threshold) {
2745                 err = pci_enable_msix(adapter->pdev, adapter->intr.msix_entries,
2746                                       vectors);
2747                 if (!err) {
2748                         adapter->intr.num_intrs = vectors;
2749                         return 0;
2750                 } else if (err < 0) {
2751                         dev_err(&adapter->netdev->dev,
2752                                    "Failed to enable MSI-X, error: %d\n", err);
2753                         vectors = 0;
2754                 } else if (err < vector_threshold) {
2755                         break;
2756                 } else {
2757                         /* If fails to enable required number of MSI-x vectors
2758                          * try enabling minimum number of vectors required.
2759                          */
2760                         dev_err(&adapter->netdev->dev,
2761                                 "Failed to enable %d MSI-X, trying %d instead\n",
2762                                     vectors, vector_threshold);
2763                         vectors = vector_threshold;
2764                 }
2765         }
2766
2767         dev_info(&adapter->pdev->dev,
2768                  "Number of MSI-X interrupts which can be allocated "
2769                  "is lower than min threshold required.\n");
2770         return err;
2771 }
2772
2773
2774 #endif /* CONFIG_PCI_MSI */
2775
2776 static void
2777 vmxnet3_alloc_intr_resources(struct vmxnet3_adapter *adapter)
2778 {
2779         u32 cfg;
2780         unsigned long flags;
2781
2782         /* intr settings */
2783         spin_lock_irqsave(&adapter->cmd_lock, flags);
2784         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2785                                VMXNET3_CMD_GET_CONF_INTR);
2786         cfg = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
2787         spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2788         adapter->intr.type = cfg & 0x3;
2789         adapter->intr.mask_mode = (cfg >> 2) & 0x3;
2790
2791         if (adapter->intr.type == VMXNET3_IT_AUTO) {
2792                 adapter->intr.type = VMXNET3_IT_MSIX;
2793         }
2794
2795 #ifdef CONFIG_PCI_MSI
2796         if (adapter->intr.type == VMXNET3_IT_MSIX) {
2797                 int vector, err = 0;
2798
2799                 adapter->intr.num_intrs = (adapter->share_intr ==
2800                                            VMXNET3_INTR_TXSHARE) ? 1 :
2801                                            adapter->num_tx_queues;
2802                 adapter->intr.num_intrs += (adapter->share_intr ==
2803                                            VMXNET3_INTR_BUDDYSHARE) ? 0 :
2804                                            adapter->num_rx_queues;
2805                 adapter->intr.num_intrs += 1;           /* for link event */
2806
2807                 adapter->intr.num_intrs = (adapter->intr.num_intrs >
2808                                            VMXNET3_LINUX_MIN_MSIX_VECT
2809                                            ? adapter->intr.num_intrs :
2810                                            VMXNET3_LINUX_MIN_MSIX_VECT);
2811
2812                 for (vector = 0; vector < adapter->intr.num_intrs; vector++)
2813                         adapter->intr.msix_entries[vector].entry = vector;
2814
2815                 err = vmxnet3_acquire_msix_vectors(adapter,
2816                                                    adapter->intr.num_intrs);
2817                 /* If we cannot allocate one MSIx vector per queue
2818                  * then limit the number of rx queues to 1
2819                  */
2820                 if (err == VMXNET3_LINUX_MIN_MSIX_VECT) {
2821                         if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE
2822                             || adapter->num_rx_queues != 1) {
2823                                 adapter->share_intr = VMXNET3_INTR_TXSHARE;
2824                                 netdev_err(adapter->netdev,
2825                                            "Number of rx queues : 1\n");
2826                                 adapter->num_rx_queues = 1;
2827                                 adapter->intr.num_intrs =
2828                                                 VMXNET3_LINUX_MIN_MSIX_VECT;
2829                         }
2830                         return;
2831                 }
2832                 if (!err)
2833                         return;
2834
2835                 /* If we cannot allocate MSIx vectors use only one rx queue */
2836                 dev_info(&adapter->pdev->dev,
2837                          "Failed to enable MSI-X, error %d. "
2838                          "Limiting #rx queues to 1, try MSI.\n", err);
2839
2840                 adapter->intr.type = VMXNET3_IT_MSI;
2841         }
2842
2843         if (adapter->intr.type == VMXNET3_IT_MSI) {
2844                 int err;
2845                 err = pci_enable_msi(adapter->pdev);
2846                 if (!err) {
2847                         adapter->num_rx_queues = 1;
2848                         adapter->intr.num_intrs = 1;
2849                         return;
2850                 }
2851         }
2852 #endif /* CONFIG_PCI_MSI */
2853
2854         adapter->num_rx_queues = 1;
2855         dev_info(&adapter->netdev->dev,
2856                  "Using INTx interrupt, #Rx queues: 1.\n");
2857         adapter->intr.type = VMXNET3_IT_INTX;
2858
2859         /* INT-X related setting */
2860         adapter->intr.num_intrs = 1;
2861 }
2862
2863
2864 static void
2865 vmxnet3_free_intr_resources(struct vmxnet3_adapter *adapter)
2866 {
2867         if (adapter->intr.type == VMXNET3_IT_MSIX)
2868                 pci_disable_msix(adapter->pdev);
2869         else if (adapter->intr.type == VMXNET3_IT_MSI)
2870                 pci_disable_msi(adapter->pdev);
2871         else
2872                 BUG_ON(adapter->intr.type != VMXNET3_IT_INTX);
2873 }
2874
2875
2876 static void
2877 vmxnet3_tx_timeout(struct net_device *netdev)
2878 {
2879         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2880         adapter->tx_timeout_count++;
2881
2882         netdev_err(adapter->netdev, "tx hang\n");
2883         schedule_work(&adapter->work);
2884         netif_wake_queue(adapter->netdev);
2885 }
2886
2887
2888 static void
2889 vmxnet3_reset_work(struct work_struct *data)
2890 {
2891         struct vmxnet3_adapter *adapter;
2892
2893         adapter = container_of(data, struct vmxnet3_adapter, work);
2894
2895         /* if another thread is resetting the device, no need to proceed */
2896         if (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
2897                 return;
2898
2899         /* if the device is closed, we must leave it alone */
2900         rtnl_lock();
2901         if (netif_running(adapter->netdev)) {
2902                 netdev_notice(adapter->netdev, "resetting\n");
2903                 vmxnet3_quiesce_dev(adapter);
2904                 vmxnet3_reset_dev(adapter);
2905                 vmxnet3_activate_dev(adapter);
2906         } else {
2907                 netdev_info(adapter->netdev, "already closed\n");
2908         }
2909         rtnl_unlock();
2910
2911         clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
2912 }
2913
2914
2915 static int
2916 vmxnet3_probe_device(struct pci_dev *pdev,
2917                      const struct pci_device_id *id)
2918 {
2919         static const struct net_device_ops vmxnet3_netdev_ops = {
2920                 .ndo_open = vmxnet3_open,
2921                 .ndo_stop = vmxnet3_close,
2922                 .ndo_start_xmit = vmxnet3_xmit_frame,
2923                 .ndo_set_mac_address = vmxnet3_set_mac_addr,
2924                 .ndo_change_mtu = vmxnet3_change_mtu,
2925                 .ndo_set_features = vmxnet3_set_features,
2926                 .ndo_get_stats64 = vmxnet3_get_stats64,
2927                 .ndo_tx_timeout = vmxnet3_tx_timeout,
2928                 .ndo_set_rx_mode = vmxnet3_set_mc,
2929                 .ndo_vlan_rx_add_vid = vmxnet3_vlan_rx_add_vid,
2930                 .ndo_vlan_rx_kill_vid = vmxnet3_vlan_rx_kill_vid,
2931 #ifdef CONFIG_NET_POLL_CONTROLLER
2932                 .ndo_poll_controller = vmxnet3_netpoll,
2933 #endif
2934         };
2935         int err;
2936         bool dma64 = false; /* stupid gcc */
2937         u32 ver;
2938         struct net_device *netdev;
2939         struct vmxnet3_adapter *adapter;
2940         u8 mac[ETH_ALEN];
2941         int size;
2942         int num_tx_queues;
2943         int num_rx_queues;
2944
2945         if (!pci_msi_enabled())
2946                 enable_mq = 0;
2947
2948 #ifdef VMXNET3_RSS
2949         if (enable_mq)
2950                 num_rx_queues = min(VMXNET3_DEVICE_MAX_RX_QUEUES,
2951                                     (int)num_online_cpus());
2952         else
2953 #endif
2954                 num_rx_queues = 1;
2955         num_rx_queues = rounddown_pow_of_two(num_rx_queues);
2956
2957         if (enable_mq)
2958                 num_tx_queues = min(VMXNET3_DEVICE_MAX_TX_QUEUES,
2959                                     (int)num_online_cpus());
2960         else
2961                 num_tx_queues = 1;
2962
2963         num_tx_queues = rounddown_pow_of_two(num_tx_queues);
2964         netdev = alloc_etherdev_mq(sizeof(struct vmxnet3_adapter),
2965                                    max(num_tx_queues, num_rx_queues));
2966         dev_info(&pdev->dev,
2967                  "# of Tx queues : %d, # of Rx queues : %d\n",
2968                  num_tx_queues, num_rx_queues);
2969
2970         if (!netdev)
2971                 return -ENOMEM;
2972
2973         pci_set_drvdata(pdev, netdev);
2974         adapter = netdev_priv(netdev);
2975         adapter->netdev = netdev;
2976         adapter->pdev = pdev;
2977
2978         spin_lock_init(&adapter->cmd_lock);
2979         adapter->adapter_pa = dma_map_single(&adapter->pdev->dev, adapter,
2980                                              sizeof(struct vmxnet3_adapter),
2981                                              PCI_DMA_TODEVICE);
2982         adapter->shared = dma_alloc_coherent(
2983                                 &adapter->pdev->dev,
2984                                 sizeof(struct Vmxnet3_DriverShared),
2985                                 &adapter->shared_pa, GFP_KERNEL);
2986         if (!adapter->shared) {
2987                 dev_err(&pdev->dev, "Failed to allocate memory\n");
2988                 err = -ENOMEM;
2989                 goto err_alloc_shared;
2990         }
2991
2992         adapter->num_rx_queues = num_rx_queues;
2993         adapter->num_tx_queues = num_tx_queues;
2994         adapter->rx_buf_per_pkt = 1;
2995
2996         size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues;
2997         size += sizeof(struct Vmxnet3_RxQueueDesc) * adapter->num_rx_queues;
2998         adapter->tqd_start = dma_alloc_coherent(&adapter->pdev->dev, size,
2999                                                 &adapter->queue_desc_pa,
3000                                                 GFP_KERNEL);
3001
3002         if (!adapter->tqd_start) {
3003                 dev_err(&pdev->dev, "Failed to allocate memory\n");
3004                 err = -ENOMEM;
3005                 goto err_alloc_queue_desc;
3006         }
3007         adapter->rqd_start = (struct Vmxnet3_RxQueueDesc *)(adapter->tqd_start +
3008                                                             adapter->num_tx_queues);
3009
3010         adapter->pm_conf = dma_alloc_coherent(&adapter->pdev->dev,
3011                                               sizeof(struct Vmxnet3_PMConf),
3012                                               &adapter->pm_conf_pa,
3013                                               GFP_KERNEL);
3014         if (adapter->pm_conf == NULL) {
3015                 err = -ENOMEM;
3016                 goto err_alloc_pm;
3017         }
3018
3019 #ifdef VMXNET3_RSS
3020
3021         adapter->rss_conf = dma_alloc_coherent(&adapter->pdev->dev,
3022                                                sizeof(struct UPT1_RSSConf),
3023                                                &adapter->rss_conf_pa,
3024                                                GFP_KERNEL);
3025         if (adapter->rss_conf == NULL) {
3026                 err = -ENOMEM;
3027                 goto err_alloc_rss;
3028         }
3029 #endif /* VMXNET3_RSS */
3030
3031         err = vmxnet3_alloc_pci_resources(adapter, &dma64);
3032         if (err < 0)
3033                 goto err_alloc_pci;
3034
3035         ver = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_VRRS);
3036         if (ver & 1) {
3037                 VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_VRRS, 1);
3038         } else {
3039                 dev_err(&pdev->dev,
3040                         "Incompatible h/w version (0x%x) for adapter\n", ver);
3041                 err = -EBUSY;
3042                 goto err_ver;
3043         }
3044
3045         ver = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_UVRS);
3046         if (ver & 1) {
3047                 VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_UVRS, 1);
3048         } else {
3049                 dev_err(&pdev->dev,
3050                         "Incompatible upt version (0x%x) for adapter\n", ver);
3051                 err = -EBUSY;
3052                 goto err_ver;
3053         }
3054
3055         SET_NETDEV_DEV(netdev, &pdev->dev);
3056         vmxnet3_declare_features(adapter, dma64);
3057
3058         if (adapter->num_tx_queues == adapter->num_rx_queues)
3059                 adapter->share_intr = VMXNET3_INTR_BUDDYSHARE;
3060         else
3061                 adapter->share_intr = VMXNET3_INTR_DONTSHARE;
3062
3063         vmxnet3_alloc_intr_resources(adapter);
3064
3065 #ifdef VMXNET3_RSS
3066         if (adapter->num_rx_queues > 1 &&
3067             adapter->intr.type == VMXNET3_IT_MSIX) {
3068                 adapter->rss = true;
3069                 netdev->hw_features |= NETIF_F_RXHASH;
3070                 netdev->features |= NETIF_F_RXHASH;
3071                 dev_dbg(&pdev->dev, "RSS is enabled.\n");
3072         } else {
3073                 adapter->rss = false;
3074         }
3075 #endif
3076
3077         vmxnet3_read_mac_addr(adapter, mac);
3078         memcpy(netdev->dev_addr,  mac, netdev->addr_len);
3079
3080         netdev->netdev_ops = &vmxnet3_netdev_ops;
3081         vmxnet3_set_ethtool_ops(netdev);
3082         netdev->watchdog_timeo = 5 * HZ;
3083
3084         INIT_WORK(&adapter->work, vmxnet3_reset_work);
3085         set_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state);
3086
3087         if (adapter->intr.type == VMXNET3_IT_MSIX) {
3088                 int i;
3089                 for (i = 0; i < adapter->num_rx_queues; i++) {
3090                         netif_napi_add(adapter->netdev,
3091                                        &adapter->rx_queue[i].napi,
3092                                        vmxnet3_poll_rx_only, 64);
3093                 }
3094         } else {
3095                 netif_napi_add(adapter->netdev, &adapter->rx_queue[0].napi,
3096                                vmxnet3_poll, 64);
3097         }
3098
3099         netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
3100         netif_set_real_num_rx_queues(adapter->netdev, adapter->num_rx_queues);
3101
3102         netif_carrier_off(netdev);
3103         err = register_netdev(netdev);
3104
3105         if (err) {
3106                 dev_err(&pdev->dev, "Failed to register adapter\n");
3107                 goto err_register;
3108         }
3109
3110         vmxnet3_check_link(adapter, false);
3111         return 0;
3112
3113 err_register:
3114         vmxnet3_free_intr_resources(adapter);
3115 err_ver:
3116         vmxnet3_free_pci_resources(adapter);
3117 err_alloc_pci:
3118 #ifdef VMXNET3_RSS
3119         dma_free_coherent(&adapter->pdev->dev, sizeof(struct UPT1_RSSConf),
3120                           adapter->rss_conf, adapter->rss_conf_pa);
3121 err_alloc_rss:
3122 #endif
3123         dma_free_coherent(&adapter->pdev->dev, sizeof(struct Vmxnet3_PMConf),
3124                           adapter->pm_conf, adapter->pm_conf_pa);
3125 err_alloc_pm:
3126         dma_free_coherent(&adapter->pdev->dev, size, adapter->tqd_start,
3127                           adapter->queue_desc_pa);
3128 err_alloc_queue_desc:
3129         dma_free_coherent(&adapter->pdev->dev,
3130                           sizeof(struct Vmxnet3_DriverShared),
3131                           adapter->shared, adapter->shared_pa);
3132 err_alloc_shared:
3133         dma_unmap_single(&adapter->pdev->dev, adapter->adapter_pa,
3134                          sizeof(struct vmxnet3_adapter), PCI_DMA_TODEVICE);
3135         free_netdev(netdev);
3136         return err;
3137 }
3138
3139
3140 static void
3141 vmxnet3_remove_device(struct pci_dev *pdev)
3142 {
3143         struct net_device *netdev = pci_get_drvdata(pdev);
3144         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3145         int size = 0;
3146         int num_rx_queues;
3147
3148 #ifdef VMXNET3_RSS
3149         if (enable_mq)
3150                 num_rx_queues = min(VMXNET3_DEVICE_MAX_RX_QUEUES,
3151                                     (int)num_online_cpus());
3152         else
3153 #endif
3154                 num_rx_queues = 1;
3155         num_rx_queues = rounddown_pow_of_two(num_rx_queues);
3156
3157         cancel_work_sync(&adapter->work);
3158
3159         unregister_netdev(netdev);
3160
3161         vmxnet3_free_intr_resources(adapter);
3162         vmxnet3_free_pci_resources(adapter);
3163 #ifdef VMXNET3_RSS
3164         dma_free_coherent(&adapter->pdev->dev, sizeof(struct UPT1_RSSConf),
3165                           adapter->rss_conf, adapter->rss_conf_pa);
3166 #endif
3167         dma_free_coherent(&adapter->pdev->dev, sizeof(struct Vmxnet3_PMConf),
3168                           adapter->pm_conf, adapter->pm_conf_pa);
3169
3170         size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues;
3171         size += sizeof(struct Vmxnet3_RxQueueDesc) * num_rx_queues;
3172         dma_free_coherent(&adapter->pdev->dev, size, adapter->tqd_start,
3173                           adapter->queue_desc_pa);
3174         dma_free_coherent(&adapter->pdev->dev,
3175                           sizeof(struct Vmxnet3_DriverShared),
3176                           adapter->shared, adapter->shared_pa);
3177         dma_unmap_single(&adapter->pdev->dev, adapter->adapter_pa,
3178                          sizeof(struct vmxnet3_adapter), PCI_DMA_TODEVICE);
3179         free_netdev(netdev);
3180 }
3181
3182
3183 #ifdef CONFIG_PM
3184
3185 static int
3186 vmxnet3_suspend(struct device *device)
3187 {
3188         struct pci_dev *pdev = to_pci_dev(device);
3189         struct net_device *netdev = pci_get_drvdata(pdev);
3190         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3191         struct Vmxnet3_PMConf *pmConf;
3192         struct ethhdr *ehdr;
3193         struct arphdr *ahdr;
3194         u8 *arpreq;
3195         struct in_device *in_dev;
3196         struct in_ifaddr *ifa;
3197         unsigned long flags;
3198         int i = 0;
3199
3200         if (!netif_running(netdev))
3201                 return 0;
3202
3203         for (i = 0; i < adapter->num_rx_queues; i++)
3204                 napi_disable(&adapter->rx_queue[i].napi);
3205
3206         vmxnet3_disable_all_intrs(adapter);
3207         vmxnet3_free_irqs(adapter);
3208         vmxnet3_free_intr_resources(adapter);
3209
3210         netif_device_detach(netdev);
3211         netif_tx_stop_all_queues(netdev);
3212
3213         /* Create wake-up filters. */
3214         pmConf = adapter->pm_conf;
3215         memset(pmConf, 0, sizeof(*pmConf));
3216
3217         if (adapter->wol & WAKE_UCAST) {
3218                 pmConf->filters[i].patternSize = ETH_ALEN;
3219                 pmConf->filters[i].maskSize = 1;
3220                 memcpy(pmConf->filters[i].pattern, netdev->dev_addr, ETH_ALEN);
3221                 pmConf->filters[i].mask[0] = 0x3F; /* LSB ETH_ALEN bits */
3222
3223                 pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_FILTER;
3224                 i++;
3225         }
3226
3227         if (adapter->wol & WAKE_ARP) {
3228                 in_dev = in_dev_get(netdev);
3229                 if (!in_dev)
3230                         goto skip_arp;
3231
3232                 ifa = (struct in_ifaddr *)in_dev->ifa_list;
3233                 if (!ifa)
3234                         goto skip_arp;
3235
3236                 pmConf->filters[i].patternSize = ETH_HLEN + /* Ethernet header*/
3237                         sizeof(struct arphdr) +         /* ARP header */
3238                         2 * ETH_ALEN +          /* 2 Ethernet addresses*/
3239                         2 * sizeof(u32);        /*2 IPv4 addresses */
3240                 pmConf->filters[i].maskSize =
3241                         (pmConf->filters[i].patternSize - 1) / 8 + 1;
3242
3243                 /* ETH_P_ARP in Ethernet header. */
3244                 ehdr = (struct ethhdr *)pmConf->filters[i].pattern;
3245                 ehdr->h_proto = htons(ETH_P_ARP);
3246
3247                 /* ARPOP_REQUEST in ARP header. */
3248                 ahdr = (struct arphdr *)&pmConf->filters[i].pattern[ETH_HLEN];
3249                 ahdr->ar_op = htons(ARPOP_REQUEST);
3250                 arpreq = (u8 *)(ahdr + 1);
3251
3252                 /* The Unicast IPv4 address in 'tip' field. */
3253                 arpreq += 2 * ETH_ALEN + sizeof(u32);
3254                 *(u32 *)arpreq = ifa->ifa_address;
3255
3256                 /* The mask for the relevant bits. */
3257                 pmConf->filters[i].mask[0] = 0x00;
3258                 pmConf->filters[i].mask[1] = 0x30; /* ETH_P_ARP */
3259                 pmConf->filters[i].mask[2] = 0x30; /* ARPOP_REQUEST */
3260                 pmConf->filters[i].mask[3] = 0x00;
3261                 pmConf->filters[i].mask[4] = 0xC0; /* IPv4 TIP */
3262                 pmConf->filters[i].mask[5] = 0x03; /* IPv4 TIP */
3263                 in_dev_put(in_dev);
3264
3265                 pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_FILTER;
3266                 i++;
3267         }
3268
3269 skip_arp:
3270         if (adapter->wol & WAKE_MAGIC)
3271                 pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_MAGIC;
3272
3273         pmConf->numFilters = i;
3274
3275         adapter->shared->devRead.pmConfDesc.confVer = cpu_to_le32(1);
3276         adapter->shared->devRead.pmConfDesc.confLen = cpu_to_le32(sizeof(
3277                                                                   *pmConf));
3278         adapter->shared->devRead.pmConfDesc.confPA =
3279                 cpu_to_le64(adapter->pm_conf_pa);
3280
3281         spin_lock_irqsave(&adapter->cmd_lock, flags);
3282         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
3283                                VMXNET3_CMD_UPDATE_PMCFG);
3284         spin_unlock_irqrestore(&adapter->cmd_lock, flags);
3285
3286         pci_save_state(pdev);
3287         pci_enable_wake(pdev, pci_choose_state(pdev, PMSG_SUSPEND),
3288                         adapter->wol);
3289         pci_disable_device(pdev);
3290         pci_set_power_state(pdev, pci_choose_state(pdev, PMSG_SUSPEND));
3291
3292         return 0;
3293 }
3294
3295
3296 static int
3297 vmxnet3_resume(struct device *device)
3298 {
3299         int err, i = 0;
3300         unsigned long flags;
3301         struct pci_dev *pdev = to_pci_dev(device);
3302         struct net_device *netdev = pci_get_drvdata(pdev);
3303         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3304         struct Vmxnet3_PMConf *pmConf;
3305
3306         if (!netif_running(netdev))
3307                 return 0;
3308
3309         /* Destroy wake-up filters. */
3310         pmConf = adapter->pm_conf;
3311         memset(pmConf, 0, sizeof(*pmConf));
3312
3313         adapter->shared->devRead.pmConfDesc.confVer = cpu_to_le32(1);
3314         adapter->shared->devRead.pmConfDesc.confLen = cpu_to_le32(sizeof(
3315                                                                   *pmConf));
3316         adapter->shared->devRead.pmConfDesc.confPA =
3317                 cpu_to_le64(adapter->pm_conf_pa);
3318
3319         netif_device_attach(netdev);
3320         pci_set_power_state(pdev, PCI_D0);
3321         pci_restore_state(pdev);
3322         err = pci_enable_device_mem(pdev);
3323         if (err != 0)
3324                 return err;
3325
3326         pci_enable_wake(pdev, PCI_D0, 0);
3327
3328         spin_lock_irqsave(&adapter->cmd_lock, flags);
3329         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
3330                                VMXNET3_CMD_UPDATE_PMCFG);
3331         spin_unlock_irqrestore(&adapter->cmd_lock, flags);
3332         vmxnet3_alloc_intr_resources(adapter);
3333         vmxnet3_request_irqs(adapter);
3334         for (i = 0; i < adapter->num_rx_queues; i++)
3335                 napi_enable(&adapter->rx_queue[i].napi);
3336         vmxnet3_enable_all_intrs(adapter);
3337
3338         return 0;
3339 }
3340
3341 static const struct dev_pm_ops vmxnet3_pm_ops = {
3342         .suspend = vmxnet3_suspend,
3343         .resume = vmxnet3_resume,
3344 };
3345 #endif
3346
3347 static struct pci_driver vmxnet3_driver = {
3348         .name           = vmxnet3_driver_name,
3349         .id_table       = vmxnet3_pciid_table,
3350         .probe          = vmxnet3_probe_device,
3351         .remove         = vmxnet3_remove_device,
3352 #ifdef CONFIG_PM
3353         .driver.pm      = &vmxnet3_pm_ops,
3354 #endif
3355 };
3356
3357
3358 static int __init
3359 vmxnet3_init_module(void)
3360 {
3361         pr_info("%s - version %s\n", VMXNET3_DRIVER_DESC,
3362                 VMXNET3_DRIVER_VERSION_REPORT);
3363         return pci_register_driver(&vmxnet3_driver);
3364 }
3365
3366 module_init(vmxnet3_init_module);
3367
3368
3369 static void
3370 vmxnet3_exit_module(void)
3371 {
3372         pci_unregister_driver(&vmxnet3_driver);
3373 }
3374
3375 module_exit(vmxnet3_exit_module);
3376
3377 MODULE_AUTHOR("VMware, Inc.");
3378 MODULE_DESCRIPTION(VMXNET3_DRIVER_DESC);
3379 MODULE_LICENSE("GPL v2");
3380 MODULE_VERSION(VMXNET3_DRIVER_VERSION_STRING);