Merge remote-tracking branches 'asoc/fix/adsp', 'asoc/fix/arizona', 'asoc/fix/atmel...
[linux-drm-fsl-dcu.git] / drivers / ntb / ntb_hw.c
1 /*
2  * This file is provided under a dual BSD/GPLv2 license.  When using or
3  *   redistributing this file, you may do so under either license.
4  *
5  *   GPL LICENSE SUMMARY
6  *
7  *   Copyright(c) 2012 Intel Corporation. All rights reserved.
8  *
9  *   This program is free software; you can redistribute it and/or modify
10  *   it under the terms of version 2 of the GNU General Public License as
11  *   published by the Free Software Foundation.
12  *
13  *   BSD LICENSE
14  *
15  *   Copyright(c) 2012 Intel Corporation. All rights reserved.
16  *
17  *   Redistribution and use in source and binary forms, with or without
18  *   modification, are permitted provided that the following conditions
19  *   are met:
20  *
21  *     * Redistributions of source code must retain the above copyright
22  *       notice, this list of conditions and the following disclaimer.
23  *     * Redistributions in binary form must reproduce the above copy
24  *       notice, this list of conditions and the following disclaimer in
25  *       the documentation and/or other materials provided with the
26  *       distribution.
27  *     * Neither the name of Intel Corporation nor the names of its
28  *       contributors may be used to endorse or promote products derived
29  *       from this software without specific prior written permission.
30  *
31  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
32  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
33  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
34  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
35  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
36  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
37  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
38  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
39  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
40  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
41  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42  *
43  * Intel PCIe NTB Linux driver
44  *
45  * Contact Information:
46  * Jon Mason <jon.mason@intel.com>
47  */
48 #include <linux/debugfs.h>
49 #include <linux/delay.h>
50 #include <linux/init.h>
51 #include <linux/interrupt.h>
52 #include <linux/module.h>
53 #include <linux/pci.h>
54 #include <linux/random.h>
55 #include <linux/slab.h>
56 #include "ntb_hw.h"
57 #include "ntb_regs.h"
58
59 #define NTB_NAME        "Intel(R) PCI-E Non-Transparent Bridge Driver"
60 #define NTB_VER         "1.0"
61
62 MODULE_DESCRIPTION(NTB_NAME);
63 MODULE_VERSION(NTB_VER);
64 MODULE_LICENSE("Dual BSD/GPL");
65 MODULE_AUTHOR("Intel Corporation");
66
67 static bool xeon_errata_workaround = true;
68 module_param(xeon_errata_workaround, bool, 0644);
69 MODULE_PARM_DESC(xeon_errata_workaround, "Workaround for the Xeon Errata");
70
71 enum {
72         NTB_CONN_TRANSPARENT = 0,
73         NTB_CONN_B2B,
74         NTB_CONN_RP,
75 };
76
77 enum {
78         NTB_DEV_USD = 0,
79         NTB_DEV_DSD,
80 };
81
82 enum {
83         SNB_HW = 0,
84         BWD_HW,
85 };
86
87 static struct dentry *debugfs_dir;
88
89 #define BWD_LINK_RECOVERY_TIME  500
90
91 /* Translate memory window 0,1 to BAR 2,4 */
92 #define MW_TO_BAR(mw)   (mw * NTB_MAX_NUM_MW + 2)
93
94 static DEFINE_PCI_DEVICE_TABLE(ntb_pci_tbl) = {
95         {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_BWD)},
96         {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_JSF)},
97         {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_SNB)},
98         {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_IVT)},
99         {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_HSX)},
100         {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_JSF)},
101         {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_SNB)},
102         {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_IVT)},
103         {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_HSX)},
104         {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_JSF)},
105         {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_SNB)},
106         {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_IVT)},
107         {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_HSX)},
108         {0}
109 };
110 MODULE_DEVICE_TABLE(pci, ntb_pci_tbl);
111
112 /**
113  * ntb_register_event_callback() - register event callback
114  * @ndev: pointer to ntb_device instance
115  * @func: callback function to register
116  *
117  * This function registers a callback for any HW driver events such as link
118  * up/down, power management notices and etc.
119  *
120  * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
121  */
122 int ntb_register_event_callback(struct ntb_device *ndev,
123                             void (*func)(void *handle, enum ntb_hw_event event))
124 {
125         if (ndev->event_cb)
126                 return -EINVAL;
127
128         ndev->event_cb = func;
129
130         return 0;
131 }
132
133 /**
134  * ntb_unregister_event_callback() - unregisters the event callback
135  * @ndev: pointer to ntb_device instance
136  *
137  * This function unregisters the existing callback from transport
138  */
139 void ntb_unregister_event_callback(struct ntb_device *ndev)
140 {
141         ndev->event_cb = NULL;
142 }
143
144 static void ntb_irq_work(unsigned long data)
145 {
146         struct ntb_db_cb *db_cb = (struct ntb_db_cb *)data;
147         int rc;
148
149         rc = db_cb->callback(db_cb->data, db_cb->db_num);
150         if (rc)
151                 tasklet_schedule(&db_cb->irq_work);
152         else {
153                 struct ntb_device *ndev = db_cb->ndev;
154                 unsigned long mask;
155
156                 mask = readw(ndev->reg_ofs.ldb_mask);
157                 clear_bit(db_cb->db_num * ndev->bits_per_vector, &mask);
158                 writew(mask, ndev->reg_ofs.ldb_mask);
159         }
160 }
161
162 /**
163  * ntb_register_db_callback() - register a callback for doorbell interrupt
164  * @ndev: pointer to ntb_device instance
165  * @idx: doorbell index to register callback, zero based
166  * @data: pointer to be returned to caller with every callback
167  * @func: callback function to register
168  *
169  * This function registers a callback function for the doorbell interrupt
170  * on the primary side. The function will unmask the doorbell as well to
171  * allow interrupt.
172  *
173  * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
174  */
175 int ntb_register_db_callback(struct ntb_device *ndev, unsigned int idx,
176                              void *data, int (*func)(void *data, int db_num))
177 {
178         unsigned long mask;
179
180         if (idx >= ndev->max_cbs || ndev->db_cb[idx].callback) {
181                 dev_warn(&ndev->pdev->dev, "Invalid Index.\n");
182                 return -EINVAL;
183         }
184
185         ndev->db_cb[idx].callback = func;
186         ndev->db_cb[idx].data = data;
187         ndev->db_cb[idx].ndev = ndev;
188
189         tasklet_init(&ndev->db_cb[idx].irq_work, ntb_irq_work,
190                      (unsigned long) &ndev->db_cb[idx]);
191
192         /* unmask interrupt */
193         mask = readw(ndev->reg_ofs.ldb_mask);
194         clear_bit(idx * ndev->bits_per_vector, &mask);
195         writew(mask, ndev->reg_ofs.ldb_mask);
196
197         return 0;
198 }
199
200 /**
201  * ntb_unregister_db_callback() - unregister a callback for doorbell interrupt
202  * @ndev: pointer to ntb_device instance
203  * @idx: doorbell index to register callback, zero based
204  *
205  * This function unregisters a callback function for the doorbell interrupt
206  * on the primary side. The function will also mask the said doorbell.
207  */
208 void ntb_unregister_db_callback(struct ntb_device *ndev, unsigned int idx)
209 {
210         unsigned long mask;
211
212         if (idx >= ndev->max_cbs || !ndev->db_cb[idx].callback)
213                 return;
214
215         mask = readw(ndev->reg_ofs.ldb_mask);
216         set_bit(idx * ndev->bits_per_vector, &mask);
217         writew(mask, ndev->reg_ofs.ldb_mask);
218
219         tasklet_disable(&ndev->db_cb[idx].irq_work);
220
221         ndev->db_cb[idx].callback = NULL;
222 }
223
224 /**
225  * ntb_find_transport() - find the transport pointer
226  * @transport: pointer to pci device
227  *
228  * Given the pci device pointer, return the transport pointer passed in when
229  * the transport attached when it was inited.
230  *
231  * RETURNS: pointer to transport.
232  */
233 void *ntb_find_transport(struct pci_dev *pdev)
234 {
235         struct ntb_device *ndev = pci_get_drvdata(pdev);
236         return ndev->ntb_transport;
237 }
238
239 /**
240  * ntb_register_transport() - Register NTB transport with NTB HW driver
241  * @transport: transport identifier
242  *
243  * This function allows a transport to reserve the hardware driver for
244  * NTB usage.
245  *
246  * RETURNS: pointer to ntb_device, NULL on error.
247  */
248 struct ntb_device *ntb_register_transport(struct pci_dev *pdev, void *transport)
249 {
250         struct ntb_device *ndev = pci_get_drvdata(pdev);
251
252         if (ndev->ntb_transport)
253                 return NULL;
254
255         ndev->ntb_transport = transport;
256         return ndev;
257 }
258
259 /**
260  * ntb_unregister_transport() - Unregister the transport with the NTB HW driver
261  * @ndev - ntb_device of the transport to be freed
262  *
263  * This function unregisters the transport from the HW driver and performs any
264  * necessary cleanups.
265  */
266 void ntb_unregister_transport(struct ntb_device *ndev)
267 {
268         int i;
269
270         if (!ndev->ntb_transport)
271                 return;
272
273         for (i = 0; i < ndev->max_cbs; i++)
274                 ntb_unregister_db_callback(ndev, i);
275
276         ntb_unregister_event_callback(ndev);
277         ndev->ntb_transport = NULL;
278 }
279
280 /**
281  * ntb_write_local_spad() - write to the secondary scratchpad register
282  * @ndev: pointer to ntb_device instance
283  * @idx: index to the scratchpad register, 0 based
284  * @val: the data value to put into the register
285  *
286  * This function allows writing of a 32bit value to the indexed scratchpad
287  * register. This writes over the data mirrored to the local scratchpad register
288  * by the remote system.
289  *
290  * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
291  */
292 int ntb_write_local_spad(struct ntb_device *ndev, unsigned int idx, u32 val)
293 {
294         if (idx >= ndev->limits.max_spads)
295                 return -EINVAL;
296
297         dev_dbg(&ndev->pdev->dev, "Writing %x to local scratch pad index %d\n",
298                 val, idx);
299         writel(val, ndev->reg_ofs.spad_read + idx * 4);
300
301         return 0;
302 }
303
304 /**
305  * ntb_read_local_spad() - read from the primary scratchpad register
306  * @ndev: pointer to ntb_device instance
307  * @idx: index to scratchpad register, 0 based
308  * @val: pointer to 32bit integer for storing the register value
309  *
310  * This function allows reading of the 32bit scratchpad register on
311  * the primary (internal) side.  This allows the local system to read data
312  * written and mirrored to the scratchpad register by the remote system.
313  *
314  * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
315  */
316 int ntb_read_local_spad(struct ntb_device *ndev, unsigned int idx, u32 *val)
317 {
318         if (idx >= ndev->limits.max_spads)
319                 return -EINVAL;
320
321         *val = readl(ndev->reg_ofs.spad_write + idx * 4);
322         dev_dbg(&ndev->pdev->dev,
323                 "Reading %x from local scratch pad index %d\n", *val, idx);
324
325         return 0;
326 }
327
328 /**
329  * ntb_write_remote_spad() - write to the secondary scratchpad register
330  * @ndev: pointer to ntb_device instance
331  * @idx: index to the scratchpad register, 0 based
332  * @val: the data value to put into the register
333  *
334  * This function allows writing of a 32bit value to the indexed scratchpad
335  * register. The register resides on the secondary (external) side.  This allows
336  * the local system to write data to be mirrored to the remote systems
337  * scratchpad register.
338  *
339  * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
340  */
341 int ntb_write_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 val)
342 {
343         if (idx >= ndev->limits.max_spads)
344                 return -EINVAL;
345
346         dev_dbg(&ndev->pdev->dev, "Writing %x to remote scratch pad index %d\n",
347                 val, idx);
348         writel(val, ndev->reg_ofs.spad_write + idx * 4);
349
350         return 0;
351 }
352
353 /**
354  * ntb_read_remote_spad() - read from the primary scratchpad register
355  * @ndev: pointer to ntb_device instance
356  * @idx: index to scratchpad register, 0 based
357  * @val: pointer to 32bit integer for storing the register value
358  *
359  * This function allows reading of the 32bit scratchpad register on
360  * the primary (internal) side.  This alloows the local system to read the data
361  * it wrote to be mirrored on the remote system.
362  *
363  * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
364  */
365 int ntb_read_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 *val)
366 {
367         if (idx >= ndev->limits.max_spads)
368                 return -EINVAL;
369
370         *val = readl(ndev->reg_ofs.spad_read + idx * 4);
371         dev_dbg(&ndev->pdev->dev,
372                 "Reading %x from remote scratch pad index %d\n", *val, idx);
373
374         return 0;
375 }
376
377 /**
378  * ntb_get_mw_base() - get addr for the NTB memory window
379  * @ndev: pointer to ntb_device instance
380  * @mw: memory window number
381  *
382  * This function provides the base address of the memory window specified.
383  *
384  * RETURNS: address, or NULL on error.
385  */
386 resource_size_t ntb_get_mw_base(struct ntb_device *ndev, unsigned int mw)
387 {
388         if (mw >= ntb_max_mw(ndev))
389                 return 0;
390
391         return pci_resource_start(ndev->pdev, MW_TO_BAR(mw));
392 }
393
394 /**
395  * ntb_get_mw_vbase() - get virtual addr for the NTB memory window
396  * @ndev: pointer to ntb_device instance
397  * @mw: memory window number
398  *
399  * This function provides the base virtual address of the memory window
400  * specified.
401  *
402  * RETURNS: pointer to virtual address, or NULL on error.
403  */
404 void __iomem *ntb_get_mw_vbase(struct ntb_device *ndev, unsigned int mw)
405 {
406         if (mw >= ntb_max_mw(ndev))
407                 return NULL;
408
409         return ndev->mw[mw].vbase;
410 }
411
412 /**
413  * ntb_get_mw_size() - return size of NTB memory window
414  * @ndev: pointer to ntb_device instance
415  * @mw: memory window number
416  *
417  * This function provides the physical size of the memory window specified
418  *
419  * RETURNS: the size of the memory window or zero on error
420  */
421 u64 ntb_get_mw_size(struct ntb_device *ndev, unsigned int mw)
422 {
423         if (mw >= ntb_max_mw(ndev))
424                 return 0;
425
426         return ndev->mw[mw].bar_sz;
427 }
428
429 /**
430  * ntb_set_mw_addr - set the memory window address
431  * @ndev: pointer to ntb_device instance
432  * @mw: memory window number
433  * @addr: base address for data
434  *
435  * This function sets the base physical address of the memory window.  This
436  * memory address is where data from the remote system will be transfered into
437  * or out of depending on how the transport is configured.
438  */
439 void ntb_set_mw_addr(struct ntb_device *ndev, unsigned int mw, u64 addr)
440 {
441         if (mw >= ntb_max_mw(ndev))
442                 return;
443
444         dev_dbg(&ndev->pdev->dev, "Writing addr %Lx to BAR %d\n", addr,
445                 MW_TO_BAR(mw));
446
447         ndev->mw[mw].phys_addr = addr;
448
449         switch (MW_TO_BAR(mw)) {
450         case NTB_BAR_23:
451                 writeq(addr, ndev->reg_ofs.bar2_xlat);
452                 break;
453         case NTB_BAR_45:
454                 writeq(addr, ndev->reg_ofs.bar4_xlat);
455                 break;
456         }
457 }
458
459 /**
460  * ntb_ring_doorbell() - Set the doorbell on the secondary/external side
461  * @ndev: pointer to ntb_device instance
462  * @db: doorbell to ring
463  *
464  * This function allows triggering of a doorbell on the secondary/external
465  * side that will initiate an interrupt on the remote host
466  *
467  * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
468  */
469 void ntb_ring_doorbell(struct ntb_device *ndev, unsigned int db)
470 {
471         dev_dbg(&ndev->pdev->dev, "%s: ringing doorbell %d\n", __func__, db);
472
473         if (ndev->hw_type == BWD_HW)
474                 writeq((u64) 1 << db, ndev->reg_ofs.rdb);
475         else
476                 writew(((1 << ndev->bits_per_vector) - 1) <<
477                        (db * ndev->bits_per_vector), ndev->reg_ofs.rdb);
478 }
479
480 static void bwd_recover_link(struct ntb_device *ndev)
481 {
482         u32 status;
483
484         /* Driver resets the NTB ModPhy lanes - magic! */
485         writeb(0xe0, ndev->reg_base + BWD_MODPHY_PCSREG6);
486         writeb(0x40, ndev->reg_base + BWD_MODPHY_PCSREG4);
487         writeb(0x60, ndev->reg_base + BWD_MODPHY_PCSREG4);
488         writeb(0x60, ndev->reg_base + BWD_MODPHY_PCSREG6);
489
490         /* Driver waits 100ms to allow the NTB ModPhy to settle */
491         msleep(100);
492
493         /* Clear AER Errors, write to clear */
494         status = readl(ndev->reg_base + BWD_ERRCORSTS_OFFSET);
495         dev_dbg(&ndev->pdev->dev, "ERRCORSTS = %x\n", status);
496         status &= PCI_ERR_COR_REP_ROLL;
497         writel(status, ndev->reg_base + BWD_ERRCORSTS_OFFSET);
498
499         /* Clear unexpected electrical idle event in LTSSM, write to clear */
500         status = readl(ndev->reg_base + BWD_LTSSMERRSTS0_OFFSET);
501         dev_dbg(&ndev->pdev->dev, "LTSSMERRSTS0 = %x\n", status);
502         status |= BWD_LTSSMERRSTS0_UNEXPECTEDEI;
503         writel(status, ndev->reg_base + BWD_LTSSMERRSTS0_OFFSET);
504
505         /* Clear DeSkew Buffer error, write to clear */
506         status = readl(ndev->reg_base + BWD_DESKEWSTS_OFFSET);
507         dev_dbg(&ndev->pdev->dev, "DESKEWSTS = %x\n", status);
508         status |= BWD_DESKEWSTS_DBERR;
509         writel(status, ndev->reg_base + BWD_DESKEWSTS_OFFSET);
510
511         status = readl(ndev->reg_base + BWD_IBSTERRRCRVSTS0_OFFSET);
512         dev_dbg(&ndev->pdev->dev, "IBSTERRRCRVSTS0 = %x\n", status);
513         status &= BWD_IBIST_ERR_OFLOW;
514         writel(status, ndev->reg_base + BWD_IBSTERRRCRVSTS0_OFFSET);
515
516         /* Releases the NTB state machine to allow the link to retrain */
517         status = readl(ndev->reg_base + BWD_LTSSMSTATEJMP_OFFSET);
518         dev_dbg(&ndev->pdev->dev, "LTSSMSTATEJMP = %x\n", status);
519         status &= ~BWD_LTSSMSTATEJMP_FORCEDETECT;
520         writel(status, ndev->reg_base + BWD_LTSSMSTATEJMP_OFFSET);
521 }
522
523 static void ntb_link_event(struct ntb_device *ndev, int link_state)
524 {
525         unsigned int event;
526
527         if (ndev->link_status == link_state)
528                 return;
529
530         if (link_state == NTB_LINK_UP) {
531                 u16 status;
532
533                 dev_info(&ndev->pdev->dev, "Link Up\n");
534                 ndev->link_status = NTB_LINK_UP;
535                 event = NTB_EVENT_HW_LINK_UP;
536
537                 if (ndev->hw_type == BWD_HW ||
538                     ndev->conn_type == NTB_CONN_TRANSPARENT)
539                         status = readw(ndev->reg_ofs.lnk_stat);
540                 else {
541                         int rc = pci_read_config_word(ndev->pdev,
542                                                       SNB_LINK_STATUS_OFFSET,
543                                                       &status);
544                         if (rc)
545                                 return;
546                 }
547
548                 ndev->link_width = (status & NTB_LINK_WIDTH_MASK) >> 4;
549                 ndev->link_speed = (status & NTB_LINK_SPEED_MASK);
550                 dev_info(&ndev->pdev->dev, "Link Width %d, Link Speed %d\n",
551                          ndev->link_width, ndev->link_speed);
552         } else {
553                 dev_info(&ndev->pdev->dev, "Link Down\n");
554                 ndev->link_status = NTB_LINK_DOWN;
555                 event = NTB_EVENT_HW_LINK_DOWN;
556                 /* Don't modify link width/speed, we need it in link recovery */
557         }
558
559         /* notify the upper layer if we have an event change */
560         if (ndev->event_cb)
561                 ndev->event_cb(ndev->ntb_transport, event);
562 }
563
564 static int ntb_link_status(struct ntb_device *ndev)
565 {
566         int link_state;
567
568         if (ndev->hw_type == BWD_HW) {
569                 u32 ntb_cntl;
570
571                 ntb_cntl = readl(ndev->reg_ofs.lnk_cntl);
572                 if (ntb_cntl & BWD_CNTL_LINK_DOWN)
573                         link_state = NTB_LINK_DOWN;
574                 else
575                         link_state = NTB_LINK_UP;
576         } else {
577                 u16 status;
578                 int rc;
579
580                 rc = pci_read_config_word(ndev->pdev, SNB_LINK_STATUS_OFFSET,
581                                           &status);
582                 if (rc)
583                         return rc;
584
585                 if (status & NTB_LINK_STATUS_ACTIVE)
586                         link_state = NTB_LINK_UP;
587                 else
588                         link_state = NTB_LINK_DOWN;
589         }
590
591         ntb_link_event(ndev, link_state);
592
593         return 0;
594 }
595
596 static void bwd_link_recovery(struct work_struct *work)
597 {
598         struct ntb_device *ndev = container_of(work, struct ntb_device,
599                                                lr_timer.work);
600         u32 status32;
601
602         bwd_recover_link(ndev);
603         /* There is a potential race between the 2 NTB devices recovering at the
604          * same time.  If the times are the same, the link will not recover and
605          * the driver will be stuck in this loop forever.  Add a random interval
606          * to the recovery time to prevent this race.
607          */
608         msleep(BWD_LINK_RECOVERY_TIME + prandom_u32() % BWD_LINK_RECOVERY_TIME);
609
610         status32 = readl(ndev->reg_base + BWD_LTSSMSTATEJMP_OFFSET);
611         if (status32 & BWD_LTSSMSTATEJMP_FORCEDETECT)
612                 goto retry;
613
614         status32 = readl(ndev->reg_base + BWD_IBSTERRRCRVSTS0_OFFSET);
615         if (status32 & BWD_IBIST_ERR_OFLOW)
616                 goto retry;
617
618         status32 = readl(ndev->reg_ofs.lnk_cntl);
619         if (!(status32 & BWD_CNTL_LINK_DOWN)) {
620                 unsigned char speed, width;
621                 u16 status16;
622
623                 status16 = readw(ndev->reg_ofs.lnk_stat);
624                 width = (status16 & NTB_LINK_WIDTH_MASK) >> 4;
625                 speed = (status16 & NTB_LINK_SPEED_MASK);
626                 if (ndev->link_width != width || ndev->link_speed != speed)
627                         goto retry;
628         }
629
630         schedule_delayed_work(&ndev->hb_timer, NTB_HB_TIMEOUT);
631         return;
632
633 retry:
634         schedule_delayed_work(&ndev->lr_timer, NTB_HB_TIMEOUT);
635 }
636
637 /* BWD doesn't have link status interrupt, poll on that platform */
638 static void bwd_link_poll(struct work_struct *work)
639 {
640         struct ntb_device *ndev = container_of(work, struct ntb_device,
641                                                hb_timer.work);
642         unsigned long ts = jiffies;
643
644         /* If we haven't gotten an interrupt in a while, check the BWD link
645          * status bit
646          */
647         if (ts > ndev->last_ts + NTB_HB_TIMEOUT) {
648                 int rc = ntb_link_status(ndev);
649                 if (rc)
650                         dev_err(&ndev->pdev->dev,
651                                 "Error determining link status\n");
652
653                 /* Check to see if a link error is the cause of the link down */
654                 if (ndev->link_status == NTB_LINK_DOWN) {
655                         u32 status32 = readl(ndev->reg_base +
656                                              BWD_LTSSMSTATEJMP_OFFSET);
657                         if (status32 & BWD_LTSSMSTATEJMP_FORCEDETECT) {
658                                 schedule_delayed_work(&ndev->lr_timer, 0);
659                                 return;
660                         }
661                 }
662         }
663
664         schedule_delayed_work(&ndev->hb_timer, NTB_HB_TIMEOUT);
665 }
666
667 static int ntb_xeon_setup(struct ntb_device *ndev)
668 {
669         int rc;
670         u8 val;
671
672         ndev->hw_type = SNB_HW;
673
674         rc = pci_read_config_byte(ndev->pdev, NTB_PPD_OFFSET, &val);
675         if (rc)
676                 return rc;
677
678         if (val & SNB_PPD_DEV_TYPE)
679                 ndev->dev_type = NTB_DEV_USD;
680         else
681                 ndev->dev_type = NTB_DEV_DSD;
682
683         switch (val & SNB_PPD_CONN_TYPE) {
684         case NTB_CONN_B2B:
685                 dev_info(&ndev->pdev->dev, "Conn Type = B2B\n");
686                 ndev->conn_type = NTB_CONN_B2B;
687                 ndev->reg_ofs.ldb = ndev->reg_base + SNB_PDOORBELL_OFFSET;
688                 ndev->reg_ofs.ldb_mask = ndev->reg_base + SNB_PDBMSK_OFFSET;
689                 ndev->reg_ofs.spad_read = ndev->reg_base + SNB_SPAD_OFFSET;
690                 ndev->reg_ofs.bar2_xlat = ndev->reg_base + SNB_SBAR2XLAT_OFFSET;
691                 ndev->reg_ofs.bar4_xlat = ndev->reg_base + SNB_SBAR4XLAT_OFFSET;
692                 ndev->limits.max_spads = SNB_MAX_B2B_SPADS;
693
694                 /* There is a Xeon hardware errata related to writes to
695                  * SDOORBELL or B2BDOORBELL in conjunction with inbound access
696                  * to NTB MMIO Space, which may hang the system.  To workaround
697                  * this use the second memory window to access the interrupt and
698                  * scratch pad registers on the remote system.
699                  */
700                 if (xeon_errata_workaround) {
701                         if (!ndev->mw[1].bar_sz)
702                                 return -EINVAL;
703
704                         ndev->limits.max_mw = SNB_ERRATA_MAX_MW;
705                         ndev->limits.max_db_bits = SNB_MAX_DB_BITS;
706                         ndev->reg_ofs.spad_write = ndev->mw[1].vbase +
707                                                    SNB_SPAD_OFFSET;
708                         ndev->reg_ofs.rdb = ndev->mw[1].vbase +
709                                             SNB_PDOORBELL_OFFSET;
710
711                         /* Set the Limit register to 4k, the minimum size, to
712                          * prevent an illegal access
713                          */
714                         writeq(ndev->mw[1].bar_sz + 0x1000, ndev->reg_base +
715                                SNB_PBAR4LMT_OFFSET);
716                         /* HW errata on the Limit registers.  They can only be
717                          * written when the base register is 4GB aligned and
718                          * < 32bit.  This should already be the case based on the
719                          * driver defaults, but write the Limit registers first
720                          * just in case.
721                          */
722                 } else {
723                         ndev->limits.max_mw = SNB_MAX_MW;
724
725                         /* HW Errata on bit 14 of b2bdoorbell register.  Writes
726                          * will not be mirrored to the remote system.  Shrink
727                          * the number of bits by one, since bit 14 is the last
728                          * bit.
729                          */
730                         ndev->limits.max_db_bits = SNB_MAX_DB_BITS - 1;
731                         ndev->reg_ofs.spad_write = ndev->reg_base +
732                                                    SNB_B2B_SPAD_OFFSET;
733                         ndev->reg_ofs.rdb = ndev->reg_base +
734                                             SNB_B2B_DOORBELL_OFFSET;
735
736                         /* Disable the Limit register, just incase it is set to
737                          * something silly
738                          */
739                         writeq(0, ndev->reg_base + SNB_PBAR4LMT_OFFSET);
740                         /* HW errata on the Limit registers.  They can only be
741                          * written when the base register is 4GB aligned and
742                          * < 32bit.  This should already be the case based on the
743                          * driver defaults, but write the Limit registers first
744                          * just in case.
745                          */
746                 }
747
748                 /* The Xeon errata workaround requires setting SBAR Base
749                  * addresses to known values, so that the PBAR XLAT can be
750                  * pointed at SBAR0 of the remote system.
751                  */
752                 if (ndev->dev_type == NTB_DEV_USD) {
753                         writeq(SNB_MBAR23_DSD_ADDR, ndev->reg_base +
754                                SNB_PBAR2XLAT_OFFSET);
755                         if (xeon_errata_workaround)
756                                 writeq(SNB_MBAR01_DSD_ADDR, ndev->reg_base +
757                                        SNB_PBAR4XLAT_OFFSET);
758                         else {
759                                 writeq(SNB_MBAR45_DSD_ADDR, ndev->reg_base +
760                                        SNB_PBAR4XLAT_OFFSET);
761                                 /* B2B_XLAT_OFFSET is a 64bit register, but can
762                                  * only take 32bit writes
763                                  */
764                                 writel(SNB_MBAR01_DSD_ADDR & 0xffffffff,
765                                        ndev->reg_base + SNB_B2B_XLAT_OFFSETL);
766                                 writel(SNB_MBAR01_DSD_ADDR >> 32,
767                                        ndev->reg_base + SNB_B2B_XLAT_OFFSETU);
768                         }
769
770                         writeq(SNB_MBAR01_USD_ADDR, ndev->reg_base +
771                                SNB_SBAR0BASE_OFFSET);
772                         writeq(SNB_MBAR23_USD_ADDR, ndev->reg_base +
773                                SNB_SBAR2BASE_OFFSET);
774                         writeq(SNB_MBAR45_USD_ADDR, ndev->reg_base +
775                                SNB_SBAR4BASE_OFFSET);
776                 } else {
777                         writeq(SNB_MBAR23_USD_ADDR, ndev->reg_base +
778                                SNB_PBAR2XLAT_OFFSET);
779                         if (xeon_errata_workaround)
780                                 writeq(SNB_MBAR01_USD_ADDR, ndev->reg_base +
781                                        SNB_PBAR4XLAT_OFFSET);
782                         else {
783                                 writeq(SNB_MBAR45_USD_ADDR, ndev->reg_base +
784                                        SNB_PBAR4XLAT_OFFSET);
785                                 /* B2B_XLAT_OFFSET is a 64bit register, but can
786                                  * only take 32bit writes
787                                  */
788                                 writel(SNB_MBAR01_DSD_ADDR & 0xffffffff,
789                                        ndev->reg_base + SNB_B2B_XLAT_OFFSETL);
790                                 writel(SNB_MBAR01_USD_ADDR >> 32,
791                                        ndev->reg_base + SNB_B2B_XLAT_OFFSETU);
792                         }
793                         writeq(SNB_MBAR01_DSD_ADDR, ndev->reg_base +
794                                SNB_SBAR0BASE_OFFSET);
795                         writeq(SNB_MBAR23_DSD_ADDR, ndev->reg_base +
796                                SNB_SBAR2BASE_OFFSET);
797                         writeq(SNB_MBAR45_DSD_ADDR, ndev->reg_base +
798                                SNB_SBAR4BASE_OFFSET);
799                 }
800                 break;
801         case NTB_CONN_RP:
802                 dev_info(&ndev->pdev->dev, "Conn Type = RP\n");
803                 ndev->conn_type = NTB_CONN_RP;
804
805                 if (xeon_errata_workaround) {
806                         dev_err(&ndev->pdev->dev, 
807                                 "NTB-RP disabled due to hardware errata.  To disregard this warning and potentially lock-up the system, add the parameter 'xeon_errata_workaround=0'.\n");
808                         return -EINVAL;
809                 }
810
811                 /* Scratch pads need to have exclusive access from the primary
812                  * or secondary side.  Halve the num spads so that each side can
813                  * have an equal amount.
814                  */
815                 ndev->limits.max_spads = SNB_MAX_COMPAT_SPADS / 2;
816                 ndev->limits.max_db_bits = SNB_MAX_DB_BITS;
817                 /* Note: The SDOORBELL is the cause of the errata.  You REALLY
818                  * don't want to touch it.
819                  */
820                 ndev->reg_ofs.rdb = ndev->reg_base + SNB_SDOORBELL_OFFSET;
821                 ndev->reg_ofs.ldb = ndev->reg_base + SNB_PDOORBELL_OFFSET;
822                 ndev->reg_ofs.ldb_mask = ndev->reg_base + SNB_PDBMSK_OFFSET;
823                 /* Offset the start of the spads to correspond to whether it is
824                  * primary or secondary
825                  */
826                 ndev->reg_ofs.spad_write = ndev->reg_base + SNB_SPAD_OFFSET +
827                                            ndev->limits.max_spads * 4;
828                 ndev->reg_ofs.spad_read = ndev->reg_base + SNB_SPAD_OFFSET;
829                 ndev->reg_ofs.bar2_xlat = ndev->reg_base + SNB_SBAR2XLAT_OFFSET;
830                 ndev->reg_ofs.bar4_xlat = ndev->reg_base + SNB_SBAR4XLAT_OFFSET;
831                 ndev->limits.max_mw = SNB_MAX_MW;
832                 break;
833         case NTB_CONN_TRANSPARENT:
834                 dev_info(&ndev->pdev->dev, "Conn Type = TRANSPARENT\n");
835                 ndev->conn_type = NTB_CONN_TRANSPARENT;
836                 /* Scratch pads need to have exclusive access from the primary
837                  * or secondary side.  Halve the num spads so that each side can
838                  * have an equal amount.
839                  */
840                 ndev->limits.max_spads = SNB_MAX_COMPAT_SPADS / 2;
841                 ndev->limits.max_db_bits = SNB_MAX_DB_BITS;
842                 ndev->reg_ofs.rdb = ndev->reg_base + SNB_PDOORBELL_OFFSET;
843                 ndev->reg_ofs.ldb = ndev->reg_base + SNB_SDOORBELL_OFFSET;
844                 ndev->reg_ofs.ldb_mask = ndev->reg_base + SNB_SDBMSK_OFFSET;
845                 ndev->reg_ofs.spad_write = ndev->reg_base + SNB_SPAD_OFFSET;
846                 /* Offset the start of the spads to correspond to whether it is
847                  * primary or secondary
848                  */
849                 ndev->reg_ofs.spad_read = ndev->reg_base + SNB_SPAD_OFFSET +
850                                           ndev->limits.max_spads * 4;
851                 ndev->reg_ofs.bar2_xlat = ndev->reg_base + SNB_PBAR2XLAT_OFFSET;
852                 ndev->reg_ofs.bar4_xlat = ndev->reg_base + SNB_PBAR4XLAT_OFFSET;
853
854                 ndev->limits.max_mw = SNB_MAX_MW;
855                 break;
856         default:
857                 /* Most likely caused by the remote NTB-RP device not being
858                  * configured
859                  */
860                 dev_err(&ndev->pdev->dev, "Unknown PPD %x\n", val);
861                 return -EINVAL;
862         }
863
864         ndev->reg_ofs.lnk_cntl = ndev->reg_base + SNB_NTBCNTL_OFFSET;
865         ndev->reg_ofs.lnk_stat = ndev->reg_base + SNB_SLINK_STATUS_OFFSET;
866         ndev->reg_ofs.spci_cmd = ndev->reg_base + SNB_PCICMD_OFFSET;
867
868         ndev->limits.msix_cnt = SNB_MSIX_CNT;
869         ndev->bits_per_vector = SNB_DB_BITS_PER_VEC;
870
871         return 0;
872 }
873
874 static int ntb_bwd_setup(struct ntb_device *ndev)
875 {
876         int rc;
877         u32 val;
878
879         ndev->hw_type = BWD_HW;
880
881         rc = pci_read_config_dword(ndev->pdev, NTB_PPD_OFFSET, &val);
882         if (rc)
883                 return rc;
884
885         switch ((val & BWD_PPD_CONN_TYPE) >> 8) {
886         case NTB_CONN_B2B:
887                 ndev->conn_type = NTB_CONN_B2B;
888                 break;
889         case NTB_CONN_RP:
890         default:
891                 dev_err(&ndev->pdev->dev, "Unsupported NTB configuration\n");
892                 return -EINVAL;
893         }
894
895         if (val & BWD_PPD_DEV_TYPE)
896                 ndev->dev_type = NTB_DEV_DSD;
897         else
898                 ndev->dev_type = NTB_DEV_USD;
899
900         /* Initiate PCI-E link training */
901         rc = pci_write_config_dword(ndev->pdev, NTB_PPD_OFFSET,
902                                     val | BWD_PPD_INIT_LINK);
903         if (rc)
904                 return rc;
905
906         ndev->reg_ofs.ldb = ndev->reg_base + BWD_PDOORBELL_OFFSET;
907         ndev->reg_ofs.ldb_mask = ndev->reg_base + BWD_PDBMSK_OFFSET;
908         ndev->reg_ofs.rdb = ndev->reg_base + BWD_B2B_DOORBELL_OFFSET;
909         ndev->reg_ofs.bar2_xlat = ndev->reg_base + BWD_SBAR2XLAT_OFFSET;
910         ndev->reg_ofs.bar4_xlat = ndev->reg_base + BWD_SBAR4XLAT_OFFSET;
911         ndev->reg_ofs.lnk_cntl = ndev->reg_base + BWD_NTBCNTL_OFFSET;
912         ndev->reg_ofs.lnk_stat = ndev->reg_base + BWD_LINK_STATUS_OFFSET;
913         ndev->reg_ofs.spad_read = ndev->reg_base + BWD_SPAD_OFFSET;
914         ndev->reg_ofs.spad_write = ndev->reg_base + BWD_B2B_SPAD_OFFSET;
915         ndev->reg_ofs.spci_cmd = ndev->reg_base + BWD_PCICMD_OFFSET;
916         ndev->limits.max_mw = BWD_MAX_MW;
917         ndev->limits.max_spads = BWD_MAX_SPADS;
918         ndev->limits.max_db_bits = BWD_MAX_DB_BITS;
919         ndev->limits.msix_cnt = BWD_MSIX_CNT;
920         ndev->bits_per_vector = BWD_DB_BITS_PER_VEC;
921
922         /* Since bwd doesn't have a link interrupt, setup a poll timer */
923         INIT_DELAYED_WORK(&ndev->hb_timer, bwd_link_poll);
924         INIT_DELAYED_WORK(&ndev->lr_timer, bwd_link_recovery);
925         schedule_delayed_work(&ndev->hb_timer, NTB_HB_TIMEOUT);
926
927         return 0;
928 }
929
930 static int ntb_device_setup(struct ntb_device *ndev)
931 {
932         int rc;
933
934         switch (ndev->pdev->device) {
935         case PCI_DEVICE_ID_INTEL_NTB_SS_JSF:
936         case PCI_DEVICE_ID_INTEL_NTB_SS_SNB:
937         case PCI_DEVICE_ID_INTEL_NTB_SS_IVT:
938         case PCI_DEVICE_ID_INTEL_NTB_SS_HSX:
939         case PCI_DEVICE_ID_INTEL_NTB_PS_JSF:
940         case PCI_DEVICE_ID_INTEL_NTB_PS_SNB:
941         case PCI_DEVICE_ID_INTEL_NTB_PS_IVT:
942         case PCI_DEVICE_ID_INTEL_NTB_PS_HSX:
943         case PCI_DEVICE_ID_INTEL_NTB_B2B_JSF:
944         case PCI_DEVICE_ID_INTEL_NTB_B2B_SNB:
945         case PCI_DEVICE_ID_INTEL_NTB_B2B_IVT:
946         case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX:
947                 rc = ntb_xeon_setup(ndev);
948                 break;
949         case PCI_DEVICE_ID_INTEL_NTB_B2B_BWD:
950                 rc = ntb_bwd_setup(ndev);
951                 break;
952         default:
953                 rc = -ENODEV;
954         }
955
956         if (rc)
957                 return rc;
958
959         dev_info(&ndev->pdev->dev, "Device Type = %s\n",
960                  ndev->dev_type == NTB_DEV_USD ? "USD/DSP" : "DSD/USP");
961
962         if (ndev->conn_type == NTB_CONN_B2B)
963                 /* Enable Bus Master and Memory Space on the secondary side */
964                 writew(PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER,
965                        ndev->reg_ofs.spci_cmd);
966
967         return 0;
968 }
969
970 static void ntb_device_free(struct ntb_device *ndev)
971 {
972         if (ndev->hw_type == BWD_HW) {
973                 cancel_delayed_work_sync(&ndev->hb_timer);
974                 cancel_delayed_work_sync(&ndev->lr_timer);
975         }
976 }
977
978 static irqreturn_t bwd_callback_msix_irq(int irq, void *data)
979 {
980         struct ntb_db_cb *db_cb = data;
981         struct ntb_device *ndev = db_cb->ndev;
982         unsigned long mask;
983
984         dev_dbg(&ndev->pdev->dev, "MSI-X irq %d received for DB %d\n", irq,
985                 db_cb->db_num);
986
987         mask = readw(ndev->reg_ofs.ldb_mask);
988         set_bit(db_cb->db_num * ndev->bits_per_vector, &mask);
989         writew(mask, ndev->reg_ofs.ldb_mask);
990
991         tasklet_schedule(&db_cb->irq_work);
992
993         /* No need to check for the specific HB irq, any interrupt means
994          * we're connected.
995          */
996         ndev->last_ts = jiffies;
997
998         writeq((u64) 1 << db_cb->db_num, ndev->reg_ofs.ldb);
999
1000         return IRQ_HANDLED;
1001 }
1002
1003 static irqreturn_t xeon_callback_msix_irq(int irq, void *data)
1004 {
1005         struct ntb_db_cb *db_cb = data;
1006         struct ntb_device *ndev = db_cb->ndev;
1007         unsigned long mask;
1008
1009         dev_dbg(&ndev->pdev->dev, "MSI-X irq %d received for DB %d\n", irq,
1010                 db_cb->db_num);
1011
1012         mask = readw(ndev->reg_ofs.ldb_mask);
1013         set_bit(db_cb->db_num * ndev->bits_per_vector, &mask);
1014         writew(mask, ndev->reg_ofs.ldb_mask);
1015
1016         tasklet_schedule(&db_cb->irq_work);
1017
1018         /* On Sandybridge, there are 16 bits in the interrupt register
1019          * but only 4 vectors.  So, 5 bits are assigned to the first 3
1020          * vectors, with the 4th having a single bit for link
1021          * interrupts.
1022          */
1023         writew(((1 << ndev->bits_per_vector) - 1) <<
1024                (db_cb->db_num * ndev->bits_per_vector), ndev->reg_ofs.ldb);
1025
1026         return IRQ_HANDLED;
1027 }
1028
1029 /* Since we do not have a HW doorbell in BWD, this is only used in JF/JT */
1030 static irqreturn_t xeon_event_msix_irq(int irq, void *dev)
1031 {
1032         struct ntb_device *ndev = dev;
1033         int rc;
1034
1035         dev_dbg(&ndev->pdev->dev, "MSI-X irq %d received for Events\n", irq);
1036
1037         rc = ntb_link_status(ndev);
1038         if (rc)
1039                 dev_err(&ndev->pdev->dev, "Error determining link status\n");
1040
1041         /* bit 15 is always the link bit */
1042         writew(1 << SNB_LINK_DB, ndev->reg_ofs.ldb);
1043
1044         return IRQ_HANDLED;
1045 }
1046
1047 static irqreturn_t ntb_interrupt(int irq, void *dev)
1048 {
1049         struct ntb_device *ndev = dev;
1050         unsigned int i = 0;
1051
1052         if (ndev->hw_type == BWD_HW) {
1053                 u64 ldb = readq(ndev->reg_ofs.ldb);
1054
1055                 dev_dbg(&ndev->pdev->dev, "irq %d - ldb = %Lx\n", irq, ldb);
1056
1057                 while (ldb) {
1058                         i = __ffs(ldb);
1059                         ldb &= ldb - 1;
1060                         bwd_callback_msix_irq(irq, &ndev->db_cb[i]);
1061                 }
1062         } else {
1063                 u16 ldb = readw(ndev->reg_ofs.ldb);
1064
1065                 dev_dbg(&ndev->pdev->dev, "irq %d - ldb = %x\n", irq, ldb);
1066
1067                 if (ldb & SNB_DB_HW_LINK) {
1068                         xeon_event_msix_irq(irq, dev);
1069                         ldb &= ~SNB_DB_HW_LINK;
1070                 }
1071
1072                 while (ldb) {
1073                         i = __ffs(ldb);
1074                         ldb &= ldb - 1;
1075                         xeon_callback_msix_irq(irq, &ndev->db_cb[i]);
1076                 }
1077         }
1078
1079         return IRQ_HANDLED;
1080 }
1081
1082 static int ntb_setup_msix(struct ntb_device *ndev)
1083 {
1084         struct pci_dev *pdev = ndev->pdev;
1085         struct msix_entry *msix;
1086         int msix_entries;
1087         int rc, i;
1088         u16 val;
1089
1090         if (!pdev->msix_cap) {
1091                 rc = -EIO;
1092                 goto err;
1093         }
1094
1095         rc = pci_read_config_word(pdev, pdev->msix_cap + PCI_MSIX_FLAGS, &val);
1096         if (rc)
1097                 goto err;
1098
1099         msix_entries = msix_table_size(val);
1100         if (msix_entries > ndev->limits.msix_cnt) {
1101                 rc = -EINVAL;
1102                 goto err;
1103         }
1104
1105         ndev->msix_entries = kmalloc(sizeof(struct msix_entry) * msix_entries,
1106                                      GFP_KERNEL);
1107         if (!ndev->msix_entries) {
1108                 rc = -ENOMEM;
1109                 goto err;
1110         }
1111
1112         for (i = 0; i < msix_entries; i++)
1113                 ndev->msix_entries[i].entry = i;
1114
1115         rc = pci_enable_msix(pdev, ndev->msix_entries, msix_entries);
1116         if (rc < 0)
1117                 goto err1;
1118         if (rc > 0) {
1119                 /* On SNB, the link interrupt is always tied to 4th vector.  If
1120                  * we can't get all 4, then we can't use MSI-X.
1121                  */
1122                 if (ndev->hw_type != BWD_HW) {
1123                         rc = -EIO;
1124                         goto err1;
1125                 }
1126
1127                 dev_warn(&pdev->dev,
1128                          "Only %d MSI-X vectors.  Limiting the number of queues to that number.\n",
1129                          rc);
1130                 msix_entries = rc;
1131
1132                 rc = pci_enable_msix(pdev, ndev->msix_entries, msix_entries);
1133                 if (rc)
1134                         goto err1;
1135         }
1136
1137         for (i = 0; i < msix_entries; i++) {
1138                 msix = &ndev->msix_entries[i];
1139                 WARN_ON(!msix->vector);
1140
1141                 /* Use the last MSI-X vector for Link status */
1142                 if (ndev->hw_type == BWD_HW) {
1143                         rc = request_irq(msix->vector, bwd_callback_msix_irq, 0,
1144                                          "ntb-callback-msix", &ndev->db_cb[i]);
1145                         if (rc)
1146                                 goto err2;
1147                 } else {
1148                         if (i == msix_entries - 1) {
1149                                 rc = request_irq(msix->vector,
1150                                                  xeon_event_msix_irq, 0,
1151                                                  "ntb-event-msix", ndev);
1152                                 if (rc)
1153                                         goto err2;
1154                         } else {
1155                                 rc = request_irq(msix->vector,
1156                                                  xeon_callback_msix_irq, 0,
1157                                                  "ntb-callback-msix",
1158                                                  &ndev->db_cb[i]);
1159                                 if (rc)
1160                                         goto err2;
1161                         }
1162                 }
1163         }
1164
1165         ndev->num_msix = msix_entries;
1166         if (ndev->hw_type == BWD_HW)
1167                 ndev->max_cbs = msix_entries;
1168         else
1169                 ndev->max_cbs = msix_entries - 1;
1170
1171         return 0;
1172
1173 err2:
1174         while (--i >= 0) {
1175                 msix = &ndev->msix_entries[i];
1176                 if (ndev->hw_type != BWD_HW && i == ndev->num_msix - 1)
1177                         free_irq(msix->vector, ndev);
1178                 else
1179                         free_irq(msix->vector, &ndev->db_cb[i]);
1180         }
1181         pci_disable_msix(pdev);
1182 err1:
1183         kfree(ndev->msix_entries);
1184         dev_err(&pdev->dev, "Error allocating MSI-X interrupt\n");
1185 err:
1186         ndev->num_msix = 0;
1187         return rc;
1188 }
1189
1190 static int ntb_setup_msi(struct ntb_device *ndev)
1191 {
1192         struct pci_dev *pdev = ndev->pdev;
1193         int rc;
1194
1195         rc = pci_enable_msi(pdev);
1196         if (rc)
1197                 return rc;
1198
1199         rc = request_irq(pdev->irq, ntb_interrupt, 0, "ntb-msi", ndev);
1200         if (rc) {
1201                 pci_disable_msi(pdev);
1202                 dev_err(&pdev->dev, "Error allocating MSI interrupt\n");
1203                 return rc;
1204         }
1205
1206         return 0;
1207 }
1208
1209 static int ntb_setup_intx(struct ntb_device *ndev)
1210 {
1211         struct pci_dev *pdev = ndev->pdev;
1212         int rc;
1213
1214         pci_msi_off(pdev);
1215
1216         /* Verify intx is enabled */
1217         pci_intx(pdev, 1);
1218
1219         rc = request_irq(pdev->irq, ntb_interrupt, IRQF_SHARED, "ntb-intx",
1220                          ndev);
1221         if (rc)
1222                 return rc;
1223
1224         return 0;
1225 }
1226
1227 static int ntb_setup_interrupts(struct ntb_device *ndev)
1228 {
1229         int rc;
1230
1231         /* On BWD, disable all interrupts.  On SNB, disable all but Link
1232          * Interrupt.  The rest will be unmasked as callbacks are registered.
1233          */
1234         if (ndev->hw_type == BWD_HW)
1235                 writeq(~0, ndev->reg_ofs.ldb_mask);
1236         else {
1237                 u16 var = 1 << SNB_LINK_DB;
1238                 writew(~var, ndev->reg_ofs.ldb_mask);
1239         }
1240
1241         rc = ntb_setup_msix(ndev);
1242         if (!rc)
1243                 goto done;
1244
1245         ndev->bits_per_vector = 1;
1246         ndev->max_cbs = ndev->limits.max_db_bits;
1247
1248         rc = ntb_setup_msi(ndev);
1249         if (!rc)
1250                 goto done;
1251
1252         rc = ntb_setup_intx(ndev);
1253         if (rc) {
1254                 dev_err(&ndev->pdev->dev, "no usable interrupts\n");
1255                 return rc;
1256         }
1257
1258 done:
1259         return 0;
1260 }
1261
1262 static void ntb_free_interrupts(struct ntb_device *ndev)
1263 {
1264         struct pci_dev *pdev = ndev->pdev;
1265
1266         /* mask interrupts */
1267         if (ndev->hw_type == BWD_HW)
1268                 writeq(~0, ndev->reg_ofs.ldb_mask);
1269         else
1270                 writew(~0, ndev->reg_ofs.ldb_mask);
1271
1272         if (ndev->num_msix) {
1273                 struct msix_entry *msix;
1274                 u32 i;
1275
1276                 for (i = 0; i < ndev->num_msix; i++) {
1277                         msix = &ndev->msix_entries[i];
1278                         if (ndev->hw_type != BWD_HW && i == ndev->num_msix - 1)
1279                                 free_irq(msix->vector, ndev);
1280                         else
1281                                 free_irq(msix->vector, &ndev->db_cb[i]);
1282                 }
1283                 pci_disable_msix(pdev);
1284         } else {
1285                 free_irq(pdev->irq, ndev);
1286
1287                 if (pci_dev_msi_enabled(pdev))
1288                         pci_disable_msi(pdev);
1289         }
1290 }
1291
1292 static int ntb_create_callbacks(struct ntb_device *ndev)
1293 {
1294         int i;
1295
1296         /* Chicken-egg issue.  We won't know how many callbacks are necessary
1297          * until we see how many MSI-X vectors we get, but these pointers need
1298          * to be passed into the MSI-X register function.  So, we allocate the
1299          * max, knowing that they might not all be used, to work around this.
1300          */
1301         ndev->db_cb = kcalloc(ndev->limits.max_db_bits,
1302                               sizeof(struct ntb_db_cb),
1303                               GFP_KERNEL);
1304         if (!ndev->db_cb)
1305                 return -ENOMEM;
1306
1307         for (i = 0; i < ndev->limits.max_db_bits; i++) {
1308                 ndev->db_cb[i].db_num = i;
1309                 ndev->db_cb[i].ndev = ndev;
1310         }
1311
1312         return 0;
1313 }
1314
1315 static void ntb_free_callbacks(struct ntb_device *ndev)
1316 {
1317         int i;
1318
1319         for (i = 0; i < ndev->limits.max_db_bits; i++)
1320                 ntb_unregister_db_callback(ndev, i);
1321
1322         kfree(ndev->db_cb);
1323 }
1324
1325 static void ntb_setup_debugfs(struct ntb_device *ndev)
1326 {
1327         if (!debugfs_initialized())
1328                 return;
1329
1330         if (!debugfs_dir)
1331                 debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
1332
1333         ndev->debugfs_dir = debugfs_create_dir(pci_name(ndev->pdev),
1334                                                debugfs_dir);
1335 }
1336
1337 static void ntb_free_debugfs(struct ntb_device *ndev)
1338 {
1339         debugfs_remove_recursive(ndev->debugfs_dir);
1340
1341         if (debugfs_dir && simple_empty(debugfs_dir)) {
1342                 debugfs_remove_recursive(debugfs_dir);
1343                 debugfs_dir = NULL;
1344         }
1345 }
1346
1347 static void ntb_hw_link_up(struct ntb_device *ndev)
1348 {
1349         if (ndev->conn_type == NTB_CONN_TRANSPARENT)
1350                 ntb_link_event(ndev, NTB_LINK_UP);
1351         else {
1352                 u32 ntb_cntl;
1353
1354                 /* Let's bring the NTB link up */
1355                 ntb_cntl = readl(ndev->reg_ofs.lnk_cntl);
1356                 ntb_cntl &= ~(NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK);
1357                 ntb_cntl |= NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP;
1358                 ntb_cntl |= NTB_CNTL_P2S_BAR45_SNOOP | NTB_CNTL_S2P_BAR45_SNOOP;
1359                 writel(ntb_cntl, ndev->reg_ofs.lnk_cntl);
1360         }
1361 }
1362
1363 static void ntb_hw_link_down(struct ntb_device *ndev)
1364 {
1365         u32 ntb_cntl;
1366
1367         if (ndev->conn_type == NTB_CONN_TRANSPARENT) {
1368                 ntb_link_event(ndev, NTB_LINK_DOWN);
1369                 return;
1370         }
1371
1372         /* Bring NTB link down */
1373         ntb_cntl = readl(ndev->reg_ofs.lnk_cntl);
1374         ntb_cntl &= ~(NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP);
1375         ntb_cntl &= ~(NTB_CNTL_P2S_BAR45_SNOOP | NTB_CNTL_S2P_BAR45_SNOOP);
1376         ntb_cntl |= NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK;
1377         writel(ntb_cntl, ndev->reg_ofs.lnk_cntl);
1378 }
1379
1380 static int ntb_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
1381 {
1382         struct ntb_device *ndev;
1383         int rc, i;
1384
1385         ndev = kzalloc(sizeof(struct ntb_device), GFP_KERNEL);
1386         if (!ndev)
1387                 return -ENOMEM;
1388
1389         ndev->pdev = pdev;
1390         ndev->link_status = NTB_LINK_DOWN;
1391         pci_set_drvdata(pdev, ndev);
1392         ntb_setup_debugfs(ndev);
1393
1394         rc = pci_enable_device(pdev);
1395         if (rc)
1396                 goto err;
1397
1398         pci_set_master(ndev->pdev);
1399
1400         rc = pci_request_selected_regions(pdev, NTB_BAR_MASK, KBUILD_MODNAME);
1401         if (rc)
1402                 goto err1;
1403
1404         ndev->reg_base = pci_ioremap_bar(pdev, NTB_BAR_MMIO);
1405         if (!ndev->reg_base) {
1406                 dev_warn(&pdev->dev, "Cannot remap BAR 0\n");
1407                 rc = -EIO;
1408                 goto err2;
1409         }
1410
1411         for (i = 0; i < NTB_MAX_NUM_MW; i++) {
1412                 ndev->mw[i].bar_sz = pci_resource_len(pdev, MW_TO_BAR(i));
1413                 ndev->mw[i].vbase =
1414                     ioremap_wc(pci_resource_start(pdev, MW_TO_BAR(i)),
1415                                ndev->mw[i].bar_sz);
1416                 dev_info(&pdev->dev, "MW %d size %llu\n", i,
1417                          (unsigned long long) ndev->mw[i].bar_sz);
1418                 if (!ndev->mw[i].vbase) {
1419                         dev_warn(&pdev->dev, "Cannot remap BAR %d\n",
1420                                  MW_TO_BAR(i));
1421                         rc = -EIO;
1422                         goto err3;
1423                 }
1424         }
1425
1426         rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1427         if (rc) {
1428                 rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
1429                 if (rc)
1430                         goto err3;
1431
1432                 dev_warn(&pdev->dev, "Cannot DMA highmem\n");
1433         }
1434
1435         rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1436         if (rc) {
1437                 rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
1438                 if (rc)
1439                         goto err3;
1440
1441                 dev_warn(&pdev->dev, "Cannot DMA consistent highmem\n");
1442         }
1443
1444         rc = ntb_device_setup(ndev);
1445         if (rc)
1446                 goto err3;
1447
1448         rc = ntb_create_callbacks(ndev);
1449         if (rc)
1450                 goto err4;
1451
1452         rc = ntb_setup_interrupts(ndev);
1453         if (rc)
1454                 goto err5;
1455
1456         /* The scratchpad registers keep the values between rmmod/insmod,
1457          * blast them now
1458          */
1459         for (i = 0; i < ndev->limits.max_spads; i++) {
1460                 ntb_write_local_spad(ndev, i, 0);
1461                 ntb_write_remote_spad(ndev, i, 0);
1462         }
1463
1464         rc = ntb_transport_init(pdev);
1465         if (rc)
1466                 goto err6;
1467
1468         ntb_hw_link_up(ndev);
1469
1470         return 0;
1471
1472 err6:
1473         ntb_free_interrupts(ndev);
1474 err5:
1475         ntb_free_callbacks(ndev);
1476 err4:
1477         ntb_device_free(ndev);
1478 err3:
1479         for (i--; i >= 0; i--)
1480                 iounmap(ndev->mw[i].vbase);
1481         iounmap(ndev->reg_base);
1482 err2:
1483         pci_release_selected_regions(pdev, NTB_BAR_MASK);
1484 err1:
1485         pci_disable_device(pdev);
1486 err:
1487         ntb_free_debugfs(ndev);
1488         kfree(ndev);
1489
1490         dev_err(&pdev->dev, "Error loading %s module\n", KBUILD_MODNAME);
1491         return rc;
1492 }
1493
1494 static void ntb_pci_remove(struct pci_dev *pdev)
1495 {
1496         struct ntb_device *ndev = pci_get_drvdata(pdev);
1497         int i;
1498
1499         ntb_hw_link_down(ndev);
1500
1501         ntb_transport_free(ndev->ntb_transport);
1502
1503         ntb_free_interrupts(ndev);
1504         ntb_free_callbacks(ndev);
1505         ntb_device_free(ndev);
1506
1507         for (i = 0; i < NTB_MAX_NUM_MW; i++)
1508                 iounmap(ndev->mw[i].vbase);
1509
1510         iounmap(ndev->reg_base);
1511         pci_release_selected_regions(pdev, NTB_BAR_MASK);
1512         pci_disable_device(pdev);
1513         ntb_free_debugfs(ndev);
1514         kfree(ndev);
1515 }
1516
1517 static struct pci_driver ntb_pci_driver = {
1518         .name = KBUILD_MODNAME,
1519         .id_table = ntb_pci_tbl,
1520         .probe = ntb_pci_probe,
1521         .remove = ntb_pci_remove,
1522 };
1523 module_pci_driver(ntb_pci_driver);