Merge remote-tracking branches 'asoc/fix/tlv320aic3x' and 'asoc/fix/wm8962' into...
[linux-drm-fsl-dcu.git] / drivers / staging / rdma / hfi1 / chip.c
1 /*
2  *
3  * This file is provided under a dual BSD/GPLv2 license.  When using or
4  * redistributing this file, you may do so under either license.
5  *
6  * GPL LICENSE SUMMARY
7  *
8  * Copyright(c) 2015 Intel Corporation.
9  *
10  * This program is free software; you can redistribute it and/or modify
11  * it under the terms of version 2 of the GNU General Public License as
12  * published by the Free Software Foundation.
13  *
14  * This program is distributed in the hope that it will be useful, but
15  * WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * General Public License for more details.
18  *
19  * BSD LICENSE
20  *
21  * Copyright(c) 2015 Intel Corporation.
22  *
23  * Redistribution and use in source and binary forms, with or without
24  * modification, are permitted provided that the following conditions
25  * are met:
26  *
27  *  - Redistributions of source code must retain the above copyright
28  *    notice, this list of conditions and the following disclaimer.
29  *  - Redistributions in binary form must reproduce the above copyright
30  *    notice, this list of conditions and the following disclaimer in
31  *    the documentation and/or other materials provided with the
32  *    distribution.
33  *  - Neither the name of Intel Corporation nor the names of its
34  *    contributors may be used to endorse or promote products derived
35  *    from this software without specific prior written permission.
36  *
37  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
38  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
40  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
41  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
45  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
46  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
47  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48  *
49  */
50
51 /*
52  * This file contains all of the code that is specific to the HFI chip
53  */
54
55 #include <linux/pci.h>
56 #include <linux/delay.h>
57 #include <linux/interrupt.h>
58 #include <linux/module.h>
59
60 #include "hfi.h"
61 #include "trace.h"
62 #include "mad.h"
63 #include "pio.h"
64 #include "sdma.h"
65 #include "eprom.h"
66
67 #define NUM_IB_PORTS 1
68
69 uint kdeth_qp;
70 module_param_named(kdeth_qp, kdeth_qp, uint, S_IRUGO);
71 MODULE_PARM_DESC(kdeth_qp, "Set the KDETH queue pair prefix");
72
73 uint num_vls = HFI1_MAX_VLS_SUPPORTED;
74 module_param(num_vls, uint, S_IRUGO);
75 MODULE_PARM_DESC(num_vls, "Set number of Virtual Lanes to use (1-8)");
76
77 /*
78  * Default time to aggregate two 10K packets from the idle state
79  * (timer not running). The timer starts at the end of the first packet,
80  * so only the time for one 10K packet and header plus a bit extra is needed.
81  * 10 * 1024 + 64 header byte = 10304 byte
82  * 10304 byte / 12.5 GB/s = 824.32ns
83  */
84 uint rcv_intr_timeout = (824 + 16); /* 16 is for coalescing interrupt */
85 module_param(rcv_intr_timeout, uint, S_IRUGO);
86 MODULE_PARM_DESC(rcv_intr_timeout, "Receive interrupt mitigation timeout in ns");
87
88 uint rcv_intr_count = 16; /* same as qib */
89 module_param(rcv_intr_count, uint, S_IRUGO);
90 MODULE_PARM_DESC(rcv_intr_count, "Receive interrupt mitigation count");
91
92 ushort link_crc_mask = SUPPORTED_CRCS;
93 module_param(link_crc_mask, ushort, S_IRUGO);
94 MODULE_PARM_DESC(link_crc_mask, "CRCs to use on the link");
95
96 uint loopback;
97 module_param_named(loopback, loopback, uint, S_IRUGO);
98 MODULE_PARM_DESC(loopback, "Put into loopback mode (1 = serdes, 3 = external cable");
99
100 /* Other driver tunables */
101 uint rcv_intr_dynamic = 1; /* enable dynamic mode for rcv int mitigation*/
102 static ushort crc_14b_sideband = 1;
103 static uint use_flr = 1;
104 uint quick_linkup; /* skip LNI */
105
106 struct flag_table {
107         u64 flag;       /* the flag */
108         char *str;      /* description string */
109         u16 extra;      /* extra information */
110         u16 unused0;
111         u32 unused1;
112 };
113
114 /* str must be a string constant */
115 #define FLAG_ENTRY(str, extra, flag) {flag, str, extra}
116 #define FLAG_ENTRY0(str, flag) {flag, str, 0}
117
118 /* Send Error Consequences */
119 #define SEC_WRITE_DROPPED       0x1
120 #define SEC_PACKET_DROPPED      0x2
121 #define SEC_SC_HALTED           0x4     /* per-context only */
122 #define SEC_SPC_FREEZE          0x8     /* per-HFI only */
123
124 #define VL15CTXT                  1
125 #define MIN_KERNEL_KCTXTS         2
126 #define NUM_MAP_REGS             32
127
128 /* Bit offset into the GUID which carries HFI id information */
129 #define GUID_HFI_INDEX_SHIFT     39
130
131 /* extract the emulation revision */
132 #define emulator_rev(dd) ((dd)->irev >> 8)
133 /* parallel and serial emulation versions are 3 and 4 respectively */
134 #define is_emulator_p(dd) ((((dd)->irev) & 0xf) == 3)
135 #define is_emulator_s(dd) ((((dd)->irev) & 0xf) == 4)
136
137 /* RSM fields */
138
139 /* packet type */
140 #define IB_PACKET_TYPE         2ull
141 #define QW_SHIFT               6ull
142 /* QPN[7..1] */
143 #define QPN_WIDTH              7ull
144
145 /* LRH.BTH: QW 0, OFFSET 48 - for match */
146 #define LRH_BTH_QW             0ull
147 #define LRH_BTH_BIT_OFFSET     48ull
148 #define LRH_BTH_OFFSET(off)    ((LRH_BTH_QW << QW_SHIFT) | (off))
149 #define LRH_BTH_MATCH_OFFSET   LRH_BTH_OFFSET(LRH_BTH_BIT_OFFSET)
150 #define LRH_BTH_SELECT
151 #define LRH_BTH_MASK           3ull
152 #define LRH_BTH_VALUE          2ull
153
154 /* LRH.SC[3..0] QW 0, OFFSET 56 - for match */
155 #define LRH_SC_QW              0ull
156 #define LRH_SC_BIT_OFFSET      56ull
157 #define LRH_SC_OFFSET(off)     ((LRH_SC_QW << QW_SHIFT) | (off))
158 #define LRH_SC_MATCH_OFFSET    LRH_SC_OFFSET(LRH_SC_BIT_OFFSET)
159 #define LRH_SC_MASK            128ull
160 #define LRH_SC_VALUE           0ull
161
162 /* SC[n..0] QW 0, OFFSET 60 - for select */
163 #define LRH_SC_SELECT_OFFSET  ((LRH_SC_QW << QW_SHIFT) | (60ull))
164
165 /* QPN[m+n:1] QW 1, OFFSET 1 */
166 #define QPN_SELECT_OFFSET      ((1ull << QW_SHIFT) | (1ull))
167
168 /* defines to build power on SC2VL table */
169 #define SC2VL_VAL( \
170         num, \
171         sc0, sc0val, \
172         sc1, sc1val, \
173         sc2, sc2val, \
174         sc3, sc3val, \
175         sc4, sc4val, \
176         sc5, sc5val, \
177         sc6, sc6val, \
178         sc7, sc7val) \
179 ( \
180         ((u64)(sc0val) << SEND_SC2VLT##num##_SC##sc0##_SHIFT) | \
181         ((u64)(sc1val) << SEND_SC2VLT##num##_SC##sc1##_SHIFT) | \
182         ((u64)(sc2val) << SEND_SC2VLT##num##_SC##sc2##_SHIFT) | \
183         ((u64)(sc3val) << SEND_SC2VLT##num##_SC##sc3##_SHIFT) | \
184         ((u64)(sc4val) << SEND_SC2VLT##num##_SC##sc4##_SHIFT) | \
185         ((u64)(sc5val) << SEND_SC2VLT##num##_SC##sc5##_SHIFT) | \
186         ((u64)(sc6val) << SEND_SC2VLT##num##_SC##sc6##_SHIFT) | \
187         ((u64)(sc7val) << SEND_SC2VLT##num##_SC##sc7##_SHIFT)   \
188 )
189
190 #define DC_SC_VL_VAL( \
191         range, \
192         e0, e0val, \
193         e1, e1val, \
194         e2, e2val, \
195         e3, e3val, \
196         e4, e4val, \
197         e5, e5val, \
198         e6, e6val, \
199         e7, e7val, \
200         e8, e8val, \
201         e9, e9val, \
202         e10, e10val, \
203         e11, e11val, \
204         e12, e12val, \
205         e13, e13val, \
206         e14, e14val, \
207         e15, e15val) \
208 ( \
209         ((u64)(e0val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e0##_SHIFT) | \
210         ((u64)(e1val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e1##_SHIFT) | \
211         ((u64)(e2val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e2##_SHIFT) | \
212         ((u64)(e3val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e3##_SHIFT) | \
213         ((u64)(e4val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e4##_SHIFT) | \
214         ((u64)(e5val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e5##_SHIFT) | \
215         ((u64)(e6val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e6##_SHIFT) | \
216         ((u64)(e7val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e7##_SHIFT) | \
217         ((u64)(e8val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e8##_SHIFT) | \
218         ((u64)(e9val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e9##_SHIFT) | \
219         ((u64)(e10val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e10##_SHIFT) | \
220         ((u64)(e11val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e11##_SHIFT) | \
221         ((u64)(e12val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e12##_SHIFT) | \
222         ((u64)(e13val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e13##_SHIFT) | \
223         ((u64)(e14val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e14##_SHIFT) | \
224         ((u64)(e15val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e15##_SHIFT) \
225 )
226
227 /* all CceStatus sub-block freeze bits */
228 #define ALL_FROZE (CCE_STATUS_SDMA_FROZE_SMASK \
229                         | CCE_STATUS_RXE_FROZE_SMASK \
230                         | CCE_STATUS_TXE_FROZE_SMASK \
231                         | CCE_STATUS_TXE_PIO_FROZE_SMASK)
232 /* all CceStatus sub-block TXE pause bits */
233 #define ALL_TXE_PAUSE (CCE_STATUS_TXE_PIO_PAUSED_SMASK \
234                         | CCE_STATUS_TXE_PAUSED_SMASK \
235                         | CCE_STATUS_SDMA_PAUSED_SMASK)
236 /* all CceStatus sub-block RXE pause bits */
237 #define ALL_RXE_PAUSE CCE_STATUS_RXE_PAUSED_SMASK
238
239 /*
240  * CCE Error flags.
241  */
242 static struct flag_table cce_err_status_flags[] = {
243 /* 0*/  FLAG_ENTRY0("CceCsrParityErr",
244                 CCE_ERR_STATUS_CCE_CSR_PARITY_ERR_SMASK),
245 /* 1*/  FLAG_ENTRY0("CceCsrReadBadAddrErr",
246                 CCE_ERR_STATUS_CCE_CSR_READ_BAD_ADDR_ERR_SMASK),
247 /* 2*/  FLAG_ENTRY0("CceCsrWriteBadAddrErr",
248                 CCE_ERR_STATUS_CCE_CSR_WRITE_BAD_ADDR_ERR_SMASK),
249 /* 3*/  FLAG_ENTRY0("CceTrgtAsyncFifoParityErr",
250                 CCE_ERR_STATUS_CCE_TRGT_ASYNC_FIFO_PARITY_ERR_SMASK),
251 /* 4*/  FLAG_ENTRY0("CceTrgtAccessErr",
252                 CCE_ERR_STATUS_CCE_TRGT_ACCESS_ERR_SMASK),
253 /* 5*/  FLAG_ENTRY0("CceRspdDataParityErr",
254                 CCE_ERR_STATUS_CCE_RSPD_DATA_PARITY_ERR_SMASK),
255 /* 6*/  FLAG_ENTRY0("CceCli0AsyncFifoParityErr",
256                 CCE_ERR_STATUS_CCE_CLI0_ASYNC_FIFO_PARITY_ERR_SMASK),
257 /* 7*/  FLAG_ENTRY0("CceCsrCfgBusParityErr",
258                 CCE_ERR_STATUS_CCE_CSR_CFG_BUS_PARITY_ERR_SMASK),
259 /* 8*/  FLAG_ENTRY0("CceCli2AsyncFifoParityErr",
260                 CCE_ERR_STATUS_CCE_CLI2_ASYNC_FIFO_PARITY_ERR_SMASK),
261 /* 9*/  FLAG_ENTRY0("CceCli1AsyncFifoPioCrdtParityErr",
262             CCE_ERR_STATUS_CCE_CLI1_ASYNC_FIFO_PIO_CRDT_PARITY_ERR_SMASK),
263 /*10*/  FLAG_ENTRY0("CceCli1AsyncFifoPioCrdtParityErr",
264             CCE_ERR_STATUS_CCE_CLI1_ASYNC_FIFO_SDMA_HD_PARITY_ERR_SMASK),
265 /*11*/  FLAG_ENTRY0("CceCli1AsyncFifoRxdmaParityError",
266             CCE_ERR_STATUS_CCE_CLI1_ASYNC_FIFO_RXDMA_PARITY_ERROR_SMASK),
267 /*12*/  FLAG_ENTRY0("CceCli1AsyncFifoDbgParityError",
268                 CCE_ERR_STATUS_CCE_CLI1_ASYNC_FIFO_DBG_PARITY_ERROR_SMASK),
269 /*13*/  FLAG_ENTRY0("PcicRetryMemCorErr",
270                 CCE_ERR_STATUS_PCIC_RETRY_MEM_COR_ERR_SMASK),
271 /*14*/  FLAG_ENTRY0("PcicRetryMemCorErr",
272                 CCE_ERR_STATUS_PCIC_RETRY_SOT_MEM_COR_ERR_SMASK),
273 /*15*/  FLAG_ENTRY0("PcicPostHdQCorErr",
274                 CCE_ERR_STATUS_PCIC_POST_HD_QCOR_ERR_SMASK),
275 /*16*/  FLAG_ENTRY0("PcicPostHdQCorErr",
276                 CCE_ERR_STATUS_PCIC_POST_DAT_QCOR_ERR_SMASK),
277 /*17*/  FLAG_ENTRY0("PcicPostHdQCorErr",
278                 CCE_ERR_STATUS_PCIC_CPL_HD_QCOR_ERR_SMASK),
279 /*18*/  FLAG_ENTRY0("PcicCplDatQCorErr",
280                 CCE_ERR_STATUS_PCIC_CPL_DAT_QCOR_ERR_SMASK),
281 /*19*/  FLAG_ENTRY0("PcicNPostHQParityErr",
282                 CCE_ERR_STATUS_PCIC_NPOST_HQ_PARITY_ERR_SMASK),
283 /*20*/  FLAG_ENTRY0("PcicNPostDatQParityErr",
284                 CCE_ERR_STATUS_PCIC_NPOST_DAT_QPARITY_ERR_SMASK),
285 /*21*/  FLAG_ENTRY0("PcicRetryMemUncErr",
286                 CCE_ERR_STATUS_PCIC_RETRY_MEM_UNC_ERR_SMASK),
287 /*22*/  FLAG_ENTRY0("PcicRetrySotMemUncErr",
288                 CCE_ERR_STATUS_PCIC_RETRY_SOT_MEM_UNC_ERR_SMASK),
289 /*23*/  FLAG_ENTRY0("PcicPostHdQUncErr",
290                 CCE_ERR_STATUS_PCIC_POST_HD_QUNC_ERR_SMASK),
291 /*24*/  FLAG_ENTRY0("PcicPostDatQUncErr",
292                 CCE_ERR_STATUS_PCIC_POST_DAT_QUNC_ERR_SMASK),
293 /*25*/  FLAG_ENTRY0("PcicCplHdQUncErr",
294                 CCE_ERR_STATUS_PCIC_CPL_HD_QUNC_ERR_SMASK),
295 /*26*/  FLAG_ENTRY0("PcicCplDatQUncErr",
296                 CCE_ERR_STATUS_PCIC_CPL_DAT_QUNC_ERR_SMASK),
297 /*27*/  FLAG_ENTRY0("PcicTransmitFrontParityErr",
298                 CCE_ERR_STATUS_PCIC_TRANSMIT_FRONT_PARITY_ERR_SMASK),
299 /*28*/  FLAG_ENTRY0("PcicTransmitBackParityErr",
300                 CCE_ERR_STATUS_PCIC_TRANSMIT_BACK_PARITY_ERR_SMASK),
301 /*29*/  FLAG_ENTRY0("PcicReceiveParityErr",
302                 CCE_ERR_STATUS_PCIC_RECEIVE_PARITY_ERR_SMASK),
303 /*30*/  FLAG_ENTRY0("CceTrgtCplTimeoutErr",
304                 CCE_ERR_STATUS_CCE_TRGT_CPL_TIMEOUT_ERR_SMASK),
305 /*31*/  FLAG_ENTRY0("LATriggered",
306                 CCE_ERR_STATUS_LA_TRIGGERED_SMASK),
307 /*32*/  FLAG_ENTRY0("CceSegReadBadAddrErr",
308                 CCE_ERR_STATUS_CCE_SEG_READ_BAD_ADDR_ERR_SMASK),
309 /*33*/  FLAG_ENTRY0("CceSegWriteBadAddrErr",
310                 CCE_ERR_STATUS_CCE_SEG_WRITE_BAD_ADDR_ERR_SMASK),
311 /*34*/  FLAG_ENTRY0("CceRcplAsyncFifoParityErr",
312                 CCE_ERR_STATUS_CCE_RCPL_ASYNC_FIFO_PARITY_ERR_SMASK),
313 /*35*/  FLAG_ENTRY0("CceRxdmaConvFifoParityErr",
314                 CCE_ERR_STATUS_CCE_RXDMA_CONV_FIFO_PARITY_ERR_SMASK),
315 /*36*/  FLAG_ENTRY0("CceMsixTableCorErr",
316                 CCE_ERR_STATUS_CCE_MSIX_TABLE_COR_ERR_SMASK),
317 /*37*/  FLAG_ENTRY0("CceMsixTableUncErr",
318                 CCE_ERR_STATUS_CCE_MSIX_TABLE_UNC_ERR_SMASK),
319 /*38*/  FLAG_ENTRY0("CceIntMapCorErr",
320                 CCE_ERR_STATUS_CCE_INT_MAP_COR_ERR_SMASK),
321 /*39*/  FLAG_ENTRY0("CceIntMapUncErr",
322                 CCE_ERR_STATUS_CCE_INT_MAP_UNC_ERR_SMASK),
323 /*40*/  FLAG_ENTRY0("CceMsixCsrParityErr",
324                 CCE_ERR_STATUS_CCE_MSIX_CSR_PARITY_ERR_SMASK),
325 /*41-63 reserved*/
326 };
327
328 /*
329  * Misc Error flags
330  */
331 #define MES(text) MISC_ERR_STATUS_MISC_##text##_ERR_SMASK
332 static struct flag_table misc_err_status_flags[] = {
333 /* 0*/  FLAG_ENTRY0("CSR_PARITY", MES(CSR_PARITY)),
334 /* 1*/  FLAG_ENTRY0("CSR_READ_BAD_ADDR", MES(CSR_READ_BAD_ADDR)),
335 /* 2*/  FLAG_ENTRY0("CSR_WRITE_BAD_ADDR", MES(CSR_WRITE_BAD_ADDR)),
336 /* 3*/  FLAG_ENTRY0("SBUS_WRITE_FAILED", MES(SBUS_WRITE_FAILED)),
337 /* 4*/  FLAG_ENTRY0("KEY_MISMATCH", MES(KEY_MISMATCH)),
338 /* 5*/  FLAG_ENTRY0("FW_AUTH_FAILED", MES(FW_AUTH_FAILED)),
339 /* 6*/  FLAG_ENTRY0("EFUSE_CSR_PARITY", MES(EFUSE_CSR_PARITY)),
340 /* 7*/  FLAG_ENTRY0("EFUSE_READ_BAD_ADDR", MES(EFUSE_READ_BAD_ADDR)),
341 /* 8*/  FLAG_ENTRY0("EFUSE_WRITE", MES(EFUSE_WRITE)),
342 /* 9*/  FLAG_ENTRY0("EFUSE_DONE_PARITY", MES(EFUSE_DONE_PARITY)),
343 /*10*/  FLAG_ENTRY0("INVALID_EEP_CMD", MES(INVALID_EEP_CMD)),
344 /*11*/  FLAG_ENTRY0("MBIST_FAIL", MES(MBIST_FAIL)),
345 /*12*/  FLAG_ENTRY0("PLL_LOCK_FAIL", MES(PLL_LOCK_FAIL))
346 };
347
348 /*
349  * TXE PIO Error flags and consequences
350  */
351 static struct flag_table pio_err_status_flags[] = {
352 /* 0*/  FLAG_ENTRY("PioWriteBadCtxt",
353         SEC_WRITE_DROPPED,
354         SEND_PIO_ERR_STATUS_PIO_WRITE_BAD_CTXT_ERR_SMASK),
355 /* 1*/  FLAG_ENTRY("PioWriteAddrParity",
356         SEC_SPC_FREEZE,
357         SEND_PIO_ERR_STATUS_PIO_WRITE_ADDR_PARITY_ERR_SMASK),
358 /* 2*/  FLAG_ENTRY("PioCsrParity",
359         SEC_SPC_FREEZE,
360         SEND_PIO_ERR_STATUS_PIO_CSR_PARITY_ERR_SMASK),
361 /* 3*/  FLAG_ENTRY("PioSbMemFifo0",
362         SEC_SPC_FREEZE,
363         SEND_PIO_ERR_STATUS_PIO_SB_MEM_FIFO0_ERR_SMASK),
364 /* 4*/  FLAG_ENTRY("PioSbMemFifo1",
365         SEC_SPC_FREEZE,
366         SEND_PIO_ERR_STATUS_PIO_SB_MEM_FIFO1_ERR_SMASK),
367 /* 5*/  FLAG_ENTRY("PioPccFifoParity",
368         SEC_SPC_FREEZE,
369         SEND_PIO_ERR_STATUS_PIO_PCC_FIFO_PARITY_ERR_SMASK),
370 /* 6*/  FLAG_ENTRY("PioPecFifoParity",
371         SEC_SPC_FREEZE,
372         SEND_PIO_ERR_STATUS_PIO_PEC_FIFO_PARITY_ERR_SMASK),
373 /* 7*/  FLAG_ENTRY("PioSbrdctlCrrelParity",
374         SEC_SPC_FREEZE,
375         SEND_PIO_ERR_STATUS_PIO_SBRDCTL_CRREL_PARITY_ERR_SMASK),
376 /* 8*/  FLAG_ENTRY("PioSbrdctrlCrrelFifoParity",
377         SEC_SPC_FREEZE,
378         SEND_PIO_ERR_STATUS_PIO_SBRDCTRL_CRREL_FIFO_PARITY_ERR_SMASK),
379 /* 9*/  FLAG_ENTRY("PioPktEvictFifoParityErr",
380         SEC_SPC_FREEZE,
381         SEND_PIO_ERR_STATUS_PIO_PKT_EVICT_FIFO_PARITY_ERR_SMASK),
382 /*10*/  FLAG_ENTRY("PioSmPktResetParity",
383         SEC_SPC_FREEZE,
384         SEND_PIO_ERR_STATUS_PIO_SM_PKT_RESET_PARITY_ERR_SMASK),
385 /*11*/  FLAG_ENTRY("PioVlLenMemBank0Unc",
386         SEC_SPC_FREEZE,
387         SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK0_UNC_ERR_SMASK),
388 /*12*/  FLAG_ENTRY("PioVlLenMemBank1Unc",
389         SEC_SPC_FREEZE,
390         SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK1_UNC_ERR_SMASK),
391 /*13*/  FLAG_ENTRY("PioVlLenMemBank0Cor",
392         0,
393         SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK0_COR_ERR_SMASK),
394 /*14*/  FLAG_ENTRY("PioVlLenMemBank1Cor",
395         0,
396         SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK1_COR_ERR_SMASK),
397 /*15*/  FLAG_ENTRY("PioCreditRetFifoParity",
398         SEC_SPC_FREEZE,
399         SEND_PIO_ERR_STATUS_PIO_CREDIT_RET_FIFO_PARITY_ERR_SMASK),
400 /*16*/  FLAG_ENTRY("PioPpmcPblFifo",
401         SEC_SPC_FREEZE,
402         SEND_PIO_ERR_STATUS_PIO_PPMC_PBL_FIFO_ERR_SMASK),
403 /*17*/  FLAG_ENTRY("PioInitSmIn",
404         0,
405         SEND_PIO_ERR_STATUS_PIO_INIT_SM_IN_ERR_SMASK),
406 /*18*/  FLAG_ENTRY("PioPktEvictSmOrArbSm",
407         SEC_SPC_FREEZE,
408         SEND_PIO_ERR_STATUS_PIO_PKT_EVICT_SM_OR_ARB_SM_ERR_SMASK),
409 /*19*/  FLAG_ENTRY("PioHostAddrMemUnc",
410         SEC_SPC_FREEZE,
411         SEND_PIO_ERR_STATUS_PIO_HOST_ADDR_MEM_UNC_ERR_SMASK),
412 /*20*/  FLAG_ENTRY("PioHostAddrMemCor",
413         0,
414         SEND_PIO_ERR_STATUS_PIO_HOST_ADDR_MEM_COR_ERR_SMASK),
415 /*21*/  FLAG_ENTRY("PioWriteDataParity",
416         SEC_SPC_FREEZE,
417         SEND_PIO_ERR_STATUS_PIO_WRITE_DATA_PARITY_ERR_SMASK),
418 /*22*/  FLAG_ENTRY("PioStateMachine",
419         SEC_SPC_FREEZE,
420         SEND_PIO_ERR_STATUS_PIO_STATE_MACHINE_ERR_SMASK),
421 /*23*/  FLAG_ENTRY("PioWriteQwValidParity",
422         SEC_WRITE_DROPPED|SEC_SPC_FREEZE,
423         SEND_PIO_ERR_STATUS_PIO_WRITE_QW_VALID_PARITY_ERR_SMASK),
424 /*24*/  FLAG_ENTRY("PioBlockQwCountParity",
425         SEC_WRITE_DROPPED|SEC_SPC_FREEZE,
426         SEND_PIO_ERR_STATUS_PIO_BLOCK_QW_COUNT_PARITY_ERR_SMASK),
427 /*25*/  FLAG_ENTRY("PioVlfVlLenParity",
428         SEC_SPC_FREEZE,
429         SEND_PIO_ERR_STATUS_PIO_VLF_VL_LEN_PARITY_ERR_SMASK),
430 /*26*/  FLAG_ENTRY("PioVlfSopParity",
431         SEC_SPC_FREEZE,
432         SEND_PIO_ERR_STATUS_PIO_VLF_SOP_PARITY_ERR_SMASK),
433 /*27*/  FLAG_ENTRY("PioVlFifoParity",
434         SEC_SPC_FREEZE,
435         SEND_PIO_ERR_STATUS_PIO_VL_FIFO_PARITY_ERR_SMASK),
436 /*28*/  FLAG_ENTRY("PioPpmcBqcMemParity",
437         SEC_SPC_FREEZE,
438         SEND_PIO_ERR_STATUS_PIO_PPMC_BQC_MEM_PARITY_ERR_SMASK),
439 /*29*/  FLAG_ENTRY("PioPpmcSopLen",
440         SEC_SPC_FREEZE,
441         SEND_PIO_ERR_STATUS_PIO_PPMC_SOP_LEN_ERR_SMASK),
442 /*30-31 reserved*/
443 /*32*/  FLAG_ENTRY("PioCurrentFreeCntParity",
444         SEC_SPC_FREEZE,
445         SEND_PIO_ERR_STATUS_PIO_CURRENT_FREE_CNT_PARITY_ERR_SMASK),
446 /*33*/  FLAG_ENTRY("PioLastReturnedCntParity",
447         SEC_SPC_FREEZE,
448         SEND_PIO_ERR_STATUS_PIO_LAST_RETURNED_CNT_PARITY_ERR_SMASK),
449 /*34*/  FLAG_ENTRY("PioPccSopHeadParity",
450         SEC_SPC_FREEZE,
451         SEND_PIO_ERR_STATUS_PIO_PCC_SOP_HEAD_PARITY_ERR_SMASK),
452 /*35*/  FLAG_ENTRY("PioPecSopHeadParityErr",
453         SEC_SPC_FREEZE,
454         SEND_PIO_ERR_STATUS_PIO_PEC_SOP_HEAD_PARITY_ERR_SMASK),
455 /*36-63 reserved*/
456 };
457
458 /* TXE PIO errors that cause an SPC freeze */
459 #define ALL_PIO_FREEZE_ERR \
460         (SEND_PIO_ERR_STATUS_PIO_WRITE_ADDR_PARITY_ERR_SMASK \
461         | SEND_PIO_ERR_STATUS_PIO_CSR_PARITY_ERR_SMASK \
462         | SEND_PIO_ERR_STATUS_PIO_SB_MEM_FIFO0_ERR_SMASK \
463         | SEND_PIO_ERR_STATUS_PIO_SB_MEM_FIFO1_ERR_SMASK \
464         | SEND_PIO_ERR_STATUS_PIO_PCC_FIFO_PARITY_ERR_SMASK \
465         | SEND_PIO_ERR_STATUS_PIO_PEC_FIFO_PARITY_ERR_SMASK \
466         | SEND_PIO_ERR_STATUS_PIO_SBRDCTL_CRREL_PARITY_ERR_SMASK \
467         | SEND_PIO_ERR_STATUS_PIO_SBRDCTRL_CRREL_FIFO_PARITY_ERR_SMASK \
468         | SEND_PIO_ERR_STATUS_PIO_PKT_EVICT_FIFO_PARITY_ERR_SMASK \
469         | SEND_PIO_ERR_STATUS_PIO_SM_PKT_RESET_PARITY_ERR_SMASK \
470         | SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK0_UNC_ERR_SMASK \
471         | SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK1_UNC_ERR_SMASK \
472         | SEND_PIO_ERR_STATUS_PIO_CREDIT_RET_FIFO_PARITY_ERR_SMASK \
473         | SEND_PIO_ERR_STATUS_PIO_PPMC_PBL_FIFO_ERR_SMASK \
474         | SEND_PIO_ERR_STATUS_PIO_PKT_EVICT_SM_OR_ARB_SM_ERR_SMASK \
475         | SEND_PIO_ERR_STATUS_PIO_HOST_ADDR_MEM_UNC_ERR_SMASK \
476         | SEND_PIO_ERR_STATUS_PIO_WRITE_DATA_PARITY_ERR_SMASK \
477         | SEND_PIO_ERR_STATUS_PIO_STATE_MACHINE_ERR_SMASK \
478         | SEND_PIO_ERR_STATUS_PIO_WRITE_QW_VALID_PARITY_ERR_SMASK \
479         | SEND_PIO_ERR_STATUS_PIO_BLOCK_QW_COUNT_PARITY_ERR_SMASK \
480         | SEND_PIO_ERR_STATUS_PIO_VLF_VL_LEN_PARITY_ERR_SMASK \
481         | SEND_PIO_ERR_STATUS_PIO_VLF_SOP_PARITY_ERR_SMASK \
482         | SEND_PIO_ERR_STATUS_PIO_VL_FIFO_PARITY_ERR_SMASK \
483         | SEND_PIO_ERR_STATUS_PIO_PPMC_BQC_MEM_PARITY_ERR_SMASK \
484         | SEND_PIO_ERR_STATUS_PIO_PPMC_SOP_LEN_ERR_SMASK \
485         | SEND_PIO_ERR_STATUS_PIO_CURRENT_FREE_CNT_PARITY_ERR_SMASK \
486         | SEND_PIO_ERR_STATUS_PIO_LAST_RETURNED_CNT_PARITY_ERR_SMASK \
487         | SEND_PIO_ERR_STATUS_PIO_PCC_SOP_HEAD_PARITY_ERR_SMASK \
488         | SEND_PIO_ERR_STATUS_PIO_PEC_SOP_HEAD_PARITY_ERR_SMASK)
489
490 /*
491  * TXE SDMA Error flags
492  */
493 static struct flag_table sdma_err_status_flags[] = {
494 /* 0*/  FLAG_ENTRY0("SDmaRpyTagErr",
495                 SEND_DMA_ERR_STATUS_SDMA_RPY_TAG_ERR_SMASK),
496 /* 1*/  FLAG_ENTRY0("SDmaCsrParityErr",
497                 SEND_DMA_ERR_STATUS_SDMA_CSR_PARITY_ERR_SMASK),
498 /* 2*/  FLAG_ENTRY0("SDmaPcieReqTrackingUncErr",
499                 SEND_DMA_ERR_STATUS_SDMA_PCIE_REQ_TRACKING_UNC_ERR_SMASK),
500 /* 3*/  FLAG_ENTRY0("SDmaPcieReqTrackingCorErr",
501                 SEND_DMA_ERR_STATUS_SDMA_PCIE_REQ_TRACKING_COR_ERR_SMASK),
502 /*04-63 reserved*/
503 };
504
505 /* TXE SDMA errors that cause an SPC freeze */
506 #define ALL_SDMA_FREEZE_ERR  \
507                 (SEND_DMA_ERR_STATUS_SDMA_RPY_TAG_ERR_SMASK \
508                 | SEND_DMA_ERR_STATUS_SDMA_CSR_PARITY_ERR_SMASK \
509                 | SEND_DMA_ERR_STATUS_SDMA_PCIE_REQ_TRACKING_UNC_ERR_SMASK)
510
511 /*
512  * TXE Egress Error flags
513  */
514 #define SEES(text) SEND_EGRESS_ERR_STATUS_##text##_ERR_SMASK
515 static struct flag_table egress_err_status_flags[] = {
516 /* 0*/  FLAG_ENTRY0("TxPktIntegrityMemCorErr", SEES(TX_PKT_INTEGRITY_MEM_COR)),
517 /* 1*/  FLAG_ENTRY0("TxPktIntegrityMemUncErr", SEES(TX_PKT_INTEGRITY_MEM_UNC)),
518 /* 2 reserved */
519 /* 3*/  FLAG_ENTRY0("TxEgressFifoUnderrunOrParityErr",
520                 SEES(TX_EGRESS_FIFO_UNDERRUN_OR_PARITY)),
521 /* 4*/  FLAG_ENTRY0("TxLinkdownErr", SEES(TX_LINKDOWN)),
522 /* 5*/  FLAG_ENTRY0("TxIncorrectLinkStateErr", SEES(TX_INCORRECT_LINK_STATE)),
523 /* 6 reserved */
524 /* 7*/  FLAG_ENTRY0("TxPioLaunchIntfParityErr",
525                 SEES(TX_PIO_LAUNCH_INTF_PARITY)),
526 /* 8*/  FLAG_ENTRY0("TxSdmaLaunchIntfParityErr",
527                 SEES(TX_SDMA_LAUNCH_INTF_PARITY)),
528 /* 9-10 reserved */
529 /*11*/  FLAG_ENTRY0("TxSbrdCtlStateMachineParityErr",
530                 SEES(TX_SBRD_CTL_STATE_MACHINE_PARITY)),
531 /*12*/  FLAG_ENTRY0("TxIllegalVLErr", SEES(TX_ILLEGAL_VL)),
532 /*13*/  FLAG_ENTRY0("TxLaunchCsrParityErr", SEES(TX_LAUNCH_CSR_PARITY)),
533 /*14*/  FLAG_ENTRY0("TxSbrdCtlCsrParityErr", SEES(TX_SBRD_CTL_CSR_PARITY)),
534 /*15*/  FLAG_ENTRY0("TxConfigParityErr", SEES(TX_CONFIG_PARITY)),
535 /*16*/  FLAG_ENTRY0("TxSdma0DisallowedPacketErr",
536                 SEES(TX_SDMA0_DISALLOWED_PACKET)),
537 /*17*/  FLAG_ENTRY0("TxSdma1DisallowedPacketErr",
538                 SEES(TX_SDMA1_DISALLOWED_PACKET)),
539 /*18*/  FLAG_ENTRY0("TxSdma2DisallowedPacketErr",
540                 SEES(TX_SDMA2_DISALLOWED_PACKET)),
541 /*19*/  FLAG_ENTRY0("TxSdma3DisallowedPacketErr",
542                 SEES(TX_SDMA3_DISALLOWED_PACKET)),
543 /*20*/  FLAG_ENTRY0("TxSdma4DisallowedPacketErr",
544                 SEES(TX_SDMA4_DISALLOWED_PACKET)),
545 /*21*/  FLAG_ENTRY0("TxSdma5DisallowedPacketErr",
546                 SEES(TX_SDMA5_DISALLOWED_PACKET)),
547 /*22*/  FLAG_ENTRY0("TxSdma6DisallowedPacketErr",
548                 SEES(TX_SDMA6_DISALLOWED_PACKET)),
549 /*23*/  FLAG_ENTRY0("TxSdma7DisallowedPacketErr",
550                 SEES(TX_SDMA7_DISALLOWED_PACKET)),
551 /*24*/  FLAG_ENTRY0("TxSdma8DisallowedPacketErr",
552                 SEES(TX_SDMA8_DISALLOWED_PACKET)),
553 /*25*/  FLAG_ENTRY0("TxSdma9DisallowedPacketErr",
554                 SEES(TX_SDMA9_DISALLOWED_PACKET)),
555 /*26*/  FLAG_ENTRY0("TxSdma10DisallowedPacketErr",
556                 SEES(TX_SDMA10_DISALLOWED_PACKET)),
557 /*27*/  FLAG_ENTRY0("TxSdma11DisallowedPacketErr",
558                 SEES(TX_SDMA11_DISALLOWED_PACKET)),
559 /*28*/  FLAG_ENTRY0("TxSdma12DisallowedPacketErr",
560                 SEES(TX_SDMA12_DISALLOWED_PACKET)),
561 /*29*/  FLAG_ENTRY0("TxSdma13DisallowedPacketErr",
562                 SEES(TX_SDMA13_DISALLOWED_PACKET)),
563 /*30*/  FLAG_ENTRY0("TxSdma14DisallowedPacketErr",
564                 SEES(TX_SDMA14_DISALLOWED_PACKET)),
565 /*31*/  FLAG_ENTRY0("TxSdma15DisallowedPacketErr",
566                 SEES(TX_SDMA15_DISALLOWED_PACKET)),
567 /*32*/  FLAG_ENTRY0("TxLaunchFifo0UncOrParityErr",
568                 SEES(TX_LAUNCH_FIFO0_UNC_OR_PARITY)),
569 /*33*/  FLAG_ENTRY0("TxLaunchFifo1UncOrParityErr",
570                 SEES(TX_LAUNCH_FIFO1_UNC_OR_PARITY)),
571 /*34*/  FLAG_ENTRY0("TxLaunchFifo2UncOrParityErr",
572                 SEES(TX_LAUNCH_FIFO2_UNC_OR_PARITY)),
573 /*35*/  FLAG_ENTRY0("TxLaunchFifo3UncOrParityErr",
574                 SEES(TX_LAUNCH_FIFO3_UNC_OR_PARITY)),
575 /*36*/  FLAG_ENTRY0("TxLaunchFifo4UncOrParityErr",
576                 SEES(TX_LAUNCH_FIFO4_UNC_OR_PARITY)),
577 /*37*/  FLAG_ENTRY0("TxLaunchFifo5UncOrParityErr",
578                 SEES(TX_LAUNCH_FIFO5_UNC_OR_PARITY)),
579 /*38*/  FLAG_ENTRY0("TxLaunchFifo6UncOrParityErr",
580                 SEES(TX_LAUNCH_FIFO6_UNC_OR_PARITY)),
581 /*39*/  FLAG_ENTRY0("TxLaunchFifo7UncOrParityErr",
582                 SEES(TX_LAUNCH_FIFO7_UNC_OR_PARITY)),
583 /*40*/  FLAG_ENTRY0("TxLaunchFifo8UncOrParityErr",
584                 SEES(TX_LAUNCH_FIFO8_UNC_OR_PARITY)),
585 /*41*/  FLAG_ENTRY0("TxCreditReturnParityErr", SEES(TX_CREDIT_RETURN_PARITY)),
586 /*42*/  FLAG_ENTRY0("TxSbHdrUncErr", SEES(TX_SB_HDR_UNC)),
587 /*43*/  FLAG_ENTRY0("TxReadSdmaMemoryUncErr", SEES(TX_READ_SDMA_MEMORY_UNC)),
588 /*44*/  FLAG_ENTRY0("TxReadPioMemoryUncErr", SEES(TX_READ_PIO_MEMORY_UNC)),
589 /*45*/  FLAG_ENTRY0("TxEgressFifoUncErr", SEES(TX_EGRESS_FIFO_UNC)),
590 /*46*/  FLAG_ENTRY0("TxHcrcInsertionErr", SEES(TX_HCRC_INSERTION)),
591 /*47*/  FLAG_ENTRY0("TxCreditReturnVLErr", SEES(TX_CREDIT_RETURN_VL)),
592 /*48*/  FLAG_ENTRY0("TxLaunchFifo0CorErr", SEES(TX_LAUNCH_FIFO0_COR)),
593 /*49*/  FLAG_ENTRY0("TxLaunchFifo1CorErr", SEES(TX_LAUNCH_FIFO1_COR)),
594 /*50*/  FLAG_ENTRY0("TxLaunchFifo2CorErr", SEES(TX_LAUNCH_FIFO2_COR)),
595 /*51*/  FLAG_ENTRY0("TxLaunchFifo3CorErr", SEES(TX_LAUNCH_FIFO3_COR)),
596 /*52*/  FLAG_ENTRY0("TxLaunchFifo4CorErr", SEES(TX_LAUNCH_FIFO4_COR)),
597 /*53*/  FLAG_ENTRY0("TxLaunchFifo5CorErr", SEES(TX_LAUNCH_FIFO5_COR)),
598 /*54*/  FLAG_ENTRY0("TxLaunchFifo6CorErr", SEES(TX_LAUNCH_FIFO6_COR)),
599 /*55*/  FLAG_ENTRY0("TxLaunchFifo7CorErr", SEES(TX_LAUNCH_FIFO7_COR)),
600 /*56*/  FLAG_ENTRY0("TxLaunchFifo8CorErr", SEES(TX_LAUNCH_FIFO8_COR)),
601 /*57*/  FLAG_ENTRY0("TxCreditOverrunErr", SEES(TX_CREDIT_OVERRUN)),
602 /*58*/  FLAG_ENTRY0("TxSbHdrCorErr", SEES(TX_SB_HDR_COR)),
603 /*59*/  FLAG_ENTRY0("TxReadSdmaMemoryCorErr", SEES(TX_READ_SDMA_MEMORY_COR)),
604 /*60*/  FLAG_ENTRY0("TxReadPioMemoryCorErr", SEES(TX_READ_PIO_MEMORY_COR)),
605 /*61*/  FLAG_ENTRY0("TxEgressFifoCorErr", SEES(TX_EGRESS_FIFO_COR)),
606 /*62*/  FLAG_ENTRY0("TxReadSdmaMemoryCsrUncErr",
607                 SEES(TX_READ_SDMA_MEMORY_CSR_UNC)),
608 /*63*/  FLAG_ENTRY0("TxReadPioMemoryCsrUncErr",
609                 SEES(TX_READ_PIO_MEMORY_CSR_UNC)),
610 };
611
612 /*
613  * TXE Egress Error Info flags
614  */
615 #define SEEI(text) SEND_EGRESS_ERR_INFO_##text##_ERR_SMASK
616 static struct flag_table egress_err_info_flags[] = {
617 /* 0*/  FLAG_ENTRY0("Reserved", 0ull),
618 /* 1*/  FLAG_ENTRY0("VLErr", SEEI(VL)),
619 /* 2*/  FLAG_ENTRY0("JobKeyErr", SEEI(JOB_KEY)),
620 /* 3*/  FLAG_ENTRY0("JobKeyErr", SEEI(JOB_KEY)),
621 /* 4*/  FLAG_ENTRY0("PartitionKeyErr", SEEI(PARTITION_KEY)),
622 /* 5*/  FLAG_ENTRY0("SLIDErr", SEEI(SLID)),
623 /* 6*/  FLAG_ENTRY0("OpcodeErr", SEEI(OPCODE)),
624 /* 7*/  FLAG_ENTRY0("VLMappingErr", SEEI(VL_MAPPING)),
625 /* 8*/  FLAG_ENTRY0("RawErr", SEEI(RAW)),
626 /* 9*/  FLAG_ENTRY0("RawIPv6Err", SEEI(RAW_IPV6)),
627 /*10*/  FLAG_ENTRY0("GRHErr", SEEI(GRH)),
628 /*11*/  FLAG_ENTRY0("BypassErr", SEEI(BYPASS)),
629 /*12*/  FLAG_ENTRY0("KDETHPacketsErr", SEEI(KDETH_PACKETS)),
630 /*13*/  FLAG_ENTRY0("NonKDETHPacketsErr", SEEI(NON_KDETH_PACKETS)),
631 /*14*/  FLAG_ENTRY0("TooSmallIBPacketsErr", SEEI(TOO_SMALL_IB_PACKETS)),
632 /*15*/  FLAG_ENTRY0("TooSmallBypassPacketsErr", SEEI(TOO_SMALL_BYPASS_PACKETS)),
633 /*16*/  FLAG_ENTRY0("PbcTestErr", SEEI(PBC_TEST)),
634 /*17*/  FLAG_ENTRY0("BadPktLenErr", SEEI(BAD_PKT_LEN)),
635 /*18*/  FLAG_ENTRY0("TooLongIBPacketErr", SEEI(TOO_LONG_IB_PACKET)),
636 /*19*/  FLAG_ENTRY0("TooLongBypassPacketsErr", SEEI(TOO_LONG_BYPASS_PACKETS)),
637 /*20*/  FLAG_ENTRY0("PbcStaticRateControlErr", SEEI(PBC_STATIC_RATE_CONTROL)),
638 /*21*/  FLAG_ENTRY0("BypassBadPktLenErr", SEEI(BAD_PKT_LEN)),
639 };
640
641 /* TXE Egress errors that cause an SPC freeze */
642 #define ALL_TXE_EGRESS_FREEZE_ERR \
643         (SEES(TX_EGRESS_FIFO_UNDERRUN_OR_PARITY) \
644         | SEES(TX_PIO_LAUNCH_INTF_PARITY) \
645         | SEES(TX_SDMA_LAUNCH_INTF_PARITY) \
646         | SEES(TX_SBRD_CTL_STATE_MACHINE_PARITY) \
647         | SEES(TX_LAUNCH_CSR_PARITY) \
648         | SEES(TX_SBRD_CTL_CSR_PARITY) \
649         | SEES(TX_CONFIG_PARITY) \
650         | SEES(TX_LAUNCH_FIFO0_UNC_OR_PARITY) \
651         | SEES(TX_LAUNCH_FIFO1_UNC_OR_PARITY) \
652         | SEES(TX_LAUNCH_FIFO2_UNC_OR_PARITY) \
653         | SEES(TX_LAUNCH_FIFO3_UNC_OR_PARITY) \
654         | SEES(TX_LAUNCH_FIFO4_UNC_OR_PARITY) \
655         | SEES(TX_LAUNCH_FIFO5_UNC_OR_PARITY) \
656         | SEES(TX_LAUNCH_FIFO6_UNC_OR_PARITY) \
657         | SEES(TX_LAUNCH_FIFO7_UNC_OR_PARITY) \
658         | SEES(TX_LAUNCH_FIFO8_UNC_OR_PARITY) \
659         | SEES(TX_CREDIT_RETURN_PARITY))
660
661 /*
662  * TXE Send error flags
663  */
664 #define SES(name) SEND_ERR_STATUS_SEND_##name##_ERR_SMASK
665 static struct flag_table send_err_status_flags[] = {
666 /* 0*/  FLAG_ENTRY0("SDmaRpyTagErr", SES(CSR_PARITY)),
667 /* 1*/  FLAG_ENTRY0("SendCsrReadBadAddrErr", SES(CSR_READ_BAD_ADDR)),
668 /* 2*/  FLAG_ENTRY0("SendCsrWriteBadAddrErr", SES(CSR_WRITE_BAD_ADDR))
669 };
670
671 /*
672  * TXE Send Context Error flags and consequences
673  */
674 static struct flag_table sc_err_status_flags[] = {
675 /* 0*/  FLAG_ENTRY("InconsistentSop",
676                 SEC_PACKET_DROPPED | SEC_SC_HALTED,
677                 SEND_CTXT_ERR_STATUS_PIO_INCONSISTENT_SOP_ERR_SMASK),
678 /* 1*/  FLAG_ENTRY("DisallowedPacket",
679                 SEC_PACKET_DROPPED | SEC_SC_HALTED,
680                 SEND_CTXT_ERR_STATUS_PIO_DISALLOWED_PACKET_ERR_SMASK),
681 /* 2*/  FLAG_ENTRY("WriteCrossesBoundary",
682                 SEC_WRITE_DROPPED | SEC_SC_HALTED,
683                 SEND_CTXT_ERR_STATUS_PIO_WRITE_CROSSES_BOUNDARY_ERR_SMASK),
684 /* 3*/  FLAG_ENTRY("WriteOverflow",
685                 SEC_WRITE_DROPPED | SEC_SC_HALTED,
686                 SEND_CTXT_ERR_STATUS_PIO_WRITE_OVERFLOW_ERR_SMASK),
687 /* 4*/  FLAG_ENTRY("WriteOutOfBounds",
688                 SEC_WRITE_DROPPED | SEC_SC_HALTED,
689                 SEND_CTXT_ERR_STATUS_PIO_WRITE_OUT_OF_BOUNDS_ERR_SMASK),
690 /* 5-63 reserved*/
691 };
692
693 /*
694  * RXE Receive Error flags
695  */
696 #define RXES(name) RCV_ERR_STATUS_RX_##name##_ERR_SMASK
697 static struct flag_table rxe_err_status_flags[] = {
698 /* 0*/  FLAG_ENTRY0("RxDmaCsrCorErr", RXES(DMA_CSR_COR)),
699 /* 1*/  FLAG_ENTRY0("RxDcIntfParityErr", RXES(DC_INTF_PARITY)),
700 /* 2*/  FLAG_ENTRY0("RxRcvHdrUncErr", RXES(RCV_HDR_UNC)),
701 /* 3*/  FLAG_ENTRY0("RxRcvHdrCorErr", RXES(RCV_HDR_COR)),
702 /* 4*/  FLAG_ENTRY0("RxRcvDataUncErr", RXES(RCV_DATA_UNC)),
703 /* 5*/  FLAG_ENTRY0("RxRcvDataCorErr", RXES(RCV_DATA_COR)),
704 /* 6*/  FLAG_ENTRY0("RxRcvQpMapTableUncErr", RXES(RCV_QP_MAP_TABLE_UNC)),
705 /* 7*/  FLAG_ENTRY0("RxRcvQpMapTableCorErr", RXES(RCV_QP_MAP_TABLE_COR)),
706 /* 8*/  FLAG_ENTRY0("RxRcvCsrParityErr", RXES(RCV_CSR_PARITY)),
707 /* 9*/  FLAG_ENTRY0("RxDcSopEopParityErr", RXES(DC_SOP_EOP_PARITY)),
708 /*10*/  FLAG_ENTRY0("RxDmaFlagUncErr", RXES(DMA_FLAG_UNC)),
709 /*11*/  FLAG_ENTRY0("RxDmaFlagCorErr", RXES(DMA_FLAG_COR)),
710 /*12*/  FLAG_ENTRY0("RxRcvFsmEncodingErr", RXES(RCV_FSM_ENCODING)),
711 /*13*/  FLAG_ENTRY0("RxRbufFreeListUncErr", RXES(RBUF_FREE_LIST_UNC)),
712 /*14*/  FLAG_ENTRY0("RxRbufFreeListCorErr", RXES(RBUF_FREE_LIST_COR)),
713 /*15*/  FLAG_ENTRY0("RxRbufLookupDesRegUncErr", RXES(RBUF_LOOKUP_DES_REG_UNC)),
714 /*16*/  FLAG_ENTRY0("RxRbufLookupDesRegUncCorErr",
715                 RXES(RBUF_LOOKUP_DES_REG_UNC_COR)),
716 /*17*/  FLAG_ENTRY0("RxRbufLookupDesUncErr", RXES(RBUF_LOOKUP_DES_UNC)),
717 /*18*/  FLAG_ENTRY0("RxRbufLookupDesCorErr", RXES(RBUF_LOOKUP_DES_COR)),
718 /*19*/  FLAG_ENTRY0("RxRbufBlockListReadUncErr",
719                 RXES(RBUF_BLOCK_LIST_READ_UNC)),
720 /*20*/  FLAG_ENTRY0("RxRbufBlockListReadCorErr",
721                 RXES(RBUF_BLOCK_LIST_READ_COR)),
722 /*21*/  FLAG_ENTRY0("RxRbufCsrQHeadBufNumParityErr",
723                 RXES(RBUF_CSR_QHEAD_BUF_NUM_PARITY)),
724 /*22*/  FLAG_ENTRY0("RxRbufCsrQEntCntParityErr",
725                 RXES(RBUF_CSR_QENT_CNT_PARITY)),
726 /*23*/  FLAG_ENTRY0("RxRbufCsrQNextBufParityErr",
727                 RXES(RBUF_CSR_QNEXT_BUF_PARITY)),
728 /*24*/  FLAG_ENTRY0("RxRbufCsrQVldBitParityErr",
729                 RXES(RBUF_CSR_QVLD_BIT_PARITY)),
730 /*25*/  FLAG_ENTRY0("RxRbufCsrQHdPtrParityErr", RXES(RBUF_CSR_QHD_PTR_PARITY)),
731 /*26*/  FLAG_ENTRY0("RxRbufCsrQTlPtrParityErr", RXES(RBUF_CSR_QTL_PTR_PARITY)),
732 /*27*/  FLAG_ENTRY0("RxRbufCsrQNumOfPktParityErr",
733                 RXES(RBUF_CSR_QNUM_OF_PKT_PARITY)),
734 /*28*/  FLAG_ENTRY0("RxRbufCsrQEOPDWParityErr", RXES(RBUF_CSR_QEOPDW_PARITY)),
735 /*29*/  FLAG_ENTRY0("RxRbufCtxIdParityErr", RXES(RBUF_CTX_ID_PARITY)),
736 /*30*/  FLAG_ENTRY0("RxRBufBadLookupErr", RXES(RBUF_BAD_LOOKUP)),
737 /*31*/  FLAG_ENTRY0("RxRbufFullErr", RXES(RBUF_FULL)),
738 /*32*/  FLAG_ENTRY0("RxRbufEmptyErr", RXES(RBUF_EMPTY)),
739 /*33*/  FLAG_ENTRY0("RxRbufFlRdAddrParityErr", RXES(RBUF_FL_RD_ADDR_PARITY)),
740 /*34*/  FLAG_ENTRY0("RxRbufFlWrAddrParityErr", RXES(RBUF_FL_WR_ADDR_PARITY)),
741 /*35*/  FLAG_ENTRY0("RxRbufFlInitdoneParityErr",
742                 RXES(RBUF_FL_INITDONE_PARITY)),
743 /*36*/  FLAG_ENTRY0("RxRbufFlInitWrAddrParityErr",
744                 RXES(RBUF_FL_INIT_WR_ADDR_PARITY)),
745 /*37*/  FLAG_ENTRY0("RxRbufNextFreeBufUncErr", RXES(RBUF_NEXT_FREE_BUF_UNC)),
746 /*38*/  FLAG_ENTRY0("RxRbufNextFreeBufCorErr", RXES(RBUF_NEXT_FREE_BUF_COR)),
747 /*39*/  FLAG_ENTRY0("RxLookupDesPart1UncErr", RXES(LOOKUP_DES_PART1_UNC)),
748 /*40*/  FLAG_ENTRY0("RxLookupDesPart1UncCorErr",
749                 RXES(LOOKUP_DES_PART1_UNC_COR)),
750 /*41*/  FLAG_ENTRY0("RxLookupDesPart2ParityErr",
751                 RXES(LOOKUP_DES_PART2_PARITY)),
752 /*42*/  FLAG_ENTRY0("RxLookupRcvArrayUncErr", RXES(LOOKUP_RCV_ARRAY_UNC)),
753 /*43*/  FLAG_ENTRY0("RxLookupRcvArrayCorErr", RXES(LOOKUP_RCV_ARRAY_COR)),
754 /*44*/  FLAG_ENTRY0("RxLookupCsrParityErr", RXES(LOOKUP_CSR_PARITY)),
755 /*45*/  FLAG_ENTRY0("RxHqIntrCsrParityErr", RXES(HQ_INTR_CSR_PARITY)),
756 /*46*/  FLAG_ENTRY0("RxHqIntrFsmErr", RXES(HQ_INTR_FSM)),
757 /*47*/  FLAG_ENTRY0("RxRbufDescPart1UncErr", RXES(RBUF_DESC_PART1_UNC)),
758 /*48*/  FLAG_ENTRY0("RxRbufDescPart1CorErr", RXES(RBUF_DESC_PART1_COR)),
759 /*49*/  FLAG_ENTRY0("RxRbufDescPart2UncErr", RXES(RBUF_DESC_PART2_UNC)),
760 /*50*/  FLAG_ENTRY0("RxRbufDescPart2CorErr", RXES(RBUF_DESC_PART2_COR)),
761 /*51*/  FLAG_ENTRY0("RxDmaHdrFifoRdUncErr", RXES(DMA_HDR_FIFO_RD_UNC)),
762 /*52*/  FLAG_ENTRY0("RxDmaHdrFifoRdCorErr", RXES(DMA_HDR_FIFO_RD_COR)),
763 /*53*/  FLAG_ENTRY0("RxDmaDataFifoRdUncErr", RXES(DMA_DATA_FIFO_RD_UNC)),
764 /*54*/  FLAG_ENTRY0("RxDmaDataFifoRdCorErr", RXES(DMA_DATA_FIFO_RD_COR)),
765 /*55*/  FLAG_ENTRY0("RxRbufDataUncErr", RXES(RBUF_DATA_UNC)),
766 /*56*/  FLAG_ENTRY0("RxRbufDataCorErr", RXES(RBUF_DATA_COR)),
767 /*57*/  FLAG_ENTRY0("RxDmaCsrParityErr", RXES(DMA_CSR_PARITY)),
768 /*58*/  FLAG_ENTRY0("RxDmaEqFsmEncodingErr", RXES(DMA_EQ_FSM_ENCODING)),
769 /*59*/  FLAG_ENTRY0("RxDmaDqFsmEncodingErr", RXES(DMA_DQ_FSM_ENCODING)),
770 /*60*/  FLAG_ENTRY0("RxDmaCsrUncErr", RXES(DMA_CSR_UNC)),
771 /*61*/  FLAG_ENTRY0("RxCsrReadBadAddrErr", RXES(CSR_READ_BAD_ADDR)),
772 /*62*/  FLAG_ENTRY0("RxCsrWriteBadAddrErr", RXES(CSR_WRITE_BAD_ADDR)),
773 /*63*/  FLAG_ENTRY0("RxCsrParityErr", RXES(CSR_PARITY))
774 };
775
776 /* RXE errors that will trigger an SPC freeze */
777 #define ALL_RXE_FREEZE_ERR  \
778         (RCV_ERR_STATUS_RX_RCV_QP_MAP_TABLE_UNC_ERR_SMASK \
779         | RCV_ERR_STATUS_RX_RCV_CSR_PARITY_ERR_SMASK \
780         | RCV_ERR_STATUS_RX_DMA_FLAG_UNC_ERR_SMASK \
781         | RCV_ERR_STATUS_RX_RCV_FSM_ENCODING_ERR_SMASK \
782         | RCV_ERR_STATUS_RX_RBUF_FREE_LIST_UNC_ERR_SMASK \
783         | RCV_ERR_STATUS_RX_RBUF_LOOKUP_DES_REG_UNC_ERR_SMASK \
784         | RCV_ERR_STATUS_RX_RBUF_LOOKUP_DES_REG_UNC_COR_ERR_SMASK \
785         | RCV_ERR_STATUS_RX_RBUF_LOOKUP_DES_UNC_ERR_SMASK \
786         | RCV_ERR_STATUS_RX_RBUF_BLOCK_LIST_READ_UNC_ERR_SMASK \
787         | RCV_ERR_STATUS_RX_RBUF_CSR_QHEAD_BUF_NUM_PARITY_ERR_SMASK \
788         | RCV_ERR_STATUS_RX_RBUF_CSR_QENT_CNT_PARITY_ERR_SMASK \
789         | RCV_ERR_STATUS_RX_RBUF_CSR_QNEXT_BUF_PARITY_ERR_SMASK \
790         | RCV_ERR_STATUS_RX_RBUF_CSR_QVLD_BIT_PARITY_ERR_SMASK \
791         | RCV_ERR_STATUS_RX_RBUF_CSR_QHD_PTR_PARITY_ERR_SMASK \
792         | RCV_ERR_STATUS_RX_RBUF_CSR_QTL_PTR_PARITY_ERR_SMASK \
793         | RCV_ERR_STATUS_RX_RBUF_CSR_QNUM_OF_PKT_PARITY_ERR_SMASK \
794         | RCV_ERR_STATUS_RX_RBUF_CSR_QEOPDW_PARITY_ERR_SMASK \
795         | RCV_ERR_STATUS_RX_RBUF_CTX_ID_PARITY_ERR_SMASK \
796         | RCV_ERR_STATUS_RX_RBUF_BAD_LOOKUP_ERR_SMASK \
797         | RCV_ERR_STATUS_RX_RBUF_FULL_ERR_SMASK \
798         | RCV_ERR_STATUS_RX_RBUF_EMPTY_ERR_SMASK \
799         | RCV_ERR_STATUS_RX_RBUF_FL_RD_ADDR_PARITY_ERR_SMASK \
800         | RCV_ERR_STATUS_RX_RBUF_FL_WR_ADDR_PARITY_ERR_SMASK \
801         | RCV_ERR_STATUS_RX_RBUF_FL_INITDONE_PARITY_ERR_SMASK \
802         | RCV_ERR_STATUS_RX_RBUF_FL_INIT_WR_ADDR_PARITY_ERR_SMASK \
803         | RCV_ERR_STATUS_RX_RBUF_NEXT_FREE_BUF_UNC_ERR_SMASK \
804         | RCV_ERR_STATUS_RX_LOOKUP_DES_PART1_UNC_ERR_SMASK \
805         | RCV_ERR_STATUS_RX_LOOKUP_DES_PART1_UNC_COR_ERR_SMASK \
806         | RCV_ERR_STATUS_RX_LOOKUP_DES_PART2_PARITY_ERR_SMASK \
807         | RCV_ERR_STATUS_RX_LOOKUP_RCV_ARRAY_UNC_ERR_SMASK \
808         | RCV_ERR_STATUS_RX_LOOKUP_CSR_PARITY_ERR_SMASK \
809         | RCV_ERR_STATUS_RX_HQ_INTR_CSR_PARITY_ERR_SMASK \
810         | RCV_ERR_STATUS_RX_HQ_INTR_FSM_ERR_SMASK \
811         | RCV_ERR_STATUS_RX_RBUF_DESC_PART1_UNC_ERR_SMASK \
812         | RCV_ERR_STATUS_RX_RBUF_DESC_PART1_COR_ERR_SMASK \
813         | RCV_ERR_STATUS_RX_RBUF_DESC_PART2_UNC_ERR_SMASK \
814         | RCV_ERR_STATUS_RX_DMA_HDR_FIFO_RD_UNC_ERR_SMASK \
815         | RCV_ERR_STATUS_RX_DMA_DATA_FIFO_RD_UNC_ERR_SMASK \
816         | RCV_ERR_STATUS_RX_RBUF_DATA_UNC_ERR_SMASK \
817         | RCV_ERR_STATUS_RX_DMA_CSR_PARITY_ERR_SMASK \
818         | RCV_ERR_STATUS_RX_DMA_EQ_FSM_ENCODING_ERR_SMASK \
819         | RCV_ERR_STATUS_RX_DMA_DQ_FSM_ENCODING_ERR_SMASK \
820         | RCV_ERR_STATUS_RX_DMA_CSR_UNC_ERR_SMASK \
821         | RCV_ERR_STATUS_RX_CSR_PARITY_ERR_SMASK)
822
823 #define RXE_FREEZE_ABORT_MASK \
824         (RCV_ERR_STATUS_RX_DMA_CSR_UNC_ERR_SMASK | \
825         RCV_ERR_STATUS_RX_DMA_HDR_FIFO_RD_UNC_ERR_SMASK | \
826         RCV_ERR_STATUS_RX_DMA_DATA_FIFO_RD_UNC_ERR_SMASK)
827
828 /*
829  * DCC Error Flags
830  */
831 #define DCCE(name) DCC_ERR_FLG_##name##_SMASK
832 static struct flag_table dcc_err_flags[] = {
833         FLAG_ENTRY0("bad_l2_err", DCCE(BAD_L2_ERR)),
834         FLAG_ENTRY0("bad_sc_err", DCCE(BAD_SC_ERR)),
835         FLAG_ENTRY0("bad_mid_tail_err", DCCE(BAD_MID_TAIL_ERR)),
836         FLAG_ENTRY0("bad_preemption_err", DCCE(BAD_PREEMPTION_ERR)),
837         FLAG_ENTRY0("preemption_err", DCCE(PREEMPTION_ERR)),
838         FLAG_ENTRY0("preemptionvl15_err", DCCE(PREEMPTIONVL15_ERR)),
839         FLAG_ENTRY0("bad_vl_marker_err", DCCE(BAD_VL_MARKER_ERR)),
840         FLAG_ENTRY0("bad_dlid_target_err", DCCE(BAD_DLID_TARGET_ERR)),
841         FLAG_ENTRY0("bad_lver_err", DCCE(BAD_LVER_ERR)),
842         FLAG_ENTRY0("uncorrectable_err", DCCE(UNCORRECTABLE_ERR)),
843         FLAG_ENTRY0("bad_crdt_ack_err", DCCE(BAD_CRDT_ACK_ERR)),
844         FLAG_ENTRY0("unsup_pkt_type", DCCE(UNSUP_PKT_TYPE)),
845         FLAG_ENTRY0("bad_ctrl_flit_err", DCCE(BAD_CTRL_FLIT_ERR)),
846         FLAG_ENTRY0("event_cntr_parity_err", DCCE(EVENT_CNTR_PARITY_ERR)),
847         FLAG_ENTRY0("event_cntr_rollover_err", DCCE(EVENT_CNTR_ROLLOVER_ERR)),
848         FLAG_ENTRY0("link_err", DCCE(LINK_ERR)),
849         FLAG_ENTRY0("misc_cntr_rollover_err", DCCE(MISC_CNTR_ROLLOVER_ERR)),
850         FLAG_ENTRY0("bad_ctrl_dist_err", DCCE(BAD_CTRL_DIST_ERR)),
851         FLAG_ENTRY0("bad_tail_dist_err", DCCE(BAD_TAIL_DIST_ERR)),
852         FLAG_ENTRY0("bad_head_dist_err", DCCE(BAD_HEAD_DIST_ERR)),
853         FLAG_ENTRY0("nonvl15_state_err", DCCE(NONVL15_STATE_ERR)),
854         FLAG_ENTRY0("vl15_multi_err", DCCE(VL15_MULTI_ERR)),
855         FLAG_ENTRY0("bad_pkt_length_err", DCCE(BAD_PKT_LENGTH_ERR)),
856         FLAG_ENTRY0("unsup_vl_err", DCCE(UNSUP_VL_ERR)),
857         FLAG_ENTRY0("perm_nvl15_err", DCCE(PERM_NVL15_ERR)),
858         FLAG_ENTRY0("slid_zero_err", DCCE(SLID_ZERO_ERR)),
859         FLAG_ENTRY0("dlid_zero_err", DCCE(DLID_ZERO_ERR)),
860         FLAG_ENTRY0("length_mtu_err", DCCE(LENGTH_MTU_ERR)),
861         FLAG_ENTRY0("rx_early_drop_err", DCCE(RX_EARLY_DROP_ERR)),
862         FLAG_ENTRY0("late_short_err", DCCE(LATE_SHORT_ERR)),
863         FLAG_ENTRY0("late_long_err", DCCE(LATE_LONG_ERR)),
864         FLAG_ENTRY0("late_ebp_err", DCCE(LATE_EBP_ERR)),
865         FLAG_ENTRY0("fpe_tx_fifo_ovflw_err", DCCE(FPE_TX_FIFO_OVFLW_ERR)),
866         FLAG_ENTRY0("fpe_tx_fifo_unflw_err", DCCE(FPE_TX_FIFO_UNFLW_ERR)),
867         FLAG_ENTRY0("csr_access_blocked_host", DCCE(CSR_ACCESS_BLOCKED_HOST)),
868         FLAG_ENTRY0("csr_access_blocked_uc", DCCE(CSR_ACCESS_BLOCKED_UC)),
869         FLAG_ENTRY0("tx_ctrl_parity_err", DCCE(TX_CTRL_PARITY_ERR)),
870         FLAG_ENTRY0("tx_ctrl_parity_mbe_err", DCCE(TX_CTRL_PARITY_MBE_ERR)),
871         FLAG_ENTRY0("tx_sc_parity_err", DCCE(TX_SC_PARITY_ERR)),
872         FLAG_ENTRY0("rx_ctrl_parity_mbe_err", DCCE(RX_CTRL_PARITY_MBE_ERR)),
873         FLAG_ENTRY0("csr_parity_err", DCCE(CSR_PARITY_ERR)),
874         FLAG_ENTRY0("csr_inval_addr", DCCE(CSR_INVAL_ADDR)),
875         FLAG_ENTRY0("tx_byte_shft_parity_err", DCCE(TX_BYTE_SHFT_PARITY_ERR)),
876         FLAG_ENTRY0("rx_byte_shft_parity_err", DCCE(RX_BYTE_SHFT_PARITY_ERR)),
877         FLAG_ENTRY0("fmconfig_err", DCCE(FMCONFIG_ERR)),
878         FLAG_ENTRY0("rcvport_err", DCCE(RCVPORT_ERR)),
879 };
880
881 /*
882  * LCB error flags
883  */
884 #define LCBE(name) DC_LCB_ERR_FLG_##name##_SMASK
885 static struct flag_table lcb_err_flags[] = {
886 /* 0*/  FLAG_ENTRY0("CSR_PARITY_ERR", LCBE(CSR_PARITY_ERR)),
887 /* 1*/  FLAG_ENTRY0("INVALID_CSR_ADDR", LCBE(INVALID_CSR_ADDR)),
888 /* 2*/  FLAG_ENTRY0("RST_FOR_FAILED_DESKEW", LCBE(RST_FOR_FAILED_DESKEW)),
889 /* 3*/  FLAG_ENTRY0("ALL_LNS_FAILED_REINIT_TEST",
890                 LCBE(ALL_LNS_FAILED_REINIT_TEST)),
891 /* 4*/  FLAG_ENTRY0("LOST_REINIT_STALL_OR_TOS", LCBE(LOST_REINIT_STALL_OR_TOS)),
892 /* 5*/  FLAG_ENTRY0("TX_LESS_THAN_FOUR_LNS", LCBE(TX_LESS_THAN_FOUR_LNS)),
893 /* 6*/  FLAG_ENTRY0("RX_LESS_THAN_FOUR_LNS", LCBE(RX_LESS_THAN_FOUR_LNS)),
894 /* 7*/  FLAG_ENTRY0("SEQ_CRC_ERR", LCBE(SEQ_CRC_ERR)),
895 /* 8*/  FLAG_ENTRY0("REINIT_FROM_PEER", LCBE(REINIT_FROM_PEER)),
896 /* 9*/  FLAG_ENTRY0("REINIT_FOR_LN_DEGRADE", LCBE(REINIT_FOR_LN_DEGRADE)),
897 /*10*/  FLAG_ENTRY0("CRC_ERR_CNT_HIT_LIMIT", LCBE(CRC_ERR_CNT_HIT_LIMIT)),
898 /*11*/  FLAG_ENTRY0("RCLK_STOPPED", LCBE(RCLK_STOPPED)),
899 /*12*/  FLAG_ENTRY0("UNEXPECTED_REPLAY_MARKER", LCBE(UNEXPECTED_REPLAY_MARKER)),
900 /*13*/  FLAG_ENTRY0("UNEXPECTED_ROUND_TRIP_MARKER",
901                 LCBE(UNEXPECTED_ROUND_TRIP_MARKER)),
902 /*14*/  FLAG_ENTRY0("ILLEGAL_NULL_LTP", LCBE(ILLEGAL_NULL_LTP)),
903 /*15*/  FLAG_ENTRY0("ILLEGAL_FLIT_ENCODING", LCBE(ILLEGAL_FLIT_ENCODING)),
904 /*16*/  FLAG_ENTRY0("FLIT_INPUT_BUF_OFLW", LCBE(FLIT_INPUT_BUF_OFLW)),
905 /*17*/  FLAG_ENTRY0("VL_ACK_INPUT_BUF_OFLW", LCBE(VL_ACK_INPUT_BUF_OFLW)),
906 /*18*/  FLAG_ENTRY0("VL_ACK_INPUT_PARITY_ERR", LCBE(VL_ACK_INPUT_PARITY_ERR)),
907 /*19*/  FLAG_ENTRY0("VL_ACK_INPUT_WRONG_CRC_MODE",
908                 LCBE(VL_ACK_INPUT_WRONG_CRC_MODE)),
909 /*20*/  FLAG_ENTRY0("FLIT_INPUT_BUF_MBE", LCBE(FLIT_INPUT_BUF_MBE)),
910 /*21*/  FLAG_ENTRY0("FLIT_INPUT_BUF_SBE", LCBE(FLIT_INPUT_BUF_SBE)),
911 /*22*/  FLAG_ENTRY0("REPLAY_BUF_MBE", LCBE(REPLAY_BUF_MBE)),
912 /*23*/  FLAG_ENTRY0("REPLAY_BUF_SBE", LCBE(REPLAY_BUF_SBE)),
913 /*24*/  FLAG_ENTRY0("CREDIT_RETURN_FLIT_MBE", LCBE(CREDIT_RETURN_FLIT_MBE)),
914 /*25*/  FLAG_ENTRY0("RST_FOR_LINK_TIMEOUT", LCBE(RST_FOR_LINK_TIMEOUT)),
915 /*26*/  FLAG_ENTRY0("RST_FOR_INCOMPLT_RND_TRIP",
916                 LCBE(RST_FOR_INCOMPLT_RND_TRIP)),
917 /*27*/  FLAG_ENTRY0("HOLD_REINIT", LCBE(HOLD_REINIT)),
918 /*28*/  FLAG_ENTRY0("NEG_EDGE_LINK_TRANSFER_ACTIVE",
919                 LCBE(NEG_EDGE_LINK_TRANSFER_ACTIVE)),
920 /*29*/  FLAG_ENTRY0("REDUNDANT_FLIT_PARITY_ERR",
921                 LCBE(REDUNDANT_FLIT_PARITY_ERR))
922 };
923
924 /*
925  * DC8051 Error Flags
926  */
927 #define D8E(name) DC_DC8051_ERR_FLG_##name##_SMASK
928 static struct flag_table dc8051_err_flags[] = {
929         FLAG_ENTRY0("SET_BY_8051", D8E(SET_BY_8051)),
930         FLAG_ENTRY0("LOST_8051_HEART_BEAT", D8E(LOST_8051_HEART_BEAT)),
931         FLAG_ENTRY0("CRAM_MBE", D8E(CRAM_MBE)),
932         FLAG_ENTRY0("CRAM_SBE", D8E(CRAM_SBE)),
933         FLAG_ENTRY0("DRAM_MBE", D8E(DRAM_MBE)),
934         FLAG_ENTRY0("DRAM_SBE", D8E(DRAM_SBE)),
935         FLAG_ENTRY0("IRAM_MBE", D8E(IRAM_MBE)),
936         FLAG_ENTRY0("IRAM_SBE", D8E(IRAM_SBE)),
937         FLAG_ENTRY0("UNMATCHED_SECURE_MSG_ACROSS_BCC_LANES",
938                 D8E(UNMATCHED_SECURE_MSG_ACROSS_BCC_LANES)),
939         FLAG_ENTRY0("INVALID_CSR_ADDR", D8E(INVALID_CSR_ADDR)),
940 };
941
942 /*
943  * DC8051 Information Error flags
944  *
945  * Flags in DC8051_DBG_ERR_INFO_SET_BY_8051.ERROR field.
946  */
947 static struct flag_table dc8051_info_err_flags[] = {
948         FLAG_ENTRY0("Spico ROM check failed",  SPICO_ROM_FAILED),
949         FLAG_ENTRY0("Unknown frame received",  UNKNOWN_FRAME),
950         FLAG_ENTRY0("Target BER not met",      TARGET_BER_NOT_MET),
951         FLAG_ENTRY0("Serdes internal loopback failure",
952                                         FAILED_SERDES_INTERNAL_LOOPBACK),
953         FLAG_ENTRY0("Failed SerDes init",      FAILED_SERDES_INIT),
954         FLAG_ENTRY0("Failed LNI(Polling)",     FAILED_LNI_POLLING),
955         FLAG_ENTRY0("Failed LNI(Debounce)",    FAILED_LNI_DEBOUNCE),
956         FLAG_ENTRY0("Failed LNI(EstbComm)",    FAILED_LNI_ESTBCOMM),
957         FLAG_ENTRY0("Failed LNI(OptEq)",       FAILED_LNI_OPTEQ),
958         FLAG_ENTRY0("Failed LNI(VerifyCap_1)", FAILED_LNI_VERIFY_CAP1),
959         FLAG_ENTRY0("Failed LNI(VerifyCap_2)", FAILED_LNI_VERIFY_CAP2),
960         FLAG_ENTRY0("Failed LNI(ConfigLT)",    FAILED_LNI_CONFIGLT)
961 };
962
963 /*
964  * DC8051 Information Host Information flags
965  *
966  * Flags in DC8051_DBG_ERR_INFO_SET_BY_8051.HOST_MSG field.
967  */
968 static struct flag_table dc8051_info_host_msg_flags[] = {
969         FLAG_ENTRY0("Host request done", 0x0001),
970         FLAG_ENTRY0("BC SMA message", 0x0002),
971         FLAG_ENTRY0("BC PWR_MGM message", 0x0004),
972         FLAG_ENTRY0("BC Unknown message (BCC)", 0x0008),
973         FLAG_ENTRY0("BC Unknown message (LCB)", 0x0010),
974         FLAG_ENTRY0("External device config request", 0x0020),
975         FLAG_ENTRY0("VerifyCap all frames received", 0x0040),
976         FLAG_ENTRY0("LinkUp achieved", 0x0080),
977         FLAG_ENTRY0("Link going down", 0x0100),
978 };
979
980
981 static u32 encoded_size(u32 size);
982 static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate);
983 static int set_physical_link_state(struct hfi1_devdata *dd, u64 state);
984 static void read_vc_remote_phy(struct hfi1_devdata *dd, u8 *power_management,
985                                u8 *continuous);
986 static void read_vc_remote_fabric(struct hfi1_devdata *dd, u8 *vau, u8 *z,
987                                   u8 *vcu, u16 *vl15buf, u8 *crc_sizes);
988 static void read_vc_remote_link_width(struct hfi1_devdata *dd,
989                                       u8 *remote_tx_rate, u16 *link_widths);
990 static void read_vc_local_link_width(struct hfi1_devdata *dd, u8 *misc_bits,
991                                      u8 *flag_bits, u16 *link_widths);
992 static void read_remote_device_id(struct hfi1_devdata *dd, u16 *device_id,
993                                   u8 *device_rev);
994 static void read_mgmt_allowed(struct hfi1_devdata *dd, u8 *mgmt_allowed);
995 static void read_local_lni(struct hfi1_devdata *dd, u8 *enable_lane_rx);
996 static int read_tx_settings(struct hfi1_devdata *dd, u8 *enable_lane_tx,
997                             u8 *tx_polarity_inversion,
998                             u8 *rx_polarity_inversion, u8 *max_rate);
999 static void handle_sdma_eng_err(struct hfi1_devdata *dd,
1000                                 unsigned int context, u64 err_status);
1001 static void handle_qsfp_int(struct hfi1_devdata *dd, u32 source, u64 reg);
1002 static void handle_dcc_err(struct hfi1_devdata *dd,
1003                            unsigned int context, u64 err_status);
1004 static void handle_lcb_err(struct hfi1_devdata *dd,
1005                            unsigned int context, u64 err_status);
1006 static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg);
1007 static void handle_cce_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1008 static void handle_rxe_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1009 static void handle_misc_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1010 static void handle_pio_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1011 static void handle_sdma_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1012 static void handle_egress_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1013 static void handle_txe_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1014 static void set_partition_keys(struct hfi1_pportdata *);
1015 static const char *link_state_name(u32 state);
1016 static const char *link_state_reason_name(struct hfi1_pportdata *ppd,
1017                                           u32 state);
1018 static int do_8051_command(struct hfi1_devdata *dd, u32 type, u64 in_data,
1019                            u64 *out_data);
1020 static int read_idle_sma(struct hfi1_devdata *dd, u64 *data);
1021 static int thermal_init(struct hfi1_devdata *dd);
1022
1023 static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state,
1024                                   int msecs);
1025 static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc);
1026 static void handle_temp_err(struct hfi1_devdata *);
1027 static void dc_shutdown(struct hfi1_devdata *);
1028 static void dc_start(struct hfi1_devdata *);
1029
1030 /*
1031  * Error interrupt table entry.  This is used as input to the interrupt
1032  * "clear down" routine used for all second tier error interrupt register.
1033  * Second tier interrupt registers have a single bit representing them
1034  * in the top-level CceIntStatus.
1035  */
1036 struct err_reg_info {
1037         u32 status;             /* status CSR offset */
1038         u32 clear;              /* clear CSR offset */
1039         u32 mask;               /* mask CSR offset */
1040         void (*handler)(struct hfi1_devdata *dd, u32 source, u64 reg);
1041         const char *desc;
1042 };
1043
1044 #define NUM_MISC_ERRS (IS_GENERAL_ERR_END - IS_GENERAL_ERR_START)
1045 #define NUM_DC_ERRS (IS_DC_END - IS_DC_START)
1046 #define NUM_VARIOUS (IS_VARIOUS_END - IS_VARIOUS_START)
1047
1048 /*
1049  * Helpers for building HFI and DC error interrupt table entries.  Different
1050  * helpers are needed because of inconsistent register names.
1051  */
1052 #define EE(reg, handler, desc) \
1053         { reg##_STATUS, reg##_CLEAR, reg##_MASK, \
1054                 handler, desc }
1055 #define DC_EE1(reg, handler, desc) \
1056         { reg##_FLG, reg##_FLG_CLR, reg##_FLG_EN, handler, desc }
1057 #define DC_EE2(reg, handler, desc) \
1058         { reg##_FLG, reg##_CLR, reg##_EN, handler, desc }
1059
1060 /*
1061  * Table of the "misc" grouping of error interrupts.  Each entry refers to
1062  * another register containing more information.
1063  */
1064 static const struct err_reg_info misc_errs[NUM_MISC_ERRS] = {
1065 /* 0*/  EE(CCE_ERR,             handle_cce_err,    "CceErr"),
1066 /* 1*/  EE(RCV_ERR,             handle_rxe_err,    "RxeErr"),
1067 /* 2*/  EE(MISC_ERR,    handle_misc_err,   "MiscErr"),
1068 /* 3*/  { 0, 0, 0, NULL }, /* reserved */
1069 /* 4*/  EE(SEND_PIO_ERR,    handle_pio_err,    "PioErr"),
1070 /* 5*/  EE(SEND_DMA_ERR,    handle_sdma_err,   "SDmaErr"),
1071 /* 6*/  EE(SEND_EGRESS_ERR, handle_egress_err, "EgressErr"),
1072 /* 7*/  EE(SEND_ERR,    handle_txe_err,    "TxeErr")
1073         /* the rest are reserved */
1074 };
1075
1076 /*
1077  * Index into the Various section of the interrupt sources
1078  * corresponding to the Critical Temperature interrupt.
1079  */
1080 #define TCRIT_INT_SOURCE 4
1081
1082 /*
1083  * SDMA error interrupt entry - refers to another register containing more
1084  * information.
1085  */
1086 static const struct err_reg_info sdma_eng_err =
1087         EE(SEND_DMA_ENG_ERR, handle_sdma_eng_err, "SDmaEngErr");
1088
1089 static const struct err_reg_info various_err[NUM_VARIOUS] = {
1090 /* 0*/  { 0, 0, 0, NULL }, /* PbcInt */
1091 /* 1*/  { 0, 0, 0, NULL }, /* GpioAssertInt */
1092 /* 2*/  EE(ASIC_QSFP1,  handle_qsfp_int,        "QSFP1"),
1093 /* 3*/  EE(ASIC_QSFP2,  handle_qsfp_int,        "QSFP2"),
1094 /* 4*/  { 0, 0, 0, NULL }, /* TCritInt */
1095         /* rest are reserved */
1096 };
1097
1098 /*
1099  * The DC encoding of mtu_cap for 10K MTU in the DCC_CFG_PORT_CONFIG
1100  * register can not be derived from the MTU value because 10K is not
1101  * a power of 2. Therefore, we need a constant. Everything else can
1102  * be calculated.
1103  */
1104 #define DCC_CFG_PORT_MTU_CAP_10240 7
1105
1106 /*
1107  * Table of the DC grouping of error interrupts.  Each entry refers to
1108  * another register containing more information.
1109  */
1110 static const struct err_reg_info dc_errs[NUM_DC_ERRS] = {
1111 /* 0*/  DC_EE1(DCC_ERR,         handle_dcc_err,        "DCC Err"),
1112 /* 1*/  DC_EE2(DC_LCB_ERR,      handle_lcb_err,        "LCB Err"),
1113 /* 2*/  DC_EE2(DC_DC8051_ERR,   handle_8051_interrupt, "DC8051 Interrupt"),
1114 /* 3*/  /* dc_lbm_int - special, see is_dc_int() */
1115         /* the rest are reserved */
1116 };
1117
1118 struct cntr_entry {
1119         /*
1120          * counter name
1121          */
1122         char *name;
1123
1124         /*
1125          * csr to read for name (if applicable)
1126          */
1127         u64 csr;
1128
1129         /*
1130          * offset into dd or ppd to store the counter's value
1131          */
1132         int offset;
1133
1134         /*
1135          * flags
1136          */
1137         u8 flags;
1138
1139         /*
1140          * accessor for stat element, context either dd or ppd
1141          */
1142         u64 (*rw_cntr)(const struct cntr_entry *,
1143                                void *context,
1144                                int vl,
1145                                int mode,
1146                                u64 data);
1147 };
1148
1149 #define C_RCV_HDR_OVF_FIRST C_RCV_HDR_OVF_0
1150 #define C_RCV_HDR_OVF_LAST C_RCV_HDR_OVF_159
1151
1152 #define CNTR_ELEM(name, csr, offset, flags, accessor) \
1153 { \
1154         name, \
1155         csr, \
1156         offset, \
1157         flags, \
1158         accessor \
1159 }
1160
1161 /* 32bit RXE */
1162 #define RXE32_PORT_CNTR_ELEM(name, counter, flags) \
1163 CNTR_ELEM(#name, \
1164           (counter * 8 + RCV_COUNTER_ARRAY32), \
1165           0, flags | CNTR_32BIT, \
1166           port_access_u32_csr)
1167
1168 #define RXE32_DEV_CNTR_ELEM(name, counter, flags) \
1169 CNTR_ELEM(#name, \
1170           (counter * 8 + RCV_COUNTER_ARRAY32), \
1171           0, flags | CNTR_32BIT, \
1172           dev_access_u32_csr)
1173
1174 /* 64bit RXE */
1175 #define RXE64_PORT_CNTR_ELEM(name, counter, flags) \
1176 CNTR_ELEM(#name, \
1177           (counter * 8 + RCV_COUNTER_ARRAY64), \
1178           0, flags, \
1179           port_access_u64_csr)
1180
1181 #define RXE64_DEV_CNTR_ELEM(name, counter, flags) \
1182 CNTR_ELEM(#name, \
1183           (counter * 8 + RCV_COUNTER_ARRAY64), \
1184           0, flags, \
1185           dev_access_u64_csr)
1186
1187 #define OVR_LBL(ctx) C_RCV_HDR_OVF_ ## ctx
1188 #define OVR_ELM(ctx) \
1189 CNTR_ELEM("RcvHdrOvr" #ctx, \
1190           (RCV_HDR_OVFL_CNT + ctx*0x100), \
1191           0, CNTR_NORMAL, port_access_u64_csr)
1192
1193 /* 32bit TXE */
1194 #define TXE32_PORT_CNTR_ELEM(name, counter, flags) \
1195 CNTR_ELEM(#name, \
1196           (counter * 8 + SEND_COUNTER_ARRAY32), \
1197           0, flags | CNTR_32BIT, \
1198           port_access_u32_csr)
1199
1200 /* 64bit TXE */
1201 #define TXE64_PORT_CNTR_ELEM(name, counter, flags) \
1202 CNTR_ELEM(#name, \
1203           (counter * 8 + SEND_COUNTER_ARRAY64), \
1204           0, flags, \
1205           port_access_u64_csr)
1206
1207 # define TX64_DEV_CNTR_ELEM(name, counter, flags) \
1208 CNTR_ELEM(#name,\
1209           counter * 8 + SEND_COUNTER_ARRAY64, \
1210           0, \
1211           flags, \
1212           dev_access_u64_csr)
1213
1214 /* CCE */
1215 #define CCE_PERF_DEV_CNTR_ELEM(name, counter, flags) \
1216 CNTR_ELEM(#name, \
1217           (counter * 8 + CCE_COUNTER_ARRAY32), \
1218           0, flags | CNTR_32BIT, \
1219           dev_access_u32_csr)
1220
1221 #define CCE_INT_DEV_CNTR_ELEM(name, counter, flags) \
1222 CNTR_ELEM(#name, \
1223           (counter * 8 + CCE_INT_COUNTER_ARRAY32), \
1224           0, flags | CNTR_32BIT, \
1225           dev_access_u32_csr)
1226
1227 /* DC */
1228 #define DC_PERF_CNTR(name, counter, flags) \
1229 CNTR_ELEM(#name, \
1230           counter, \
1231           0, \
1232           flags, \
1233           dev_access_u64_csr)
1234
1235 #define DC_PERF_CNTR_LCB(name, counter, flags) \
1236 CNTR_ELEM(#name, \
1237           counter, \
1238           0, \
1239           flags, \
1240           dc_access_lcb_cntr)
1241
1242 /* ibp counters */
1243 #define SW_IBP_CNTR(name, cntr) \
1244 CNTR_ELEM(#name, \
1245           0, \
1246           0, \
1247           CNTR_SYNTH, \
1248           access_ibp_##cntr)
1249
1250 u64 read_csr(const struct hfi1_devdata *dd, u32 offset)
1251 {
1252         u64 val;
1253
1254         if (dd->flags & HFI1_PRESENT) {
1255                 val = readq((void __iomem *)dd->kregbase + offset);
1256                 return val;
1257         }
1258         return -1;
1259 }
1260
1261 void write_csr(const struct hfi1_devdata *dd, u32 offset, u64 value)
1262 {
1263         if (dd->flags & HFI1_PRESENT)
1264                 writeq(value, (void __iomem *)dd->kregbase + offset);
1265 }
1266
1267 void __iomem *get_csr_addr(
1268         struct hfi1_devdata *dd,
1269         u32 offset)
1270 {
1271         return (void __iomem *)dd->kregbase + offset;
1272 }
1273
1274 static inline u64 read_write_csr(const struct hfi1_devdata *dd, u32 csr,
1275                                  int mode, u64 value)
1276 {
1277         u64 ret;
1278
1279
1280         if (mode == CNTR_MODE_R) {
1281                 ret = read_csr(dd, csr);
1282         } else if (mode == CNTR_MODE_W) {
1283                 write_csr(dd, csr, value);
1284                 ret = value;
1285         } else {
1286                 dd_dev_err(dd, "Invalid cntr register access mode");
1287                 return 0;
1288         }
1289
1290         hfi1_cdbg(CNTR, "csr 0x%x val 0x%llx mode %d", csr, ret, mode);
1291         return ret;
1292 }
1293
1294 /* Dev Access */
1295 static u64 dev_access_u32_csr(const struct cntr_entry *entry,
1296                             void *context, int vl, int mode, u64 data)
1297 {
1298         struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
1299
1300         if (vl != CNTR_INVALID_VL)
1301                 return 0;
1302         return read_write_csr(dd, entry->csr, mode, data);
1303 }
1304
1305 static u64 dev_access_u64_csr(const struct cntr_entry *entry, void *context,
1306                             int vl, int mode, u64 data)
1307 {
1308         struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
1309
1310         u64 val = 0;
1311         u64 csr = entry->csr;
1312
1313         if (entry->flags & CNTR_VL) {
1314                 if (vl == CNTR_INVALID_VL)
1315                         return 0;
1316                 csr += 8 * vl;
1317         } else {
1318                 if (vl != CNTR_INVALID_VL)
1319                         return 0;
1320         }
1321
1322         val = read_write_csr(dd, csr, mode, data);
1323         return val;
1324 }
1325
1326 static u64 dc_access_lcb_cntr(const struct cntr_entry *entry, void *context,
1327                             int vl, int mode, u64 data)
1328 {
1329         struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
1330         u32 csr = entry->csr;
1331         int ret = 0;
1332
1333         if (vl != CNTR_INVALID_VL)
1334                 return 0;
1335         if (mode == CNTR_MODE_R)
1336                 ret = read_lcb_csr(dd, csr, &data);
1337         else if (mode == CNTR_MODE_W)
1338                 ret = write_lcb_csr(dd, csr, data);
1339
1340         if (ret) {
1341                 dd_dev_err(dd, "Could not acquire LCB for counter 0x%x", csr);
1342                 return 0;
1343         }
1344
1345         hfi1_cdbg(CNTR, "csr 0x%x val 0x%llx mode %d", csr, data, mode);
1346         return data;
1347 }
1348
1349 /* Port Access */
1350 static u64 port_access_u32_csr(const struct cntr_entry *entry, void *context,
1351                              int vl, int mode, u64 data)
1352 {
1353         struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context;
1354
1355         if (vl != CNTR_INVALID_VL)
1356                 return 0;
1357         return read_write_csr(ppd->dd, entry->csr, mode, data);
1358 }
1359
1360 static u64 port_access_u64_csr(const struct cntr_entry *entry,
1361                              void *context, int vl, int mode, u64 data)
1362 {
1363         struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context;
1364         u64 val;
1365         u64 csr = entry->csr;
1366
1367         if (entry->flags & CNTR_VL) {
1368                 if (vl == CNTR_INVALID_VL)
1369                         return 0;
1370                 csr += 8 * vl;
1371         } else {
1372                 if (vl != CNTR_INVALID_VL)
1373                         return 0;
1374         }
1375         val = read_write_csr(ppd->dd, csr, mode, data);
1376         return val;
1377 }
1378
1379 /* Software defined */
1380 static inline u64 read_write_sw(struct hfi1_devdata *dd, u64 *cntr, int mode,
1381                                 u64 data)
1382 {
1383         u64 ret;
1384
1385         if (mode == CNTR_MODE_R) {
1386                 ret = *cntr;
1387         } else if (mode == CNTR_MODE_W) {
1388                 *cntr = data;
1389                 ret = data;
1390         } else {
1391                 dd_dev_err(dd, "Invalid cntr sw access mode");
1392                 return 0;
1393         }
1394
1395         hfi1_cdbg(CNTR, "val 0x%llx mode %d", ret, mode);
1396
1397         return ret;
1398 }
1399
1400 static u64 access_sw_link_dn_cnt(const struct cntr_entry *entry, void *context,
1401                                int vl, int mode, u64 data)
1402 {
1403         struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context;
1404
1405         if (vl != CNTR_INVALID_VL)
1406                 return 0;
1407         return read_write_sw(ppd->dd, &ppd->link_downed, mode, data);
1408 }
1409
1410 static u64 access_sw_link_up_cnt(const struct cntr_entry *entry, void *context,
1411                                int vl, int mode, u64 data)
1412 {
1413         struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context;
1414
1415         if (vl != CNTR_INVALID_VL)
1416                 return 0;
1417         return read_write_sw(ppd->dd, &ppd->link_up, mode, data);
1418 }
1419
1420 static u64 access_sw_xmit_discards(const struct cntr_entry *entry,
1421                                     void *context, int vl, int mode, u64 data)
1422 {
1423         struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context;
1424
1425         if (vl != CNTR_INVALID_VL)
1426                 return 0;
1427
1428         return read_write_sw(ppd->dd, &ppd->port_xmit_discards, mode, data);
1429 }
1430
1431 static u64 access_xmit_constraint_errs(const struct cntr_entry *entry,
1432                                      void *context, int vl, int mode, u64 data)
1433 {
1434         struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context;
1435
1436         if (vl != CNTR_INVALID_VL)
1437                 return 0;
1438
1439         return read_write_sw(ppd->dd, &ppd->port_xmit_constraint_errors,
1440                              mode, data);
1441 }
1442
1443 static u64 access_rcv_constraint_errs(const struct cntr_entry *entry,
1444                                      void *context, int vl, int mode, u64 data)
1445 {
1446         struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context;
1447
1448         if (vl != CNTR_INVALID_VL)
1449                 return 0;
1450
1451         return read_write_sw(ppd->dd, &ppd->port_rcv_constraint_errors,
1452                              mode, data);
1453 }
1454
1455 u64 get_all_cpu_total(u64 __percpu *cntr)
1456 {
1457         int cpu;
1458         u64 counter = 0;
1459
1460         for_each_possible_cpu(cpu)
1461                 counter += *per_cpu_ptr(cntr, cpu);
1462         return counter;
1463 }
1464
1465 static u64 read_write_cpu(struct hfi1_devdata *dd, u64 *z_val,
1466                           u64 __percpu *cntr,
1467                           int vl, int mode, u64 data)
1468 {
1469
1470         u64 ret = 0;
1471
1472         if (vl != CNTR_INVALID_VL)
1473                 return 0;
1474
1475         if (mode == CNTR_MODE_R) {
1476                 ret = get_all_cpu_total(cntr) - *z_val;
1477         } else if (mode == CNTR_MODE_W) {
1478                 /* A write can only zero the counter */
1479                 if (data == 0)
1480                         *z_val = get_all_cpu_total(cntr);
1481                 else
1482                         dd_dev_err(dd, "Per CPU cntrs can only be zeroed");
1483         } else {
1484                 dd_dev_err(dd, "Invalid cntr sw cpu access mode");
1485                 return 0;
1486         }
1487
1488         return ret;
1489 }
1490
1491 static u64 access_sw_cpu_intr(const struct cntr_entry *entry,
1492                               void *context, int vl, int mode, u64 data)
1493 {
1494         struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
1495
1496         return read_write_cpu(dd, &dd->z_int_counter, dd->int_counter, vl,
1497                               mode, data);
1498 }
1499
1500 static u64 access_sw_cpu_rcv_limit(const struct cntr_entry *entry,
1501                               void *context, int vl, int mode, u64 data)
1502 {
1503         struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
1504
1505         return read_write_cpu(dd, &dd->z_rcv_limit, dd->rcv_limit, vl,
1506                               mode, data);
1507 }
1508
1509 static u64 access_sw_pio_wait(const struct cntr_entry *entry,
1510                               void *context, int vl, int mode, u64 data)
1511 {
1512         struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
1513
1514         return dd->verbs_dev.n_piowait;
1515 }
1516
1517 static u64 access_sw_vtx_wait(const struct cntr_entry *entry,
1518                               void *context, int vl, int mode, u64 data)
1519 {
1520         struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
1521
1522         return dd->verbs_dev.n_txwait;
1523 }
1524
1525 static u64 access_sw_kmem_wait(const struct cntr_entry *entry,
1526                                void *context, int vl, int mode, u64 data)
1527 {
1528         struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
1529
1530         return dd->verbs_dev.n_kmem_wait;
1531 }
1532
1533 #define def_access_sw_cpu(cntr) \
1534 static u64 access_sw_cpu_##cntr(const struct cntr_entry *entry,               \
1535                               void *context, int vl, int mode, u64 data)      \
1536 {                                                                             \
1537         struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context;        \
1538         return read_write_cpu(ppd->dd, &ppd->ibport_data.z_ ##cntr,           \
1539                               ppd->ibport_data.cntr, vl,                      \
1540                               mode, data);                                    \
1541 }
1542
1543 def_access_sw_cpu(rc_acks);
1544 def_access_sw_cpu(rc_qacks);
1545 def_access_sw_cpu(rc_delayed_comp);
1546
1547 #define def_access_ibp_counter(cntr) \
1548 static u64 access_ibp_##cntr(const struct cntr_entry *entry,                  \
1549                                 void *context, int vl, int mode, u64 data)    \
1550 {                                                                             \
1551         struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context;        \
1552                                                                               \
1553         if (vl != CNTR_INVALID_VL)                                            \
1554                 return 0;                                                     \
1555                                                                               \
1556         return read_write_sw(ppd->dd, &ppd->ibport_data.n_ ##cntr,            \
1557                              mode, data);                                     \
1558 }
1559
1560 def_access_ibp_counter(loop_pkts);
1561 def_access_ibp_counter(rc_resends);
1562 def_access_ibp_counter(rnr_naks);
1563 def_access_ibp_counter(other_naks);
1564 def_access_ibp_counter(rc_timeouts);
1565 def_access_ibp_counter(pkt_drops);
1566 def_access_ibp_counter(dmawait);
1567 def_access_ibp_counter(rc_seqnak);
1568 def_access_ibp_counter(rc_dupreq);
1569 def_access_ibp_counter(rdma_seq);
1570 def_access_ibp_counter(unaligned);
1571 def_access_ibp_counter(seq_naks);
1572
1573 static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = {
1574 [C_RCV_OVF] = RXE32_DEV_CNTR_ELEM(RcvOverflow, RCV_BUF_OVFL_CNT, CNTR_SYNTH),
1575 [C_RX_TID_FULL] = RXE32_DEV_CNTR_ELEM(RxTIDFullEr, RCV_TID_FULL_ERR_CNT,
1576                         CNTR_NORMAL),
1577 [C_RX_TID_INVALID] = RXE32_DEV_CNTR_ELEM(RxTIDInvalid, RCV_TID_VALID_ERR_CNT,
1578                         CNTR_NORMAL),
1579 [C_RX_TID_FLGMS] = RXE32_DEV_CNTR_ELEM(RxTidFLGMs,
1580                         RCV_TID_FLOW_GEN_MISMATCH_CNT,
1581                         CNTR_NORMAL),
1582 [C_RX_CTX_RHQS] = RXE32_DEV_CNTR_ELEM(RxCtxRHQS, RCV_CONTEXT_RHQ_STALL,
1583                         CNTR_NORMAL),
1584 [C_RX_CTX_EGRS] = RXE32_DEV_CNTR_ELEM(RxCtxEgrS, RCV_CONTEXT_EGR_STALL,
1585                         CNTR_NORMAL),
1586 [C_RCV_TID_FLSMS] = RXE32_DEV_CNTR_ELEM(RxTidFLSMs,
1587                         RCV_TID_FLOW_SEQ_MISMATCH_CNT, CNTR_NORMAL),
1588 [C_CCE_PCI_CR_ST] = CCE_PERF_DEV_CNTR_ELEM(CcePciCrSt,
1589                         CCE_PCIE_POSTED_CRDT_STALL_CNT, CNTR_NORMAL),
1590 [C_CCE_PCI_TR_ST] = CCE_PERF_DEV_CNTR_ELEM(CcePciTrSt, CCE_PCIE_TRGT_STALL_CNT,
1591                         CNTR_NORMAL),
1592 [C_CCE_PIO_WR_ST] = CCE_PERF_DEV_CNTR_ELEM(CcePioWrSt, CCE_PIO_WR_STALL_CNT,
1593                         CNTR_NORMAL),
1594 [C_CCE_ERR_INT] = CCE_INT_DEV_CNTR_ELEM(CceErrInt, CCE_ERR_INT_CNT,
1595                         CNTR_NORMAL),
1596 [C_CCE_SDMA_INT] = CCE_INT_DEV_CNTR_ELEM(CceSdmaInt, CCE_SDMA_INT_CNT,
1597                         CNTR_NORMAL),
1598 [C_CCE_MISC_INT] = CCE_INT_DEV_CNTR_ELEM(CceMiscInt, CCE_MISC_INT_CNT,
1599                         CNTR_NORMAL),
1600 [C_CCE_RCV_AV_INT] = CCE_INT_DEV_CNTR_ELEM(CceRcvAvInt, CCE_RCV_AVAIL_INT_CNT,
1601                         CNTR_NORMAL),
1602 [C_CCE_RCV_URG_INT] = CCE_INT_DEV_CNTR_ELEM(CceRcvUrgInt,
1603                         CCE_RCV_URGENT_INT_CNT, CNTR_NORMAL),
1604 [C_CCE_SEND_CR_INT] = CCE_INT_DEV_CNTR_ELEM(CceSndCrInt,
1605                         CCE_SEND_CREDIT_INT_CNT, CNTR_NORMAL),
1606 [C_DC_UNC_ERR] = DC_PERF_CNTR(DcUnctblErr, DCC_ERR_UNCORRECTABLE_CNT,
1607                               CNTR_SYNTH),
1608 [C_DC_RCV_ERR] = DC_PERF_CNTR(DcRecvErr, DCC_ERR_PORTRCV_ERR_CNT, CNTR_SYNTH),
1609 [C_DC_FM_CFG_ERR] = DC_PERF_CNTR(DcFmCfgErr, DCC_ERR_FMCONFIG_ERR_CNT,
1610                                  CNTR_SYNTH),
1611 [C_DC_RMT_PHY_ERR] = DC_PERF_CNTR(DcRmtPhyErr, DCC_ERR_RCVREMOTE_PHY_ERR_CNT,
1612                                   CNTR_SYNTH),
1613 [C_DC_DROPPED_PKT] = DC_PERF_CNTR(DcDroppedPkt, DCC_ERR_DROPPED_PKT_CNT,
1614                                   CNTR_SYNTH),
1615 [C_DC_MC_XMIT_PKTS] = DC_PERF_CNTR(DcMcXmitPkts,
1616                                    DCC_PRF_PORT_XMIT_MULTICAST_CNT, CNTR_SYNTH),
1617 [C_DC_MC_RCV_PKTS] = DC_PERF_CNTR(DcMcRcvPkts,
1618                                   DCC_PRF_PORT_RCV_MULTICAST_PKT_CNT,
1619                                   CNTR_SYNTH),
1620 [C_DC_XMIT_CERR] = DC_PERF_CNTR(DcXmitCorr,
1621                                 DCC_PRF_PORT_XMIT_CORRECTABLE_CNT, CNTR_SYNTH),
1622 [C_DC_RCV_CERR] = DC_PERF_CNTR(DcRcvCorrCnt, DCC_PRF_PORT_RCV_CORRECTABLE_CNT,
1623                                CNTR_SYNTH),
1624 [C_DC_RCV_FCC] = DC_PERF_CNTR(DcRxFCntl, DCC_PRF_RX_FLOW_CRTL_CNT,
1625                               CNTR_SYNTH),
1626 [C_DC_XMIT_FCC] = DC_PERF_CNTR(DcXmitFCntl, DCC_PRF_TX_FLOW_CRTL_CNT,
1627                                CNTR_SYNTH),
1628 [C_DC_XMIT_FLITS] = DC_PERF_CNTR(DcXmitFlits, DCC_PRF_PORT_XMIT_DATA_CNT,
1629                                  CNTR_SYNTH),
1630 [C_DC_RCV_FLITS] = DC_PERF_CNTR(DcRcvFlits, DCC_PRF_PORT_RCV_DATA_CNT,
1631                                 CNTR_SYNTH),
1632 [C_DC_XMIT_PKTS] = DC_PERF_CNTR(DcXmitPkts, DCC_PRF_PORT_XMIT_PKTS_CNT,
1633                                 CNTR_SYNTH),
1634 [C_DC_RCV_PKTS] = DC_PERF_CNTR(DcRcvPkts, DCC_PRF_PORT_RCV_PKTS_CNT,
1635                                CNTR_SYNTH),
1636 [C_DC_RX_FLIT_VL] = DC_PERF_CNTR(DcRxFlitVl, DCC_PRF_PORT_VL_RCV_DATA_CNT,
1637                                  CNTR_SYNTH | CNTR_VL),
1638 [C_DC_RX_PKT_VL] = DC_PERF_CNTR(DcRxPktVl, DCC_PRF_PORT_VL_RCV_PKTS_CNT,
1639                                 CNTR_SYNTH | CNTR_VL),
1640 [C_DC_RCV_FCN] = DC_PERF_CNTR(DcRcvFcn, DCC_PRF_PORT_RCV_FECN_CNT, CNTR_SYNTH),
1641 [C_DC_RCV_FCN_VL] = DC_PERF_CNTR(DcRcvFcnVl, DCC_PRF_PORT_VL_RCV_FECN_CNT,
1642                                  CNTR_SYNTH | CNTR_VL),
1643 [C_DC_RCV_BCN] = DC_PERF_CNTR(DcRcvBcn, DCC_PRF_PORT_RCV_BECN_CNT, CNTR_SYNTH),
1644 [C_DC_RCV_BCN_VL] = DC_PERF_CNTR(DcRcvBcnVl, DCC_PRF_PORT_VL_RCV_BECN_CNT,
1645                                  CNTR_SYNTH | CNTR_VL),
1646 [C_DC_RCV_BBL] = DC_PERF_CNTR(DcRcvBbl, DCC_PRF_PORT_RCV_BUBBLE_CNT,
1647                               CNTR_SYNTH),
1648 [C_DC_RCV_BBL_VL] = DC_PERF_CNTR(DcRcvBblVl, DCC_PRF_PORT_VL_RCV_BUBBLE_CNT,
1649                                  CNTR_SYNTH | CNTR_VL),
1650 [C_DC_MARK_FECN] = DC_PERF_CNTR(DcMarkFcn, DCC_PRF_PORT_MARK_FECN_CNT,
1651                                 CNTR_SYNTH),
1652 [C_DC_MARK_FECN_VL] = DC_PERF_CNTR(DcMarkFcnVl, DCC_PRF_PORT_VL_MARK_FECN_CNT,
1653                                    CNTR_SYNTH | CNTR_VL),
1654 [C_DC_TOTAL_CRC] =
1655         DC_PERF_CNTR_LCB(DcTotCrc, DC_LCB_ERR_INFO_TOTAL_CRC_ERR,
1656                          CNTR_SYNTH),
1657 [C_DC_CRC_LN0] = DC_PERF_CNTR_LCB(DcCrcLn0, DC_LCB_ERR_INFO_CRC_ERR_LN0,
1658                                   CNTR_SYNTH),
1659 [C_DC_CRC_LN1] = DC_PERF_CNTR_LCB(DcCrcLn1, DC_LCB_ERR_INFO_CRC_ERR_LN1,
1660                                   CNTR_SYNTH),
1661 [C_DC_CRC_LN2] = DC_PERF_CNTR_LCB(DcCrcLn2, DC_LCB_ERR_INFO_CRC_ERR_LN2,
1662                                   CNTR_SYNTH),
1663 [C_DC_CRC_LN3] = DC_PERF_CNTR_LCB(DcCrcLn3, DC_LCB_ERR_INFO_CRC_ERR_LN3,
1664                                   CNTR_SYNTH),
1665 [C_DC_CRC_MULT_LN] =
1666         DC_PERF_CNTR_LCB(DcMultLn, DC_LCB_ERR_INFO_CRC_ERR_MULTI_LN,
1667                          CNTR_SYNTH),
1668 [C_DC_TX_REPLAY] = DC_PERF_CNTR_LCB(DcTxReplay, DC_LCB_ERR_INFO_TX_REPLAY_CNT,
1669                                     CNTR_SYNTH),
1670 [C_DC_RX_REPLAY] = DC_PERF_CNTR_LCB(DcRxReplay, DC_LCB_ERR_INFO_RX_REPLAY_CNT,
1671                                     CNTR_SYNTH),
1672 [C_DC_SEQ_CRC_CNT] =
1673         DC_PERF_CNTR_LCB(DcLinkSeqCrc, DC_LCB_ERR_INFO_SEQ_CRC_CNT,
1674                          CNTR_SYNTH),
1675 [C_DC_ESC0_ONLY_CNT] =
1676         DC_PERF_CNTR_LCB(DcEsc0, DC_LCB_ERR_INFO_ESCAPE_0_ONLY_CNT,
1677                          CNTR_SYNTH),
1678 [C_DC_ESC0_PLUS1_CNT] =
1679         DC_PERF_CNTR_LCB(DcEsc1, DC_LCB_ERR_INFO_ESCAPE_0_PLUS1_CNT,
1680                          CNTR_SYNTH),
1681 [C_DC_ESC0_PLUS2_CNT] =
1682         DC_PERF_CNTR_LCB(DcEsc0Plus2, DC_LCB_ERR_INFO_ESCAPE_0_PLUS2_CNT,
1683                          CNTR_SYNTH),
1684 [C_DC_REINIT_FROM_PEER_CNT] =
1685         DC_PERF_CNTR_LCB(DcReinitPeer, DC_LCB_ERR_INFO_REINIT_FROM_PEER_CNT,
1686                          CNTR_SYNTH),
1687 [C_DC_SBE_CNT] = DC_PERF_CNTR_LCB(DcSbe, DC_LCB_ERR_INFO_SBE_CNT,
1688                                   CNTR_SYNTH),
1689 [C_DC_MISC_FLG_CNT] =
1690         DC_PERF_CNTR_LCB(DcMiscFlg, DC_LCB_ERR_INFO_MISC_FLG_CNT,
1691                          CNTR_SYNTH),
1692 [C_DC_PRF_GOOD_LTP_CNT] =
1693         DC_PERF_CNTR_LCB(DcGoodLTP, DC_LCB_PRF_GOOD_LTP_CNT, CNTR_SYNTH),
1694 [C_DC_PRF_ACCEPTED_LTP_CNT] =
1695         DC_PERF_CNTR_LCB(DcAccLTP, DC_LCB_PRF_ACCEPTED_LTP_CNT,
1696                          CNTR_SYNTH),
1697 [C_DC_PRF_RX_FLIT_CNT] =
1698         DC_PERF_CNTR_LCB(DcPrfRxFlit, DC_LCB_PRF_RX_FLIT_CNT, CNTR_SYNTH),
1699 [C_DC_PRF_TX_FLIT_CNT] =
1700         DC_PERF_CNTR_LCB(DcPrfTxFlit, DC_LCB_PRF_TX_FLIT_CNT, CNTR_SYNTH),
1701 [C_DC_PRF_CLK_CNTR] =
1702         DC_PERF_CNTR_LCB(DcPrfClk, DC_LCB_PRF_CLK_CNTR, CNTR_SYNTH),
1703 [C_DC_PG_DBG_FLIT_CRDTS_CNT] =
1704         DC_PERF_CNTR_LCB(DcFltCrdts, DC_LCB_PG_DBG_FLIT_CRDTS_CNT, CNTR_SYNTH),
1705 [C_DC_PG_STS_PAUSE_COMPLETE_CNT] =
1706         DC_PERF_CNTR_LCB(DcPauseComp, DC_LCB_PG_STS_PAUSE_COMPLETE_CNT,
1707                          CNTR_SYNTH),
1708 [C_DC_PG_STS_TX_SBE_CNT] =
1709         DC_PERF_CNTR_LCB(DcStsTxSbe, DC_LCB_PG_STS_TX_SBE_CNT, CNTR_SYNTH),
1710 [C_DC_PG_STS_TX_MBE_CNT] =
1711         DC_PERF_CNTR_LCB(DcStsTxMbe, DC_LCB_PG_STS_TX_MBE_CNT,
1712                          CNTR_SYNTH),
1713 [C_SW_CPU_INTR] = CNTR_ELEM("Intr", 0, 0, CNTR_NORMAL,
1714                             access_sw_cpu_intr),
1715 [C_SW_CPU_RCV_LIM] = CNTR_ELEM("RcvLimit", 0, 0, CNTR_NORMAL,
1716                             access_sw_cpu_rcv_limit),
1717 [C_SW_VTX_WAIT] = CNTR_ELEM("vTxWait", 0, 0, CNTR_NORMAL,
1718                             access_sw_vtx_wait),
1719 [C_SW_PIO_WAIT] = CNTR_ELEM("PioWait", 0, 0, CNTR_NORMAL,
1720                             access_sw_pio_wait),
1721 [C_SW_KMEM_WAIT] = CNTR_ELEM("KmemWait", 0, 0, CNTR_NORMAL,
1722                             access_sw_kmem_wait),
1723 };
1724
1725 static struct cntr_entry port_cntrs[PORT_CNTR_LAST] = {
1726 [C_TX_UNSUP_VL] = TXE32_PORT_CNTR_ELEM(TxUnVLErr, SEND_UNSUP_VL_ERR_CNT,
1727                         CNTR_NORMAL),
1728 [C_TX_INVAL_LEN] = TXE32_PORT_CNTR_ELEM(TxInvalLen, SEND_LEN_ERR_CNT,
1729                         CNTR_NORMAL),
1730 [C_TX_MM_LEN_ERR] = TXE32_PORT_CNTR_ELEM(TxMMLenErr, SEND_MAX_MIN_LEN_ERR_CNT,
1731                         CNTR_NORMAL),
1732 [C_TX_UNDERRUN] = TXE32_PORT_CNTR_ELEM(TxUnderrun, SEND_UNDERRUN_CNT,
1733                         CNTR_NORMAL),
1734 [C_TX_FLOW_STALL] = TXE32_PORT_CNTR_ELEM(TxFlowStall, SEND_FLOW_STALL_CNT,
1735                         CNTR_NORMAL),
1736 [C_TX_DROPPED] = TXE32_PORT_CNTR_ELEM(TxDropped, SEND_DROPPED_PKT_CNT,
1737                         CNTR_NORMAL),
1738 [C_TX_HDR_ERR] = TXE32_PORT_CNTR_ELEM(TxHdrErr, SEND_HEADERS_ERR_CNT,
1739                         CNTR_NORMAL),
1740 [C_TX_PKT] = TXE64_PORT_CNTR_ELEM(TxPkt, SEND_DATA_PKT_CNT, CNTR_NORMAL),
1741 [C_TX_WORDS] = TXE64_PORT_CNTR_ELEM(TxWords, SEND_DWORD_CNT, CNTR_NORMAL),
1742 [C_TX_WAIT] = TXE64_PORT_CNTR_ELEM(TxWait, SEND_WAIT_CNT, CNTR_SYNTH),
1743 [C_TX_FLIT_VL] = TXE64_PORT_CNTR_ELEM(TxFlitVL, SEND_DATA_VL0_CNT,
1744                         CNTR_SYNTH | CNTR_VL),
1745 [C_TX_PKT_VL] = TXE64_PORT_CNTR_ELEM(TxPktVL, SEND_DATA_PKT_VL0_CNT,
1746                         CNTR_SYNTH | CNTR_VL),
1747 [C_TX_WAIT_VL] = TXE64_PORT_CNTR_ELEM(TxWaitVL, SEND_WAIT_VL0_CNT,
1748                         CNTR_SYNTH | CNTR_VL),
1749 [C_RX_PKT] = RXE64_PORT_CNTR_ELEM(RxPkt, RCV_DATA_PKT_CNT, CNTR_NORMAL),
1750 [C_RX_WORDS] = RXE64_PORT_CNTR_ELEM(RxWords, RCV_DWORD_CNT, CNTR_NORMAL),
1751 [C_SW_LINK_DOWN] = CNTR_ELEM("SwLinkDown", 0, 0, CNTR_SYNTH | CNTR_32BIT,
1752                         access_sw_link_dn_cnt),
1753 [C_SW_LINK_UP] = CNTR_ELEM("SwLinkUp", 0, 0, CNTR_SYNTH | CNTR_32BIT,
1754                         access_sw_link_up_cnt),
1755 [C_SW_XMIT_DSCD] = CNTR_ELEM("XmitDscd", 0, 0, CNTR_SYNTH | CNTR_32BIT,
1756                         access_sw_xmit_discards),
1757 [C_SW_XMIT_DSCD_VL] = CNTR_ELEM("XmitDscdVl", 0, 0,
1758                         CNTR_SYNTH | CNTR_32BIT | CNTR_VL,
1759                         access_sw_xmit_discards),
1760 [C_SW_XMIT_CSTR_ERR] = CNTR_ELEM("XmitCstrErr", 0, 0, CNTR_SYNTH,
1761                         access_xmit_constraint_errs),
1762 [C_SW_RCV_CSTR_ERR] = CNTR_ELEM("RcvCstrErr", 0, 0, CNTR_SYNTH,
1763                         access_rcv_constraint_errs),
1764 [C_SW_IBP_LOOP_PKTS] = SW_IBP_CNTR(LoopPkts, loop_pkts),
1765 [C_SW_IBP_RC_RESENDS] = SW_IBP_CNTR(RcResend, rc_resends),
1766 [C_SW_IBP_RNR_NAKS] = SW_IBP_CNTR(RnrNak, rnr_naks),
1767 [C_SW_IBP_OTHER_NAKS] = SW_IBP_CNTR(OtherNak, other_naks),
1768 [C_SW_IBP_RC_TIMEOUTS] = SW_IBP_CNTR(RcTimeOut, rc_timeouts),
1769 [C_SW_IBP_PKT_DROPS] = SW_IBP_CNTR(PktDrop, pkt_drops),
1770 [C_SW_IBP_DMA_WAIT] = SW_IBP_CNTR(DmaWait, dmawait),
1771 [C_SW_IBP_RC_SEQNAK] = SW_IBP_CNTR(RcSeqNak, rc_seqnak),
1772 [C_SW_IBP_RC_DUPREQ] = SW_IBP_CNTR(RcDupRew, rc_dupreq),
1773 [C_SW_IBP_RDMA_SEQ] = SW_IBP_CNTR(RdmaSeq, rdma_seq),
1774 [C_SW_IBP_UNALIGNED] = SW_IBP_CNTR(Unaligned, unaligned),
1775 [C_SW_IBP_SEQ_NAK] = SW_IBP_CNTR(SeqNak, seq_naks),
1776 [C_SW_CPU_RC_ACKS] = CNTR_ELEM("RcAcks", 0, 0, CNTR_NORMAL,
1777                                access_sw_cpu_rc_acks),
1778 [C_SW_CPU_RC_QACKS] = CNTR_ELEM("RcQacks", 0, 0, CNTR_NORMAL,
1779                                access_sw_cpu_rc_qacks),
1780 [C_SW_CPU_RC_DELAYED_COMP] = CNTR_ELEM("RcDelayComp", 0, 0, CNTR_NORMAL,
1781                                access_sw_cpu_rc_delayed_comp),
1782 [OVR_LBL(0)] = OVR_ELM(0), [OVR_LBL(1)] = OVR_ELM(1),
1783 [OVR_LBL(2)] = OVR_ELM(2), [OVR_LBL(3)] = OVR_ELM(3),
1784 [OVR_LBL(4)] = OVR_ELM(4), [OVR_LBL(5)] = OVR_ELM(5),
1785 [OVR_LBL(6)] = OVR_ELM(6), [OVR_LBL(7)] = OVR_ELM(7),
1786 [OVR_LBL(8)] = OVR_ELM(8), [OVR_LBL(9)] = OVR_ELM(9),
1787 [OVR_LBL(10)] = OVR_ELM(10), [OVR_LBL(11)] = OVR_ELM(11),
1788 [OVR_LBL(12)] = OVR_ELM(12), [OVR_LBL(13)] = OVR_ELM(13),
1789 [OVR_LBL(14)] = OVR_ELM(14), [OVR_LBL(15)] = OVR_ELM(15),
1790 [OVR_LBL(16)] = OVR_ELM(16), [OVR_LBL(17)] = OVR_ELM(17),
1791 [OVR_LBL(18)] = OVR_ELM(18), [OVR_LBL(19)] = OVR_ELM(19),
1792 [OVR_LBL(20)] = OVR_ELM(20), [OVR_LBL(21)] = OVR_ELM(21),
1793 [OVR_LBL(22)] = OVR_ELM(22), [OVR_LBL(23)] = OVR_ELM(23),
1794 [OVR_LBL(24)] = OVR_ELM(24), [OVR_LBL(25)] = OVR_ELM(25),
1795 [OVR_LBL(26)] = OVR_ELM(26), [OVR_LBL(27)] = OVR_ELM(27),
1796 [OVR_LBL(28)] = OVR_ELM(28), [OVR_LBL(29)] = OVR_ELM(29),
1797 [OVR_LBL(30)] = OVR_ELM(30), [OVR_LBL(31)] = OVR_ELM(31),
1798 [OVR_LBL(32)] = OVR_ELM(32), [OVR_LBL(33)] = OVR_ELM(33),
1799 [OVR_LBL(34)] = OVR_ELM(34), [OVR_LBL(35)] = OVR_ELM(35),
1800 [OVR_LBL(36)] = OVR_ELM(36), [OVR_LBL(37)] = OVR_ELM(37),
1801 [OVR_LBL(38)] = OVR_ELM(38), [OVR_LBL(39)] = OVR_ELM(39),
1802 [OVR_LBL(40)] = OVR_ELM(40), [OVR_LBL(41)] = OVR_ELM(41),
1803 [OVR_LBL(42)] = OVR_ELM(42), [OVR_LBL(43)] = OVR_ELM(43),
1804 [OVR_LBL(44)] = OVR_ELM(44), [OVR_LBL(45)] = OVR_ELM(45),
1805 [OVR_LBL(46)] = OVR_ELM(46), [OVR_LBL(47)] = OVR_ELM(47),
1806 [OVR_LBL(48)] = OVR_ELM(48), [OVR_LBL(49)] = OVR_ELM(49),
1807 [OVR_LBL(50)] = OVR_ELM(50), [OVR_LBL(51)] = OVR_ELM(51),
1808 [OVR_LBL(52)] = OVR_ELM(52), [OVR_LBL(53)] = OVR_ELM(53),
1809 [OVR_LBL(54)] = OVR_ELM(54), [OVR_LBL(55)] = OVR_ELM(55),
1810 [OVR_LBL(56)] = OVR_ELM(56), [OVR_LBL(57)] = OVR_ELM(57),
1811 [OVR_LBL(58)] = OVR_ELM(58), [OVR_LBL(59)] = OVR_ELM(59),
1812 [OVR_LBL(60)] = OVR_ELM(60), [OVR_LBL(61)] = OVR_ELM(61),
1813 [OVR_LBL(62)] = OVR_ELM(62), [OVR_LBL(63)] = OVR_ELM(63),
1814 [OVR_LBL(64)] = OVR_ELM(64), [OVR_LBL(65)] = OVR_ELM(65),
1815 [OVR_LBL(66)] = OVR_ELM(66), [OVR_LBL(67)] = OVR_ELM(67),
1816 [OVR_LBL(68)] = OVR_ELM(68), [OVR_LBL(69)] = OVR_ELM(69),
1817 [OVR_LBL(70)] = OVR_ELM(70), [OVR_LBL(71)] = OVR_ELM(71),
1818 [OVR_LBL(72)] = OVR_ELM(72), [OVR_LBL(73)] = OVR_ELM(73),
1819 [OVR_LBL(74)] = OVR_ELM(74), [OVR_LBL(75)] = OVR_ELM(75),
1820 [OVR_LBL(76)] = OVR_ELM(76), [OVR_LBL(77)] = OVR_ELM(77),
1821 [OVR_LBL(78)] = OVR_ELM(78), [OVR_LBL(79)] = OVR_ELM(79),
1822 [OVR_LBL(80)] = OVR_ELM(80), [OVR_LBL(81)] = OVR_ELM(81),
1823 [OVR_LBL(82)] = OVR_ELM(82), [OVR_LBL(83)] = OVR_ELM(83),
1824 [OVR_LBL(84)] = OVR_ELM(84), [OVR_LBL(85)] = OVR_ELM(85),
1825 [OVR_LBL(86)] = OVR_ELM(86), [OVR_LBL(87)] = OVR_ELM(87),
1826 [OVR_LBL(88)] = OVR_ELM(88), [OVR_LBL(89)] = OVR_ELM(89),
1827 [OVR_LBL(90)] = OVR_ELM(90), [OVR_LBL(91)] = OVR_ELM(91),
1828 [OVR_LBL(92)] = OVR_ELM(92), [OVR_LBL(93)] = OVR_ELM(93),
1829 [OVR_LBL(94)] = OVR_ELM(94), [OVR_LBL(95)] = OVR_ELM(95),
1830 [OVR_LBL(96)] = OVR_ELM(96), [OVR_LBL(97)] = OVR_ELM(97),
1831 [OVR_LBL(98)] = OVR_ELM(98), [OVR_LBL(99)] = OVR_ELM(99),
1832 [OVR_LBL(100)] = OVR_ELM(100), [OVR_LBL(101)] = OVR_ELM(101),
1833 [OVR_LBL(102)] = OVR_ELM(102), [OVR_LBL(103)] = OVR_ELM(103),
1834 [OVR_LBL(104)] = OVR_ELM(104), [OVR_LBL(105)] = OVR_ELM(105),
1835 [OVR_LBL(106)] = OVR_ELM(106), [OVR_LBL(107)] = OVR_ELM(107),
1836 [OVR_LBL(108)] = OVR_ELM(108), [OVR_LBL(109)] = OVR_ELM(109),
1837 [OVR_LBL(110)] = OVR_ELM(110), [OVR_LBL(111)] = OVR_ELM(111),
1838 [OVR_LBL(112)] = OVR_ELM(112), [OVR_LBL(113)] = OVR_ELM(113),
1839 [OVR_LBL(114)] = OVR_ELM(114), [OVR_LBL(115)] = OVR_ELM(115),
1840 [OVR_LBL(116)] = OVR_ELM(116), [OVR_LBL(117)] = OVR_ELM(117),
1841 [OVR_LBL(118)] = OVR_ELM(118), [OVR_LBL(119)] = OVR_ELM(119),
1842 [OVR_LBL(120)] = OVR_ELM(120), [OVR_LBL(121)] = OVR_ELM(121),
1843 [OVR_LBL(122)] = OVR_ELM(122), [OVR_LBL(123)] = OVR_ELM(123),
1844 [OVR_LBL(124)] = OVR_ELM(124), [OVR_LBL(125)] = OVR_ELM(125),
1845 [OVR_LBL(126)] = OVR_ELM(126), [OVR_LBL(127)] = OVR_ELM(127),
1846 [OVR_LBL(128)] = OVR_ELM(128), [OVR_LBL(129)] = OVR_ELM(129),
1847 [OVR_LBL(130)] = OVR_ELM(130), [OVR_LBL(131)] = OVR_ELM(131),
1848 [OVR_LBL(132)] = OVR_ELM(132), [OVR_LBL(133)] = OVR_ELM(133),
1849 [OVR_LBL(134)] = OVR_ELM(134), [OVR_LBL(135)] = OVR_ELM(135),
1850 [OVR_LBL(136)] = OVR_ELM(136), [OVR_LBL(137)] = OVR_ELM(137),
1851 [OVR_LBL(138)] = OVR_ELM(138), [OVR_LBL(139)] = OVR_ELM(139),
1852 [OVR_LBL(140)] = OVR_ELM(140), [OVR_LBL(141)] = OVR_ELM(141),
1853 [OVR_LBL(142)] = OVR_ELM(142), [OVR_LBL(143)] = OVR_ELM(143),
1854 [OVR_LBL(144)] = OVR_ELM(144), [OVR_LBL(145)] = OVR_ELM(145),
1855 [OVR_LBL(146)] = OVR_ELM(146), [OVR_LBL(147)] = OVR_ELM(147),
1856 [OVR_LBL(148)] = OVR_ELM(148), [OVR_LBL(149)] = OVR_ELM(149),
1857 [OVR_LBL(150)] = OVR_ELM(150), [OVR_LBL(151)] = OVR_ELM(151),
1858 [OVR_LBL(152)] = OVR_ELM(152), [OVR_LBL(153)] = OVR_ELM(153),
1859 [OVR_LBL(154)] = OVR_ELM(154), [OVR_LBL(155)] = OVR_ELM(155),
1860 [OVR_LBL(156)] = OVR_ELM(156), [OVR_LBL(157)] = OVR_ELM(157),
1861 [OVR_LBL(158)] = OVR_ELM(158), [OVR_LBL(159)] = OVR_ELM(159),
1862 };
1863
1864 /* ======================================================================== */
1865
1866 /* return true if this is chip revision revision a0 */
1867 int is_a0(struct hfi1_devdata *dd)
1868 {
1869         return ((dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT)
1870                         & CCE_REVISION_CHIP_REV_MINOR_MASK) == 0;
1871 }
1872
1873 /* return true if this is chip revision revision a */
1874 int is_ax(struct hfi1_devdata *dd)
1875 {
1876         u8 chip_rev_minor =
1877                 dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT
1878                         & CCE_REVISION_CHIP_REV_MINOR_MASK;
1879         return (chip_rev_minor & 0xf0) == 0;
1880 }
1881
1882 /* return true if this is chip revision revision b */
1883 int is_bx(struct hfi1_devdata *dd)
1884 {
1885         u8 chip_rev_minor =
1886                 dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT
1887                         & CCE_REVISION_CHIP_REV_MINOR_MASK;
1888         return !!(chip_rev_minor & 0x10);
1889 }
1890
1891 /*
1892  * Append string s to buffer buf.  Arguments curp and len are the current
1893  * position and remaining length, respectively.
1894  *
1895  * return 0 on success, 1 on out of room
1896  */
1897 static int append_str(char *buf, char **curp, int *lenp, const char *s)
1898 {
1899         char *p = *curp;
1900         int len = *lenp;
1901         int result = 0; /* success */
1902         char c;
1903
1904         /* add a comma, if first in the buffer */
1905         if (p != buf) {
1906                 if (len == 0) {
1907                         result = 1; /* out of room */
1908                         goto done;
1909                 }
1910                 *p++ = ',';
1911                 len--;
1912         }
1913
1914         /* copy the string */
1915         while ((c = *s++) != 0) {
1916                 if (len == 0) {
1917                         result = 1; /* out of room */
1918                         goto done;
1919                 }
1920                 *p++ = c;
1921                 len--;
1922         }
1923
1924 done:
1925         /* write return values */
1926         *curp = p;
1927         *lenp = len;
1928
1929         return result;
1930 }
1931
1932 /*
1933  * Using the given flag table, print a comma separated string into
1934  * the buffer.  End in '*' if the buffer is too short.
1935  */
1936 static char *flag_string(char *buf, int buf_len, u64 flags,
1937                                 struct flag_table *table, int table_size)
1938 {
1939         char extra[32];
1940         char *p = buf;
1941         int len = buf_len;
1942         int no_room = 0;
1943         int i;
1944
1945         /* make sure there is at least 2 so we can form "*" */
1946         if (len < 2)
1947                 return "";
1948
1949         len--;  /* leave room for a nul */
1950         for (i = 0; i < table_size; i++) {
1951                 if (flags & table[i].flag) {
1952                         no_room = append_str(buf, &p, &len, table[i].str);
1953                         if (no_room)
1954                                 break;
1955                         flags &= ~table[i].flag;
1956                 }
1957         }
1958
1959         /* any undocumented bits left? */
1960         if (!no_room && flags) {
1961                 snprintf(extra, sizeof(extra), "bits 0x%llx", flags);
1962                 no_room = append_str(buf, &p, &len, extra);
1963         }
1964
1965         /* add * if ran out of room */
1966         if (no_room) {
1967                 /* may need to back up to add space for a '*' */
1968                 if (len == 0)
1969                         --p;
1970                 *p++ = '*';
1971         }
1972
1973         /* add final nul - space already allocated above */
1974         *p = 0;
1975         return buf;
1976 }
1977
1978 /* first 8 CCE error interrupt source names */
1979 static const char * const cce_misc_names[] = {
1980         "CceErrInt",            /* 0 */
1981         "RxeErrInt",            /* 1 */
1982         "MiscErrInt",           /* 2 */
1983         "Reserved3",            /* 3 */
1984         "PioErrInt",            /* 4 */
1985         "SDmaErrInt",           /* 5 */
1986         "EgressErrInt",         /* 6 */
1987         "TxeErrInt"             /* 7 */
1988 };
1989
1990 /*
1991  * Return the miscellaneous error interrupt name.
1992  */
1993 static char *is_misc_err_name(char *buf, size_t bsize, unsigned int source)
1994 {
1995         if (source < ARRAY_SIZE(cce_misc_names))
1996                 strncpy(buf, cce_misc_names[source], bsize);
1997         else
1998                 snprintf(buf,
1999                         bsize,
2000                         "Reserved%u",
2001                         source + IS_GENERAL_ERR_START);
2002
2003         return buf;
2004 }
2005
2006 /*
2007  * Return the SDMA engine error interrupt name.
2008  */
2009 static char *is_sdma_eng_err_name(char *buf, size_t bsize, unsigned int source)
2010 {
2011         snprintf(buf, bsize, "SDmaEngErrInt%u", source);
2012         return buf;
2013 }
2014
2015 /*
2016  * Return the send context error interrupt name.
2017  */
2018 static char *is_sendctxt_err_name(char *buf, size_t bsize, unsigned int source)
2019 {
2020         snprintf(buf, bsize, "SendCtxtErrInt%u", source);
2021         return buf;
2022 }
2023
2024 static const char * const various_names[] = {
2025         "PbcInt",
2026         "GpioAssertInt",
2027         "Qsfp1Int",
2028         "Qsfp2Int",
2029         "TCritInt"
2030 };
2031
2032 /*
2033  * Return the various interrupt name.
2034  */
2035 static char *is_various_name(char *buf, size_t bsize, unsigned int source)
2036 {
2037         if (source < ARRAY_SIZE(various_names))
2038                 strncpy(buf, various_names[source], bsize);
2039         else
2040                 snprintf(buf, bsize, "Reserved%u", source+IS_VARIOUS_START);
2041         return buf;
2042 }
2043
2044 /*
2045  * Return the DC interrupt name.
2046  */
2047 static char *is_dc_name(char *buf, size_t bsize, unsigned int source)
2048 {
2049         static const char * const dc_int_names[] = {
2050                 "common",
2051                 "lcb",
2052                 "8051",
2053                 "lbm"   /* local block merge */
2054         };
2055
2056         if (source < ARRAY_SIZE(dc_int_names))
2057                 snprintf(buf, bsize, "dc_%s_int", dc_int_names[source]);
2058         else
2059                 snprintf(buf, bsize, "DCInt%u", source);
2060         return buf;
2061 }
2062
2063 static const char * const sdma_int_names[] = {
2064         "SDmaInt",
2065         "SdmaIdleInt",
2066         "SdmaProgressInt",
2067 };
2068
2069 /*
2070  * Return the SDMA engine interrupt name.
2071  */
2072 static char *is_sdma_eng_name(char *buf, size_t bsize, unsigned int source)
2073 {
2074         /* what interrupt */
2075         unsigned int what  = source / TXE_NUM_SDMA_ENGINES;
2076         /* which engine */
2077         unsigned int which = source % TXE_NUM_SDMA_ENGINES;
2078
2079         if (likely(what < 3))
2080                 snprintf(buf, bsize, "%s%u", sdma_int_names[what], which);
2081         else
2082                 snprintf(buf, bsize, "Invalid SDMA interrupt %u", source);
2083         return buf;
2084 }
2085
2086 /*
2087  * Return the receive available interrupt name.
2088  */
2089 static char *is_rcv_avail_name(char *buf, size_t bsize, unsigned int source)
2090 {
2091         snprintf(buf, bsize, "RcvAvailInt%u", source);
2092         return buf;
2093 }
2094
2095 /*
2096  * Return the receive urgent interrupt name.
2097  */
2098 static char *is_rcv_urgent_name(char *buf, size_t bsize, unsigned int source)
2099 {
2100         snprintf(buf, bsize, "RcvUrgentInt%u", source);
2101         return buf;
2102 }
2103
2104 /*
2105  * Return the send credit interrupt name.
2106  */
2107 static char *is_send_credit_name(char *buf, size_t bsize, unsigned int source)
2108 {
2109         snprintf(buf, bsize, "SendCreditInt%u", source);
2110         return buf;
2111 }
2112
2113 /*
2114  * Return the reserved interrupt name.
2115  */
2116 static char *is_reserved_name(char *buf, size_t bsize, unsigned int source)
2117 {
2118         snprintf(buf, bsize, "Reserved%u", source + IS_RESERVED_START);
2119         return buf;
2120 }
2121
2122 static char *cce_err_status_string(char *buf, int buf_len, u64 flags)
2123 {
2124         return flag_string(buf, buf_len, flags,
2125                         cce_err_status_flags, ARRAY_SIZE(cce_err_status_flags));
2126 }
2127
2128 static char *rxe_err_status_string(char *buf, int buf_len, u64 flags)
2129 {
2130         return flag_string(buf, buf_len, flags,
2131                         rxe_err_status_flags, ARRAY_SIZE(rxe_err_status_flags));
2132 }
2133
2134 static char *misc_err_status_string(char *buf, int buf_len, u64 flags)
2135 {
2136         return flag_string(buf, buf_len, flags, misc_err_status_flags,
2137                         ARRAY_SIZE(misc_err_status_flags));
2138 }
2139
2140 static char *pio_err_status_string(char *buf, int buf_len, u64 flags)
2141 {
2142         return flag_string(buf, buf_len, flags,
2143                         pio_err_status_flags, ARRAY_SIZE(pio_err_status_flags));
2144 }
2145
2146 static char *sdma_err_status_string(char *buf, int buf_len, u64 flags)
2147 {
2148         return flag_string(buf, buf_len, flags,
2149                         sdma_err_status_flags,
2150                         ARRAY_SIZE(sdma_err_status_flags));
2151 }
2152
2153 static char *egress_err_status_string(char *buf, int buf_len, u64 flags)
2154 {
2155         return flag_string(buf, buf_len, flags,
2156                 egress_err_status_flags, ARRAY_SIZE(egress_err_status_flags));
2157 }
2158
2159 static char *egress_err_info_string(char *buf, int buf_len, u64 flags)
2160 {
2161         return flag_string(buf, buf_len, flags,
2162                 egress_err_info_flags, ARRAY_SIZE(egress_err_info_flags));
2163 }
2164
2165 static char *send_err_status_string(char *buf, int buf_len, u64 flags)
2166 {
2167         return flag_string(buf, buf_len, flags,
2168                         send_err_status_flags,
2169                         ARRAY_SIZE(send_err_status_flags));
2170 }
2171
2172 static void handle_cce_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2173 {
2174         char buf[96];
2175
2176         /*
2177          * For most these errors, there is nothing that can be done except
2178          * report or record it.
2179          */
2180         dd_dev_info(dd, "CCE Error: %s\n",
2181                 cce_err_status_string(buf, sizeof(buf), reg));
2182
2183         if ((reg & CCE_ERR_STATUS_CCE_CLI2_ASYNC_FIFO_PARITY_ERR_SMASK)
2184                         && is_a0(dd)
2185                         && (dd->icode != ICODE_FUNCTIONAL_SIMULATOR)) {
2186                 /* this error requires a manual drop into SPC freeze mode */
2187                 /* then a fix up */
2188                 start_freeze_handling(dd->pport, FREEZE_SELF);
2189         }
2190 }
2191
2192 /*
2193  * Check counters for receive errors that do not have an interrupt
2194  * associated with them.
2195  */
2196 #define RCVERR_CHECK_TIME 10
2197 static void update_rcverr_timer(unsigned long opaque)
2198 {
2199         struct hfi1_devdata *dd = (struct hfi1_devdata *)opaque;
2200         struct hfi1_pportdata *ppd = dd->pport;
2201         u32 cur_ovfl_cnt = read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL);
2202
2203         if (dd->rcv_ovfl_cnt < cur_ovfl_cnt &&
2204                 ppd->port_error_action & OPA_PI_MASK_EX_BUFFER_OVERRUN) {
2205                 dd_dev_info(dd, "%s: PortErrorAction bounce\n", __func__);
2206                 set_link_down_reason(ppd,
2207                   OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN, 0,
2208                         OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN);
2209                 queue_work(ppd->hfi1_wq, &ppd->link_bounce_work);
2210         }
2211         dd->rcv_ovfl_cnt = (u32) cur_ovfl_cnt;
2212
2213         mod_timer(&dd->rcverr_timer, jiffies + HZ * RCVERR_CHECK_TIME);
2214 }
2215
2216 static int init_rcverr(struct hfi1_devdata *dd)
2217 {
2218         init_timer(&dd->rcverr_timer);
2219         dd->rcverr_timer.function = update_rcverr_timer;
2220         dd->rcverr_timer.data = (unsigned long) dd;
2221         /* Assume the hardware counter has been reset */
2222         dd->rcv_ovfl_cnt = 0;
2223         return mod_timer(&dd->rcverr_timer, jiffies + HZ * RCVERR_CHECK_TIME);
2224 }
2225
2226 static void free_rcverr(struct hfi1_devdata *dd)
2227 {
2228         if (dd->rcverr_timer.data)
2229                 del_timer_sync(&dd->rcverr_timer);
2230         dd->rcverr_timer.data = 0;
2231 }
2232
2233 static void handle_rxe_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2234 {
2235         char buf[96];
2236
2237         dd_dev_info(dd, "Receive Error: %s\n",
2238                 rxe_err_status_string(buf, sizeof(buf), reg));
2239
2240         if (reg & ALL_RXE_FREEZE_ERR) {
2241                 int flags = 0;
2242
2243                 /*
2244                  * Freeze mode recovery is disabled for the errors
2245                  * in RXE_FREEZE_ABORT_MASK
2246                  */
2247                 if (is_a0(dd) && (reg & RXE_FREEZE_ABORT_MASK))
2248                         flags = FREEZE_ABORT;
2249
2250                 start_freeze_handling(dd->pport, flags);
2251         }
2252 }
2253
2254 static void handle_misc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2255 {
2256         char buf[96];
2257
2258         dd_dev_info(dd, "Misc Error: %s",
2259                 misc_err_status_string(buf, sizeof(buf), reg));
2260 }
2261
2262 static void handle_pio_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2263 {
2264         char buf[96];
2265
2266         dd_dev_info(dd, "PIO Error: %s\n",
2267                 pio_err_status_string(buf, sizeof(buf), reg));
2268
2269         if (reg & ALL_PIO_FREEZE_ERR)
2270                 start_freeze_handling(dd->pport, 0);
2271 }
2272
2273 static void handle_sdma_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2274 {
2275         char buf[96];
2276
2277         dd_dev_info(dd, "SDMA Error: %s\n",
2278                 sdma_err_status_string(buf, sizeof(buf), reg));
2279
2280         if (reg & ALL_SDMA_FREEZE_ERR)
2281                 start_freeze_handling(dd->pport, 0);
2282 }
2283
2284 static void count_port_inactive(struct hfi1_devdata *dd)
2285 {
2286         struct hfi1_pportdata *ppd = dd->pport;
2287
2288         if (ppd->port_xmit_discards < ~(u64)0)
2289                 ppd->port_xmit_discards++;
2290 }
2291
2292 /*
2293  * We have had a "disallowed packet" error during egress. Determine the
2294  * integrity check which failed, and update relevant error counter, etc.
2295  *
2296  * Note that the SEND_EGRESS_ERR_INFO register has only a single
2297  * bit of state per integrity check, and so we can miss the reason for an
2298  * egress error if more than one packet fails the same integrity check
2299  * since we cleared the corresponding bit in SEND_EGRESS_ERR_INFO.
2300  */
2301 static void handle_send_egress_err_info(struct hfi1_devdata *dd)
2302 {
2303         struct hfi1_pportdata *ppd = dd->pport;
2304         u64 src = read_csr(dd, SEND_EGRESS_ERR_SOURCE); /* read first */
2305         u64 info = read_csr(dd, SEND_EGRESS_ERR_INFO);
2306         char buf[96];
2307
2308         /* clear down all observed info as quickly as possible after read */
2309         write_csr(dd, SEND_EGRESS_ERR_INFO, info);
2310
2311         dd_dev_info(dd,
2312                 "Egress Error Info: 0x%llx, %s Egress Error Src 0x%llx\n",
2313                 info, egress_err_info_string(buf, sizeof(buf), info), src);
2314
2315         /* Eventually add other counters for each bit */
2316
2317         if (info & SEND_EGRESS_ERR_INFO_TOO_LONG_IB_PACKET_ERR_SMASK) {
2318                 if (ppd->port_xmit_discards < ~(u64)0)
2319                         ppd->port_xmit_discards++;
2320         }
2321 }
2322
2323 /*
2324  * Input value is a bit position within the SEND_EGRESS_ERR_STATUS
2325  * register. Does it represent a 'port inactive' error?
2326  */
2327 static inline int port_inactive_err(u64 posn)
2328 {
2329         return (posn >= SEES(TX_LINKDOWN) &&
2330                 posn <= SEES(TX_INCORRECT_LINK_STATE));
2331 }
2332
2333 /*
2334  * Input value is a bit position within the SEND_EGRESS_ERR_STATUS
2335  * register. Does it represent a 'disallowed packet' error?
2336  */
2337 static inline int disallowed_pkt_err(u64 posn)
2338 {
2339         return (posn >= SEES(TX_SDMA0_DISALLOWED_PACKET) &&
2340                 posn <= SEES(TX_SDMA15_DISALLOWED_PACKET));
2341 }
2342
2343 static void handle_egress_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2344 {
2345         u64 reg_copy = reg, handled = 0;
2346         char buf[96];
2347
2348         if (reg & ALL_TXE_EGRESS_FREEZE_ERR)
2349                 start_freeze_handling(dd->pport, 0);
2350         if (is_a0(dd) && (reg &
2351                     SEND_EGRESS_ERR_STATUS_TX_CREDIT_RETURN_VL_ERR_SMASK)
2352                     && (dd->icode != ICODE_FUNCTIONAL_SIMULATOR))
2353                 start_freeze_handling(dd->pport, 0);
2354
2355         while (reg_copy) {
2356                 int posn = fls64(reg_copy);
2357                 /*
2358                  * fls64() returns a 1-based offset, but we generally
2359                  * want 0-based offsets.
2360                  */
2361                 int shift = posn - 1;
2362
2363                 if (port_inactive_err(shift)) {
2364                         count_port_inactive(dd);
2365                         handled |= (1ULL << shift);
2366                 } else if (disallowed_pkt_err(shift)) {
2367                         handle_send_egress_err_info(dd);
2368                         handled |= (1ULL << shift);
2369                 }
2370                 clear_bit(shift, (unsigned long *)&reg_copy);
2371         }
2372
2373         reg &= ~handled;
2374
2375         if (reg)
2376                 dd_dev_info(dd, "Egress Error: %s\n",
2377                         egress_err_status_string(buf, sizeof(buf), reg));
2378 }
2379
2380 static void handle_txe_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2381 {
2382         char buf[96];
2383
2384         dd_dev_info(dd, "Send Error: %s\n",
2385                 send_err_status_string(buf, sizeof(buf), reg));
2386
2387 }
2388
2389 /*
2390  * The maximum number of times the error clear down will loop before
2391  * blocking a repeating error.  This value is arbitrary.
2392  */
2393 #define MAX_CLEAR_COUNT 20
2394
2395 /*
2396  * Clear and handle an error register.  All error interrupts are funneled
2397  * through here to have a central location to correctly handle single-
2398  * or multi-shot errors.
2399  *
2400  * For non per-context registers, call this routine with a context value
2401  * of 0 so the per-context offset is zero.
2402  *
2403  * If the handler loops too many times, assume that something is wrong
2404  * and can't be fixed, so mask the error bits.
2405  */
2406 static void interrupt_clear_down(struct hfi1_devdata *dd,
2407                                  u32 context,
2408                                  const struct err_reg_info *eri)
2409 {
2410         u64 reg;
2411         u32 count;
2412
2413         /* read in a loop until no more errors are seen */
2414         count = 0;
2415         while (1) {
2416                 reg = read_kctxt_csr(dd, context, eri->status);
2417                 if (reg == 0)
2418                         break;
2419                 write_kctxt_csr(dd, context, eri->clear, reg);
2420                 if (likely(eri->handler))
2421                         eri->handler(dd, context, reg);
2422                 count++;
2423                 if (count > MAX_CLEAR_COUNT) {
2424                         u64 mask;
2425
2426                         dd_dev_err(dd, "Repeating %s bits 0x%llx - masking\n",
2427                                 eri->desc, reg);
2428                         /*
2429                          * Read-modify-write so any other masked bits
2430                          * remain masked.
2431                          */
2432                         mask = read_kctxt_csr(dd, context, eri->mask);
2433                         mask &= ~reg;
2434                         write_kctxt_csr(dd, context, eri->mask, mask);
2435                         break;
2436                 }
2437         }
2438 }
2439
2440 /*
2441  * CCE block "misc" interrupt.  Source is < 16.
2442  */
2443 static void is_misc_err_int(struct hfi1_devdata *dd, unsigned int source)
2444 {
2445         const struct err_reg_info *eri = &misc_errs[source];
2446
2447         if (eri->handler) {
2448                 interrupt_clear_down(dd, 0, eri);
2449         } else {
2450                 dd_dev_err(dd, "Unexpected misc interrupt (%u) - reserved\n",
2451                         source);
2452         }
2453 }
2454
2455 static char *send_context_err_status_string(char *buf, int buf_len, u64 flags)
2456 {
2457         return flag_string(buf, buf_len, flags,
2458                         sc_err_status_flags, ARRAY_SIZE(sc_err_status_flags));
2459 }
2460
2461 /*
2462  * Send context error interrupt.  Source (hw_context) is < 160.
2463  *
2464  * All send context errors cause the send context to halt.  The normal
2465  * clear-down mechanism cannot be used because we cannot clear the
2466  * error bits until several other long-running items are done first.
2467  * This is OK because with the context halted, nothing else is going
2468  * to happen on it anyway.
2469  */
2470 static void is_sendctxt_err_int(struct hfi1_devdata *dd,
2471                                 unsigned int hw_context)
2472 {
2473         struct send_context_info *sci;
2474         struct send_context *sc;
2475         char flags[96];
2476         u64 status;
2477         u32 sw_index;
2478
2479         sw_index = dd->hw_to_sw[hw_context];
2480         if (sw_index >= dd->num_send_contexts) {
2481                 dd_dev_err(dd,
2482                         "out of range sw index %u for send context %u\n",
2483                         sw_index, hw_context);
2484                 return;
2485         }
2486         sci = &dd->send_contexts[sw_index];
2487         sc = sci->sc;
2488         if (!sc) {
2489                 dd_dev_err(dd, "%s: context %u(%u): no sc?\n", __func__,
2490                         sw_index, hw_context);
2491                 return;
2492         }
2493
2494         /* tell the software that a halt has begun */
2495         sc_stop(sc, SCF_HALTED);
2496
2497         status = read_kctxt_csr(dd, hw_context, SEND_CTXT_ERR_STATUS);
2498
2499         dd_dev_info(dd, "Send Context %u(%u) Error: %s\n", sw_index, hw_context,
2500                 send_context_err_status_string(flags, sizeof(flags), status));
2501
2502         if (status & SEND_CTXT_ERR_STATUS_PIO_DISALLOWED_PACKET_ERR_SMASK)
2503                 handle_send_egress_err_info(dd);
2504
2505         /*
2506          * Automatically restart halted kernel contexts out of interrupt
2507          * context.  User contexts must ask the driver to restart the context.
2508          */
2509         if (sc->type != SC_USER)
2510                 queue_work(dd->pport->hfi1_wq, &sc->halt_work);
2511 }
2512
2513 static void handle_sdma_eng_err(struct hfi1_devdata *dd,
2514                                 unsigned int source, u64 status)
2515 {
2516         struct sdma_engine *sde;
2517
2518         sde = &dd->per_sdma[source];
2519 #ifdef CONFIG_SDMA_VERBOSITY
2520         dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx,
2521                    slashstrip(__FILE__), __LINE__, __func__);
2522         dd_dev_err(sde->dd, "CONFIG SDMA(%u) source: %u status 0x%llx\n",
2523                    sde->this_idx, source, (unsigned long long)status);
2524 #endif
2525         sdma_engine_error(sde, status);
2526 }
2527
2528 /*
2529  * CCE block SDMA error interrupt.  Source is < 16.
2530  */
2531 static void is_sdma_eng_err_int(struct hfi1_devdata *dd, unsigned int source)
2532 {
2533 #ifdef CONFIG_SDMA_VERBOSITY
2534         struct sdma_engine *sde = &dd->per_sdma[source];
2535
2536         dd_dev_err(dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx,
2537                    slashstrip(__FILE__), __LINE__, __func__);
2538         dd_dev_err(dd, "CONFIG SDMA(%u) source: %u\n", sde->this_idx,
2539                    source);
2540         sdma_dumpstate(sde);
2541 #endif
2542         interrupt_clear_down(dd, source, &sdma_eng_err);
2543 }
2544
2545 /*
2546  * CCE block "various" interrupt.  Source is < 8.
2547  */
2548 static void is_various_int(struct hfi1_devdata *dd, unsigned int source)
2549 {
2550         const struct err_reg_info *eri = &various_err[source];
2551
2552         /*
2553          * TCritInt cannot go through interrupt_clear_down()
2554          * because it is not a second tier interrupt. The handler
2555          * should be called directly.
2556          */
2557         if (source == TCRIT_INT_SOURCE)
2558                 handle_temp_err(dd);
2559         else if (eri->handler)
2560                 interrupt_clear_down(dd, 0, eri);
2561         else
2562                 dd_dev_info(dd,
2563                         "%s: Unimplemented/reserved interrupt %d\n",
2564                         __func__, source);
2565 }
2566
2567 static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg)
2568 {
2569         /* source is always zero */
2570         struct hfi1_pportdata *ppd = dd->pport;
2571         unsigned long flags;
2572         u64 qsfp_int_mgmt = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N);
2573
2574         if (reg & QSFP_HFI0_MODPRST_N) {
2575
2576                 dd_dev_info(dd, "%s: ModPresent triggered QSFP interrupt\n",
2577                                 __func__);
2578
2579                 if (!qsfp_mod_present(ppd)) {
2580                         ppd->driver_link_ready = 0;
2581                         /*
2582                          * Cable removed, reset all our information about the
2583                          * cache and cable capabilities
2584                          */
2585
2586                         spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
2587                         /*
2588                          * We don't set cache_refresh_required here as we expect
2589                          * an interrupt when a cable is inserted
2590                          */
2591                         ppd->qsfp_info.cache_valid = 0;
2592                         ppd->qsfp_info.qsfp_interrupt_functional = 0;
2593                         spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock,
2594                                                 flags);
2595                         write_csr(dd,
2596                                         dd->hfi1_id ?
2597                                                 ASIC_QSFP2_INVERT :
2598                                                 ASIC_QSFP1_INVERT,
2599                                 qsfp_int_mgmt);
2600                         if (ppd->host_link_state == HLS_DN_POLL) {
2601                                 /*
2602                                  * The link is still in POLL. This means
2603                                  * that the normal link down processing
2604                                  * will not happen. We have to do it here
2605                                  * before turning the DC off.
2606                                  */
2607                                 queue_work(ppd->hfi1_wq, &ppd->link_down_work);
2608                         }
2609                 } else {
2610                         spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
2611                         ppd->qsfp_info.cache_valid = 0;
2612                         ppd->qsfp_info.cache_refresh_required = 1;
2613                         spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock,
2614                                                 flags);
2615
2616                         qsfp_int_mgmt &= ~(u64)QSFP_HFI0_MODPRST_N;
2617                         write_csr(dd,
2618                                         dd->hfi1_id ?
2619                                                 ASIC_QSFP2_INVERT :
2620                                                 ASIC_QSFP1_INVERT,
2621                                 qsfp_int_mgmt);
2622                 }
2623         }
2624
2625         if (reg & QSFP_HFI0_INT_N) {
2626
2627                 dd_dev_info(dd, "%s: IntN triggered QSFP interrupt\n",
2628                                 __func__);
2629                 spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
2630                 ppd->qsfp_info.check_interrupt_flags = 1;
2631                 ppd->qsfp_info.qsfp_interrupt_functional = 1;
2632                 spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock, flags);
2633         }
2634
2635         /* Schedule the QSFP work only if there is a cable attached. */
2636         if (qsfp_mod_present(ppd))
2637                 queue_work(ppd->hfi1_wq, &ppd->qsfp_info.qsfp_work);
2638 }
2639
2640 static int request_host_lcb_access(struct hfi1_devdata *dd)
2641 {
2642         int ret;
2643
2644         ret = do_8051_command(dd, HCMD_MISC,
2645                 (u64)HCMD_MISC_REQUEST_LCB_ACCESS << LOAD_DATA_FIELD_ID_SHIFT,
2646                 NULL);
2647         if (ret != HCMD_SUCCESS) {
2648                 dd_dev_err(dd, "%s: command failed with error %d\n",
2649                         __func__, ret);
2650         }
2651         return ret == HCMD_SUCCESS ? 0 : -EBUSY;
2652 }
2653
2654 static int request_8051_lcb_access(struct hfi1_devdata *dd)
2655 {
2656         int ret;
2657
2658         ret = do_8051_command(dd, HCMD_MISC,
2659                 (u64)HCMD_MISC_GRANT_LCB_ACCESS << LOAD_DATA_FIELD_ID_SHIFT,
2660                 NULL);
2661         if (ret != HCMD_SUCCESS) {
2662                 dd_dev_err(dd, "%s: command failed with error %d\n",
2663                         __func__, ret);
2664         }
2665         return ret == HCMD_SUCCESS ? 0 : -EBUSY;
2666 }
2667
2668 /*
2669  * Set the LCB selector - allow host access.  The DCC selector always
2670  * points to the host.
2671  */
2672 static inline void set_host_lcb_access(struct hfi1_devdata *dd)
2673 {
2674         write_csr(dd, DC_DC8051_CFG_CSR_ACCESS_SEL,
2675                                 DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK
2676                                 | DC_DC8051_CFG_CSR_ACCESS_SEL_LCB_SMASK);
2677 }
2678
2679 /*
2680  * Clear the LCB selector - allow 8051 access.  The DCC selector always
2681  * points to the host.
2682  */
2683 static inline void set_8051_lcb_access(struct hfi1_devdata *dd)
2684 {
2685         write_csr(dd, DC_DC8051_CFG_CSR_ACCESS_SEL,
2686                                 DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK);
2687 }
2688
2689 /*
2690  * Acquire LCB access from the 8051.  If the host already has access,
2691  * just increment a counter.  Otherwise, inform the 8051 that the
2692  * host is taking access.
2693  *
2694  * Returns:
2695  *      0 on success
2696  *      -EBUSY if the 8051 has control and cannot be disturbed
2697  *      -errno if unable to acquire access from the 8051
2698  */
2699 int acquire_lcb_access(struct hfi1_devdata *dd, int sleep_ok)
2700 {
2701         struct hfi1_pportdata *ppd = dd->pport;
2702         int ret = 0;
2703
2704         /*
2705          * Use the host link state lock so the operation of this routine
2706          * { link state check, selector change, count increment } can occur
2707          * as a unit against a link state change.  Otherwise there is a
2708          * race between the state change and the count increment.
2709          */
2710         if (sleep_ok) {
2711                 mutex_lock(&ppd->hls_lock);
2712         } else {
2713                 while (!mutex_trylock(&ppd->hls_lock))
2714                         udelay(1);
2715         }
2716
2717         /* this access is valid only when the link is up */
2718         if ((ppd->host_link_state & HLS_UP) == 0) {
2719                 dd_dev_info(dd, "%s: link state %s not up\n",
2720                         __func__, link_state_name(ppd->host_link_state));
2721                 ret = -EBUSY;
2722                 goto done;
2723         }
2724
2725         if (dd->lcb_access_count == 0) {
2726                 ret = request_host_lcb_access(dd);
2727                 if (ret) {
2728                         dd_dev_err(dd,
2729                                 "%s: unable to acquire LCB access, err %d\n",
2730                                 __func__, ret);
2731                         goto done;
2732                 }
2733                 set_host_lcb_access(dd);
2734         }
2735         dd->lcb_access_count++;
2736 done:
2737         mutex_unlock(&ppd->hls_lock);
2738         return ret;
2739 }
2740
2741 /*
2742  * Release LCB access by decrementing the use count.  If the count is moving
2743  * from 1 to 0, inform 8051 that it has control back.
2744  *
2745  * Returns:
2746  *      0 on success
2747  *      -errno if unable to release access to the 8051
2748  */
2749 int release_lcb_access(struct hfi1_devdata *dd, int sleep_ok)
2750 {
2751         int ret = 0;
2752
2753         /*
2754          * Use the host link state lock because the acquire needed it.
2755          * Here, we only need to keep { selector change, count decrement }
2756          * as a unit.
2757          */
2758         if (sleep_ok) {
2759                 mutex_lock(&dd->pport->hls_lock);
2760         } else {
2761                 while (!mutex_trylock(&dd->pport->hls_lock))
2762                         udelay(1);
2763         }
2764
2765         if (dd->lcb_access_count == 0) {
2766                 dd_dev_err(dd, "%s: LCB access count is zero.  Skipping.\n",
2767                         __func__);
2768                 goto done;
2769         }
2770
2771         if (dd->lcb_access_count == 1) {
2772                 set_8051_lcb_access(dd);
2773                 ret = request_8051_lcb_access(dd);
2774                 if (ret) {
2775                         dd_dev_err(dd,
2776                                 "%s: unable to release LCB access, err %d\n",
2777                                 __func__, ret);
2778                         /* restore host access if the grant didn't work */
2779                         set_host_lcb_access(dd);
2780                         goto done;
2781                 }
2782         }
2783         dd->lcb_access_count--;
2784 done:
2785         mutex_unlock(&dd->pport->hls_lock);
2786         return ret;
2787 }
2788
2789 /*
2790  * Initialize LCB access variables and state.  Called during driver load,
2791  * after most of the initialization is finished.
2792  *
2793  * The DC default is LCB access on for the host.  The driver defaults to
2794  * leaving access to the 8051.  Assign access now - this constrains the call
2795  * to this routine to be after all LCB set-up is done.  In particular, after
2796  * hf1_init_dd() -> set_up_interrupts() -> clear_all_interrupts()
2797  */
2798 static void init_lcb_access(struct hfi1_devdata *dd)
2799 {
2800         dd->lcb_access_count = 0;
2801 }
2802
2803 /*
2804  * Write a response back to a 8051 request.
2805  */
2806 static void hreq_response(struct hfi1_devdata *dd, u8 return_code, u16 rsp_data)
2807 {
2808         write_csr(dd, DC_DC8051_CFG_EXT_DEV_0,
2809                 DC_DC8051_CFG_EXT_DEV_0_COMPLETED_SMASK
2810                 | (u64)return_code << DC_DC8051_CFG_EXT_DEV_0_RETURN_CODE_SHIFT
2811                 | (u64)rsp_data << DC_DC8051_CFG_EXT_DEV_0_RSP_DATA_SHIFT);
2812 }
2813
2814 /*
2815  * Handle requests from the 8051.
2816  */
2817 static void handle_8051_request(struct hfi1_devdata *dd)
2818 {
2819         u64 reg;
2820         u16 data;
2821         u8 type;
2822
2823         reg = read_csr(dd, DC_DC8051_CFG_EXT_DEV_1);
2824         if ((reg & DC_DC8051_CFG_EXT_DEV_1_REQ_NEW_SMASK) == 0)
2825                 return; /* no request */
2826
2827         /* zero out COMPLETED so the response is seen */
2828         write_csr(dd, DC_DC8051_CFG_EXT_DEV_0, 0);
2829
2830         /* extract request details */
2831         type = (reg >> DC_DC8051_CFG_EXT_DEV_1_REQ_TYPE_SHIFT)
2832                         & DC_DC8051_CFG_EXT_DEV_1_REQ_TYPE_MASK;
2833         data = (reg >> DC_DC8051_CFG_EXT_DEV_1_REQ_DATA_SHIFT)
2834                         & DC_DC8051_CFG_EXT_DEV_1_REQ_DATA_MASK;
2835
2836         switch (type) {
2837         case HREQ_LOAD_CONFIG:
2838         case HREQ_SAVE_CONFIG:
2839         case HREQ_READ_CONFIG:
2840         case HREQ_SET_TX_EQ_ABS:
2841         case HREQ_SET_TX_EQ_REL:
2842         case HREQ_ENABLE:
2843                 dd_dev_info(dd, "8051 request: request 0x%x not supported\n",
2844                         type);
2845                 hreq_response(dd, HREQ_NOT_SUPPORTED, 0);
2846                 break;
2847
2848         case HREQ_CONFIG_DONE:
2849                 hreq_response(dd, HREQ_SUCCESS, 0);
2850                 break;
2851
2852         case HREQ_INTERFACE_TEST:
2853                 hreq_response(dd, HREQ_SUCCESS, data);
2854                 break;
2855
2856         default:
2857                 dd_dev_err(dd, "8051 request: unknown request 0x%x\n", type);
2858                 hreq_response(dd, HREQ_NOT_SUPPORTED, 0);
2859                 break;
2860         }
2861 }
2862
2863 static void write_global_credit(struct hfi1_devdata *dd,
2864                                 u8 vau, u16 total, u16 shared)
2865 {
2866         write_csr(dd, SEND_CM_GLOBAL_CREDIT,
2867                 ((u64)total
2868                         << SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT)
2869                 | ((u64)shared
2870                         << SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT)
2871                 | ((u64)vau << SEND_CM_GLOBAL_CREDIT_AU_SHIFT));
2872 }
2873
2874 /*
2875  * Set up initial VL15 credits of the remote.  Assumes the rest of
2876  * the CM credit registers are zero from a previous global or credit reset .
2877  */
2878 void set_up_vl15(struct hfi1_devdata *dd, u8 vau, u16 vl15buf)
2879 {
2880         /* leave shared count at zero for both global and VL15 */
2881         write_global_credit(dd, vau, vl15buf, 0);
2882
2883         /* We may need some credits for another VL when sending packets
2884          * with the snoop interface. Dividing it down the middle for VL15
2885          * and VL0 should suffice.
2886          */
2887         if (unlikely(dd->hfi1_snoop.mode_flag == HFI1_PORT_SNOOP_MODE)) {
2888                 write_csr(dd, SEND_CM_CREDIT_VL15, (u64)(vl15buf >> 1)
2889                     << SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT);
2890                 write_csr(dd, SEND_CM_CREDIT_VL, (u64)(vl15buf >> 1)
2891                     << SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SHIFT);
2892         } else {
2893                 write_csr(dd, SEND_CM_CREDIT_VL15, (u64)vl15buf
2894                         << SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT);
2895         }
2896 }
2897
2898 /*
2899  * Zero all credit details from the previous connection and
2900  * reset the CM manager's internal counters.
2901  */
2902 void reset_link_credits(struct hfi1_devdata *dd)
2903 {
2904         int i;
2905
2906         /* remove all previous VL credit limits */
2907         for (i = 0; i < TXE_NUM_DATA_VL; i++)
2908                 write_csr(dd, SEND_CM_CREDIT_VL + (8*i), 0);
2909         write_csr(dd, SEND_CM_CREDIT_VL15, 0);
2910         write_global_credit(dd, 0, 0, 0);
2911         /* reset the CM block */
2912         pio_send_control(dd, PSC_CM_RESET);
2913 }
2914
2915 /* convert a vCU to a CU */
2916 static u32 vcu_to_cu(u8 vcu)
2917 {
2918         return 1 << vcu;
2919 }
2920
2921 /* convert a CU to a vCU */
2922 static u8 cu_to_vcu(u32 cu)
2923 {
2924         return ilog2(cu);
2925 }
2926
2927 /* convert a vAU to an AU */
2928 static u32 vau_to_au(u8 vau)
2929 {
2930         return 8 * (1 << vau);
2931 }
2932
2933 static void set_linkup_defaults(struct hfi1_pportdata *ppd)
2934 {
2935         ppd->sm_trap_qp = 0x0;
2936         ppd->sa_qp = 0x1;
2937 }
2938
2939 /*
2940  * Graceful LCB shutdown.  This leaves the LCB FIFOs in reset.
2941  */
2942 static void lcb_shutdown(struct hfi1_devdata *dd, int abort)
2943 {
2944         u64 reg;
2945
2946         /* clear lcb run: LCB_CFG_RUN.EN = 0 */
2947         write_csr(dd, DC_LCB_CFG_RUN, 0);
2948         /* set tx fifo reset: LCB_CFG_TX_FIFOS_RESET.VAL = 1 */
2949         write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET,
2950                 1ull << DC_LCB_CFG_TX_FIFOS_RESET_VAL_SHIFT);
2951         /* set dcc reset csr: DCC_CFG_RESET.{reset_lcb,reset_rx_fpe} = 1 */
2952         dd->lcb_err_en = read_csr(dd, DC_LCB_ERR_EN);
2953         reg = read_csr(dd, DCC_CFG_RESET);
2954         write_csr(dd, DCC_CFG_RESET,
2955                 reg
2956                 | (1ull << DCC_CFG_RESET_RESET_LCB_SHIFT)
2957                 | (1ull << DCC_CFG_RESET_RESET_RX_FPE_SHIFT));
2958         (void) read_csr(dd, DCC_CFG_RESET); /* make sure the write completed */
2959         if (!abort) {
2960                 udelay(1);    /* must hold for the longer of 16cclks or 20ns */
2961                 write_csr(dd, DCC_CFG_RESET, reg);
2962                 write_csr(dd, DC_LCB_ERR_EN, dd->lcb_err_en);
2963         }
2964 }
2965
2966 /*
2967  * This routine should be called after the link has been transitioned to
2968  * OFFLINE (OFFLINE state has the side effect of putting the SerDes into
2969  * reset).
2970  *
2971  * The expectation is that the caller of this routine would have taken
2972  * care of properly transitioning the link into the correct state.
2973  */
2974 static void dc_shutdown(struct hfi1_devdata *dd)
2975 {
2976         unsigned long flags;
2977
2978         spin_lock_irqsave(&dd->dc8051_lock, flags);
2979         if (dd->dc_shutdown) {
2980                 spin_unlock_irqrestore(&dd->dc8051_lock, flags);
2981                 return;
2982         }
2983         dd->dc_shutdown = 1;
2984         spin_unlock_irqrestore(&dd->dc8051_lock, flags);
2985         /* Shutdown the LCB */
2986         lcb_shutdown(dd, 1);
2987         /* Going to OFFLINE would have causes the 8051 to put the
2988          * SerDes into reset already. Just need to shut down the 8051,
2989          * itself. */
2990         write_csr(dd, DC_DC8051_CFG_RST, 0x1);
2991 }
2992
2993 /* Calling this after the DC has been brought out of reset should not
2994  * do any damage. */
2995 static void dc_start(struct hfi1_devdata *dd)
2996 {
2997         unsigned long flags;
2998         int ret;
2999
3000         spin_lock_irqsave(&dd->dc8051_lock, flags);
3001         if (!dd->dc_shutdown)
3002                 goto done;
3003         spin_unlock_irqrestore(&dd->dc8051_lock, flags);
3004         /* Take the 8051 out of reset */
3005         write_csr(dd, DC_DC8051_CFG_RST, 0ull);
3006         /* Wait until 8051 is ready */
3007         ret = wait_fm_ready(dd, TIMEOUT_8051_START);
3008         if (ret) {
3009                 dd_dev_err(dd, "%s: timeout starting 8051 firmware\n",
3010                         __func__);
3011         }
3012         /* Take away reset for LCB and RX FPE (set in lcb_shutdown). */
3013         write_csr(dd, DCC_CFG_RESET, 0x10);
3014         /* lcb_shutdown() with abort=1 does not restore these */
3015         write_csr(dd, DC_LCB_ERR_EN, dd->lcb_err_en);
3016         spin_lock_irqsave(&dd->dc8051_lock, flags);
3017         dd->dc_shutdown = 0;
3018 done:
3019         spin_unlock_irqrestore(&dd->dc8051_lock, flags);
3020 }
3021
3022 /*
3023  * These LCB adjustments are for the Aurora SerDes core in the FPGA.
3024  */
3025 static void adjust_lcb_for_fpga_serdes(struct hfi1_devdata *dd)
3026 {
3027         u64 rx_radr, tx_radr;
3028         u32 version;
3029
3030         if (dd->icode != ICODE_FPGA_EMULATION)
3031                 return;
3032
3033         /*
3034          * These LCB defaults on emulator _s are good, nothing to do here:
3035          *      LCB_CFG_TX_FIFOS_RADR
3036          *      LCB_CFG_RX_FIFOS_RADR
3037          *      LCB_CFG_LN_DCLK
3038          *      LCB_CFG_IGNORE_LOST_RCLK
3039          */
3040         if (is_emulator_s(dd))
3041                 return;
3042         /* else this is _p */
3043
3044         version = emulator_rev(dd);
3045         if (!is_a0(dd))
3046                 version = 0x2d; /* all B0 use 0x2d or higher settings */
3047
3048         if (version <= 0x12) {
3049                 /* release 0x12 and below */
3050
3051                 /*
3052                  * LCB_CFG_RX_FIFOS_RADR.RST_VAL = 0x9
3053                  * LCB_CFG_RX_FIFOS_RADR.OK_TO_JUMP_VAL = 0x9
3054                  * LCB_CFG_RX_FIFOS_RADR.DO_NOT_JUMP_VAL = 0xa
3055                  */
3056                 rx_radr =
3057                       0xaull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT
3058                     | 0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT
3059                     | 0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT;
3060                 /*
3061                  * LCB_CFG_TX_FIFOS_RADR.ON_REINIT = 0 (default)
3062                  * LCB_CFG_TX_FIFOS_RADR.RST_VAL = 6
3063                  */
3064                 tx_radr = 6ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT;
3065         } else if (version <= 0x18) {
3066                 /* release 0x13 up to 0x18 */
3067                 /* LCB_CFG_RX_FIFOS_RADR = 0x988 */
3068                 rx_radr =
3069                       0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT
3070                     | 0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT
3071                     | 0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT;
3072                 tx_radr = 7ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT;
3073         } else if (version == 0x19) {
3074                 /* release 0x19 */
3075                 /* LCB_CFG_RX_FIFOS_RADR = 0xa99 */
3076                 rx_radr =
3077                       0xAull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT
3078                     | 0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT
3079                     | 0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT;
3080                 tx_radr = 3ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT;
3081         } else if (version == 0x1a) {
3082                 /* release 0x1a */
3083                 /* LCB_CFG_RX_FIFOS_RADR = 0x988 */
3084                 rx_radr =
3085                       0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT
3086                     | 0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT
3087                     | 0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT;
3088                 tx_radr = 7ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT;
3089                 write_csr(dd, DC_LCB_CFG_LN_DCLK, 1ull);
3090         } else {
3091                 /* release 0x1b and higher */
3092                 /* LCB_CFG_RX_FIFOS_RADR = 0x877 */
3093                 rx_radr =
3094                       0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT
3095                     | 0x7ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT
3096                     | 0x7ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT;
3097                 tx_radr = 3ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT;
3098         }
3099
3100         write_csr(dd, DC_LCB_CFG_RX_FIFOS_RADR, rx_radr);
3101         /* LCB_CFG_IGNORE_LOST_RCLK.EN = 1 */
3102         write_csr(dd, DC_LCB_CFG_IGNORE_LOST_RCLK,
3103                 DC_LCB_CFG_IGNORE_LOST_RCLK_EN_SMASK);
3104         write_csr(dd, DC_LCB_CFG_TX_FIFOS_RADR, tx_radr);
3105 }
3106
3107 /*
3108  * Handle a SMA idle message
3109  *
3110  * This is a work-queue function outside of the interrupt.
3111  */
3112 void handle_sma_message(struct work_struct *work)
3113 {
3114         struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3115                                                         sma_message_work);
3116         struct hfi1_devdata *dd = ppd->dd;
3117         u64 msg;
3118         int ret;
3119
3120         /* msg is bytes 1-4 of the 40-bit idle message - the command code
3121            is stripped off */
3122         ret = read_idle_sma(dd, &msg);
3123         if (ret)
3124                 return;
3125         dd_dev_info(dd, "%s: SMA message 0x%llx\n", __func__, msg);
3126         /*
3127          * React to the SMA message.  Byte[1] (0 for us) is the command.
3128          */
3129         switch (msg & 0xff) {
3130         case SMA_IDLE_ARM:
3131                 /*
3132                  * See OPAv1 table 9-14 - HFI and External Switch Ports Key
3133                  * State Transitions
3134                  *
3135                  * Only expected in INIT or ARMED, discard otherwise.
3136                  */
3137                 if (ppd->host_link_state & (HLS_UP_INIT | HLS_UP_ARMED))
3138                         ppd->neighbor_normal = 1;
3139                 break;
3140         case SMA_IDLE_ACTIVE:
3141                 /*
3142                  * See OPAv1 table 9-14 - HFI and External Switch Ports Key
3143                  * State Transitions
3144                  *
3145                  * Can activate the node.  Discard otherwise.
3146                  */
3147                 if (ppd->host_link_state == HLS_UP_ARMED
3148                                         && ppd->is_active_optimize_enabled) {
3149                         ppd->neighbor_normal = 1;
3150                         ret = set_link_state(ppd, HLS_UP_ACTIVE);
3151                         if (ret)
3152                                 dd_dev_err(
3153                                         dd,
3154                                         "%s: received Active SMA idle message, couldn't set link to Active\n",
3155                                         __func__);
3156                 }
3157                 break;
3158         default:
3159                 dd_dev_err(dd,
3160                         "%s: received unexpected SMA idle message 0x%llx\n",
3161                         __func__, msg);
3162                 break;
3163         }
3164 }
3165
3166 static void adjust_rcvctrl(struct hfi1_devdata *dd, u64 add, u64 clear)
3167 {
3168         u64 rcvctrl;
3169         unsigned long flags;
3170
3171         spin_lock_irqsave(&dd->rcvctrl_lock, flags);
3172         rcvctrl = read_csr(dd, RCV_CTRL);
3173         rcvctrl |= add;
3174         rcvctrl &= ~clear;
3175         write_csr(dd, RCV_CTRL, rcvctrl);
3176         spin_unlock_irqrestore(&dd->rcvctrl_lock, flags);
3177 }
3178
3179 static inline void add_rcvctrl(struct hfi1_devdata *dd, u64 add)
3180 {
3181         adjust_rcvctrl(dd, add, 0);
3182 }
3183
3184 static inline void clear_rcvctrl(struct hfi1_devdata *dd, u64 clear)
3185 {
3186         adjust_rcvctrl(dd, 0, clear);
3187 }
3188
3189 /*
3190  * Called from all interrupt handlers to start handling an SPC freeze.
3191  */
3192 void start_freeze_handling(struct hfi1_pportdata *ppd, int flags)
3193 {
3194         struct hfi1_devdata *dd = ppd->dd;
3195         struct send_context *sc;
3196         int i;
3197
3198         if (flags & FREEZE_SELF)
3199                 write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_FREEZE_SMASK);
3200
3201         /* enter frozen mode */
3202         dd->flags |= HFI1_FROZEN;
3203
3204         /* notify all SDMA engines that they are going into a freeze */
3205         sdma_freeze_notify(dd, !!(flags & FREEZE_LINK_DOWN));
3206
3207         /* do halt pre-handling on all enabled send contexts */
3208         for (i = 0; i < dd->num_send_contexts; i++) {
3209                 sc = dd->send_contexts[i].sc;
3210                 if (sc && (sc->flags & SCF_ENABLED))
3211                         sc_stop(sc, SCF_FROZEN | SCF_HALTED);
3212         }
3213
3214         /* Send context are frozen. Notify user space */
3215         hfi1_set_uevent_bits(ppd, _HFI1_EVENT_FROZEN_BIT);
3216
3217         if (flags & FREEZE_ABORT) {
3218                 dd_dev_err(dd,
3219                            "Aborted freeze recovery. Please REBOOT system\n");
3220                 return;
3221         }
3222         /* queue non-interrupt handler */
3223         queue_work(ppd->hfi1_wq, &ppd->freeze_work);
3224 }
3225
3226 /*
3227  * Wait until all 4 sub-blocks indicate that they have frozen or unfrozen,
3228  * depending on the "freeze" parameter.
3229  *
3230  * No need to return an error if it times out, our only option
3231  * is to proceed anyway.
3232  */
3233 static void wait_for_freeze_status(struct hfi1_devdata *dd, int freeze)
3234 {
3235         unsigned long timeout;
3236         u64 reg;
3237
3238         timeout = jiffies + msecs_to_jiffies(FREEZE_STATUS_TIMEOUT);
3239         while (1) {
3240                 reg = read_csr(dd, CCE_STATUS);
3241                 if (freeze) {
3242                         /* waiting until all indicators are set */
3243                         if ((reg & ALL_FROZE) == ALL_FROZE)
3244                                 return; /* all done */
3245                 } else {
3246                         /* waiting until all indicators are clear */
3247                         if ((reg & ALL_FROZE) == 0)
3248                                 return; /* all done */
3249                 }
3250
3251                 if (time_after(jiffies, timeout)) {
3252                         dd_dev_err(dd,
3253                                 "Time out waiting for SPC %sfreeze, bits 0x%llx, expecting 0x%llx, continuing",
3254                                 freeze ? "" : "un",
3255                                 reg & ALL_FROZE,
3256                                 freeze ? ALL_FROZE : 0ull);
3257                         return;
3258                 }
3259                 usleep_range(80, 120);
3260         }
3261 }
3262
3263 /*
3264  * Do all freeze handling for the RXE block.
3265  */
3266 static void rxe_freeze(struct hfi1_devdata *dd)
3267 {
3268         int i;
3269
3270         /* disable port */
3271         clear_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
3272
3273         /* disable all receive contexts */
3274         for (i = 0; i < dd->num_rcv_contexts; i++)
3275                 hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS, i);
3276 }
3277
3278 /*
3279  * Unfreeze handling for the RXE block - kernel contexts only.
3280  * This will also enable the port.  User contexts will do unfreeze
3281  * handling on a per-context basis as they call into the driver.
3282  *
3283  */
3284 static void rxe_kernel_unfreeze(struct hfi1_devdata *dd)
3285 {
3286         int i;
3287
3288         /* enable all kernel contexts */
3289         for (i = 0; i < dd->n_krcv_queues; i++)
3290                 hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, i);
3291
3292         /* enable port */
3293         add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
3294 }
3295
3296 /*
3297  * Non-interrupt SPC freeze handling.
3298  *
3299  * This is a work-queue function outside of the triggering interrupt.
3300  */
3301 void handle_freeze(struct work_struct *work)
3302 {
3303         struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3304                                                                 freeze_work);
3305         struct hfi1_devdata *dd = ppd->dd;
3306
3307         /* wait for freeze indicators on all affected blocks */
3308         dd_dev_info(dd, "Entering SPC freeze\n");
3309         wait_for_freeze_status(dd, 1);
3310
3311         /* SPC is now frozen */
3312
3313         /* do send PIO freeze steps */
3314         pio_freeze(dd);
3315
3316         /* do send DMA freeze steps */
3317         sdma_freeze(dd);
3318
3319         /* do send egress freeze steps - nothing to do */
3320
3321         /* do receive freeze steps */
3322         rxe_freeze(dd);
3323
3324         /*
3325          * Unfreeze the hardware - clear the freeze, wait for each
3326          * block's frozen bit to clear, then clear the frozen flag.
3327          */
3328         write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_UNFREEZE_SMASK);
3329         wait_for_freeze_status(dd, 0);
3330
3331         if (is_a0(dd)) {
3332                 write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_FREEZE_SMASK);
3333                 wait_for_freeze_status(dd, 1);
3334                 write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_UNFREEZE_SMASK);
3335                 wait_for_freeze_status(dd, 0);
3336         }
3337
3338         /* do send PIO unfreeze steps for kernel contexts */
3339         pio_kernel_unfreeze(dd);
3340
3341         /* do send DMA unfreeze steps */
3342         sdma_unfreeze(dd);
3343
3344         /* do send egress unfreeze steps - nothing to do */
3345
3346         /* do receive unfreeze steps for kernel contexts */
3347         rxe_kernel_unfreeze(dd);
3348
3349         /*
3350          * The unfreeze procedure touches global device registers when
3351          * it disables and re-enables RXE. Mark the device unfrozen
3352          * after all that is done so other parts of the driver waiting
3353          * for the device to unfreeze don't do things out of order.
3354          *
3355          * The above implies that the meaning of HFI1_FROZEN flag is
3356          * "Device has gone into freeze mode and freeze mode handling
3357          * is still in progress."
3358          *
3359          * The flag will be removed when freeze mode processing has
3360          * completed.
3361          */
3362         dd->flags &= ~HFI1_FROZEN;
3363         wake_up(&dd->event_queue);
3364
3365         /* no longer frozen */
3366         dd_dev_err(dd, "Exiting SPC freeze\n");
3367 }
3368
3369 /*
3370  * Handle a link up interrupt from the 8051.
3371  *
3372  * This is a work-queue function outside of the interrupt.
3373  */
3374 void handle_link_up(struct work_struct *work)
3375 {
3376         struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3377                                                                 link_up_work);
3378         set_link_state(ppd, HLS_UP_INIT);
3379
3380         /* cache the read of DC_LCB_STS_ROUND_TRIP_LTP_CNT */
3381         read_ltp_rtt(ppd->dd);
3382         /*
3383          * OPA specifies that certain counters are cleared on a transition
3384          * to link up, so do that.
3385          */
3386         clear_linkup_counters(ppd->dd);
3387         /*
3388          * And (re)set link up default values.
3389          */
3390         set_linkup_defaults(ppd);
3391
3392         /* enforce link speed enabled */
3393         if ((ppd->link_speed_active & ppd->link_speed_enabled) == 0) {
3394                 /* oops - current speed is not enabled, bounce */
3395                 dd_dev_err(ppd->dd,
3396                         "Link speed active 0x%x is outside enabled 0x%x, downing link\n",
3397                         ppd->link_speed_active, ppd->link_speed_enabled);
3398                 set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SPEED_POLICY, 0,
3399                         OPA_LINKDOWN_REASON_SPEED_POLICY);
3400                 set_link_state(ppd, HLS_DN_OFFLINE);
3401                 start_link(ppd);
3402         }
3403 }
3404
3405 /* Several pieces of LNI information were cached for SMA in ppd.
3406  * Reset these on link down */
3407 static void reset_neighbor_info(struct hfi1_pportdata *ppd)
3408 {
3409         ppd->neighbor_guid = 0;
3410         ppd->neighbor_port_number = 0;
3411         ppd->neighbor_type = 0;
3412         ppd->neighbor_fm_security = 0;
3413 }
3414
3415 /*
3416  * Handle a link down interrupt from the 8051.
3417  *
3418  * This is a work-queue function outside of the interrupt.
3419  */
3420 void handle_link_down(struct work_struct *work)
3421 {
3422         u8 lcl_reason, neigh_reason = 0;
3423         struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3424                                                                 link_down_work);
3425
3426         /* go offline first, then deal with reasons */
3427         set_link_state(ppd, HLS_DN_OFFLINE);
3428
3429         lcl_reason = 0;
3430         read_planned_down_reason_code(ppd->dd, &neigh_reason);
3431
3432         /*
3433          * If no reason, assume peer-initiated but missed
3434          * LinkGoingDown idle flits.
3435          */
3436         if (neigh_reason == 0)
3437                 lcl_reason = OPA_LINKDOWN_REASON_NEIGHBOR_UNKNOWN;
3438
3439         set_link_down_reason(ppd, lcl_reason, neigh_reason, 0);
3440
3441         reset_neighbor_info(ppd);
3442
3443         /* disable the port */
3444         clear_rcvctrl(ppd->dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
3445
3446         /* If there is no cable attached, turn the DC off. Otherwise,
3447          * start the link bring up. */
3448         if (!qsfp_mod_present(ppd))
3449                 dc_shutdown(ppd->dd);
3450         else
3451                 start_link(ppd);
3452 }
3453
3454 void handle_link_bounce(struct work_struct *work)
3455 {
3456         struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3457                                                         link_bounce_work);
3458
3459         /*
3460          * Only do something if the link is currently up.
3461          */
3462         if (ppd->host_link_state & HLS_UP) {
3463                 set_link_state(ppd, HLS_DN_OFFLINE);
3464                 start_link(ppd);
3465         } else {
3466                 dd_dev_info(ppd->dd, "%s: link not up (%s), nothing to do\n",
3467                         __func__, link_state_name(ppd->host_link_state));
3468         }
3469 }
3470
3471 /*
3472  * Mask conversion: Capability exchange to Port LTP.  The capability
3473  * exchange has an implicit 16b CRC that is mandatory.
3474  */
3475 static int cap_to_port_ltp(int cap)
3476 {
3477         int port_ltp = PORT_LTP_CRC_MODE_16; /* this mode is mandatory */
3478
3479         if (cap & CAP_CRC_14B)
3480                 port_ltp |= PORT_LTP_CRC_MODE_14;
3481         if (cap & CAP_CRC_48B)
3482                 port_ltp |= PORT_LTP_CRC_MODE_48;
3483         if (cap & CAP_CRC_12B_16B_PER_LANE)
3484                 port_ltp |= PORT_LTP_CRC_MODE_PER_LANE;
3485
3486         return port_ltp;
3487 }
3488
3489 /*
3490  * Convert an OPA Port LTP mask to capability mask
3491  */
3492 int port_ltp_to_cap(int port_ltp)
3493 {
3494         int cap_mask = 0;
3495
3496         if (port_ltp & PORT_LTP_CRC_MODE_14)
3497                 cap_mask |= CAP_CRC_14B;
3498         if (port_ltp & PORT_LTP_CRC_MODE_48)
3499                 cap_mask |= CAP_CRC_48B;
3500         if (port_ltp & PORT_LTP_CRC_MODE_PER_LANE)
3501                 cap_mask |= CAP_CRC_12B_16B_PER_LANE;
3502
3503         return cap_mask;
3504 }
3505
3506 /*
3507  * Convert a single DC LCB CRC mode to an OPA Port LTP mask.
3508  */
3509 static int lcb_to_port_ltp(int lcb_crc)
3510 {
3511         int port_ltp = 0;
3512
3513         if (lcb_crc == LCB_CRC_12B_16B_PER_LANE)
3514                 port_ltp = PORT_LTP_CRC_MODE_PER_LANE;
3515         else if (lcb_crc == LCB_CRC_48B)
3516                 port_ltp = PORT_LTP_CRC_MODE_48;
3517         else if (lcb_crc == LCB_CRC_14B)
3518                 port_ltp = PORT_LTP_CRC_MODE_14;
3519         else
3520                 port_ltp = PORT_LTP_CRC_MODE_16;
3521
3522         return port_ltp;
3523 }
3524
3525 /*
3526  * Our neighbor has indicated that we are allowed to act as a fabric
3527  * manager, so place the full management partition key in the second
3528  * (0-based) pkey array position (see OPAv1, section 20.2.2.6.8). Note
3529  * that we should already have the limited management partition key in
3530  * array element 1, and also that the port is not yet up when
3531  * add_full_mgmt_pkey() is invoked.
3532  */
3533 static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd)
3534 {
3535         struct hfi1_devdata *dd = ppd->dd;
3536
3537         /* Sanity check - ppd->pkeys[2] should be 0 */
3538         if (ppd->pkeys[2] != 0)
3539                 dd_dev_err(dd, "%s pkey[2] already set to 0x%x, resetting it to 0x%x\n",
3540                            __func__, ppd->pkeys[2], FULL_MGMT_P_KEY);
3541         ppd->pkeys[2] = FULL_MGMT_P_KEY;
3542         (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
3543 }
3544
3545 /*
3546  * Convert the given link width to the OPA link width bitmask.
3547  */
3548 static u16 link_width_to_bits(struct hfi1_devdata *dd, u16 width)
3549 {
3550         switch (width) {
3551         case 0:
3552                 /*
3553                  * Simulator and quick linkup do not set the width.
3554                  * Just set it to 4x without complaint.
3555                  */
3556                 if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR || quick_linkup)
3557                         return OPA_LINK_WIDTH_4X;
3558                 return 0; /* no lanes up */
3559         case 1: return OPA_LINK_WIDTH_1X;
3560         case 2: return OPA_LINK_WIDTH_2X;
3561         case 3: return OPA_LINK_WIDTH_3X;
3562         default:
3563                 dd_dev_info(dd, "%s: invalid width %d, using 4\n",
3564                         __func__, width);
3565                 /* fall through */
3566         case 4: return OPA_LINK_WIDTH_4X;
3567         }
3568 }
3569
3570 /*
3571  * Do a population count on the bottom nibble.
3572  */
3573 static const u8 bit_counts[16] = {
3574         0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4
3575 };
3576 static inline u8 nibble_to_count(u8 nibble)
3577 {
3578         return bit_counts[nibble & 0xf];
3579 }
3580
3581 /*
3582  * Read the active lane information from the 8051 registers and return
3583  * their widths.
3584  *
3585  * Active lane information is found in these 8051 registers:
3586  *      enable_lane_tx
3587  *      enable_lane_rx
3588  */
3589 static void get_link_widths(struct hfi1_devdata *dd, u16 *tx_width,
3590                             u16 *rx_width)
3591 {
3592         u16 tx, rx;
3593         u8 enable_lane_rx;
3594         u8 enable_lane_tx;
3595         u8 tx_polarity_inversion;
3596         u8 rx_polarity_inversion;
3597         u8 max_rate;
3598
3599         /* read the active lanes */
3600         read_tx_settings(dd, &enable_lane_tx, &tx_polarity_inversion,
3601                                 &rx_polarity_inversion, &max_rate);
3602         read_local_lni(dd, &enable_lane_rx);
3603
3604         /* convert to counts */
3605         tx = nibble_to_count(enable_lane_tx);
3606         rx = nibble_to_count(enable_lane_rx);
3607
3608         /*
3609          * Set link_speed_active here, overriding what was set in
3610          * handle_verify_cap().  The ASIC 8051 firmware does not correctly
3611          * set the max_rate field in handle_verify_cap until v0.19.
3612          */
3613         if ((dd->icode == ICODE_RTL_SILICON)
3614                                 && (dd->dc8051_ver < dc8051_ver(0, 19))) {
3615                 /* max_rate: 0 = 12.5G, 1 = 25G */
3616                 switch (max_rate) {
3617                 case 0:
3618                         dd->pport[0].link_speed_active = OPA_LINK_SPEED_12_5G;
3619                         break;
3620                 default:
3621                         dd_dev_err(dd,
3622                                 "%s: unexpected max rate %d, using 25Gb\n",
3623                                 __func__, (int)max_rate);
3624                         /* fall through */
3625                 case 1:
3626                         dd->pport[0].link_speed_active = OPA_LINK_SPEED_25G;
3627                         break;
3628                 }
3629         }
3630
3631         dd_dev_info(dd,
3632                 "Fabric active lanes (width): tx 0x%x (%d), rx 0x%x (%d)\n",
3633                 enable_lane_tx, tx, enable_lane_rx, rx);
3634         *tx_width = link_width_to_bits(dd, tx);
3635         *rx_width = link_width_to_bits(dd, rx);
3636 }
3637
3638 /*
3639  * Read verify_cap_local_fm_link_width[1] to obtain the link widths.
3640  * Valid after the end of VerifyCap and during LinkUp.  Does not change
3641  * after link up.  I.e. look elsewhere for downgrade information.
3642  *
3643  * Bits are:
3644  *      + bits [7:4] contain the number of active transmitters
3645  *      + bits [3:0] contain the number of active receivers
3646  * These are numbers 1 through 4 and can be different values if the
3647  * link is asymmetric.
3648  *
3649  * verify_cap_local_fm_link_width[0] retains its original value.
3650  */
3651 static void get_linkup_widths(struct hfi1_devdata *dd, u16 *tx_width,
3652                               u16 *rx_width)
3653 {
3654         u16 widths, tx, rx;
3655         u8 misc_bits, local_flags;
3656         u16 active_tx, active_rx;
3657
3658         read_vc_local_link_width(dd, &misc_bits, &local_flags, &widths);
3659         tx = widths >> 12;
3660         rx = (widths >> 8) & 0xf;
3661
3662         *tx_width = link_width_to_bits(dd, tx);
3663         *rx_width = link_width_to_bits(dd, rx);
3664
3665         /* print the active widths */
3666         get_link_widths(dd, &active_tx, &active_rx);
3667 }
3668
3669 /*
3670  * Set ppd->link_width_active and ppd->link_width_downgrade_active using
3671  * hardware information when the link first comes up.
3672  *
3673  * The link width is not available until after VerifyCap.AllFramesReceived
3674  * (the trigger for handle_verify_cap), so this is outside that routine
3675  * and should be called when the 8051 signals linkup.
3676  */
3677 void get_linkup_link_widths(struct hfi1_pportdata *ppd)
3678 {
3679         u16 tx_width, rx_width;
3680
3681         /* get end-of-LNI link widths */
3682         get_linkup_widths(ppd->dd, &tx_width, &rx_width);
3683
3684         /* use tx_width as the link is supposed to be symmetric on link up */
3685         ppd->link_width_active = tx_width;
3686         /* link width downgrade active (LWD.A) starts out matching LW.A */
3687         ppd->link_width_downgrade_tx_active = ppd->link_width_active;
3688         ppd->link_width_downgrade_rx_active = ppd->link_width_active;
3689         /* per OPA spec, on link up LWD.E resets to LWD.S */
3690         ppd->link_width_downgrade_enabled = ppd->link_width_downgrade_supported;
3691         /* cache the active egress rate (units {10^6 bits/sec]) */
3692         ppd->current_egress_rate = active_egress_rate(ppd);
3693 }
3694
3695 /*
3696  * Handle a verify capabilities interrupt from the 8051.
3697  *
3698  * This is a work-queue function outside of the interrupt.
3699  */
3700 void handle_verify_cap(struct work_struct *work)
3701 {
3702         struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3703                                                                 link_vc_work);
3704         struct hfi1_devdata *dd = ppd->dd;
3705         u64 reg;
3706         u8 power_management;
3707         u8 continious;
3708         u8 vcu;
3709         u8 vau;
3710         u8 z;
3711         u16 vl15buf;
3712         u16 link_widths;
3713         u16 crc_mask;
3714         u16 crc_val;
3715         u16 device_id;
3716         u16 active_tx, active_rx;
3717         u8 partner_supported_crc;
3718         u8 remote_tx_rate;
3719         u8 device_rev;
3720
3721         set_link_state(ppd, HLS_VERIFY_CAP);
3722
3723         lcb_shutdown(dd, 0);
3724         adjust_lcb_for_fpga_serdes(dd);
3725
3726         /*
3727          * These are now valid:
3728          *      remote VerifyCap fields in the general LNI config
3729          *      CSR DC8051_STS_REMOTE_GUID
3730          *      CSR DC8051_STS_REMOTE_NODE_TYPE
3731          *      CSR DC8051_STS_REMOTE_FM_SECURITY
3732          *      CSR DC8051_STS_REMOTE_PORT_NO
3733          */
3734
3735         read_vc_remote_phy(dd, &power_management, &continious);
3736         read_vc_remote_fabric(
3737                 dd,
3738                 &vau,
3739                 &z,
3740                 &vcu,
3741                 &vl15buf,
3742                 &partner_supported_crc);
3743         read_vc_remote_link_width(dd, &remote_tx_rate, &link_widths);
3744         read_remote_device_id(dd, &device_id, &device_rev);
3745         /*
3746          * And the 'MgmtAllowed' information, which is exchanged during
3747          * LNI, is also be available at this point.
3748          */
3749         read_mgmt_allowed(dd, &ppd->mgmt_allowed);
3750         /* print the active widths */
3751         get_link_widths(dd, &active_tx, &active_rx);
3752         dd_dev_info(dd,
3753                 "Peer PHY: power management 0x%x, continuous updates 0x%x\n",
3754                 (int)power_management, (int)continious);
3755         dd_dev_info(dd,
3756                 "Peer Fabric: vAU %d, Z %d, vCU %d, vl15 credits 0x%x, CRC sizes 0x%x\n",
3757                 (int)vau,
3758                 (int)z,
3759                 (int)vcu,
3760                 (int)vl15buf,
3761                 (int)partner_supported_crc);
3762         dd_dev_info(dd, "Peer Link Width: tx rate 0x%x, widths 0x%x\n",
3763                 (u32)remote_tx_rate, (u32)link_widths);
3764         dd_dev_info(dd, "Peer Device ID: 0x%04x, Revision 0x%02x\n",
3765                 (u32)device_id, (u32)device_rev);
3766         /*
3767          * The peer vAU value just read is the peer receiver value.  HFI does
3768          * not support a transmit vAU of 0 (AU == 8).  We advertised that
3769          * with Z=1 in the fabric capabilities sent to the peer.  The peer
3770          * will see our Z=1, and, if it advertised a vAU of 0, will move its
3771          * receive to vAU of 1 (AU == 16).  Do the same here.  We do not care
3772          * about the peer Z value - our sent vAU is 3 (hardwired) and is not
3773          * subject to the Z value exception.
3774          */
3775         if (vau == 0)
3776                 vau = 1;
3777         set_up_vl15(dd, vau, vl15buf);
3778
3779         /* set up the LCB CRC mode */
3780         crc_mask = ppd->port_crc_mode_enabled & partner_supported_crc;
3781
3782         /* order is important: use the lowest bit in common */
3783         if (crc_mask & CAP_CRC_14B)
3784                 crc_val = LCB_CRC_14B;
3785         else if (crc_mask & CAP_CRC_48B)
3786                 crc_val = LCB_CRC_48B;
3787         else if (crc_mask & CAP_CRC_12B_16B_PER_LANE)
3788                 crc_val = LCB_CRC_12B_16B_PER_LANE;
3789         else
3790                 crc_val = LCB_CRC_16B;
3791
3792         dd_dev_info(dd, "Final LCB CRC mode: %d\n", (int)crc_val);
3793         write_csr(dd, DC_LCB_CFG_CRC_MODE,
3794                   (u64)crc_val << DC_LCB_CFG_CRC_MODE_TX_VAL_SHIFT);
3795
3796         /* set (14b only) or clear sideband credit */
3797         reg = read_csr(dd, SEND_CM_CTRL);
3798         if (crc_val == LCB_CRC_14B && crc_14b_sideband) {
3799                 write_csr(dd, SEND_CM_CTRL,
3800                         reg | SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK);
3801         } else {
3802                 write_csr(dd, SEND_CM_CTRL,
3803                         reg & ~SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK);
3804         }
3805
3806         ppd->link_speed_active = 0;     /* invalid value */
3807         if (dd->dc8051_ver < dc8051_ver(0, 20)) {
3808                 /* remote_tx_rate: 0 = 12.5G, 1 = 25G */
3809                 switch (remote_tx_rate) {
3810                 case 0:
3811                         ppd->link_speed_active = OPA_LINK_SPEED_12_5G;
3812                         break;
3813                 case 1:
3814                         ppd->link_speed_active = OPA_LINK_SPEED_25G;
3815                         break;
3816                 }
3817         } else {
3818                 /* actual rate is highest bit of the ANDed rates */
3819                 u8 rate = remote_tx_rate & ppd->local_tx_rate;
3820
3821                 if (rate & 2)
3822                         ppd->link_speed_active = OPA_LINK_SPEED_25G;
3823                 else if (rate & 1)
3824                         ppd->link_speed_active = OPA_LINK_SPEED_12_5G;
3825         }
3826         if (ppd->link_speed_active == 0) {
3827                 dd_dev_err(dd, "%s: unexpected remote tx rate %d, using 25Gb\n",
3828                         __func__, (int)remote_tx_rate);
3829                 ppd->link_speed_active = OPA_LINK_SPEED_25G;
3830         }
3831
3832         /*
3833          * Cache the values of the supported, enabled, and active
3834          * LTP CRC modes to return in 'portinfo' queries. But the bit
3835          * flags that are returned in the portinfo query differ from
3836          * what's in the link_crc_mask, crc_sizes, and crc_val
3837          * variables. Convert these here.
3838          */
3839         ppd->port_ltp_crc_mode = cap_to_port_ltp(link_crc_mask) << 8;
3840                 /* supported crc modes */
3841         ppd->port_ltp_crc_mode |=
3842                 cap_to_port_ltp(ppd->port_crc_mode_enabled) << 4;
3843                 /* enabled crc modes */
3844         ppd->port_ltp_crc_mode |= lcb_to_port_ltp(crc_val);
3845                 /* active crc mode */
3846
3847         /* set up the remote credit return table */
3848         assign_remote_cm_au_table(dd, vcu);
3849
3850         /*
3851          * The LCB is reset on entry to handle_verify_cap(), so this must
3852          * be applied on every link up.
3853          *
3854          * Adjust LCB error kill enable to kill the link if
3855          * these RBUF errors are seen:
3856          *      REPLAY_BUF_MBE_SMASK
3857          *      FLIT_INPUT_BUF_MBE_SMASK
3858          */
3859         if (is_a0(dd)) {                        /* fixed in B0 */
3860                 reg = read_csr(dd, DC_LCB_CFG_LINK_KILL_EN);
3861                 reg |= DC_LCB_CFG_LINK_KILL_EN_REPLAY_BUF_MBE_SMASK
3862                         | DC_LCB_CFG_LINK_KILL_EN_FLIT_INPUT_BUF_MBE_SMASK;
3863                 write_csr(dd, DC_LCB_CFG_LINK_KILL_EN, reg);
3864         }
3865
3866         /* pull LCB fifos out of reset - all fifo clocks must be stable */
3867         write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 0);
3868
3869         /* give 8051 access to the LCB CSRs */
3870         write_csr(dd, DC_LCB_ERR_EN, 0); /* mask LCB errors */
3871         set_8051_lcb_access(dd);
3872
3873         ppd->neighbor_guid =
3874                 read_csr(dd, DC_DC8051_STS_REMOTE_GUID);
3875         ppd->neighbor_port_number = read_csr(dd, DC_DC8051_STS_REMOTE_PORT_NO) &
3876                                         DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK;
3877         ppd->neighbor_type =
3878                 read_csr(dd, DC_DC8051_STS_REMOTE_NODE_TYPE) &
3879                 DC_DC8051_STS_REMOTE_NODE_TYPE_VAL_MASK;
3880         ppd->neighbor_fm_security =
3881                 read_csr(dd, DC_DC8051_STS_REMOTE_FM_SECURITY) &
3882                 DC_DC8051_STS_LOCAL_FM_SECURITY_DISABLED_MASK;
3883         dd_dev_info(dd,
3884                 "Neighbor Guid: %llx Neighbor type %d MgmtAllowed %d FM security bypass %d\n",
3885                 ppd->neighbor_guid, ppd->neighbor_type,
3886                 ppd->mgmt_allowed, ppd->neighbor_fm_security);
3887         if (ppd->mgmt_allowed)
3888                 add_full_mgmt_pkey(ppd);
3889
3890         /* tell the 8051 to go to LinkUp */
3891         set_link_state(ppd, HLS_GOING_UP);
3892 }
3893
3894 /*
3895  * Apply the link width downgrade enabled policy against the current active
3896  * link widths.
3897  *
3898  * Called when the enabled policy changes or the active link widths change.
3899  */
3900 void apply_link_downgrade_policy(struct hfi1_pportdata *ppd, int refresh_widths)
3901 {
3902         int skip = 1;
3903         int do_bounce = 0;
3904         u16 lwde = ppd->link_width_downgrade_enabled;
3905         u16 tx, rx;
3906
3907         mutex_lock(&ppd->hls_lock);
3908         /* only apply if the link is up */
3909         if (ppd->host_link_state & HLS_UP)
3910                 skip = 0;
3911         mutex_unlock(&ppd->hls_lock);
3912         if (skip)
3913                 return;
3914
3915         if (refresh_widths) {
3916                 get_link_widths(ppd->dd, &tx, &rx);
3917                 ppd->link_width_downgrade_tx_active = tx;
3918                 ppd->link_width_downgrade_rx_active = rx;
3919         }
3920
3921         if (lwde == 0) {
3922                 /* downgrade is disabled */
3923
3924                 /* bounce if not at starting active width */
3925                 if ((ppd->link_width_active !=
3926                                         ppd->link_width_downgrade_tx_active)
3927                                 || (ppd->link_width_active !=
3928                                         ppd->link_width_downgrade_rx_active)) {
3929                         dd_dev_err(ppd->dd,
3930                                 "Link downgrade is disabled and link has downgraded, downing link\n");
3931                         dd_dev_err(ppd->dd,
3932                                 "  original 0x%x, tx active 0x%x, rx active 0x%x\n",
3933                                 ppd->link_width_active,
3934                                 ppd->link_width_downgrade_tx_active,
3935                                 ppd->link_width_downgrade_rx_active);
3936                         do_bounce = 1;
3937                 }
3938         } else if ((lwde & ppd->link_width_downgrade_tx_active) == 0
3939                 || (lwde & ppd->link_width_downgrade_rx_active) == 0) {
3940                 /* Tx or Rx is outside the enabled policy */
3941                 dd_dev_err(ppd->dd,
3942                         "Link is outside of downgrade allowed, downing link\n");
3943                 dd_dev_err(ppd->dd,
3944                         "  enabled 0x%x, tx active 0x%x, rx active 0x%x\n",
3945                         lwde,
3946                         ppd->link_width_downgrade_tx_active,
3947                         ppd->link_width_downgrade_rx_active);
3948                 do_bounce = 1;
3949         }
3950
3951         if (do_bounce) {
3952                 set_link_down_reason(ppd, OPA_LINKDOWN_REASON_WIDTH_POLICY, 0,
3953                   OPA_LINKDOWN_REASON_WIDTH_POLICY);
3954                 set_link_state(ppd, HLS_DN_OFFLINE);
3955                 start_link(ppd);
3956         }
3957 }
3958
3959 /*
3960  * Handle a link downgrade interrupt from the 8051.
3961  *
3962  * This is a work-queue function outside of the interrupt.
3963  */
3964 void handle_link_downgrade(struct work_struct *work)
3965 {
3966         struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3967                                                         link_downgrade_work);
3968
3969         dd_dev_info(ppd->dd, "8051: Link width downgrade\n");
3970         apply_link_downgrade_policy(ppd, 1);
3971 }
3972
3973 static char *dcc_err_string(char *buf, int buf_len, u64 flags)
3974 {
3975         return flag_string(buf, buf_len, flags, dcc_err_flags,
3976                 ARRAY_SIZE(dcc_err_flags));
3977 }
3978
3979 static char *lcb_err_string(char *buf, int buf_len, u64 flags)
3980 {
3981         return flag_string(buf, buf_len, flags, lcb_err_flags,
3982                 ARRAY_SIZE(lcb_err_flags));
3983 }
3984
3985 static char *dc8051_err_string(char *buf, int buf_len, u64 flags)
3986 {
3987         return flag_string(buf, buf_len, flags, dc8051_err_flags,
3988                 ARRAY_SIZE(dc8051_err_flags));
3989 }
3990
3991 static char *dc8051_info_err_string(char *buf, int buf_len, u64 flags)
3992 {
3993         return flag_string(buf, buf_len, flags, dc8051_info_err_flags,
3994                 ARRAY_SIZE(dc8051_info_err_flags));
3995 }
3996
3997 static char *dc8051_info_host_msg_string(char *buf, int buf_len, u64 flags)
3998 {
3999         return flag_string(buf, buf_len, flags, dc8051_info_host_msg_flags,
4000                 ARRAY_SIZE(dc8051_info_host_msg_flags));
4001 }
4002
4003 static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg)
4004 {
4005         struct hfi1_pportdata *ppd = dd->pport;
4006         u64 info, err, host_msg;
4007         int queue_link_down = 0;
4008         char buf[96];
4009
4010         /* look at the flags */
4011         if (reg & DC_DC8051_ERR_FLG_SET_BY_8051_SMASK) {
4012                 /* 8051 information set by firmware */
4013                 /* read DC8051_DBG_ERR_INFO_SET_BY_8051 for details */
4014                 info = read_csr(dd, DC_DC8051_DBG_ERR_INFO_SET_BY_8051);
4015                 err = (info >> DC_DC8051_DBG_ERR_INFO_SET_BY_8051_ERROR_SHIFT)
4016                         & DC_DC8051_DBG_ERR_INFO_SET_BY_8051_ERROR_MASK;
4017                 host_msg = (info >>
4018                         DC_DC8051_DBG_ERR_INFO_SET_BY_8051_HOST_MSG_SHIFT)
4019                         & DC_DC8051_DBG_ERR_INFO_SET_BY_8051_HOST_MSG_MASK;
4020
4021                 /*
4022                  * Handle error flags.
4023                  */
4024                 if (err & FAILED_LNI) {
4025                         /*
4026                          * LNI error indications are cleared by the 8051
4027                          * only when starting polling.  Only pay attention
4028                          * to them when in the states that occur during
4029                          * LNI.
4030                          */
4031                         if (ppd->host_link_state
4032                             & (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) {
4033                                 queue_link_down = 1;
4034                                 dd_dev_info(dd, "Link error: %s\n",
4035                                         dc8051_info_err_string(buf,
4036                                                 sizeof(buf),
4037                                                 err & FAILED_LNI));
4038                         }
4039                         err &= ~(u64)FAILED_LNI;
4040                 }
4041                 if (err) {
4042                         /* report remaining errors, but do not do anything */
4043                         dd_dev_err(dd, "8051 info error: %s\n",
4044                                 dc8051_info_err_string(buf, sizeof(buf), err));
4045                 }
4046
4047                 /*
4048                  * Handle host message flags.
4049                  */
4050                 if (host_msg & HOST_REQ_DONE) {
4051                         /*
4052                          * Presently, the driver does a busy wait for
4053                          * host requests to complete.  This is only an
4054                          * informational message.
4055                          * NOTE: The 8051 clears the host message
4056                          * information *on the next 8051 command*.
4057                          * Therefore, when linkup is achieved,
4058                          * this flag will still be set.
4059                          */
4060                         host_msg &= ~(u64)HOST_REQ_DONE;
4061                 }
4062                 if (host_msg & BC_SMA_MSG) {
4063                         queue_work(ppd->hfi1_wq, &ppd->sma_message_work);
4064                         host_msg &= ~(u64)BC_SMA_MSG;
4065                 }
4066                 if (host_msg & LINKUP_ACHIEVED) {
4067                         dd_dev_info(dd, "8051: Link up\n");
4068                         queue_work(ppd->hfi1_wq, &ppd->link_up_work);
4069                         host_msg &= ~(u64)LINKUP_ACHIEVED;
4070                 }
4071                 if (host_msg & EXT_DEVICE_CFG_REQ) {
4072                         handle_8051_request(dd);
4073                         host_msg &= ~(u64)EXT_DEVICE_CFG_REQ;
4074                 }
4075                 if (host_msg & VERIFY_CAP_FRAME) {
4076                         queue_work(ppd->hfi1_wq, &ppd->link_vc_work);
4077                         host_msg &= ~(u64)VERIFY_CAP_FRAME;
4078                 }
4079                 if (host_msg & LINK_GOING_DOWN) {
4080                         const char *extra = "";
4081                         /* no downgrade action needed if going down */
4082                         if (host_msg & LINK_WIDTH_DOWNGRADED) {
4083                                 host_msg &= ~(u64)LINK_WIDTH_DOWNGRADED;
4084                                 extra = " (ignoring downgrade)";
4085                         }
4086                         dd_dev_info(dd, "8051: Link down%s\n", extra);
4087                         queue_link_down = 1;
4088                         host_msg &= ~(u64)LINK_GOING_DOWN;
4089                 }
4090                 if (host_msg & LINK_WIDTH_DOWNGRADED) {
4091                         queue_work(ppd->hfi1_wq, &ppd->link_downgrade_work);
4092                         host_msg &= ~(u64)LINK_WIDTH_DOWNGRADED;
4093                 }
4094                 if (host_msg) {
4095                         /* report remaining messages, but do not do anything */
4096                         dd_dev_info(dd, "8051 info host message: %s\n",
4097                                 dc8051_info_host_msg_string(buf, sizeof(buf),
4098                                         host_msg));
4099                 }
4100
4101                 reg &= ~DC_DC8051_ERR_FLG_SET_BY_8051_SMASK;
4102         }
4103         if (reg & DC_DC8051_ERR_FLG_LOST_8051_HEART_BEAT_SMASK) {
4104                 /*
4105                  * Lost the 8051 heartbeat.  If this happens, we
4106                  * receive constant interrupts about it.  Disable
4107                  * the interrupt after the first.
4108                  */
4109                 dd_dev_err(dd, "Lost 8051 heartbeat\n");
4110                 write_csr(dd, DC_DC8051_ERR_EN,
4111                         read_csr(dd, DC_DC8051_ERR_EN)
4112                           & ~DC_DC8051_ERR_EN_LOST_8051_HEART_BEAT_SMASK);
4113
4114                 reg &= ~DC_DC8051_ERR_FLG_LOST_8051_HEART_BEAT_SMASK;
4115         }
4116         if (reg) {
4117                 /* report the error, but do not do anything */
4118                 dd_dev_err(dd, "8051 error: %s\n",
4119                         dc8051_err_string(buf, sizeof(buf), reg));
4120         }
4121
4122         if (queue_link_down) {
4123                 /* if the link is already going down or disabled, do not
4124                  * queue another */
4125                 if ((ppd->host_link_state
4126                                     & (HLS_GOING_OFFLINE|HLS_LINK_COOLDOWN))
4127                                 || ppd->link_enabled == 0) {
4128                         dd_dev_info(dd, "%s: not queuing link down\n",
4129                                 __func__);
4130                 } else {
4131                         queue_work(ppd->hfi1_wq, &ppd->link_down_work);
4132                 }
4133         }
4134 }
4135
4136 static const char * const fm_config_txt[] = {
4137 [0] =
4138         "BadHeadDist: Distance violation between two head flits",
4139 [1] =
4140         "BadTailDist: Distance violation between two tail flits",
4141 [2] =
4142         "BadCtrlDist: Distance violation between two credit control flits",
4143 [3] =
4144         "BadCrdAck: Credits return for unsupported VL",
4145 [4] =
4146         "UnsupportedVLMarker: Received VL Marker",
4147 [5] =
4148         "BadPreempt: Exceeded the preemption nesting level",
4149 [6] =
4150         "BadControlFlit: Received unsupported control flit",
4151 /* no 7 */
4152 [8] =
4153         "UnsupportedVLMarker: Received VL Marker for unconfigured or disabled VL",
4154 };
4155
4156 static const char * const port_rcv_txt[] = {
4157 [1] =
4158         "BadPktLen: Illegal PktLen",
4159 [2] =
4160         "PktLenTooLong: Packet longer than PktLen",
4161 [3] =
4162         "PktLenTooShort: Packet shorter than PktLen",
4163 [4] =
4164         "BadSLID: Illegal SLID (0, using multicast as SLID, does not include security validation of SLID)",
4165 [5] =
4166         "BadDLID: Illegal DLID (0, doesn't match HFI)",
4167 [6] =
4168         "BadL2: Illegal L2 opcode",
4169 [7] =
4170         "BadSC: Unsupported SC",
4171 [9] =
4172         "BadRC: Illegal RC",
4173 [11] =
4174         "PreemptError: Preempting with same VL",
4175 [12] =
4176         "PreemptVL15: Preempting a VL15 packet",
4177 };
4178
4179 #define OPA_LDR_FMCONFIG_OFFSET 16
4180 #define OPA_LDR_PORTRCV_OFFSET 0
4181 static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
4182 {
4183         u64 info, hdr0, hdr1;
4184         const char *extra;
4185         char buf[96];
4186         struct hfi1_pportdata *ppd = dd->pport;
4187         u8 lcl_reason = 0;
4188         int do_bounce = 0;
4189
4190         if (reg & DCC_ERR_FLG_UNCORRECTABLE_ERR_SMASK) {
4191                 if (!(dd->err_info_uncorrectable & OPA_EI_STATUS_SMASK)) {
4192                         info = read_csr(dd, DCC_ERR_INFO_UNCORRECTABLE);
4193                         dd->err_info_uncorrectable = info & OPA_EI_CODE_SMASK;
4194                         /* set status bit */
4195                         dd->err_info_uncorrectable |= OPA_EI_STATUS_SMASK;
4196                 }
4197                 reg &= ~DCC_ERR_FLG_UNCORRECTABLE_ERR_SMASK;
4198         }
4199
4200         if (reg & DCC_ERR_FLG_LINK_ERR_SMASK) {
4201                 struct hfi1_pportdata *ppd = dd->pport;
4202                 /* this counter saturates at (2^32) - 1 */
4203                 if (ppd->link_downed < (u32)UINT_MAX)
4204                         ppd->link_downed++;
4205                 reg &= ~DCC_ERR_FLG_LINK_ERR_SMASK;
4206         }
4207
4208         if (reg & DCC_ERR_FLG_FMCONFIG_ERR_SMASK) {
4209                 u8 reason_valid = 1;
4210
4211                 info = read_csr(dd, DCC_ERR_INFO_FMCONFIG);
4212                 if (!(dd->err_info_fmconfig & OPA_EI_STATUS_SMASK)) {
4213                         dd->err_info_fmconfig = info & OPA_EI_CODE_SMASK;
4214                         /* set status bit */
4215                         dd->err_info_fmconfig |= OPA_EI_STATUS_SMASK;
4216                 }
4217                 switch (info) {
4218                 case 0:
4219                 case 1:
4220                 case 2:
4221                 case 3:
4222                 case 4:
4223                 case 5:
4224                 case 6:
4225                         extra = fm_config_txt[info];
4226                         break;
4227                 case 8:
4228                         extra = fm_config_txt[info];
4229                         if (ppd->port_error_action &
4230                             OPA_PI_MASK_FM_CFG_UNSUPPORTED_VL_MARKER) {
4231                                 do_bounce = 1;
4232                                 /*
4233                                  * lcl_reason cannot be derived from info
4234                                  * for this error
4235                                  */
4236                                 lcl_reason =
4237                                   OPA_LINKDOWN_REASON_UNSUPPORTED_VL_MARKER;
4238                         }
4239                         break;
4240                 default:
4241                         reason_valid = 0;
4242                         snprintf(buf, sizeof(buf), "reserved%lld", info);
4243                         extra = buf;
4244                         break;
4245                 }
4246
4247                 if (reason_valid && !do_bounce) {
4248                         do_bounce = ppd->port_error_action &
4249                                         (1 << (OPA_LDR_FMCONFIG_OFFSET + info));
4250                         lcl_reason = info + OPA_LINKDOWN_REASON_BAD_HEAD_DIST;
4251                 }
4252
4253                 /* just report this */
4254                 dd_dev_info(dd, "DCC Error: fmconfig error: %s\n", extra);
4255                 reg &= ~DCC_ERR_FLG_FMCONFIG_ERR_SMASK;
4256         }
4257
4258         if (reg & DCC_ERR_FLG_RCVPORT_ERR_SMASK) {
4259                 u8 reason_valid = 1;
4260
4261                 info = read_csr(dd, DCC_ERR_INFO_PORTRCV);
4262                 hdr0 = read_csr(dd, DCC_ERR_INFO_PORTRCV_HDR0);
4263                 hdr1 = read_csr(dd, DCC_ERR_INFO_PORTRCV_HDR1);
4264                 if (!(dd->err_info_rcvport.status_and_code &
4265                       OPA_EI_STATUS_SMASK)) {
4266                         dd->err_info_rcvport.status_and_code =
4267                                 info & OPA_EI_CODE_SMASK;
4268                         /* set status bit */
4269                         dd->err_info_rcvport.status_and_code |=
4270                                 OPA_EI_STATUS_SMASK;
4271                         /* save first 2 flits in the packet that caused
4272                          * the error */
4273                          dd->err_info_rcvport.packet_flit1 = hdr0;
4274                          dd->err_info_rcvport.packet_flit2 = hdr1;
4275                 }
4276                 switch (info) {
4277                 case 1:
4278                 case 2:
4279                 case 3:
4280                 case 4:
4281                 case 5:
4282                 case 6:
4283                 case 7:
4284                 case 9:
4285                 case 11:
4286                 case 12:
4287                         extra = port_rcv_txt[info];
4288                         break;
4289                 default:
4290                         reason_valid = 0;
4291                         snprintf(buf, sizeof(buf), "reserved%lld", info);
4292                         extra = buf;
4293                         break;
4294                 }
4295
4296                 if (reason_valid && !do_bounce) {
4297                         do_bounce = ppd->port_error_action &
4298                                         (1 << (OPA_LDR_PORTRCV_OFFSET + info));
4299                         lcl_reason = info + OPA_LINKDOWN_REASON_RCV_ERROR_0;
4300                 }
4301
4302                 /* just report this */
4303                 dd_dev_info(dd, "DCC Error: PortRcv error: %s\n", extra);
4304                 dd_dev_info(dd, "           hdr0 0x%llx, hdr1 0x%llx\n",
4305                         hdr0, hdr1);
4306
4307                 reg &= ~DCC_ERR_FLG_RCVPORT_ERR_SMASK;
4308         }
4309
4310         if (reg & DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_UC_SMASK) {
4311                 /* informative only */
4312                 dd_dev_info(dd, "8051 access to LCB blocked\n");
4313                 reg &= ~DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_UC_SMASK;
4314         }
4315         if (reg & DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_HOST_SMASK) {
4316                 /* informative only */
4317                 dd_dev_info(dd, "host access to LCB blocked\n");
4318                 reg &= ~DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_HOST_SMASK;
4319         }
4320
4321         /* report any remaining errors */
4322         if (reg)
4323                 dd_dev_info(dd, "DCC Error: %s\n",
4324                         dcc_err_string(buf, sizeof(buf), reg));
4325
4326         if (lcl_reason == 0)
4327                 lcl_reason = OPA_LINKDOWN_REASON_UNKNOWN;
4328
4329         if (do_bounce) {
4330                 dd_dev_info(dd, "%s: PortErrorAction bounce\n", __func__);
4331                 set_link_down_reason(ppd, lcl_reason, 0, lcl_reason);
4332                 queue_work(ppd->hfi1_wq, &ppd->link_bounce_work);
4333         }
4334 }
4335
4336 static void handle_lcb_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
4337 {
4338         char buf[96];
4339
4340         dd_dev_info(dd, "LCB Error: %s\n",
4341                 lcb_err_string(buf, sizeof(buf), reg));
4342 }
4343
4344 /*
4345  * CCE block DC interrupt.  Source is < 8.
4346  */
4347 static void is_dc_int(struct hfi1_devdata *dd, unsigned int source)
4348 {
4349         const struct err_reg_info *eri = &dc_errs[source];
4350
4351         if (eri->handler) {
4352                 interrupt_clear_down(dd, 0, eri);
4353         } else if (source == 3 /* dc_lbm_int */) {
4354                 /*
4355                  * This indicates that a parity error has occurred on the
4356                  * address/control lines presented to the LBM.  The error
4357                  * is a single pulse, there is no associated error flag,
4358                  * and it is non-maskable.  This is because if a parity
4359                  * error occurs on the request the request is dropped.
4360                  * This should never occur, but it is nice to know if it
4361                  * ever does.
4362                  */
4363                 dd_dev_err(dd, "Parity error in DC LBM block\n");
4364         } else {
4365                 dd_dev_err(dd, "Invalid DC interrupt %u\n", source);
4366         }
4367 }
4368
4369 /*
4370  * TX block send credit interrupt.  Source is < 160.
4371  */
4372 static void is_send_credit_int(struct hfi1_devdata *dd, unsigned int source)
4373 {
4374         sc_group_release_update(dd, source);
4375 }
4376
4377 /*
4378  * TX block SDMA interrupt.  Source is < 48.
4379  *
4380  * SDMA interrupts are grouped by type:
4381  *
4382  *       0 -  N-1 = SDma
4383  *       N - 2N-1 = SDmaProgress
4384  *      2N - 3N-1 = SDmaIdle
4385  */
4386 static void is_sdma_eng_int(struct hfi1_devdata *dd, unsigned int source)
4387 {
4388         /* what interrupt */
4389         unsigned int what  = source / TXE_NUM_SDMA_ENGINES;
4390         /* which engine */
4391         unsigned int which = source % TXE_NUM_SDMA_ENGINES;
4392
4393 #ifdef CONFIG_SDMA_VERBOSITY
4394         dd_dev_err(dd, "CONFIG SDMA(%u) %s:%d %s()\n", which,
4395                    slashstrip(__FILE__), __LINE__, __func__);
4396         sdma_dumpstate(&dd->per_sdma[which]);
4397 #endif
4398
4399         if (likely(what < 3 && which < dd->num_sdma)) {
4400                 sdma_engine_interrupt(&dd->per_sdma[which], 1ull << source);
4401         } else {
4402                 /* should not happen */
4403                 dd_dev_err(dd, "Invalid SDMA interrupt 0x%x\n", source);
4404         }
4405 }
4406
4407 /*
4408  * RX block receive available interrupt.  Source is < 160.
4409  */
4410 static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source)
4411 {
4412         struct hfi1_ctxtdata *rcd;
4413         char *err_detail;
4414
4415         if (likely(source < dd->num_rcv_contexts)) {
4416                 rcd = dd->rcd[source];
4417                 if (rcd) {
4418                         if (source < dd->first_user_ctxt)
4419                                 rcd->do_interrupt(rcd);
4420                         else
4421                                 handle_user_interrupt(rcd);
4422                         return; /* OK */
4423                 }
4424                 /* received an interrupt, but no rcd */
4425                 err_detail = "dataless";
4426         } else {
4427                 /* received an interrupt, but are not using that context */
4428                 err_detail = "out of range";
4429         }
4430         dd_dev_err(dd, "unexpected %s receive available context interrupt %u\n",
4431                 err_detail, source);
4432 }
4433
4434 /*
4435  * RX block receive urgent interrupt.  Source is < 160.
4436  */
4437 static void is_rcv_urgent_int(struct hfi1_devdata *dd, unsigned int source)
4438 {
4439         struct hfi1_ctxtdata *rcd;
4440         char *err_detail;
4441
4442         if (likely(source < dd->num_rcv_contexts)) {
4443                 rcd = dd->rcd[source];
4444                 if (rcd) {
4445                         /* only pay attention to user urgent interrupts */
4446                         if (source >= dd->first_user_ctxt)
4447                                 handle_user_interrupt(rcd);
4448                         return; /* OK */
4449                 }
4450                 /* received an interrupt, but no rcd */
4451                 err_detail = "dataless";
4452         } else {
4453                 /* received an interrupt, but are not using that context */
4454                 err_detail = "out of range";
4455         }
4456         dd_dev_err(dd, "unexpected %s receive urgent context interrupt %u\n",
4457                 err_detail, source);
4458 }
4459
4460 /*
4461  * Reserved range interrupt.  Should not be called in normal operation.
4462  */
4463 static void is_reserved_int(struct hfi1_devdata *dd, unsigned int source)
4464 {
4465         char name[64];
4466
4467         dd_dev_err(dd, "unexpected %s interrupt\n",
4468                                 is_reserved_name(name, sizeof(name), source));
4469 }
4470
4471 static const struct is_table is_table[] = {
4472 /* start                     end
4473                                 name func               interrupt func */
4474 { IS_GENERAL_ERR_START,  IS_GENERAL_ERR_END,
4475                                 is_misc_err_name,       is_misc_err_int },
4476 { IS_SDMAENG_ERR_START,  IS_SDMAENG_ERR_END,
4477                                 is_sdma_eng_err_name,   is_sdma_eng_err_int },
4478 { IS_SENDCTXT_ERR_START, IS_SENDCTXT_ERR_END,
4479                                 is_sendctxt_err_name,   is_sendctxt_err_int },
4480 { IS_SDMA_START,             IS_SDMA_END,
4481                                 is_sdma_eng_name,       is_sdma_eng_int },
4482 { IS_VARIOUS_START,          IS_VARIOUS_END,
4483                                 is_various_name,        is_various_int },
4484 { IS_DC_START,       IS_DC_END,
4485                                 is_dc_name,             is_dc_int },
4486 { IS_RCVAVAIL_START,     IS_RCVAVAIL_END,
4487                                 is_rcv_avail_name,      is_rcv_avail_int },
4488 { IS_RCVURGENT_START,    IS_RCVURGENT_END,
4489                                 is_rcv_urgent_name,     is_rcv_urgent_int },
4490 { IS_SENDCREDIT_START,   IS_SENDCREDIT_END,
4491                                 is_send_credit_name,    is_send_credit_int},
4492 { IS_RESERVED_START,     IS_RESERVED_END,
4493                                 is_reserved_name,       is_reserved_int},
4494 };
4495
4496 /*
4497  * Interrupt source interrupt - called when the given source has an interrupt.
4498  * Source is a bit index into an array of 64-bit integers.
4499  */
4500 static void is_interrupt(struct hfi1_devdata *dd, unsigned int source)
4501 {
4502         const struct is_table *entry;
4503
4504         /* avoids a double compare by walking the table in-order */
4505         for (entry = &is_table[0]; entry->is_name; entry++) {
4506                 if (source < entry->end) {
4507                         trace_hfi1_interrupt(dd, entry, source);
4508                         entry->is_int(dd, source - entry->start);
4509                         return;
4510                 }
4511         }
4512         /* fell off the end */
4513         dd_dev_err(dd, "invalid interrupt source %u\n", source);
4514 }
4515
4516 /*
4517  * General interrupt handler.  This is able to correctly handle
4518  * all interrupts in case INTx is used.
4519  */
4520 static irqreturn_t general_interrupt(int irq, void *data)
4521 {
4522         struct hfi1_devdata *dd = data;
4523         u64 regs[CCE_NUM_INT_CSRS];
4524         u32 bit;
4525         int i;
4526
4527         this_cpu_inc(*dd->int_counter);
4528
4529         /* phase 1: scan and clear all handled interrupts */
4530         for (i = 0; i < CCE_NUM_INT_CSRS; i++) {
4531                 if (dd->gi_mask[i] == 0) {
4532                         regs[i] = 0;    /* used later */
4533                         continue;
4534                 }
4535                 regs[i] = read_csr(dd, CCE_INT_STATUS + (8 * i)) &
4536                                 dd->gi_mask[i];
4537                 /* only clear if anything is set */
4538                 if (regs[i])
4539                         write_csr(dd, CCE_INT_CLEAR + (8 * i), regs[i]);
4540         }
4541
4542         /* phase 2: call the appropriate handler */
4543         for_each_set_bit(bit, (unsigned long *)&regs[0],
4544                                                 CCE_NUM_INT_CSRS*64) {
4545                 is_interrupt(dd, bit);
4546         }
4547
4548         return IRQ_HANDLED;
4549 }
4550
4551 static irqreturn_t sdma_interrupt(int irq, void *data)
4552 {
4553         struct sdma_engine *sde = data;
4554         struct hfi1_devdata *dd = sde->dd;
4555         u64 status;
4556
4557 #ifdef CONFIG_SDMA_VERBOSITY
4558         dd_dev_err(dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx,
4559                    slashstrip(__FILE__), __LINE__, __func__);
4560         sdma_dumpstate(sde);
4561 #endif
4562
4563         this_cpu_inc(*dd->int_counter);
4564
4565         /* This read_csr is really bad in the hot path */
4566         status = read_csr(dd,
4567                         CCE_INT_STATUS + (8*(IS_SDMA_START/64)))
4568                         & sde->imask;
4569         if (likely(status)) {
4570                 /* clear the interrupt(s) */
4571                 write_csr(dd,
4572                         CCE_INT_CLEAR + (8*(IS_SDMA_START/64)),
4573                         status);
4574
4575                 /* handle the interrupt(s) */
4576                 sdma_engine_interrupt(sde, status);
4577         } else
4578                 dd_dev_err(dd, "SDMA engine %u interrupt, but no status bits set\n",
4579                         sde->this_idx);
4580
4581         return IRQ_HANDLED;
4582 }
4583
4584 /*
4585  * NOTE: this routine expects to be on its own MSI-X interrupt.  If
4586  * multiple receive contexts share the same MSI-X interrupt, then this
4587  * routine must check for who received it.
4588  */
4589 static irqreturn_t receive_context_interrupt(int irq, void *data)
4590 {
4591         struct hfi1_ctxtdata *rcd = data;
4592         struct hfi1_devdata *dd = rcd->dd;
4593
4594         trace_hfi1_receive_interrupt(dd, rcd->ctxt);
4595         this_cpu_inc(*dd->int_counter);
4596
4597         /* clear the interrupt */
4598         write_csr(rcd->dd, CCE_INT_CLEAR + (8*rcd->ireg), rcd->imask);
4599
4600         /* handle the interrupt */
4601         rcd->do_interrupt(rcd);
4602
4603         return IRQ_HANDLED;
4604 }
4605
4606 /* ========================================================================= */
4607
4608 u32 read_physical_state(struct hfi1_devdata *dd)
4609 {
4610         u64 reg;
4611
4612         reg = read_csr(dd, DC_DC8051_STS_CUR_STATE);
4613         return (reg >> DC_DC8051_STS_CUR_STATE_PORT_SHIFT)
4614                                 & DC_DC8051_STS_CUR_STATE_PORT_MASK;
4615 }
4616
4617 static u32 read_logical_state(struct hfi1_devdata *dd)
4618 {
4619         u64 reg;
4620
4621         reg = read_csr(dd, DCC_CFG_PORT_CONFIG);
4622         return (reg >> DCC_CFG_PORT_CONFIG_LINK_STATE_SHIFT)
4623                                 & DCC_CFG_PORT_CONFIG_LINK_STATE_MASK;
4624 }
4625
4626 static void set_logical_state(struct hfi1_devdata *dd, u32 chip_lstate)
4627 {
4628         u64 reg;
4629
4630         reg = read_csr(dd, DCC_CFG_PORT_CONFIG);
4631         /* clear current state, set new state */
4632         reg &= ~DCC_CFG_PORT_CONFIG_LINK_STATE_SMASK;
4633         reg |= (u64)chip_lstate << DCC_CFG_PORT_CONFIG_LINK_STATE_SHIFT;
4634         write_csr(dd, DCC_CFG_PORT_CONFIG, reg);
4635 }
4636
4637 /*
4638  * Use the 8051 to read a LCB CSR.
4639  */
4640 static int read_lcb_via_8051(struct hfi1_devdata *dd, u32 addr, u64 *data)
4641 {
4642         u32 regno;
4643         int ret;
4644
4645         if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
4646                 if (acquire_lcb_access(dd, 0) == 0) {
4647                         *data = read_csr(dd, addr);
4648                         release_lcb_access(dd, 0);
4649                         return 0;
4650                 }
4651                 return -EBUSY;
4652         }
4653
4654         /* register is an index of LCB registers: (offset - base) / 8 */
4655         regno = (addr - DC_LCB_CFG_RUN) >> 3;
4656         ret = do_8051_command(dd, HCMD_READ_LCB_CSR, regno, data);
4657         if (ret != HCMD_SUCCESS)
4658                 return -EBUSY;
4659         return 0;
4660 }
4661
4662 /*
4663  * Read an LCB CSR.  Access may not be in host control, so check.
4664  * Return 0 on success, -EBUSY on failure.
4665  */
4666 int read_lcb_csr(struct hfi1_devdata *dd, u32 addr, u64 *data)
4667 {
4668         struct hfi1_pportdata *ppd = dd->pport;
4669
4670         /* if up, go through the 8051 for the value */
4671         if (ppd->host_link_state & HLS_UP)
4672                 return read_lcb_via_8051(dd, addr, data);
4673         /* if going up or down, no access */
4674         if (ppd->host_link_state & (HLS_GOING_UP | HLS_GOING_OFFLINE))
4675                 return -EBUSY;
4676         /* otherwise, host has access */
4677         *data = read_csr(dd, addr);
4678         return 0;
4679 }
4680
4681 /*
4682  * Use the 8051 to write a LCB CSR.
4683  */
4684 static int write_lcb_via_8051(struct hfi1_devdata *dd, u32 addr, u64 data)
4685 {
4686
4687         if (acquire_lcb_access(dd, 0) == 0) {
4688                 write_csr(dd, addr, data);
4689                 release_lcb_access(dd, 0);
4690                 return 0;
4691         }
4692         return -EBUSY;
4693 }
4694
4695 /*
4696  * Write an LCB CSR.  Access may not be in host control, so check.
4697  * Return 0 on success, -EBUSY on failure.
4698  */
4699 int write_lcb_csr(struct hfi1_devdata *dd, u32 addr, u64 data)
4700 {
4701         struct hfi1_pportdata *ppd = dd->pport;
4702
4703         /* if up, go through the 8051 for the value */
4704         if (ppd->host_link_state & HLS_UP)
4705                 return write_lcb_via_8051(dd, addr, data);
4706         /* if going up or down, no access */
4707         if (ppd->host_link_state & (HLS_GOING_UP | HLS_GOING_OFFLINE))
4708                 return -EBUSY;
4709         /* otherwise, host has access */
4710         write_csr(dd, addr, data);
4711         return 0;
4712 }
4713
4714 /*
4715  * Returns:
4716  *      < 0 = Linux error, not able to get access
4717  *      > 0 = 8051 command RETURN_CODE
4718  */
4719 static int do_8051_command(
4720         struct hfi1_devdata *dd,
4721         u32 type,
4722         u64 in_data,
4723         u64 *out_data)
4724 {
4725         u64 reg, completed;
4726         int return_code;
4727         unsigned long flags;
4728         unsigned long timeout;
4729
4730         hfi1_cdbg(DC8051, "type %d, data 0x%012llx", type, in_data);
4731
4732         /*
4733          * Alternative to holding the lock for a long time:
4734          * - keep busy wait - have other users bounce off
4735          */
4736         spin_lock_irqsave(&dd->dc8051_lock, flags);
4737
4738         /* We can't send any commands to the 8051 if it's in reset */
4739         if (dd->dc_shutdown) {
4740                 return_code = -ENODEV;
4741                 goto fail;
4742         }
4743
4744         /*
4745          * If an 8051 host command timed out previously, then the 8051 is
4746          * stuck.
4747          *
4748          * On first timeout, attempt to reset and restart the entire DC
4749          * block (including 8051). (Is this too big of a hammer?)
4750          *
4751          * If the 8051 times out a second time, the reset did not bring it
4752          * back to healthy life. In that case, fail any subsequent commands.
4753          */
4754         if (dd->dc8051_timed_out) {
4755                 if (dd->dc8051_timed_out > 1) {
4756                         dd_dev_err(dd,
4757                                    "Previous 8051 host command timed out, skipping command %u\n",
4758                                    type);
4759                         return_code = -ENXIO;
4760                         goto fail;
4761                 }
4762                 spin_unlock_irqrestore(&dd->dc8051_lock, flags);
4763                 dc_shutdown(dd);
4764                 dc_start(dd);
4765                 spin_lock_irqsave(&dd->dc8051_lock, flags);
4766         }
4767
4768         /*
4769          * If there is no timeout, then the 8051 command interface is
4770          * waiting for a command.
4771          */
4772
4773         /*
4774          * Do two writes: the first to stabilize the type and req_data, the
4775          * second to activate.
4776          */
4777         reg = ((u64)type & DC_DC8051_CFG_HOST_CMD_0_REQ_TYPE_MASK)
4778                         << DC_DC8051_CFG_HOST_CMD_0_REQ_TYPE_SHIFT
4779                 | (in_data & DC_DC8051_CFG_HOST_CMD_0_REQ_DATA_MASK)
4780                         << DC_DC8051_CFG_HOST_CMD_0_REQ_DATA_SHIFT;
4781         write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, reg);
4782         reg |= DC_DC8051_CFG_HOST_CMD_0_REQ_NEW_SMASK;
4783         write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, reg);
4784
4785         /* wait for completion, alternate: interrupt */
4786         timeout = jiffies + msecs_to_jiffies(DC8051_COMMAND_TIMEOUT);
4787         while (1) {
4788                 reg = read_csr(dd, DC_DC8051_CFG_HOST_CMD_1);
4789                 completed = reg & DC_DC8051_CFG_HOST_CMD_1_COMPLETED_SMASK;
4790                 if (completed)
4791                         break;
4792                 if (time_after(jiffies, timeout)) {
4793                         dd->dc8051_timed_out++;
4794                         dd_dev_err(dd, "8051 host command %u timeout\n", type);
4795                         if (out_data)
4796                                 *out_data = 0;
4797                         return_code = -ETIMEDOUT;
4798                         goto fail;
4799                 }
4800                 udelay(2);
4801         }
4802
4803         if (out_data) {
4804                 *out_data = (reg >> DC_DC8051_CFG_HOST_CMD_1_RSP_DATA_SHIFT)
4805                                 & DC_DC8051_CFG_HOST_CMD_1_RSP_DATA_MASK;
4806                 if (type == HCMD_READ_LCB_CSR) {
4807                         /* top 16 bits are in a different register */
4808                         *out_data |= (read_csr(dd, DC_DC8051_CFG_EXT_DEV_1)
4809                                 & DC_DC8051_CFG_EXT_DEV_1_REQ_DATA_SMASK)
4810                                 << (48
4811                                     - DC_DC8051_CFG_EXT_DEV_1_REQ_DATA_SHIFT);
4812                 }
4813         }
4814         return_code = (reg >> DC_DC8051_CFG_HOST_CMD_1_RETURN_CODE_SHIFT)
4815                                 & DC_DC8051_CFG_HOST_CMD_1_RETURN_CODE_MASK;
4816         dd->dc8051_timed_out = 0;
4817         /*
4818          * Clear command for next user.
4819          */
4820         write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, 0);
4821
4822 fail:
4823         spin_unlock_irqrestore(&dd->dc8051_lock, flags);
4824
4825         return return_code;
4826 }
4827
4828 static int set_physical_link_state(struct hfi1_devdata *dd, u64 state)
4829 {
4830         return do_8051_command(dd, HCMD_CHANGE_PHY_STATE, state, NULL);
4831 }
4832
4833 static int load_8051_config(struct hfi1_devdata *dd, u8 field_id,
4834                             u8 lane_id, u32 config_data)
4835 {
4836         u64 data;
4837         int ret;
4838
4839         data = (u64)field_id << LOAD_DATA_FIELD_ID_SHIFT
4840                 | (u64)lane_id << LOAD_DATA_LANE_ID_SHIFT
4841                 | (u64)config_data << LOAD_DATA_DATA_SHIFT;
4842         ret = do_8051_command(dd, HCMD_LOAD_CONFIG_DATA, data, NULL);
4843         if (ret != HCMD_SUCCESS) {
4844                 dd_dev_err(dd,
4845                         "load 8051 config: field id %d, lane %d, err %d\n",
4846                         (int)field_id, (int)lane_id, ret);
4847         }
4848         return ret;
4849 }
4850
4851 /*
4852  * Read the 8051 firmware "registers".  Use the RAM directly.  Always
4853  * set the result, even on error.
4854  * Return 0 on success, -errno on failure
4855  */
4856 static int read_8051_config(struct hfi1_devdata *dd, u8 field_id, u8 lane_id,
4857                             u32 *result)
4858 {
4859         u64 big_data;
4860         u32 addr;
4861         int ret;
4862
4863         /* address start depends on the lane_id */
4864         if (lane_id < 4)
4865                 addr = (4 * NUM_GENERAL_FIELDS)
4866                         + (lane_id * 4 * NUM_LANE_FIELDS);
4867         else
4868                 addr = 0;
4869         addr += field_id * 4;
4870
4871         /* read is in 8-byte chunks, hardware will truncate the address down */
4872         ret = read_8051_data(dd, addr, 8, &big_data);
4873
4874         if (ret == 0) {
4875                 /* extract the 4 bytes we want */
4876                 if (addr & 0x4)
4877                         *result = (u32)(big_data >> 32);
4878                 else
4879                         *result = (u32)big_data;
4880         } else {
4881                 *result = 0;
4882                 dd_dev_err(dd, "%s: direct read failed, lane %d, field %d!\n",
4883                         __func__, lane_id, field_id);
4884         }
4885
4886         return ret;
4887 }
4888
4889 static int write_vc_local_phy(struct hfi1_devdata *dd, u8 power_management,
4890                               u8 continuous)
4891 {
4892         u32 frame;
4893
4894         frame = continuous << CONTINIOUS_REMOTE_UPDATE_SUPPORT_SHIFT
4895                 | power_management << POWER_MANAGEMENT_SHIFT;
4896         return load_8051_config(dd, VERIFY_CAP_LOCAL_PHY,
4897                                 GENERAL_CONFIG, frame);
4898 }
4899
4900 static int write_vc_local_fabric(struct hfi1_devdata *dd, u8 vau, u8 z, u8 vcu,
4901                                  u16 vl15buf, u8 crc_sizes)
4902 {
4903         u32 frame;
4904
4905         frame = (u32)vau << VAU_SHIFT
4906                 | (u32)z << Z_SHIFT
4907                 | (u32)vcu << VCU_SHIFT
4908                 | (u32)vl15buf << VL15BUF_SHIFT
4909                 | (u32)crc_sizes << CRC_SIZES_SHIFT;
4910         return load_8051_config(dd, VERIFY_CAP_LOCAL_FABRIC,
4911                                 GENERAL_CONFIG, frame);
4912 }
4913
4914 static void read_vc_local_link_width(struct hfi1_devdata *dd, u8 *misc_bits,
4915                                      u8 *flag_bits, u16 *link_widths)
4916 {
4917         u32 frame;
4918
4919         read_8051_config(dd, VERIFY_CAP_LOCAL_LINK_WIDTH, GENERAL_CONFIG,
4920                                 &frame);
4921         *misc_bits = (frame >> MISC_CONFIG_BITS_SHIFT) & MISC_CONFIG_BITS_MASK;
4922         *flag_bits = (frame >> LOCAL_FLAG_BITS_SHIFT) & LOCAL_FLAG_BITS_MASK;
4923         *link_widths = (frame >> LINK_WIDTH_SHIFT) & LINK_WIDTH_MASK;
4924 }
4925
4926 static int write_vc_local_link_width(struct hfi1_devdata *dd,
4927                                      u8 misc_bits,
4928                                      u8 flag_bits,
4929                                      u16 link_widths)
4930 {
4931         u32 frame;
4932
4933         frame = (u32)misc_bits << MISC_CONFIG_BITS_SHIFT
4934                 | (u32)flag_bits << LOCAL_FLAG_BITS_SHIFT
4935                 | (u32)link_widths << LINK_WIDTH_SHIFT;
4936         return load_8051_config(dd, VERIFY_CAP_LOCAL_LINK_WIDTH, GENERAL_CONFIG,
4937                      frame);
4938 }
4939
4940 static int write_local_device_id(struct hfi1_devdata *dd, u16 device_id,
4941                                  u8 device_rev)
4942 {
4943         u32 frame;
4944
4945         frame = ((u32)device_id << LOCAL_DEVICE_ID_SHIFT)
4946                 | ((u32)device_rev << LOCAL_DEVICE_REV_SHIFT);
4947         return load_8051_config(dd, LOCAL_DEVICE_ID, GENERAL_CONFIG, frame);
4948 }
4949
4950 static void read_remote_device_id(struct hfi1_devdata *dd, u16 *device_id,
4951                                   u8 *device_rev)
4952 {
4953         u32 frame;
4954
4955         read_8051_config(dd, REMOTE_DEVICE_ID, GENERAL_CONFIG, &frame);
4956         *device_id = (frame >> REMOTE_DEVICE_ID_SHIFT) & REMOTE_DEVICE_ID_MASK;
4957         *device_rev = (frame >> REMOTE_DEVICE_REV_SHIFT)
4958                         & REMOTE_DEVICE_REV_MASK;
4959 }
4960
4961 void read_misc_status(struct hfi1_devdata *dd, u8 *ver_a, u8 *ver_b)
4962 {
4963         u32 frame;
4964
4965         read_8051_config(dd, MISC_STATUS, GENERAL_CONFIG, &frame);
4966         *ver_a = (frame >> STS_FM_VERSION_A_SHIFT) & STS_FM_VERSION_A_MASK;
4967         *ver_b = (frame >> STS_FM_VERSION_B_SHIFT) & STS_FM_VERSION_B_MASK;
4968 }
4969
4970 static void read_vc_remote_phy(struct hfi1_devdata *dd, u8 *power_management,
4971                                u8 *continuous)
4972 {
4973         u32 frame;
4974
4975         read_8051_config(dd, VERIFY_CAP_REMOTE_PHY, GENERAL_CONFIG, &frame);
4976         *power_management = (frame >> POWER_MANAGEMENT_SHIFT)
4977                                         & POWER_MANAGEMENT_MASK;
4978         *continuous = (frame >> CONTINIOUS_REMOTE_UPDATE_SUPPORT_SHIFT)
4979                                         & CONTINIOUS_REMOTE_UPDATE_SUPPORT_MASK;
4980 }
4981
4982 static void read_vc_remote_fabric(struct hfi1_devdata *dd, u8 *vau, u8 *z,
4983                                   u8 *vcu, u16 *vl15buf, u8 *crc_sizes)
4984 {
4985         u32 frame;
4986
4987         read_8051_config(dd, VERIFY_CAP_REMOTE_FABRIC, GENERAL_CONFIG, &frame);
4988         *vau = (frame >> VAU_SHIFT) & VAU_MASK;
4989         *z = (frame >> Z_SHIFT) & Z_MASK;
4990         *vcu = (frame >> VCU_SHIFT) & VCU_MASK;
4991         *vl15buf = (frame >> VL15BUF_SHIFT) & VL15BUF_MASK;
4992         *crc_sizes = (frame >> CRC_SIZES_SHIFT) & CRC_SIZES_MASK;
4993 }
4994
4995 static void read_vc_remote_link_width(struct hfi1_devdata *dd,
4996                                       u8 *remote_tx_rate,
4997                                       u16 *link_widths)
4998 {
4999         u32 frame;
5000
5001         read_8051_config(dd, VERIFY_CAP_REMOTE_LINK_WIDTH, GENERAL_CONFIG,
5002                                 &frame);
5003         *remote_tx_rate = (frame >> REMOTE_TX_RATE_SHIFT)
5004                                 & REMOTE_TX_RATE_MASK;
5005         *link_widths = (frame >> LINK_WIDTH_SHIFT) & LINK_WIDTH_MASK;
5006 }
5007
5008 static void read_local_lni(struct hfi1_devdata *dd, u8 *enable_lane_rx)
5009 {
5010         u32 frame;
5011
5012         read_8051_config(dd, LOCAL_LNI_INFO, GENERAL_CONFIG, &frame);
5013         *enable_lane_rx = (frame >> ENABLE_LANE_RX_SHIFT) & ENABLE_LANE_RX_MASK;
5014 }
5015
5016 static void read_mgmt_allowed(struct hfi1_devdata *dd, u8 *mgmt_allowed)
5017 {
5018         u32 frame;
5019
5020         read_8051_config(dd, REMOTE_LNI_INFO, GENERAL_CONFIG, &frame);
5021         *mgmt_allowed = (frame >> MGMT_ALLOWED_SHIFT) & MGMT_ALLOWED_MASK;
5022 }
5023
5024 static void read_last_local_state(struct hfi1_devdata *dd, u32 *lls)
5025 {
5026         read_8051_config(dd, LAST_LOCAL_STATE_COMPLETE, GENERAL_CONFIG, lls);
5027 }
5028
5029 static void read_last_remote_state(struct hfi1_devdata *dd, u32 *lrs)
5030 {
5031         read_8051_config(dd, LAST_REMOTE_STATE_COMPLETE, GENERAL_CONFIG, lrs);
5032 }
5033
5034 void hfi1_read_link_quality(struct hfi1_devdata *dd, u8 *link_quality)
5035 {
5036         u32 frame;
5037         int ret;
5038
5039         *link_quality = 0;
5040         if (dd->pport->host_link_state & HLS_UP) {
5041                 ret = read_8051_config(dd, LINK_QUALITY_INFO, GENERAL_CONFIG,
5042                                         &frame);
5043                 if (ret == 0)
5044                         *link_quality = (frame >> LINK_QUALITY_SHIFT)
5045                                                 & LINK_QUALITY_MASK;
5046         }
5047 }
5048
5049 static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc)
5050 {
5051         u32 frame;
5052
5053         read_8051_config(dd, LINK_QUALITY_INFO, GENERAL_CONFIG, &frame);
5054         *pdrrc = (frame >> DOWN_REMOTE_REASON_SHIFT) & DOWN_REMOTE_REASON_MASK;
5055 }
5056
5057 static int read_tx_settings(struct hfi1_devdata *dd,
5058                             u8 *enable_lane_tx,
5059                             u8 *tx_polarity_inversion,
5060                             u8 *rx_polarity_inversion,
5061                             u8 *max_rate)
5062 {
5063         u32 frame;
5064         int ret;
5065
5066         ret = read_8051_config(dd, TX_SETTINGS, GENERAL_CONFIG, &frame);
5067         *enable_lane_tx = (frame >> ENABLE_LANE_TX_SHIFT)
5068                                 & ENABLE_LANE_TX_MASK;
5069         *tx_polarity_inversion = (frame >> TX_POLARITY_INVERSION_SHIFT)
5070                                 & TX_POLARITY_INVERSION_MASK;
5071         *rx_polarity_inversion = (frame >> RX_POLARITY_INVERSION_SHIFT)
5072                                 & RX_POLARITY_INVERSION_MASK;
5073         *max_rate = (frame >> MAX_RATE_SHIFT) & MAX_RATE_MASK;
5074         return ret;
5075 }
5076
5077 static int write_tx_settings(struct hfi1_devdata *dd,
5078                              u8 enable_lane_tx,
5079                              u8 tx_polarity_inversion,
5080                              u8 rx_polarity_inversion,
5081                              u8 max_rate)
5082 {
5083         u32 frame;
5084
5085         /* no need to mask, all variable sizes match field widths */
5086         frame = enable_lane_tx << ENABLE_LANE_TX_SHIFT
5087                 | tx_polarity_inversion << TX_POLARITY_INVERSION_SHIFT
5088                 | rx_polarity_inversion << RX_POLARITY_INVERSION_SHIFT
5089                 | max_rate << MAX_RATE_SHIFT;
5090         return load_8051_config(dd, TX_SETTINGS, GENERAL_CONFIG, frame);
5091 }
5092
5093 static void check_fabric_firmware_versions(struct hfi1_devdata *dd)
5094 {
5095         u32 frame, version, prod_id;
5096         int ret, lane;
5097
5098         /* 4 lanes */
5099         for (lane = 0; lane < 4; lane++) {
5100                 ret = read_8051_config(dd, SPICO_FW_VERSION, lane, &frame);
5101                 if (ret) {
5102                         dd_dev_err(
5103                                 dd,
5104                                 "Unable to read lane %d firmware details\n",
5105                                 lane);
5106                         continue;
5107                 }
5108                 version = (frame >> SPICO_ROM_VERSION_SHIFT)
5109                                         & SPICO_ROM_VERSION_MASK;
5110                 prod_id = (frame >> SPICO_ROM_PROD_ID_SHIFT)
5111                                         & SPICO_ROM_PROD_ID_MASK;
5112                 dd_dev_info(dd,
5113                         "Lane %d firmware: version 0x%04x, prod_id 0x%04x\n",
5114                         lane, version, prod_id);
5115         }
5116 }
5117
5118 /*
5119  * Read an idle LCB message.
5120  *
5121  * Returns 0 on success, -EINVAL on error
5122  */
5123 static int read_idle_message(struct hfi1_devdata *dd, u64 type, u64 *data_out)
5124 {
5125         int ret;
5126
5127         ret = do_8051_command(dd, HCMD_READ_LCB_IDLE_MSG,
5128                 type, data_out);
5129         if (ret != HCMD_SUCCESS) {
5130                 dd_dev_err(dd, "read idle message: type %d, err %d\n",
5131                         (u32)type, ret);
5132                 return -EINVAL;
5133         }
5134         dd_dev_info(dd, "%s: read idle message 0x%llx\n", __func__, *data_out);
5135         /* return only the payload as we already know the type */
5136         *data_out >>= IDLE_PAYLOAD_SHIFT;
5137         return 0;
5138 }
5139
5140 /*
5141  * Read an idle SMA message.  To be done in response to a notification from
5142  * the 8051.
5143  *
5144  * Returns 0 on success, -EINVAL on error
5145  */
5146 static int read_idle_sma(struct hfi1_devdata *dd, u64 *data)
5147 {
5148         return read_idle_message(dd,
5149                         (u64)IDLE_SMA << IDLE_MSG_TYPE_SHIFT, data);
5150 }
5151
5152 /*
5153  * Send an idle LCB message.
5154  *
5155  * Returns 0 on success, -EINVAL on error
5156  */
5157 static int send_idle_message(struct hfi1_devdata *dd, u64 data)
5158 {
5159         int ret;
5160
5161         dd_dev_info(dd, "%s: sending idle message 0x%llx\n", __func__, data);
5162         ret = do_8051_command(dd, HCMD_SEND_LCB_IDLE_MSG, data, NULL);
5163         if (ret != HCMD_SUCCESS) {
5164                 dd_dev_err(dd, "send idle message: data 0x%llx, err %d\n",
5165                         data, ret);
5166                 return -EINVAL;
5167         }
5168         return 0;
5169 }
5170
5171 /*
5172  * Send an idle SMA message.
5173  *
5174  * Returns 0 on success, -EINVAL on error
5175  */
5176 int send_idle_sma(struct hfi1_devdata *dd, u64 message)
5177 {
5178         u64 data;
5179
5180         data = ((message & IDLE_PAYLOAD_MASK) << IDLE_PAYLOAD_SHIFT)
5181                 | ((u64)IDLE_SMA << IDLE_MSG_TYPE_SHIFT);
5182         return send_idle_message(dd, data);
5183 }
5184
5185 /*
5186  * Initialize the LCB then do a quick link up.  This may or may not be
5187  * in loopback.
5188  *
5189  * return 0 on success, -errno on error
5190  */
5191 static int do_quick_linkup(struct hfi1_devdata *dd)
5192 {
5193         u64 reg;
5194         unsigned long timeout;
5195         int ret;
5196
5197         lcb_shutdown(dd, 0);
5198
5199         if (loopback) {
5200                 /* LCB_CFG_LOOPBACK.VAL = 2 */
5201                 /* LCB_CFG_LANE_WIDTH.VAL = 0 */
5202                 write_csr(dd, DC_LCB_CFG_LOOPBACK,
5203                         IB_PACKET_TYPE << DC_LCB_CFG_LOOPBACK_VAL_SHIFT);
5204                 write_csr(dd, DC_LCB_CFG_LANE_WIDTH, 0);
5205         }
5206
5207         /* start the LCBs */
5208         /* LCB_CFG_TX_FIFOS_RESET.VAL = 0 */
5209         write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 0);
5210
5211         /* simulator only loopback steps */
5212         if (loopback && dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
5213                 /* LCB_CFG_RUN.EN = 1 */
5214                 write_csr(dd, DC_LCB_CFG_RUN,
5215                         1ull << DC_LCB_CFG_RUN_EN_SHIFT);
5216
5217                 /* watch LCB_STS_LINK_TRANSFER_ACTIVE */
5218                 timeout = jiffies + msecs_to_jiffies(10);
5219                 while (1) {
5220                         reg = read_csr(dd,
5221                                 DC_LCB_STS_LINK_TRANSFER_ACTIVE);
5222                         if (reg)
5223                                 break;
5224                         if (time_after(jiffies, timeout)) {
5225                                 dd_dev_err(dd,
5226                                         "timeout waiting for LINK_TRANSFER_ACTIVE\n");
5227                                 return -ETIMEDOUT;
5228                         }
5229                         udelay(2);
5230                 }
5231
5232                 write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP,
5233                         1ull << DC_LCB_CFG_ALLOW_LINK_UP_VAL_SHIFT);
5234         }
5235
5236         if (!loopback) {
5237                 /*
5238                  * When doing quick linkup and not in loopback, both
5239                  * sides must be done with LCB set-up before either
5240                  * starts the quick linkup.  Put a delay here so that
5241                  * both sides can be started and have a chance to be
5242                  * done with LCB set up before resuming.
5243                  */
5244                 dd_dev_err(dd,
5245                         "Pausing for peer to be finished with LCB set up\n");
5246                 msleep(5000);
5247                 dd_dev_err(dd,
5248                         "Continuing with quick linkup\n");
5249         }
5250
5251         write_csr(dd, DC_LCB_ERR_EN, 0); /* mask LCB errors */
5252         set_8051_lcb_access(dd);
5253
5254         /*
5255          * State "quick" LinkUp request sets the physical link state to
5256          * LinkUp without a verify capability sequence.
5257          * This state is in simulator v37 and later.
5258          */
5259         ret = set_physical_link_state(dd, PLS_QUICK_LINKUP);
5260         if (ret != HCMD_SUCCESS) {
5261                 dd_dev_err(dd,
5262                         "%s: set physical link state to quick LinkUp failed with return %d\n",
5263                         __func__, ret);
5264
5265                 set_host_lcb_access(dd);
5266                 write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
5267
5268                 if (ret >= 0)
5269                         ret = -EINVAL;
5270                 return ret;
5271         }
5272
5273         return 0; /* success */
5274 }
5275
5276 /*
5277  * Set the SerDes to internal loopback mode.
5278  * Returns 0 on success, -errno on error.
5279  */
5280 static int set_serdes_loopback_mode(struct hfi1_devdata *dd)
5281 {
5282         int ret;
5283
5284         ret = set_physical_link_state(dd, PLS_INTERNAL_SERDES_LOOPBACK);
5285         if (ret == HCMD_SUCCESS)
5286                 return 0;
5287         dd_dev_err(dd,
5288                 "Set physical link state to SerDes Loopback failed with return %d\n",
5289                 ret);
5290         if (ret >= 0)
5291                 ret = -EINVAL;
5292         return ret;
5293 }
5294
5295 /*
5296  * Do all special steps to set up loopback.
5297  */
5298 static int init_loopback(struct hfi1_devdata *dd)
5299 {
5300         dd_dev_info(dd, "Entering loopback mode\n");
5301
5302         /* all loopbacks should disable self GUID check */
5303         write_csr(dd, DC_DC8051_CFG_MODE,
5304                 (read_csr(dd, DC_DC8051_CFG_MODE) | DISABLE_SELF_GUID_CHECK));
5305
5306         /*
5307          * The simulator has only one loopback option - LCB.  Switch
5308          * to that option, which includes quick link up.
5309          *
5310          * Accept all valid loopback values.
5311          */
5312         if ((dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
5313                 && (loopback == LOOPBACK_SERDES
5314                         || loopback == LOOPBACK_LCB
5315                         || loopback == LOOPBACK_CABLE)) {
5316                 loopback = LOOPBACK_LCB;
5317                 quick_linkup = 1;
5318                 return 0;
5319         }
5320
5321         /* handle serdes loopback */
5322         if (loopback == LOOPBACK_SERDES) {
5323                 /* internal serdes loopack needs quick linkup on RTL */
5324                 if (dd->icode == ICODE_RTL_SILICON)
5325                         quick_linkup = 1;
5326                 return set_serdes_loopback_mode(dd);
5327         }
5328
5329         /* LCB loopback - handled at poll time */
5330         if (loopback == LOOPBACK_LCB) {
5331                 quick_linkup = 1; /* LCB is always quick linkup */
5332
5333                 /* not supported in emulation due to emulation RTL changes */
5334                 if (dd->icode == ICODE_FPGA_EMULATION) {
5335                         dd_dev_err(dd,
5336                                 "LCB loopback not supported in emulation\n");
5337                         return -EINVAL;
5338                 }
5339                 return 0;
5340         }
5341
5342         /* external cable loopback requires no extra steps */
5343         if (loopback == LOOPBACK_CABLE)
5344                 return 0;
5345
5346         dd_dev_err(dd, "Invalid loopback mode %d\n", loopback);
5347         return -EINVAL;
5348 }
5349
5350 /*
5351  * Translate from the OPA_LINK_WIDTH handed to us by the FM to bits
5352  * used in the Verify Capability link width attribute.
5353  */
5354 static u16 opa_to_vc_link_widths(u16 opa_widths)
5355 {
5356         int i;
5357         u16 result = 0;
5358
5359         static const struct link_bits {
5360                 u16 from;
5361                 u16 to;
5362         } opa_link_xlate[] = {
5363                 { OPA_LINK_WIDTH_1X, 1 << (1-1)  },
5364                 { OPA_LINK_WIDTH_2X, 1 << (2-1)  },
5365                 { OPA_LINK_WIDTH_3X, 1 << (3-1)  },
5366                 { OPA_LINK_WIDTH_4X, 1 << (4-1)  },
5367         };
5368
5369         for (i = 0; i < ARRAY_SIZE(opa_link_xlate); i++) {
5370                 if (opa_widths & opa_link_xlate[i].from)
5371                         result |= opa_link_xlate[i].to;
5372         }
5373         return result;
5374 }
5375
5376 /*
5377  * Set link attributes before moving to polling.
5378  */
5379 static int set_local_link_attributes(struct hfi1_pportdata *ppd)
5380 {
5381         struct hfi1_devdata *dd = ppd->dd;
5382         u8 enable_lane_tx;
5383         u8 tx_polarity_inversion;
5384         u8 rx_polarity_inversion;
5385         int ret;
5386
5387         /* reset our fabric serdes to clear any lingering problems */
5388         fabric_serdes_reset(dd);
5389
5390         /* set the local tx rate - need to read-modify-write */
5391         ret = read_tx_settings(dd, &enable_lane_tx, &tx_polarity_inversion,
5392                 &rx_polarity_inversion, &ppd->local_tx_rate);
5393         if (ret)
5394                 goto set_local_link_attributes_fail;
5395
5396         if (dd->dc8051_ver < dc8051_ver(0, 20)) {
5397                 /* set the tx rate to the fastest enabled */
5398                 if (ppd->link_speed_enabled & OPA_LINK_SPEED_25G)
5399                         ppd->local_tx_rate = 1;
5400                 else
5401                         ppd->local_tx_rate = 0;
5402         } else {
5403                 /* set the tx rate to all enabled */
5404                 ppd->local_tx_rate = 0;
5405                 if (ppd->link_speed_enabled & OPA_LINK_SPEED_25G)
5406                         ppd->local_tx_rate |= 2;
5407                 if (ppd->link_speed_enabled & OPA_LINK_SPEED_12_5G)
5408                         ppd->local_tx_rate |= 1;
5409         }
5410         ret = write_tx_settings(dd, enable_lane_tx, tx_polarity_inversion,
5411                      rx_polarity_inversion, ppd->local_tx_rate);
5412         if (ret != HCMD_SUCCESS)
5413                 goto set_local_link_attributes_fail;
5414
5415         /*
5416          * DC supports continuous updates.
5417          */
5418         ret = write_vc_local_phy(dd, 0 /* no power management */,
5419                                      1 /* continuous updates */);
5420         if (ret != HCMD_SUCCESS)
5421                 goto set_local_link_attributes_fail;
5422
5423         /* z=1 in the next call: AU of 0 is not supported by the hardware */
5424         ret = write_vc_local_fabric(dd, dd->vau, 1, dd->vcu, dd->vl15_init,
5425                                     ppd->port_crc_mode_enabled);
5426         if (ret != HCMD_SUCCESS)
5427                 goto set_local_link_attributes_fail;
5428
5429         ret = write_vc_local_link_width(dd, 0, 0,
5430                      opa_to_vc_link_widths(ppd->link_width_enabled));
5431         if (ret != HCMD_SUCCESS)
5432                 goto set_local_link_attributes_fail;
5433
5434         /* let peer know who we are */
5435         ret = write_local_device_id(dd, dd->pcidev->device, dd->minrev);
5436         if (ret == HCMD_SUCCESS)
5437                 return 0;
5438
5439 set_local_link_attributes_fail:
5440         dd_dev_err(dd,
5441                 "Failed to set local link attributes, return 0x%x\n",
5442                 ret);
5443         return ret;
5444 }
5445
5446 /*
5447  * Call this to start the link.  Schedule a retry if the cable is not
5448  * present or if unable to start polling.  Do not do anything if the
5449  * link is disabled.  Returns 0 if link is disabled or moved to polling
5450  */
5451 int start_link(struct hfi1_pportdata *ppd)
5452 {
5453         if (!ppd->link_enabled) {
5454                 dd_dev_info(ppd->dd,
5455                         "%s: stopping link start because link is disabled\n",
5456                         __func__);
5457                 return 0;
5458         }
5459         if (!ppd->driver_link_ready) {
5460                 dd_dev_info(ppd->dd,
5461                         "%s: stopping link start because driver is not ready\n",
5462                         __func__);
5463                 return 0;
5464         }
5465
5466         if (qsfp_mod_present(ppd) || loopback == LOOPBACK_SERDES ||
5467                         loopback == LOOPBACK_LCB ||
5468                         ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
5469                 return set_link_state(ppd, HLS_DN_POLL);
5470
5471         dd_dev_info(ppd->dd,
5472                 "%s: stopping link start because no cable is present\n",
5473                 __func__);
5474         return -EAGAIN;
5475 }
5476
5477 static void reset_qsfp(struct hfi1_pportdata *ppd)
5478 {
5479         struct hfi1_devdata *dd = ppd->dd;
5480         u64 mask, qsfp_mask;
5481
5482         mask = (u64)QSFP_HFI0_RESET_N;
5483         qsfp_mask = read_csr(dd,
5484                 dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE);
5485         qsfp_mask |= mask;
5486         write_csr(dd,
5487                 dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE,
5488                 qsfp_mask);
5489
5490         qsfp_mask = read_csr(dd,
5491                 dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT);
5492         qsfp_mask &= ~mask;
5493         write_csr(dd,
5494                 dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT,
5495                 qsfp_mask);
5496
5497         udelay(10);
5498
5499         qsfp_mask |= mask;
5500         write_csr(dd,
5501                 dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT,
5502                 qsfp_mask);
5503 }
5504
5505 static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
5506                                         u8 *qsfp_interrupt_status)
5507 {
5508         struct hfi1_devdata *dd = ppd->dd;
5509
5510         if ((qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_ALARM) ||
5511                 (qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_WARNING))
5512                 dd_dev_info(dd,
5513                         "%s: QSFP cable on fire\n",
5514                         __func__);
5515
5516         if ((qsfp_interrupt_status[0] & QSFP_LOW_TEMP_ALARM) ||
5517                 (qsfp_interrupt_status[0] & QSFP_LOW_TEMP_WARNING))
5518                 dd_dev_info(dd,
5519                         "%s: QSFP cable temperature too low\n",
5520                         __func__);
5521
5522         if ((qsfp_interrupt_status[1] & QSFP_HIGH_VCC_ALARM) ||
5523                 (qsfp_interrupt_status[1] & QSFP_HIGH_VCC_WARNING))
5524                 dd_dev_info(dd,
5525                         "%s: QSFP supply voltage too high\n",
5526                         __func__);
5527
5528         if ((qsfp_interrupt_status[1] & QSFP_LOW_VCC_ALARM) ||
5529                 (qsfp_interrupt_status[1] & QSFP_LOW_VCC_WARNING))
5530                 dd_dev_info(dd,
5531                         "%s: QSFP supply voltage too low\n",
5532                         __func__);
5533
5534         /* Byte 2 is vendor specific */
5535
5536         if ((qsfp_interrupt_status[3] & QSFP_HIGH_POWER_ALARM) ||
5537                 (qsfp_interrupt_status[3] & QSFP_HIGH_POWER_WARNING))
5538                 dd_dev_info(dd,
5539                         "%s: Cable RX channel 1/2 power too high\n",
5540                         __func__);
5541
5542         if ((qsfp_interrupt_status[3] & QSFP_LOW_POWER_ALARM) ||
5543                 (qsfp_interrupt_status[3] & QSFP_LOW_POWER_WARNING))
5544                 dd_dev_info(dd,
5545                         "%s: Cable RX channel 1/2 power too low\n",
5546                         __func__);
5547
5548         if ((qsfp_interrupt_status[4] & QSFP_HIGH_POWER_ALARM) ||
5549                 (qsfp_interrupt_status[4] & QSFP_HIGH_POWER_WARNING))
5550                 dd_dev_info(dd,
5551                         "%s: Cable RX channel 3/4 power too high\n",
5552                         __func__);
5553
5554         if ((qsfp_interrupt_status[4] & QSFP_LOW_POWER_ALARM) ||
5555                 (qsfp_interrupt_status[4] & QSFP_LOW_POWER_WARNING))
5556                 dd_dev_info(dd,
5557                         "%s: Cable RX channel 3/4 power too low\n",
5558                         __func__);
5559
5560         if ((qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_ALARM) ||
5561                 (qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_WARNING))
5562                 dd_dev_info(dd,
5563                         "%s: Cable TX channel 1/2 bias too high\n",
5564                         __func__);
5565
5566         if ((qsfp_interrupt_status[5] & QSFP_LOW_BIAS_ALARM) ||
5567                 (qsfp_interrupt_status[5] & QSFP_LOW_BIAS_WARNING))
5568                 dd_dev_info(dd,
5569                         "%s: Cable TX channel 1/2 bias too low\n",
5570                         __func__);
5571
5572         if ((qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_ALARM) ||
5573                 (qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_WARNING))
5574                 dd_dev_info(dd,
5575                         "%s: Cable TX channel 3/4 bias too high\n",
5576                         __func__);
5577
5578         if ((qsfp_interrupt_status[6] & QSFP_LOW_BIAS_ALARM) ||
5579                 (qsfp_interrupt_status[6] & QSFP_LOW_BIAS_WARNING))
5580                 dd_dev_info(dd,
5581                         "%s: Cable TX channel 3/4 bias too low\n",
5582                         __func__);
5583
5584         if ((qsfp_interrupt_status[7] & QSFP_HIGH_POWER_ALARM) ||
5585                 (qsfp_interrupt_status[7] & QSFP_HIGH_POWER_WARNING))
5586                 dd_dev_info(dd,
5587                         "%s: Cable TX channel 1/2 power too high\n",
5588                         __func__);
5589
5590         if ((qsfp_interrupt_status[7] & QSFP_LOW_POWER_ALARM) ||
5591                 (qsfp_interrupt_status[7] & QSFP_LOW_POWER_WARNING))
5592                 dd_dev_info(dd,
5593                         "%s: Cable TX channel 1/2 power too low\n",
5594                         __func__);
5595
5596         if ((qsfp_interrupt_status[8] & QSFP_HIGH_POWER_ALARM) ||
5597                 (qsfp_interrupt_status[8] & QSFP_HIGH_POWER_WARNING))
5598                 dd_dev_info(dd,
5599                         "%s: Cable TX channel 3/4 power too high\n",
5600                         __func__);
5601
5602         if ((qsfp_interrupt_status[8] & QSFP_LOW_POWER_ALARM) ||
5603                 (qsfp_interrupt_status[8] & QSFP_LOW_POWER_WARNING))
5604                 dd_dev_info(dd,
5605                         "%s: Cable TX channel 3/4 power too low\n",
5606                         __func__);
5607
5608         /* Bytes 9-10 and 11-12 are reserved */
5609         /* Bytes 13-15 are vendor specific */
5610
5611         return 0;
5612 }
5613
5614 static int do_pre_lni_host_behaviors(struct hfi1_pportdata *ppd)
5615 {
5616         refresh_qsfp_cache(ppd, &ppd->qsfp_info);
5617
5618         return 0;
5619 }
5620
5621 static int do_qsfp_intr_fallback(struct hfi1_pportdata *ppd)
5622 {
5623         struct hfi1_devdata *dd = ppd->dd;
5624         u8 qsfp_interrupt_status = 0;
5625
5626         if (qsfp_read(ppd, dd->hfi1_id, 2, &qsfp_interrupt_status, 1)
5627                 != 1) {
5628                 dd_dev_info(dd,
5629                         "%s: Failed to read status of QSFP module\n",
5630                         __func__);
5631                 return -EIO;
5632         }
5633
5634         /* We don't care about alarms & warnings with a non-functional INT_N */
5635         if (!(qsfp_interrupt_status & QSFP_DATA_NOT_READY))
5636                 do_pre_lni_host_behaviors(ppd);
5637
5638         return 0;
5639 }
5640
5641 /* This routine will only be scheduled if the QSFP module is present */
5642 static void qsfp_event(struct work_struct *work)
5643 {
5644         struct qsfp_data *qd;
5645         struct hfi1_pportdata *ppd;
5646         struct hfi1_devdata *dd;
5647
5648         qd = container_of(work, struct qsfp_data, qsfp_work);
5649         ppd = qd->ppd;
5650         dd = ppd->dd;
5651
5652         /* Sanity check */
5653         if (!qsfp_mod_present(ppd))
5654                 return;
5655
5656         /*
5657          * Turn DC back on after cables has been
5658          * re-inserted. Up until now, the DC has been in
5659          * reset to save power.
5660          */
5661         dc_start(dd);
5662
5663         if (qd->cache_refresh_required) {
5664                 msleep(3000);
5665                 reset_qsfp(ppd);
5666
5667                 /* Check for QSFP interrupt after t_init (SFF 8679)
5668                  * + extra
5669                  */
5670                 msleep(3000);
5671                 if (!qd->qsfp_interrupt_functional) {
5672                         if (do_qsfp_intr_fallback(ppd) < 0)
5673                                 dd_dev_info(dd, "%s: QSFP fallback failed\n",
5674                                         __func__);
5675                         ppd->driver_link_ready = 1;
5676                         start_link(ppd);
5677                 }
5678         }
5679
5680         if (qd->check_interrupt_flags) {
5681                 u8 qsfp_interrupt_status[16] = {0,};
5682
5683                 if (qsfp_read(ppd, dd->hfi1_id, 6,
5684                               &qsfp_interrupt_status[0], 16) != 16) {
5685                         dd_dev_info(dd,
5686                                 "%s: Failed to read status of QSFP module\n",
5687                                 __func__);
5688                 } else {
5689                         unsigned long flags;
5690                         u8 data_status;
5691
5692                         spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
5693                         ppd->qsfp_info.check_interrupt_flags = 0;
5694                         spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock,
5695                                                                 flags);
5696
5697                         if (qsfp_read(ppd, dd->hfi1_id, 2, &data_status, 1)
5698                                  != 1) {
5699                                 dd_dev_info(dd,
5700                                 "%s: Failed to read status of QSFP module\n",
5701                                         __func__);
5702                         }
5703                         if (!(data_status & QSFP_DATA_NOT_READY)) {
5704                                 do_pre_lni_host_behaviors(ppd);
5705                                 start_link(ppd);
5706                         } else
5707                                 handle_qsfp_error_conditions(ppd,
5708                                                 qsfp_interrupt_status);
5709                 }
5710         }
5711 }
5712
5713 void init_qsfp(struct hfi1_pportdata *ppd)
5714 {
5715         struct hfi1_devdata *dd = ppd->dd;
5716         u64 qsfp_mask;
5717
5718         if (loopback == LOOPBACK_SERDES || loopback == LOOPBACK_LCB ||
5719                         ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR ||
5720                         !HFI1_CAP_IS_KSET(QSFP_ENABLED)) {
5721                 ppd->driver_link_ready = 1;
5722                 return;
5723         }
5724
5725         ppd->qsfp_info.ppd = ppd;
5726         INIT_WORK(&ppd->qsfp_info.qsfp_work, qsfp_event);
5727
5728         qsfp_mask = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N);
5729         /* Clear current status to avoid spurious interrupts */
5730         write_csr(dd,
5731                         dd->hfi1_id ?
5732                                 ASIC_QSFP2_CLEAR :
5733                                 ASIC_QSFP1_CLEAR,
5734                 qsfp_mask);
5735
5736         /* Handle active low nature of INT_N and MODPRST_N pins */
5737         if (qsfp_mod_present(ppd))
5738                 qsfp_mask &= ~(u64)QSFP_HFI0_MODPRST_N;
5739         write_csr(dd,
5740                   dd->hfi1_id ? ASIC_QSFP2_INVERT : ASIC_QSFP1_INVERT,
5741                   qsfp_mask);
5742
5743         /* Allow only INT_N and MODPRST_N to trigger QSFP interrupts */
5744         qsfp_mask |= (u64)QSFP_HFI0_MODPRST_N;
5745         write_csr(dd,
5746                 dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK,
5747                 qsfp_mask);
5748
5749         if (qsfp_mod_present(ppd)) {
5750                 msleep(3000);
5751                 reset_qsfp(ppd);
5752
5753                 /* Check for QSFP interrupt after t_init (SFF 8679)
5754                  * + extra
5755                  */
5756                 msleep(3000);
5757                 if (!ppd->qsfp_info.qsfp_interrupt_functional) {
5758                         if (do_qsfp_intr_fallback(ppd) < 0)
5759                                 dd_dev_info(dd,
5760                                         "%s: QSFP fallback failed\n",
5761                                         __func__);
5762                         ppd->driver_link_ready = 1;
5763                 }
5764         }
5765 }
5766
5767 int bringup_serdes(struct hfi1_pportdata *ppd)
5768 {
5769         struct hfi1_devdata *dd = ppd->dd;
5770         u64 guid;
5771         int ret;
5772
5773         if (HFI1_CAP_IS_KSET(EXTENDED_PSN))
5774                 add_rcvctrl(dd, RCV_CTRL_RCV_EXTENDED_PSN_ENABLE_SMASK);
5775
5776         guid = ppd->guid;
5777         if (!guid) {
5778                 if (dd->base_guid)
5779                         guid = dd->base_guid + ppd->port - 1;
5780                 ppd->guid = guid;
5781         }
5782
5783         /* the link defaults to enabled */
5784         ppd->link_enabled = 1;
5785         /* Set linkinit_reason on power up per OPA spec */
5786         ppd->linkinit_reason = OPA_LINKINIT_REASON_LINKUP;
5787
5788         if (loopback) {
5789                 ret = init_loopback(dd);
5790                 if (ret < 0)
5791                         return ret;
5792         }
5793
5794         return start_link(ppd);
5795 }
5796
5797 void hfi1_quiet_serdes(struct hfi1_pportdata *ppd)
5798 {
5799         struct hfi1_devdata *dd = ppd->dd;
5800
5801         /*
5802          * Shut down the link and keep it down.   First turn off that the
5803          * driver wants to allow the link to be up (driver_link_ready).
5804          * Then make sure the link is not automatically restarted
5805          * (link_enabled).  Cancel any pending restart.  And finally
5806          * go offline.
5807          */
5808         ppd->driver_link_ready = 0;
5809         ppd->link_enabled = 0;
5810
5811         set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SMA_DISABLED, 0,
5812           OPA_LINKDOWN_REASON_SMA_DISABLED);
5813         set_link_state(ppd, HLS_DN_OFFLINE);
5814
5815         /* disable the port */
5816         clear_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
5817 }
5818
5819 static inline int init_cpu_counters(struct hfi1_devdata *dd)
5820 {
5821         struct hfi1_pportdata *ppd;
5822         int i;
5823
5824         ppd = (struct hfi1_pportdata *)(dd + 1);
5825         for (i = 0; i < dd->num_pports; i++, ppd++) {
5826                 ppd->ibport_data.rc_acks = NULL;
5827                 ppd->ibport_data.rc_qacks = NULL;
5828                 ppd->ibport_data.rc_acks = alloc_percpu(u64);
5829                 ppd->ibport_data.rc_qacks = alloc_percpu(u64);
5830                 ppd->ibport_data.rc_delayed_comp = alloc_percpu(u64);
5831                 if ((ppd->ibport_data.rc_acks == NULL) ||
5832                     (ppd->ibport_data.rc_delayed_comp == NULL) ||
5833                     (ppd->ibport_data.rc_qacks == NULL))
5834                         return -ENOMEM;
5835         }
5836
5837         return 0;
5838 }
5839
5840 static const char * const pt_names[] = {
5841         "expected",
5842         "eager",
5843         "invalid"
5844 };
5845
5846 static const char *pt_name(u32 type)
5847 {
5848         return type >= ARRAY_SIZE(pt_names) ? "unknown" : pt_names[type];
5849 }
5850
5851 /*
5852  * index is the index into the receive array
5853  */
5854 void hfi1_put_tid(struct hfi1_devdata *dd, u32 index,
5855                   u32 type, unsigned long pa, u16 order)
5856 {
5857         u64 reg;
5858         void __iomem *base = (dd->rcvarray_wc ? dd->rcvarray_wc :
5859                               (dd->kregbase + RCV_ARRAY));
5860
5861         if (!(dd->flags & HFI1_PRESENT))
5862                 goto done;
5863
5864         if (type == PT_INVALID) {
5865                 pa = 0;
5866         } else if (type > PT_INVALID) {
5867                 dd_dev_err(dd,
5868                         "unexpected receive array type %u for index %u, not handled\n",
5869                         type, index);
5870                 goto done;
5871         }
5872
5873         hfi1_cdbg(TID, "type %s, index 0x%x, pa 0x%lx, bsize 0x%lx",
5874                   pt_name(type), index, pa, (unsigned long)order);
5875
5876 #define RT_ADDR_SHIFT 12        /* 4KB kernel address boundary */
5877         reg = RCV_ARRAY_RT_WRITE_ENABLE_SMASK
5878                 | (u64)order << RCV_ARRAY_RT_BUF_SIZE_SHIFT
5879                 | ((pa >> RT_ADDR_SHIFT) & RCV_ARRAY_RT_ADDR_MASK)
5880                                         << RCV_ARRAY_RT_ADDR_SHIFT;
5881         writeq(reg, base + (index * 8));
5882
5883         if (type == PT_EAGER)
5884                 /*
5885                  * Eager entries are written one-by-one so we have to push them
5886                  * after we write the entry.
5887                  */
5888                 flush_wc();
5889 done:
5890         return;
5891 }
5892
5893 void hfi1_clear_tids(struct hfi1_ctxtdata *rcd)
5894 {
5895         struct hfi1_devdata *dd = rcd->dd;
5896         u32 i;
5897
5898         /* this could be optimized */
5899         for (i = rcd->eager_base; i < rcd->eager_base +
5900                      rcd->egrbufs.alloced; i++)
5901                 hfi1_put_tid(dd, i, PT_INVALID, 0, 0);
5902
5903         for (i = rcd->expected_base;
5904                         i < rcd->expected_base + rcd->expected_count; i++)
5905                 hfi1_put_tid(dd, i, PT_INVALID, 0, 0);
5906 }
5907
5908 int hfi1_get_base_kinfo(struct hfi1_ctxtdata *rcd,
5909                         struct hfi1_ctxt_info *kinfo)
5910 {
5911         kinfo->runtime_flags = (HFI1_MISC_GET() << HFI1_CAP_USER_SHIFT) |
5912                 HFI1_CAP_UGET(MASK) | HFI1_CAP_KGET(K2U);
5913         return 0;
5914 }
5915
5916 struct hfi1_message_header *hfi1_get_msgheader(
5917                                 struct hfi1_devdata *dd, __le32 *rhf_addr)
5918 {
5919         u32 offset = rhf_hdrq_offset(rhf_to_cpu(rhf_addr));
5920
5921         return (struct hfi1_message_header *)
5922                 (rhf_addr - dd->rhf_offset + offset);
5923 }
5924
5925 static const char * const ib_cfg_name_strings[] = {
5926         "HFI1_IB_CFG_LIDLMC",
5927         "HFI1_IB_CFG_LWID_DG_ENB",
5928         "HFI1_IB_CFG_LWID_ENB",
5929         "HFI1_IB_CFG_LWID",
5930         "HFI1_IB_CFG_SPD_ENB",
5931         "HFI1_IB_CFG_SPD",
5932         "HFI1_IB_CFG_RXPOL_ENB",
5933         "HFI1_IB_CFG_LREV_ENB",
5934         "HFI1_IB_CFG_LINKLATENCY",
5935         "HFI1_IB_CFG_HRTBT",
5936         "HFI1_IB_CFG_OP_VLS",
5937         "HFI1_IB_CFG_VL_HIGH_CAP",
5938         "HFI1_IB_CFG_VL_LOW_CAP",
5939         "HFI1_IB_CFG_OVERRUN_THRESH",
5940         "HFI1_IB_CFG_PHYERR_THRESH",
5941         "HFI1_IB_CFG_LINKDEFAULT",
5942         "HFI1_IB_CFG_PKEYS",
5943         "HFI1_IB_CFG_MTU",
5944         "HFI1_IB_CFG_LSTATE",
5945         "HFI1_IB_CFG_VL_HIGH_LIMIT",
5946         "HFI1_IB_CFG_PMA_TICKS",
5947         "HFI1_IB_CFG_PORT"
5948 };
5949
5950 static const char *ib_cfg_name(int which)
5951 {
5952         if (which < 0 || which >= ARRAY_SIZE(ib_cfg_name_strings))
5953                 return "invalid";
5954         return ib_cfg_name_strings[which];
5955 }
5956
5957 int hfi1_get_ib_cfg(struct hfi1_pportdata *ppd, int which)
5958 {
5959         struct hfi1_devdata *dd = ppd->dd;
5960         int val = 0;
5961
5962         switch (which) {
5963         case HFI1_IB_CFG_LWID_ENB: /* allowed Link-width */
5964                 val = ppd->link_width_enabled;
5965                 break;
5966         case HFI1_IB_CFG_LWID: /* currently active Link-width */
5967                 val = ppd->link_width_active;
5968                 break;
5969         case HFI1_IB_CFG_SPD_ENB: /* allowed Link speeds */
5970                 val = ppd->link_speed_enabled;
5971                 break;
5972         case HFI1_IB_CFG_SPD: /* current Link speed */
5973                 val = ppd->link_speed_active;
5974                 break;
5975
5976         case HFI1_IB_CFG_RXPOL_ENB: /* Auto-RX-polarity enable */
5977         case HFI1_IB_CFG_LREV_ENB: /* Auto-Lane-reversal enable */
5978         case HFI1_IB_CFG_LINKLATENCY:
5979                 goto unimplemented;
5980
5981         case HFI1_IB_CFG_OP_VLS:
5982                 val = ppd->vls_operational;
5983                 break;
5984         case HFI1_IB_CFG_VL_HIGH_CAP: /* VL arb high priority table size */
5985                 val = VL_ARB_HIGH_PRIO_TABLE_SIZE;
5986                 break;
5987         case HFI1_IB_CFG_VL_LOW_CAP: /* VL arb low priority table size */
5988                 val = VL_ARB_LOW_PRIO_TABLE_SIZE;
5989                 break;
5990         case HFI1_IB_CFG_OVERRUN_THRESH: /* IB overrun threshold */
5991                 val = ppd->overrun_threshold;
5992                 break;
5993         case HFI1_IB_CFG_PHYERR_THRESH: /* IB PHY error threshold */
5994                 val = ppd->phy_error_threshold;
5995                 break;
5996         case HFI1_IB_CFG_LINKDEFAULT: /* IB link default (sleep/poll) */
5997                 val = dd->link_default;
5998                 break;
5999
6000         case HFI1_IB_CFG_HRTBT: /* Heartbeat off/enable/auto */
6001         case HFI1_IB_CFG_PMA_TICKS:
6002         default:
6003 unimplemented:
6004                 if (HFI1_CAP_IS_KSET(PRINT_UNIMPL))
6005                         dd_dev_info(
6006                                 dd,
6007                                 "%s: which %s: not implemented\n",
6008                                 __func__,
6009                                 ib_cfg_name(which));
6010                 break;
6011         }
6012
6013         return val;
6014 }
6015
6016 /*
6017  * The largest MAD packet size.
6018  */
6019 #define MAX_MAD_PACKET 2048
6020
6021 /*
6022  * Return the maximum header bytes that can go on the _wire_
6023  * for this device. This count includes the ICRC which is
6024  * not part of the packet held in memory but it is appended
6025  * by the HW.
6026  * This is dependent on the device's receive header entry size.
6027  * HFI allows this to be set per-receive context, but the
6028  * driver presently enforces a global value.
6029  */
6030 u32 lrh_max_header_bytes(struct hfi1_devdata *dd)
6031 {
6032         /*
6033          * The maximum non-payload (MTU) bytes in LRH.PktLen are
6034          * the Receive Header Entry Size minus the PBC (or RHF) size
6035          * plus one DW for the ICRC appended by HW.
6036          *
6037          * dd->rcd[0].rcvhdrqentsize is in DW.
6038          * We use rcd[0] as all context will have the same value. Also,
6039          * the first kernel context would have been allocated by now so
6040          * we are guaranteed a valid value.
6041          */
6042         return (dd->rcd[0]->rcvhdrqentsize - 2/*PBC/RHF*/ + 1/*ICRC*/) << 2;
6043 }
6044
6045 /*
6046  * Set Send Length
6047  * @ppd - per port data
6048  *
6049  * Set the MTU by limiting how many DWs may be sent.  The SendLenCheck*
6050  * registers compare against LRH.PktLen, so use the max bytes included
6051  * in the LRH.
6052  *
6053  * This routine changes all VL values except VL15, which it maintains at
6054  * the same value.
6055  */
6056 static void set_send_length(struct hfi1_pportdata *ppd)
6057 {
6058         struct hfi1_devdata *dd = ppd->dd;
6059         u32 max_hb = lrh_max_header_bytes(dd), maxvlmtu = 0, dcmtu;
6060         u64 len1 = 0, len2 = (((dd->vld[15].mtu + max_hb) >> 2)
6061                               & SEND_LEN_CHECK1_LEN_VL15_MASK) <<
6062                 SEND_LEN_CHECK1_LEN_VL15_SHIFT;
6063         int i;
6064
6065         for (i = 0; i < ppd->vls_supported; i++) {
6066                 if (dd->vld[i].mtu > maxvlmtu)
6067                         maxvlmtu = dd->vld[i].mtu;
6068                 if (i <= 3)
6069                         len1 |= (((dd->vld[i].mtu + max_hb) >> 2)
6070                                  & SEND_LEN_CHECK0_LEN_VL0_MASK) <<
6071                                 ((i % 4) * SEND_LEN_CHECK0_LEN_VL1_SHIFT);
6072                 else
6073                         len2 |= (((dd->vld[i].mtu + max_hb) >> 2)
6074                                  & SEND_LEN_CHECK1_LEN_VL4_MASK) <<
6075                                 ((i % 4) * SEND_LEN_CHECK1_LEN_VL5_SHIFT);
6076         }
6077         write_csr(dd, SEND_LEN_CHECK0, len1);
6078         write_csr(dd, SEND_LEN_CHECK1, len2);
6079         /* adjust kernel credit return thresholds based on new MTUs */
6080         /* all kernel receive contexts have the same hdrqentsize */
6081         for (i = 0; i < ppd->vls_supported; i++) {
6082                 sc_set_cr_threshold(dd->vld[i].sc,
6083                         sc_mtu_to_threshold(dd->vld[i].sc, dd->vld[i].mtu,
6084                                 dd->rcd[0]->rcvhdrqentsize));
6085         }
6086         sc_set_cr_threshold(dd->vld[15].sc,
6087                 sc_mtu_to_threshold(dd->vld[15].sc, dd->vld[15].mtu,
6088                         dd->rcd[0]->rcvhdrqentsize));
6089
6090         /* Adjust maximum MTU for the port in DC */
6091         dcmtu = maxvlmtu == 10240 ? DCC_CFG_PORT_MTU_CAP_10240 :
6092                 (ilog2(maxvlmtu >> 8) + 1);
6093         len1 = read_csr(ppd->dd, DCC_CFG_PORT_CONFIG);
6094         len1 &= ~DCC_CFG_PORT_CONFIG_MTU_CAP_SMASK;
6095         len1 |= ((u64)dcmtu & DCC_CFG_PORT_CONFIG_MTU_CAP_MASK) <<
6096                 DCC_CFG_PORT_CONFIG_MTU_CAP_SHIFT;
6097         write_csr(ppd->dd, DCC_CFG_PORT_CONFIG, len1);
6098 }
6099
6100 static void set_lidlmc(struct hfi1_pportdata *ppd)
6101 {
6102         int i;
6103         u64 sreg = 0;
6104         struct hfi1_devdata *dd = ppd->dd;
6105         u32 mask = ~((1U << ppd->lmc) - 1);
6106         u64 c1 = read_csr(ppd->dd, DCC_CFG_PORT_CONFIG1);
6107
6108         if (dd->hfi1_snoop.mode_flag)
6109                 dd_dev_info(dd, "Set lid/lmc while snooping");
6110
6111         c1 &= ~(DCC_CFG_PORT_CONFIG1_TARGET_DLID_SMASK
6112                 | DCC_CFG_PORT_CONFIG1_DLID_MASK_SMASK);
6113         c1 |= ((ppd->lid & DCC_CFG_PORT_CONFIG1_TARGET_DLID_MASK)
6114                         << DCC_CFG_PORT_CONFIG1_TARGET_DLID_SHIFT)|
6115               ((mask & DCC_CFG_PORT_CONFIG1_DLID_MASK_MASK)
6116                         << DCC_CFG_PORT_CONFIG1_DLID_MASK_SHIFT);
6117         write_csr(ppd->dd, DCC_CFG_PORT_CONFIG1, c1);
6118
6119         /*
6120          * Iterate over all the send contexts and set their SLID check
6121          */
6122         sreg = ((mask & SEND_CTXT_CHECK_SLID_MASK_MASK) <<
6123                         SEND_CTXT_CHECK_SLID_MASK_SHIFT) |
6124                (((ppd->lid & mask) & SEND_CTXT_CHECK_SLID_VALUE_MASK) <<
6125                         SEND_CTXT_CHECK_SLID_VALUE_SHIFT);
6126
6127         for (i = 0; i < dd->chip_send_contexts; i++) {
6128                 hfi1_cdbg(LINKVERB, "SendContext[%d].SLID_CHECK = 0x%x",
6129                           i, (u32)sreg);
6130                 write_kctxt_csr(dd, i, SEND_CTXT_CHECK_SLID, sreg);
6131         }
6132
6133         /* Now we have to do the same thing for the sdma engines */
6134         sdma_update_lmc(dd, mask, ppd->lid);
6135 }
6136
6137 static int wait_phy_linkstate(struct hfi1_devdata *dd, u32 state, u32 msecs)
6138 {
6139         unsigned long timeout;
6140         u32 curr_state;
6141
6142         timeout = jiffies + msecs_to_jiffies(msecs);
6143         while (1) {
6144                 curr_state = read_physical_state(dd);
6145                 if (curr_state == state)
6146                         break;
6147                 if (time_after(jiffies, timeout)) {
6148                         dd_dev_err(dd,
6149                                 "timeout waiting for phy link state 0x%x, current state is 0x%x\n",
6150                                 state, curr_state);
6151                         return -ETIMEDOUT;
6152                 }
6153                 usleep_range(1950, 2050); /* sleep 2ms-ish */
6154         }
6155
6156         return 0;
6157 }
6158
6159 /*
6160  * Helper for set_link_state().  Do not call except from that routine.
6161  * Expects ppd->hls_mutex to be held.
6162  *
6163  * @rem_reason value to be sent to the neighbor
6164  *
6165  * LinkDownReasons only set if transition succeeds.
6166  */
6167 static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
6168 {
6169         struct hfi1_devdata *dd = ppd->dd;
6170         u32 pstate, previous_state;
6171         u32 last_local_state;
6172         u32 last_remote_state;
6173         int ret;
6174         int do_transition;
6175         int do_wait;
6176
6177         previous_state = ppd->host_link_state;
6178         ppd->host_link_state = HLS_GOING_OFFLINE;
6179         pstate = read_physical_state(dd);
6180         if (pstate == PLS_OFFLINE) {
6181                 do_transition = 0;      /* in right state */
6182                 do_wait = 0;            /* ...no need to wait */
6183         } else if ((pstate & 0xff) == PLS_OFFLINE) {
6184                 do_transition = 0;      /* in an offline transient state */
6185                 do_wait = 1;            /* ...wait for it to settle */
6186         } else {
6187                 do_transition = 1;      /* need to move to offline */
6188                 do_wait = 1;            /* ...will need to wait */
6189         }
6190
6191         if (do_transition) {
6192                 ret = set_physical_link_state(dd,
6193                         PLS_OFFLINE | (rem_reason << 8));
6194
6195                 if (ret != HCMD_SUCCESS) {
6196                         dd_dev_err(dd,
6197                                 "Failed to transition to Offline link state, return %d\n",
6198                                 ret);
6199                         return -EINVAL;
6200                 }
6201                 if (ppd->offline_disabled_reason == OPA_LINKDOWN_REASON_NONE)
6202                         ppd->offline_disabled_reason =
6203                         OPA_LINKDOWN_REASON_TRANSIENT;
6204         }
6205
6206         if (do_wait) {
6207                 /* it can take a while for the link to go down */
6208                 ret = wait_phy_linkstate(dd, PLS_OFFLINE, 5000);
6209                 if (ret < 0)
6210                         return ret;
6211         }
6212
6213         /* make sure the logical state is also down */
6214         wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000);
6215
6216         /*
6217          * Now in charge of LCB - must be after the physical state is
6218          * offline.quiet and before host_link_state is changed.
6219          */
6220         set_host_lcb_access(dd);
6221         write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
6222         ppd->host_link_state = HLS_LINK_COOLDOWN; /* LCB access allowed */
6223
6224         /*
6225          * The LNI has a mandatory wait time after the physical state
6226          * moves to Offline.Quiet.  The wait time may be different
6227          * depending on how the link went down.  The 8051 firmware
6228          * will observe the needed wait time and only move to ready
6229          * when that is completed.  The largest of the quiet timeouts
6230          * is 2.5s, so wait that long and then a bit more.
6231          */
6232         ret = wait_fm_ready(dd, 3000);
6233         if (ret) {
6234                 dd_dev_err(dd,
6235                         "After going offline, timed out waiting for the 8051 to become ready to accept host requests\n");
6236                 /* state is really offline, so make it so */
6237                 ppd->host_link_state = HLS_DN_OFFLINE;
6238                 return ret;
6239         }
6240
6241         /*
6242          * The state is now offline and the 8051 is ready to accept host
6243          * requests.
6244          *      - change our state
6245          *      - notify others if we were previously in a linkup state
6246          */
6247         ppd->host_link_state = HLS_DN_OFFLINE;
6248         if (previous_state & HLS_UP) {
6249                 /* went down while link was up */
6250                 handle_linkup_change(dd, 0);
6251         } else if (previous_state
6252                         & (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) {
6253                 /* went down while attempting link up */
6254                 /* byte 1 of last_*_state is the failure reason */
6255                 read_last_local_state(dd, &last_local_state);
6256                 read_last_remote_state(dd, &last_remote_state);
6257                 dd_dev_err(dd,
6258                         "LNI failure last states: local 0x%08x, remote 0x%08x\n",
6259                         last_local_state, last_remote_state);
6260         }
6261
6262         /* the active link width (downgrade) is 0 on link down */
6263         ppd->link_width_active = 0;
6264         ppd->link_width_downgrade_tx_active = 0;
6265         ppd->link_width_downgrade_rx_active = 0;
6266         ppd->current_egress_rate = 0;
6267         return 0;
6268 }
6269
6270 /* return the link state name */
6271 static const char *link_state_name(u32 state)
6272 {
6273         const char *name;
6274         int n = ilog2(state);
6275         static const char * const names[] = {
6276                 [__HLS_UP_INIT_BP]       = "INIT",
6277                 [__HLS_UP_ARMED_BP]      = "ARMED",
6278                 [__HLS_UP_ACTIVE_BP]     = "ACTIVE",
6279                 [__HLS_DN_DOWNDEF_BP]    = "DOWNDEF",
6280                 [__HLS_DN_POLL_BP]       = "POLL",
6281                 [__HLS_DN_DISABLE_BP]    = "DISABLE",
6282                 [__HLS_DN_OFFLINE_BP]    = "OFFLINE",
6283                 [__HLS_VERIFY_CAP_BP]    = "VERIFY_CAP",
6284                 [__HLS_GOING_UP_BP]      = "GOING_UP",
6285                 [__HLS_GOING_OFFLINE_BP] = "GOING_OFFLINE",
6286                 [__HLS_LINK_COOLDOWN_BP] = "LINK_COOLDOWN"
6287         };
6288
6289         name = n < ARRAY_SIZE(names) ? names[n] : NULL;
6290         return name ? name : "unknown";
6291 }
6292
6293 /* return the link state reason name */
6294 static const char *link_state_reason_name(struct hfi1_pportdata *ppd, u32 state)
6295 {
6296         if (state == HLS_UP_INIT) {
6297                 switch (ppd->linkinit_reason) {
6298                 case OPA_LINKINIT_REASON_LINKUP:
6299                         return "(LINKUP)";
6300                 case OPA_LINKINIT_REASON_FLAPPING:
6301                         return "(FLAPPING)";
6302                 case OPA_LINKINIT_OUTSIDE_POLICY:
6303                         return "(OUTSIDE_POLICY)";
6304                 case OPA_LINKINIT_QUARANTINED:
6305                         return "(QUARANTINED)";
6306                 case OPA_LINKINIT_INSUFIC_CAPABILITY:
6307                         return "(INSUFIC_CAPABILITY)";
6308                 default:
6309                         break;
6310                 }
6311         }
6312         return "";
6313 }
6314
6315 /*
6316  * driver_physical_state - convert the driver's notion of a port's
6317  * state (an HLS_*) into a physical state (a {IB,OPA}_PORTPHYSSTATE_*).
6318  * Return -1 (converted to a u32) to indicate error.
6319  */
6320 u32 driver_physical_state(struct hfi1_pportdata *ppd)
6321 {
6322         switch (ppd->host_link_state) {
6323         case HLS_UP_INIT:
6324         case HLS_UP_ARMED:
6325         case HLS_UP_ACTIVE:
6326                 return IB_PORTPHYSSTATE_LINKUP;
6327         case HLS_DN_POLL:
6328                 return IB_PORTPHYSSTATE_POLLING;
6329         case HLS_DN_DISABLE:
6330                 return IB_PORTPHYSSTATE_DISABLED;
6331         case HLS_DN_OFFLINE:
6332                 return OPA_PORTPHYSSTATE_OFFLINE;
6333         case HLS_VERIFY_CAP:
6334                 return IB_PORTPHYSSTATE_POLLING;
6335         case HLS_GOING_UP:
6336                 return IB_PORTPHYSSTATE_POLLING;
6337         case HLS_GOING_OFFLINE:
6338                 return OPA_PORTPHYSSTATE_OFFLINE;
6339         case HLS_LINK_COOLDOWN:
6340                 return OPA_PORTPHYSSTATE_OFFLINE;
6341         case HLS_DN_DOWNDEF:
6342         default:
6343                 dd_dev_err(ppd->dd, "invalid host_link_state 0x%x\n",
6344                            ppd->host_link_state);
6345                 return  -1;
6346         }
6347 }
6348
6349 /*
6350  * driver_logical_state - convert the driver's notion of a port's
6351  * state (an HLS_*) into a logical state (a IB_PORT_*). Return -1
6352  * (converted to a u32) to indicate error.
6353  */
6354 u32 driver_logical_state(struct hfi1_pportdata *ppd)
6355 {
6356         if (ppd->host_link_state && !(ppd->host_link_state & HLS_UP))
6357                 return IB_PORT_DOWN;
6358
6359         switch (ppd->host_link_state & HLS_UP) {
6360         case HLS_UP_INIT:
6361                 return IB_PORT_INIT;
6362         case HLS_UP_ARMED:
6363                 return IB_PORT_ARMED;
6364         case HLS_UP_ACTIVE:
6365                 return IB_PORT_ACTIVE;
6366         default:
6367                 dd_dev_err(ppd->dd, "invalid host_link_state 0x%x\n",
6368                            ppd->host_link_state);
6369         return -1;
6370         }
6371 }
6372
6373 void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason,
6374                           u8 neigh_reason, u8 rem_reason)
6375 {
6376         if (ppd->local_link_down_reason.latest == 0 &&
6377             ppd->neigh_link_down_reason.latest == 0) {
6378                 ppd->local_link_down_reason.latest = lcl_reason;
6379                 ppd->neigh_link_down_reason.latest = neigh_reason;
6380                 ppd->remote_link_down_reason = rem_reason;
6381         }
6382 }
6383
6384 /*
6385  * Change the physical and/or logical link state.
6386  *
6387  * Do not call this routine while inside an interrupt.  It contains
6388  * calls to routines that can take multiple seconds to finish.
6389  *
6390  * Returns 0 on success, -errno on failure.
6391  */
6392 int set_link_state(struct hfi1_pportdata *ppd, u32 state)
6393 {
6394         struct hfi1_devdata *dd = ppd->dd;
6395         struct ib_event event = {.device = NULL};
6396         int ret1, ret = 0;
6397         int was_up, is_down;
6398         int orig_new_state, poll_bounce;
6399
6400         mutex_lock(&ppd->hls_lock);
6401
6402         orig_new_state = state;
6403         if (state == HLS_DN_DOWNDEF)
6404                 state = dd->link_default;
6405
6406         /* interpret poll -> poll as a link bounce */
6407         poll_bounce = ppd->host_link_state == HLS_DN_POLL
6408                                 && state == HLS_DN_POLL;
6409
6410         dd_dev_info(dd, "%s: current %s, new %s %s%s\n", __func__,
6411                 link_state_name(ppd->host_link_state),
6412                 link_state_name(orig_new_state),
6413                 poll_bounce ? "(bounce) " : "",
6414                 link_state_reason_name(ppd, state));
6415
6416         was_up = !!(ppd->host_link_state & HLS_UP);
6417
6418         /*
6419          * If we're going to a (HLS_*) link state that implies the logical
6420          * link state is neither of (IB_PORT_ARMED, IB_PORT_ACTIVE), then
6421          * reset is_sm_config_started to 0.
6422          */
6423         if (!(state & (HLS_UP_ARMED | HLS_UP_ACTIVE)))
6424                 ppd->is_sm_config_started = 0;
6425
6426         /*
6427          * Do nothing if the states match.  Let a poll to poll link bounce
6428          * go through.
6429          */
6430         if (ppd->host_link_state == state && !poll_bounce)
6431                 goto done;
6432
6433         switch (state) {
6434         case HLS_UP_INIT:
6435                 if (ppd->host_link_state == HLS_DN_POLL && (quick_linkup
6436                             || dd->icode == ICODE_FUNCTIONAL_SIMULATOR)) {
6437                         /*
6438                          * Quick link up jumps from polling to here.
6439                          *
6440                          * Whether in normal or loopback mode, the
6441                          * simulator jumps from polling to link up.
6442                          * Accept that here.
6443                          */
6444                         /* OK */;
6445                 } else if (ppd->host_link_state != HLS_GOING_UP) {
6446                         goto unexpected;
6447                 }
6448
6449                 ppd->host_link_state = HLS_UP_INIT;
6450                 ret = wait_logical_linkstate(ppd, IB_PORT_INIT, 1000);
6451                 if (ret) {
6452                         /* logical state didn't change, stay at going_up */
6453                         ppd->host_link_state = HLS_GOING_UP;
6454                         dd_dev_err(dd,
6455                                 "%s: logical state did not change to INIT\n",
6456                                 __func__);
6457                 } else {
6458                         /* clear old transient LINKINIT_REASON code */
6459                         if (ppd->linkinit_reason >= OPA_LINKINIT_REASON_CLEAR)
6460                                 ppd->linkinit_reason =
6461                                         OPA_LINKINIT_REASON_LINKUP;
6462
6463                         /* enable the port */
6464                         add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
6465
6466                         handle_linkup_change(dd, 1);
6467                 }
6468                 break;
6469         case HLS_UP_ARMED:
6470                 if (ppd->host_link_state != HLS_UP_INIT)
6471                         goto unexpected;
6472
6473                 ppd->host_link_state = HLS_UP_ARMED;
6474                 set_logical_state(dd, LSTATE_ARMED);
6475                 ret = wait_logical_linkstate(ppd, IB_PORT_ARMED, 1000);
6476                 if (ret) {
6477                         /* logical state didn't change, stay at init */
6478                         ppd->host_link_state = HLS_UP_INIT;
6479                         dd_dev_err(dd,
6480                                 "%s: logical state did not change to ARMED\n",
6481                                 __func__);
6482                 }
6483                 /*
6484                  * The simulator does not currently implement SMA messages,
6485                  * so neighbor_normal is not set.  Set it here when we first
6486                  * move to Armed.
6487                  */
6488                 if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
6489                         ppd->neighbor_normal = 1;
6490                 break;
6491         case HLS_UP_ACTIVE:
6492                 if (ppd->host_link_state != HLS_UP_ARMED)
6493                         goto unexpected;
6494
6495                 ppd->host_link_state = HLS_UP_ACTIVE;
6496                 set_logical_state(dd, LSTATE_ACTIVE);
6497                 ret = wait_logical_linkstate(ppd, IB_PORT_ACTIVE, 1000);
6498                 if (ret) {
6499                         /* logical state didn't change, stay at armed */
6500                         ppd->host_link_state = HLS_UP_ARMED;
6501                         dd_dev_err(dd,
6502                                 "%s: logical state did not change to ACTIVE\n",
6503                                 __func__);
6504                 } else {
6505
6506                         /* tell all engines to go running */
6507                         sdma_all_running(dd);
6508
6509                         /* Signal the IB layer that the port has went active */
6510                         event.device = &dd->verbs_dev.ibdev;
6511                         event.element.port_num = ppd->port;
6512                         event.event = IB_EVENT_PORT_ACTIVE;
6513                 }
6514                 break;
6515         case HLS_DN_POLL:
6516                 if ((ppd->host_link_state == HLS_DN_DISABLE ||
6517                      ppd->host_link_state == HLS_DN_OFFLINE) &&
6518                     dd->dc_shutdown)
6519                         dc_start(dd);
6520                 /* Hand LED control to the DC */
6521                 write_csr(dd, DCC_CFG_LED_CNTRL, 0);
6522
6523                 if (ppd->host_link_state != HLS_DN_OFFLINE) {
6524                         u8 tmp = ppd->link_enabled;
6525
6526                         ret = goto_offline(ppd, ppd->remote_link_down_reason);
6527                         if (ret) {
6528                                 ppd->link_enabled = tmp;
6529                                 break;
6530                         }
6531                         ppd->remote_link_down_reason = 0;
6532
6533                         if (ppd->driver_link_ready)
6534                                 ppd->link_enabled = 1;
6535                 }
6536
6537                 ret = set_local_link_attributes(ppd);
6538                 if (ret)
6539                         break;
6540
6541                 ppd->port_error_action = 0;
6542                 ppd->host_link_state = HLS_DN_POLL;
6543
6544                 if (quick_linkup) {
6545                         /* quick linkup does not go into polling */
6546                         ret = do_quick_linkup(dd);
6547                 } else {
6548                         ret1 = set_physical_link_state(dd, PLS_POLLING);
6549                         if (ret1 != HCMD_SUCCESS) {
6550                                 dd_dev_err(dd,
6551                                         "Failed to transition to Polling link state, return 0x%x\n",
6552                                         ret1);
6553                                 ret = -EINVAL;
6554                         }
6555                 }
6556                 ppd->offline_disabled_reason = OPA_LINKDOWN_REASON_NONE;
6557                 /*
6558                  * If an error occurred above, go back to offline.  The
6559                  * caller may reschedule another attempt.
6560                  */
6561                 if (ret)
6562                         goto_offline(ppd, 0);
6563                 break;
6564         case HLS_DN_DISABLE:
6565                 /* link is disabled */
6566                 ppd->link_enabled = 0;
6567
6568                 /* allow any state to transition to disabled */
6569
6570                 /* must transition to offline first */
6571                 if (ppd->host_link_state != HLS_DN_OFFLINE) {
6572                         ret = goto_offline(ppd, ppd->remote_link_down_reason);
6573                         if (ret)
6574                                 break;
6575                         ppd->remote_link_down_reason = 0;
6576                 }
6577
6578                 ret1 = set_physical_link_state(dd, PLS_DISABLED);
6579                 if (ret1 != HCMD_SUCCESS) {
6580                         dd_dev_err(dd,
6581                                 "Failed to transition to Disabled link state, return 0x%x\n",
6582                                 ret1);
6583                         ret = -EINVAL;
6584                         break;
6585                 }
6586                 ppd->host_link_state = HLS_DN_DISABLE;
6587                 dc_shutdown(dd);
6588                 break;
6589         case HLS_DN_OFFLINE:
6590                 if (ppd->host_link_state == HLS_DN_DISABLE)
6591                         dc_start(dd);
6592
6593                 /* allow any state to transition to offline */
6594                 ret = goto_offline(ppd, ppd->remote_link_down_reason);
6595                 if (!ret)
6596                         ppd->remote_link_down_reason = 0;
6597                 break;
6598         case HLS_VERIFY_CAP:
6599                 if (ppd->host_link_state != HLS_DN_POLL)
6600                         goto unexpected;
6601                 ppd->host_link_state = HLS_VERIFY_CAP;
6602                 break;
6603         case HLS_GOING_UP:
6604                 if (ppd->host_link_state != HLS_VERIFY_CAP)
6605                         goto unexpected;
6606
6607                 ret1 = set_physical_link_state(dd, PLS_LINKUP);
6608                 if (ret1 != HCMD_SUCCESS) {
6609                         dd_dev_err(dd,
6610                                 "Failed to transition to link up state, return 0x%x\n",
6611                                 ret1);
6612                         ret = -EINVAL;
6613                         break;
6614                 }
6615                 ppd->host_link_state = HLS_GOING_UP;
6616                 break;
6617
6618         case HLS_GOING_OFFLINE:         /* transient within goto_offline() */
6619         case HLS_LINK_COOLDOWN:         /* transient within goto_offline() */
6620         default:
6621                 dd_dev_info(dd, "%s: state 0x%x: not supported\n",
6622                         __func__, state);
6623                 ret = -EINVAL;
6624                 break;
6625         }
6626
6627         is_down = !!(ppd->host_link_state & (HLS_DN_POLL |
6628                         HLS_DN_DISABLE | HLS_DN_OFFLINE));
6629
6630         if (was_up && is_down && ppd->local_link_down_reason.sma == 0 &&
6631             ppd->neigh_link_down_reason.sma == 0) {
6632                 ppd->local_link_down_reason.sma =
6633                   ppd->local_link_down_reason.latest;
6634                 ppd->neigh_link_down_reason.sma =
6635                   ppd->neigh_link_down_reason.latest;
6636         }
6637
6638         goto done;
6639
6640 unexpected:
6641         dd_dev_err(dd, "%s: unexpected state transition from %s to %s\n",
6642                 __func__, link_state_name(ppd->host_link_state),
6643                 link_state_name(state));
6644         ret = -EINVAL;
6645
6646 done:
6647         mutex_unlock(&ppd->hls_lock);
6648
6649         if (event.device)
6650                 ib_dispatch_event(&event);
6651
6652         return ret;
6653 }
6654
6655 int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val)
6656 {
6657         u64 reg;
6658         int ret = 0;
6659
6660         switch (which) {
6661         case HFI1_IB_CFG_LIDLMC:
6662                 set_lidlmc(ppd);
6663                 break;
6664         case HFI1_IB_CFG_VL_HIGH_LIMIT:
6665                 /*
6666                  * The VL Arbitrator high limit is sent in units of 4k
6667                  * bytes, while HFI stores it in units of 64 bytes.
6668                  */
6669                 val *= 4096/64;
6670                 reg = ((u64)val & SEND_HIGH_PRIORITY_LIMIT_LIMIT_MASK)
6671                         << SEND_HIGH_PRIORITY_LIMIT_LIMIT_SHIFT;
6672                 write_csr(ppd->dd, SEND_HIGH_PRIORITY_LIMIT, reg);
6673                 break;
6674         case HFI1_IB_CFG_LINKDEFAULT: /* IB link default (sleep/poll) */
6675                 /* HFI only supports POLL as the default link down state */
6676                 if (val != HLS_DN_POLL)
6677                         ret = -EINVAL;
6678                 break;
6679         case HFI1_IB_CFG_OP_VLS:
6680                 if (ppd->vls_operational != val) {
6681                         ppd->vls_operational = val;
6682                         if (!ppd->port)
6683                                 ret = -EINVAL;
6684                         else
6685                                 ret = sdma_map_init(
6686                                         ppd->dd,
6687                                         ppd->port - 1,
6688                                         val,
6689                                         NULL);
6690                 }
6691                 break;
6692         /*
6693          * For link width, link width downgrade, and speed enable, always AND
6694          * the setting with what is actually supported.  This has two benefits.
6695          * First, enabled can't have unsupported values, no matter what the
6696          * SM or FM might want.  Second, the ALL_SUPPORTED wildcards that mean
6697          * "fill in with your supported value" have all the bits in the
6698          * field set, so simply ANDing with supported has the desired result.
6699          */
6700         case HFI1_IB_CFG_LWID_ENB: /* set allowed Link-width */
6701                 ppd->link_width_enabled = val & ppd->link_width_supported;
6702                 break;
6703         case HFI1_IB_CFG_LWID_DG_ENB: /* set allowed link width downgrade */
6704                 ppd->link_width_downgrade_enabled =
6705                                 val & ppd->link_width_downgrade_supported;
6706                 break;
6707         case HFI1_IB_CFG_SPD_ENB: /* allowed Link speeds */
6708                 ppd->link_speed_enabled = val & ppd->link_speed_supported;
6709                 break;
6710         case HFI1_IB_CFG_OVERRUN_THRESH: /* IB overrun threshold */
6711                 /*
6712                  * HFI does not follow IB specs, save this value
6713                  * so we can report it, if asked.
6714                  */
6715                 ppd->overrun_threshold = val;
6716                 break;
6717         case HFI1_IB_CFG_PHYERR_THRESH: /* IB PHY error threshold */
6718                 /*
6719                  * HFI does not follow IB specs, save this value
6720                  * so we can report it, if asked.
6721                  */
6722                 ppd->phy_error_threshold = val;
6723                 break;
6724
6725         case HFI1_IB_CFG_MTU:
6726                 set_send_length(ppd);
6727                 break;
6728
6729         case HFI1_IB_CFG_PKEYS:
6730                 if (HFI1_CAP_IS_KSET(PKEY_CHECK))
6731                         set_partition_keys(ppd);
6732                 break;
6733
6734         default:
6735                 if (HFI1_CAP_IS_KSET(PRINT_UNIMPL))
6736                         dd_dev_info(ppd->dd,
6737                           "%s: which %s, val 0x%x: not implemented\n",
6738                           __func__, ib_cfg_name(which), val);
6739                 break;
6740         }
6741         return ret;
6742 }
6743
6744 /* begin functions related to vl arbitration table caching */
6745 static void init_vl_arb_caches(struct hfi1_pportdata *ppd)
6746 {
6747         int i;
6748
6749         BUILD_BUG_ON(VL_ARB_TABLE_SIZE !=
6750                         VL_ARB_LOW_PRIO_TABLE_SIZE);
6751         BUILD_BUG_ON(VL_ARB_TABLE_SIZE !=
6752                         VL_ARB_HIGH_PRIO_TABLE_SIZE);
6753
6754         /*
6755          * Note that we always return values directly from the
6756          * 'vl_arb_cache' (and do no CSR reads) in response to a
6757          * 'Get(VLArbTable)'. This is obviously correct after a
6758          * 'Set(VLArbTable)', since the cache will then be up to
6759          * date. But it's also correct prior to any 'Set(VLArbTable)'
6760          * since then both the cache, and the relevant h/w registers
6761          * will be zeroed.
6762          */
6763
6764         for (i = 0; i < MAX_PRIO_TABLE; i++)
6765                 spin_lock_init(&ppd->vl_arb_cache[i].lock);
6766 }
6767
6768 /*
6769  * vl_arb_lock_cache
6770  *
6771  * All other vl_arb_* functions should be called only after locking
6772  * the cache.
6773  */
6774 static inline struct vl_arb_cache *
6775 vl_arb_lock_cache(struct hfi1_pportdata *ppd, int idx)
6776 {
6777         if (idx != LO_PRIO_TABLE && idx != HI_PRIO_TABLE)
6778                 return NULL;
6779         spin_lock(&ppd->vl_arb_cache[idx].lock);
6780         return &ppd->vl_arb_cache[idx];
6781 }
6782
6783 static inline void vl_arb_unlock_cache(struct hfi1_pportdata *ppd, int idx)
6784 {
6785         spin_unlock(&ppd->vl_arb_cache[idx].lock);
6786 }
6787
6788 static void vl_arb_get_cache(struct vl_arb_cache *cache,
6789                              struct ib_vl_weight_elem *vl)
6790 {
6791         memcpy(vl, cache->table, VL_ARB_TABLE_SIZE * sizeof(*vl));
6792 }
6793
6794 static void vl_arb_set_cache(struct vl_arb_cache *cache,
6795                              struct ib_vl_weight_elem *vl)
6796 {
6797         memcpy(cache->table, vl, VL_ARB_TABLE_SIZE * sizeof(*vl));
6798 }
6799
6800 static int vl_arb_match_cache(struct vl_arb_cache *cache,
6801                               struct ib_vl_weight_elem *vl)
6802 {
6803         return !memcmp(cache->table, vl, VL_ARB_TABLE_SIZE * sizeof(*vl));
6804 }
6805 /* end functions related to vl arbitration table caching */
6806
6807 static int set_vl_weights(struct hfi1_pportdata *ppd, u32 target,
6808                           u32 size, struct ib_vl_weight_elem *vl)
6809 {
6810         struct hfi1_devdata *dd = ppd->dd;
6811         u64 reg;
6812         unsigned int i, is_up = 0;
6813         int drain, ret = 0;
6814
6815         mutex_lock(&ppd->hls_lock);
6816
6817         if (ppd->host_link_state & HLS_UP)
6818                 is_up = 1;
6819
6820         drain = !is_ax(dd) && is_up;
6821
6822         if (drain)
6823                 /*
6824                  * Before adjusting VL arbitration weights, empty per-VL
6825                  * FIFOs, otherwise a packet whose VL weight is being
6826                  * set to 0 could get stuck in a FIFO with no chance to
6827                  * egress.
6828                  */
6829                 ret = stop_drain_data_vls(dd);
6830
6831         if (ret) {
6832                 dd_dev_err(
6833                         dd,
6834                         "%s: cannot stop/drain VLs - refusing to change VL arbitration weights\n",
6835                         __func__);
6836                 goto err;
6837         }
6838
6839         for (i = 0; i < size; i++, vl++) {
6840                 /*
6841                  * NOTE: The low priority shift and mask are used here, but
6842                  * they are the same for both the low and high registers.
6843                  */
6844                 reg = (((u64)vl->vl & SEND_LOW_PRIORITY_LIST_VL_MASK)
6845                                 << SEND_LOW_PRIORITY_LIST_VL_SHIFT)
6846                       | (((u64)vl->weight
6847                                 & SEND_LOW_PRIORITY_LIST_WEIGHT_MASK)
6848                                 << SEND_LOW_PRIORITY_LIST_WEIGHT_SHIFT);
6849                 write_csr(dd, target + (i * 8), reg);
6850         }
6851         pio_send_control(dd, PSC_GLOBAL_VLARB_ENABLE);
6852
6853         if (drain)
6854                 open_fill_data_vls(dd); /* reopen all VLs */
6855
6856 err:
6857         mutex_unlock(&ppd->hls_lock);
6858
6859         return ret;
6860 }
6861
6862 /*
6863  * Read one credit merge VL register.
6864  */
6865 static void read_one_cm_vl(struct hfi1_devdata *dd, u32 csr,
6866                            struct vl_limit *vll)
6867 {
6868         u64 reg = read_csr(dd, csr);
6869
6870         vll->dedicated = cpu_to_be16(
6871                 (reg >> SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SHIFT)
6872                 & SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_MASK);
6873         vll->shared = cpu_to_be16(
6874                 (reg >> SEND_CM_CREDIT_VL_SHARED_LIMIT_VL_SHIFT)
6875                 & SEND_CM_CREDIT_VL_SHARED_LIMIT_VL_MASK);
6876 }
6877
6878 /*
6879  * Read the current credit merge limits.
6880  */
6881 static int get_buffer_control(struct hfi1_devdata *dd,
6882                               struct buffer_control *bc, u16 *overall_limit)
6883 {
6884         u64 reg;
6885         int i;
6886
6887         /* not all entries are filled in */
6888         memset(bc, 0, sizeof(*bc));
6889
6890         /* OPA and HFI have a 1-1 mapping */
6891         for (i = 0; i < TXE_NUM_DATA_VL; i++)
6892                 read_one_cm_vl(dd, SEND_CM_CREDIT_VL + (8*i), &bc->vl[i]);
6893
6894         /* NOTE: assumes that VL* and VL15 CSRs are bit-wise identical */
6895         read_one_cm_vl(dd, SEND_CM_CREDIT_VL15, &bc->vl[15]);
6896
6897         reg = read_csr(dd, SEND_CM_GLOBAL_CREDIT);
6898         bc->overall_shared_limit = cpu_to_be16(
6899                 (reg >> SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT)
6900                 & SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_MASK);
6901         if (overall_limit)
6902                 *overall_limit = (reg
6903                         >> SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT)
6904                         & SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_MASK;
6905         return sizeof(struct buffer_control);
6906 }
6907
6908 static int get_sc2vlnt(struct hfi1_devdata *dd, struct sc2vlnt *dp)
6909 {
6910         u64 reg;
6911         int i;
6912
6913         /* each register contains 16 SC->VLnt mappings, 4 bits each */
6914         reg = read_csr(dd, DCC_CFG_SC_VL_TABLE_15_0);
6915         for (i = 0; i < sizeof(u64); i++) {
6916                 u8 byte = *(((u8 *)&reg) + i);
6917
6918                 dp->vlnt[2 * i] = byte & 0xf;
6919                 dp->vlnt[(2 * i) + 1] = (byte & 0xf0) >> 4;
6920         }
6921
6922         reg = read_csr(dd, DCC_CFG_SC_VL_TABLE_31_16);
6923         for (i = 0; i < sizeof(u64); i++) {
6924                 u8 byte = *(((u8 *)&reg) + i);
6925
6926                 dp->vlnt[16 + (2 * i)] = byte & 0xf;
6927                 dp->vlnt[16 + (2 * i) + 1] = (byte & 0xf0) >> 4;
6928         }
6929         return sizeof(struct sc2vlnt);
6930 }
6931
6932 static void get_vlarb_preempt(struct hfi1_devdata *dd, u32 nelems,
6933                               struct ib_vl_weight_elem *vl)
6934 {
6935         unsigned int i;
6936
6937         for (i = 0; i < nelems; i++, vl++) {
6938                 vl->vl = 0xf;
6939                 vl->weight = 0;
6940         }
6941 }
6942
6943 static void set_sc2vlnt(struct hfi1_devdata *dd, struct sc2vlnt *dp)
6944 {
6945         write_csr(dd, DCC_CFG_SC_VL_TABLE_15_0,
6946                 DC_SC_VL_VAL(15_0,
6947                 0, dp->vlnt[0] & 0xf,
6948                 1, dp->vlnt[1] & 0xf,
6949                 2, dp->vlnt[2] & 0xf,
6950                 3, dp->vlnt[3] & 0xf,
6951                 4, dp->vlnt[4] & 0xf,
6952                 5, dp->vlnt[5] & 0xf,
6953                 6, dp->vlnt[6] & 0xf,
6954                 7, dp->vlnt[7] & 0xf,
6955                 8, dp->vlnt[8] & 0xf,
6956                 9, dp->vlnt[9] & 0xf,
6957                 10, dp->vlnt[10] & 0xf,
6958                 11, dp->vlnt[11] & 0xf,
6959                 12, dp->vlnt[12] & 0xf,
6960                 13, dp->vlnt[13] & 0xf,
6961                 14, dp->vlnt[14] & 0xf,
6962                 15, dp->vlnt[15] & 0xf));
6963         write_csr(dd, DCC_CFG_SC_VL_TABLE_31_16,
6964                 DC_SC_VL_VAL(31_16,
6965                 16, dp->vlnt[16] & 0xf,
6966                 17, dp->vlnt[17] & 0xf,
6967                 18, dp->vlnt[18] & 0xf,
6968                 19, dp->vlnt[19] & 0xf,
6969                 20, dp->vlnt[20] & 0xf,
6970                 21, dp->vlnt[21] & 0xf,
6971                 22, dp->vlnt[22] & 0xf,
6972                 23, dp->vlnt[23] & 0xf,
6973                 24, dp->vlnt[24] & 0xf,
6974                 25, dp->vlnt[25] & 0xf,
6975                 26, dp->vlnt[26] & 0xf,
6976                 27, dp->vlnt[27] & 0xf,
6977                 28, dp->vlnt[28] & 0xf,
6978                 29, dp->vlnt[29] & 0xf,
6979                 30, dp->vlnt[30] & 0xf,
6980                 31, dp->vlnt[31] & 0xf));
6981 }
6982
6983 static void nonzero_msg(struct hfi1_devdata *dd, int idx, const char *what,
6984                         u16 limit)
6985 {
6986         if (limit != 0)
6987                 dd_dev_info(dd, "Invalid %s limit %d on VL %d, ignoring\n",
6988                         what, (int)limit, idx);
6989 }
6990
6991 /* change only the shared limit portion of SendCmGLobalCredit */
6992 static void set_global_shared(struct hfi1_devdata *dd, u16 limit)
6993 {
6994         u64 reg;
6995
6996         reg = read_csr(dd, SEND_CM_GLOBAL_CREDIT);
6997         reg &= ~SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SMASK;
6998         reg |= (u64)limit << SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT;
6999         write_csr(dd, SEND_CM_GLOBAL_CREDIT, reg);
7000 }
7001
7002 /* change only the total credit limit portion of SendCmGLobalCredit */
7003 static void set_global_limit(struct hfi1_devdata *dd, u16 limit)
7004 {
7005         u64 reg;
7006
7007         reg = read_csr(dd, SEND_CM_GLOBAL_CREDIT);
7008         reg &= ~SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SMASK;
7009         reg |= (u64)limit << SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT;
7010         write_csr(dd, SEND_CM_GLOBAL_CREDIT, reg);
7011 }
7012
7013 /* set the given per-VL shared limit */
7014 static void set_vl_shared(struct hfi1_devdata *dd, int vl, u16 limit)
7015 {
7016         u64 reg;
7017         u32 addr;
7018
7019         if (vl < TXE_NUM_DATA_VL)
7020                 addr = SEND_CM_CREDIT_VL + (8 * vl);
7021         else
7022                 addr = SEND_CM_CREDIT_VL15;
7023
7024         reg = read_csr(dd, addr);
7025         reg &= ~SEND_CM_CREDIT_VL_SHARED_LIMIT_VL_SMASK;
7026         reg |= (u64)limit << SEND_CM_CREDIT_VL_SHARED_LIMIT_VL_SHIFT;
7027         write_csr(dd, addr, reg);
7028 }
7029
7030 /* set the given per-VL dedicated limit */
7031 static void set_vl_dedicated(struct hfi1_devdata *dd, int vl, u16 limit)
7032 {
7033         u64 reg;
7034         u32 addr;
7035
7036         if (vl < TXE_NUM_DATA_VL)
7037                 addr = SEND_CM_CREDIT_VL + (8 * vl);
7038         else
7039                 addr = SEND_CM_CREDIT_VL15;
7040
7041         reg = read_csr(dd, addr);
7042         reg &= ~SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SMASK;
7043         reg |= (u64)limit << SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SHIFT;
7044         write_csr(dd, addr, reg);
7045 }
7046
7047 /* spin until the given per-VL status mask bits clear */
7048 static void wait_for_vl_status_clear(struct hfi1_devdata *dd, u64 mask,
7049                                      const char *which)
7050 {
7051         unsigned long timeout;
7052         u64 reg;
7053
7054         timeout = jiffies + msecs_to_jiffies(VL_STATUS_CLEAR_TIMEOUT);
7055         while (1) {
7056                 reg = read_csr(dd, SEND_CM_CREDIT_USED_STATUS) & mask;
7057
7058                 if (reg == 0)
7059                         return; /* success */
7060                 if (time_after(jiffies, timeout))
7061                         break;          /* timed out */
7062                 udelay(1);
7063         }
7064
7065         dd_dev_err(dd,
7066                 "%s credit change status not clearing after %dms, mask 0x%llx, not clear 0x%llx\n",
7067                 which, VL_STATUS_CLEAR_TIMEOUT, mask, reg);
7068         /*
7069          * If this occurs, it is likely there was a credit loss on the link.
7070          * The only recovery from that is a link bounce.
7071          */
7072         dd_dev_err(dd,
7073                 "Continuing anyway.  A credit loss may occur.  Suggest a link bounce\n");
7074 }
7075
7076 /*
7077  * The number of credits on the VLs may be changed while everything
7078  * is "live", but the following algorithm must be followed due to
7079  * how the hardware is actually implemented.  In particular,
7080  * Return_Credit_Status[] is the only correct status check.
7081  *
7082  * if (reducing Global_Shared_Credit_Limit or any shared limit changing)
7083  *     set Global_Shared_Credit_Limit = 0
7084  *     use_all_vl = 1
7085  * mask0 = all VLs that are changing either dedicated or shared limits
7086  * set Shared_Limit[mask0] = 0
7087  * spin until Return_Credit_Status[use_all_vl ? all VL : mask0] == 0
7088  * if (changing any dedicated limit)
7089  *     mask1 = all VLs that are lowering dedicated limits
7090  *     lower Dedicated_Limit[mask1]
7091  *     spin until Return_Credit_Status[mask1] == 0
7092  *     raise Dedicated_Limits
7093  * raise Shared_Limits
7094  * raise Global_Shared_Credit_Limit
7095  *
7096  * lower = if the new limit is lower, set the limit to the new value
7097  * raise = if the new limit is higher than the current value (may be changed
7098  *      earlier in the algorithm), set the new limit to the new value
7099  */
7100 static int set_buffer_control(struct hfi1_devdata *dd,
7101                               struct buffer_control *new_bc)
7102 {
7103         u64 changing_mask, ld_mask, stat_mask;
7104         int change_count;
7105         int i, use_all_mask;
7106         int this_shared_changing;
7107         /*
7108          * A0: add the variable any_shared_limit_changing below and in the
7109          * algorithm above.  If removing A0 support, it can be removed.
7110          */
7111         int any_shared_limit_changing;
7112         struct buffer_control cur_bc;
7113         u8 changing[OPA_MAX_VLS];
7114         u8 lowering_dedicated[OPA_MAX_VLS];
7115         u16 cur_total;
7116         u32 new_total = 0;
7117         const u64 all_mask =
7118         SEND_CM_CREDIT_USED_STATUS_VL0_RETURN_CREDIT_STATUS_SMASK
7119          | SEND_CM_CREDIT_USED_STATUS_VL1_RETURN_CREDIT_STATUS_SMASK
7120          | SEND_CM_CREDIT_USED_STATUS_VL2_RETURN_CREDIT_STATUS_SMASK
7121          | SEND_CM_CREDIT_USED_STATUS_VL3_RETURN_CREDIT_STATUS_SMASK
7122          | SEND_CM_CREDIT_USED_STATUS_VL4_RETURN_CREDIT_STATUS_SMASK
7123          | SEND_CM_CREDIT_USED_STATUS_VL5_RETURN_CREDIT_STATUS_SMASK
7124          | SEND_CM_CREDIT_USED_STATUS_VL6_RETURN_CREDIT_STATUS_SMASK
7125          | SEND_CM_CREDIT_USED_STATUS_VL7_RETURN_CREDIT_STATUS_SMASK
7126          | SEND_CM_CREDIT_USED_STATUS_VL15_RETURN_CREDIT_STATUS_SMASK;
7127
7128 #define valid_vl(idx) ((idx) < TXE_NUM_DATA_VL || (idx) == 15)
7129 #define NUM_USABLE_VLS 16       /* look at VL15 and less */
7130
7131
7132         /* find the new total credits, do sanity check on unused VLs */
7133         for (i = 0; i < OPA_MAX_VLS; i++) {
7134                 if (valid_vl(i)) {
7135                         new_total += be16_to_cpu(new_bc->vl[i].dedicated);
7136                         continue;
7137                 }
7138                 nonzero_msg(dd, i, "dedicated",
7139                         be16_to_cpu(new_bc->vl[i].dedicated));
7140                 nonzero_msg(dd, i, "shared",
7141                         be16_to_cpu(new_bc->vl[i].shared));
7142                 new_bc->vl[i].dedicated = 0;
7143                 new_bc->vl[i].shared = 0;
7144         }
7145         new_total += be16_to_cpu(new_bc->overall_shared_limit);
7146         if (new_total > (u32)dd->link_credits)
7147                 return -EINVAL;
7148         /* fetch the current values */
7149         get_buffer_control(dd, &cur_bc, &cur_total);
7150
7151         /*
7152          * Create the masks we will use.
7153          */
7154         memset(changing, 0, sizeof(changing));
7155         memset(lowering_dedicated, 0, sizeof(lowering_dedicated));
7156         /* NOTE: Assumes that the individual VL bits are adjacent and in
7157            increasing order */
7158         stat_mask =
7159                 SEND_CM_CREDIT_USED_STATUS_VL0_RETURN_CREDIT_STATUS_SMASK;
7160         changing_mask = 0;
7161         ld_mask = 0;
7162         change_count = 0;
7163         any_shared_limit_changing = 0;
7164         for (i = 0; i < NUM_USABLE_VLS; i++, stat_mask <<= 1) {
7165                 if (!valid_vl(i))
7166                         continue;
7167                 this_shared_changing = new_bc->vl[i].shared
7168                                                 != cur_bc.vl[i].shared;
7169                 if (this_shared_changing)
7170                         any_shared_limit_changing = 1;
7171                 if (new_bc->vl[i].dedicated != cur_bc.vl[i].dedicated
7172                                 || this_shared_changing) {
7173                         changing[i] = 1;
7174                         changing_mask |= stat_mask;
7175                         change_count++;
7176                 }
7177                 if (be16_to_cpu(new_bc->vl[i].dedicated) <
7178                                         be16_to_cpu(cur_bc.vl[i].dedicated)) {
7179                         lowering_dedicated[i] = 1;
7180                         ld_mask |= stat_mask;
7181                 }
7182         }
7183
7184         /* bracket the credit change with a total adjustment */
7185         if (new_total > cur_total)
7186                 set_global_limit(dd, new_total);
7187
7188         /*
7189          * Start the credit change algorithm.
7190          */
7191         use_all_mask = 0;
7192         if ((be16_to_cpu(new_bc->overall_shared_limit) <
7193                                 be16_to_cpu(cur_bc.overall_shared_limit))
7194                         || (is_a0(dd) && any_shared_limit_changing)) {
7195                 set_global_shared(dd, 0);
7196                 cur_bc.overall_shared_limit = 0;
7197                 use_all_mask = 1;
7198         }
7199
7200         for (i = 0; i < NUM_USABLE_VLS; i++) {
7201                 if (!valid_vl(i))
7202                         continue;
7203
7204                 if (changing[i]) {
7205                         set_vl_shared(dd, i, 0);
7206                         cur_bc.vl[i].shared = 0;
7207                 }
7208         }
7209
7210         wait_for_vl_status_clear(dd, use_all_mask ? all_mask : changing_mask,
7211                 "shared");
7212
7213         if (change_count > 0) {
7214                 for (i = 0; i < NUM_USABLE_VLS; i++) {
7215                         if (!valid_vl(i))
7216                                 continue;
7217
7218                         if (lowering_dedicated[i]) {
7219                                 set_vl_dedicated(dd, i,
7220                                         be16_to_cpu(new_bc->vl[i].dedicated));
7221                                 cur_bc.vl[i].dedicated =
7222                                                 new_bc->vl[i].dedicated;
7223                         }
7224                 }
7225
7226                 wait_for_vl_status_clear(dd, ld_mask, "dedicated");
7227
7228                 /* now raise all dedicated that are going up */
7229                 for (i = 0; i < NUM_USABLE_VLS; i++) {
7230                         if (!valid_vl(i))
7231                                 continue;
7232
7233                         if (be16_to_cpu(new_bc->vl[i].dedicated) >
7234                                         be16_to_cpu(cur_bc.vl[i].dedicated))
7235                                 set_vl_dedicated(dd, i,
7236                                         be16_to_cpu(new_bc->vl[i].dedicated));
7237                 }
7238         }
7239
7240         /* next raise all shared that are going up */
7241         for (i = 0; i < NUM_USABLE_VLS; i++) {
7242                 if (!valid_vl(i))
7243                         continue;
7244
7245                 if (be16_to_cpu(new_bc->vl[i].shared) >
7246                                 be16_to_cpu(cur_bc.vl[i].shared))
7247                         set_vl_shared(dd, i, be16_to_cpu(new_bc->vl[i].shared));
7248         }
7249
7250         /* finally raise the global shared */
7251         if (be16_to_cpu(new_bc->overall_shared_limit) >
7252                         be16_to_cpu(cur_bc.overall_shared_limit))
7253                 set_global_shared(dd,
7254                         be16_to_cpu(new_bc->overall_shared_limit));
7255
7256         /* bracket the credit change with a total adjustment */
7257         if (new_total < cur_total)
7258                 set_global_limit(dd, new_total);
7259         return 0;
7260 }
7261
7262 /*
7263  * Read the given fabric manager table. Return the size of the
7264  * table (in bytes) on success, and a negative error code on
7265  * failure.
7266  */
7267 int fm_get_table(struct hfi1_pportdata *ppd, int which, void *t)
7268
7269 {
7270         int size;
7271         struct vl_arb_cache *vlc;
7272
7273         switch (which) {
7274         case FM_TBL_VL_HIGH_ARB:
7275                 size = 256;
7276                 /*
7277                  * OPA specifies 128 elements (of 2 bytes each), though
7278                  * HFI supports only 16 elements in h/w.
7279                  */
7280                 vlc = vl_arb_lock_cache(ppd, HI_PRIO_TABLE);
7281                 vl_arb_get_cache(vlc, t);
7282                 vl_arb_unlock_cache(ppd, HI_PRIO_TABLE);
7283                 break;
7284         case FM_TBL_VL_LOW_ARB:
7285                 size = 256;
7286                 /*
7287                  * OPA specifies 128 elements (of 2 bytes each), though
7288                  * HFI supports only 16 elements in h/w.
7289                  */
7290                 vlc = vl_arb_lock_cache(ppd, LO_PRIO_TABLE);
7291                 vl_arb_get_cache(vlc, t);
7292                 vl_arb_unlock_cache(ppd, LO_PRIO_TABLE);
7293                 break;
7294         case FM_TBL_BUFFER_CONTROL:
7295                 size = get_buffer_control(ppd->dd, t, NULL);
7296                 break;
7297         case FM_TBL_SC2VLNT:
7298                 size = get_sc2vlnt(ppd->dd, t);
7299                 break;
7300         case FM_TBL_VL_PREEMPT_ELEMS:
7301                 size = 256;
7302                 /* OPA specifies 128 elements, of 2 bytes each */
7303                 get_vlarb_preempt(ppd->dd, OPA_MAX_VLS, t);
7304                 break;
7305         case FM_TBL_VL_PREEMPT_MATRIX:
7306                 size = 256;
7307                 /*
7308                  * OPA specifies that this is the same size as the VL
7309                  * arbitration tables (i.e., 256 bytes).
7310                  */
7311                 break;
7312         default:
7313                 return -EINVAL;
7314         }
7315         return size;
7316 }
7317
7318 /*
7319  * Write the given fabric manager table.
7320  */
7321 int fm_set_table(struct hfi1_pportdata *ppd, int which, void *t)
7322 {
7323         int ret = 0;
7324         struct vl_arb_cache *vlc;
7325
7326         switch (which) {
7327         case FM_TBL_VL_HIGH_ARB:
7328                 vlc = vl_arb_lock_cache(ppd, HI_PRIO_TABLE);
7329                 if (vl_arb_match_cache(vlc, t)) {
7330                         vl_arb_unlock_cache(ppd, HI_PRIO_TABLE);
7331                         break;
7332                 }
7333                 vl_arb_set_cache(vlc, t);
7334                 vl_arb_unlock_cache(ppd, HI_PRIO_TABLE);
7335                 ret = set_vl_weights(ppd, SEND_HIGH_PRIORITY_LIST,
7336                                      VL_ARB_HIGH_PRIO_TABLE_SIZE, t);
7337                 break;
7338         case FM_TBL_VL_LOW_ARB:
7339                 vlc = vl_arb_lock_cache(ppd, LO_PRIO_TABLE);
7340                 if (vl_arb_match_cache(vlc, t)) {
7341                         vl_arb_unlock_cache(ppd, LO_PRIO_TABLE);
7342                         break;
7343                 }
7344                 vl_arb_set_cache(vlc, t);
7345                 vl_arb_unlock_cache(ppd, LO_PRIO_TABLE);
7346                 ret = set_vl_weights(ppd, SEND_LOW_PRIORITY_LIST,
7347                                      VL_ARB_LOW_PRIO_TABLE_SIZE, t);
7348                 break;
7349         case FM_TBL_BUFFER_CONTROL:
7350                 ret = set_buffer_control(ppd->dd, t);
7351                 break;
7352         case FM_TBL_SC2VLNT:
7353                 set_sc2vlnt(ppd->dd, t);
7354                 break;
7355         default:
7356                 ret = -EINVAL;
7357         }
7358         return ret;
7359 }
7360
7361 /*
7362  * Disable all data VLs.
7363  *
7364  * Return 0 if disabled, non-zero if the VLs cannot be disabled.
7365  */
7366 static int disable_data_vls(struct hfi1_devdata *dd)
7367 {
7368         if (is_a0(dd))
7369                 return 1;
7370
7371         pio_send_control(dd, PSC_DATA_VL_DISABLE);
7372
7373         return 0;
7374 }
7375
7376 /*
7377  * open_fill_data_vls() - the counterpart to stop_drain_data_vls().
7378  * Just re-enables all data VLs (the "fill" part happens
7379  * automatically - the name was chosen for symmetry with
7380  * stop_drain_data_vls()).
7381  *
7382  * Return 0 if successful, non-zero if the VLs cannot be enabled.
7383  */
7384 int open_fill_data_vls(struct hfi1_devdata *dd)
7385 {
7386         if (is_a0(dd))
7387                 return 1;
7388
7389         pio_send_control(dd, PSC_DATA_VL_ENABLE);
7390
7391         return 0;
7392 }
7393
7394 /*
7395  * drain_data_vls() - assumes that disable_data_vls() has been called,
7396  * wait for occupancy (of per-VL FIFOs) for all contexts, and SDMA
7397  * engines to drop to 0.
7398  */
7399 static void drain_data_vls(struct hfi1_devdata *dd)
7400 {
7401         sc_wait(dd);
7402         sdma_wait(dd);
7403         pause_for_credit_return(dd);
7404 }
7405
7406 /*
7407  * stop_drain_data_vls() - disable, then drain all per-VL fifos.
7408  *
7409  * Use open_fill_data_vls() to resume using data VLs.  This pair is
7410  * meant to be used like this:
7411  *
7412  * stop_drain_data_vls(dd);
7413  * // do things with per-VL resources
7414  * open_fill_data_vls(dd);
7415  */
7416 int stop_drain_data_vls(struct hfi1_devdata *dd)
7417 {
7418         int ret;
7419
7420         ret = disable_data_vls(dd);
7421         if (ret == 0)
7422                 drain_data_vls(dd);
7423
7424         return ret;
7425 }
7426
7427 /*
7428  * Convert a nanosecond time to a cclock count.  No matter how slow
7429  * the cclock, a non-zero ns will always have a non-zero result.
7430  */
7431 u32 ns_to_cclock(struct hfi1_devdata *dd, u32 ns)
7432 {
7433         u32 cclocks;
7434
7435         if (dd->icode == ICODE_FPGA_EMULATION)
7436                 cclocks = (ns * 1000) / FPGA_CCLOCK_PS;
7437         else  /* simulation pretends to be ASIC */
7438                 cclocks = (ns * 1000) / ASIC_CCLOCK_PS;
7439         if (ns && !cclocks)     /* if ns nonzero, must be at least 1 */
7440                 cclocks = 1;
7441         return cclocks;
7442 }
7443
7444 /*
7445  * Convert a cclock count to nanoseconds. Not matter how slow
7446  * the cclock, a non-zero cclocks will always have a non-zero result.
7447  */
7448 u32 cclock_to_ns(struct hfi1_devdata *dd, u32 cclocks)
7449 {
7450         u32 ns;
7451
7452         if (dd->icode == ICODE_FPGA_EMULATION)
7453                 ns = (cclocks * FPGA_CCLOCK_PS) / 1000;
7454         else  /* simulation pretends to be ASIC */
7455                 ns = (cclocks * ASIC_CCLOCK_PS) / 1000;
7456         if (cclocks && !ns)
7457                 ns = 1;
7458         return ns;
7459 }
7460
7461 /*
7462  * Dynamically adjust the receive interrupt timeout for a context based on
7463  * incoming packet rate.
7464  *
7465  * NOTE: Dynamic adjustment does not allow rcv_intr_count to be zero.
7466  */
7467 static void adjust_rcv_timeout(struct hfi1_ctxtdata *rcd, u32 npkts)
7468 {
7469         struct hfi1_devdata *dd = rcd->dd;
7470         u32 timeout = rcd->rcvavail_timeout;
7471
7472         /*
7473          * This algorithm doubles or halves the timeout depending on whether
7474          * the number of packets received in this interrupt were less than or
7475          * greater equal the interrupt count.
7476          *
7477          * The calculations below do not allow a steady state to be achieved.
7478          * Only at the endpoints it is possible to have an unchanging
7479          * timeout.
7480          */
7481         if (npkts < rcv_intr_count) {
7482                 /*
7483                  * Not enough packets arrived before the timeout, adjust
7484                  * timeout downward.
7485                  */
7486                 if (timeout < 2) /* already at minimum? */
7487                         return;
7488                 timeout >>= 1;
7489         } else {
7490                 /*
7491                  * More than enough packets arrived before the timeout, adjust
7492                  * timeout upward.
7493                  */
7494                 if (timeout >= dd->rcv_intr_timeout_csr) /* already at max? */
7495                         return;
7496                 timeout = min(timeout << 1, dd->rcv_intr_timeout_csr);
7497         }
7498
7499         rcd->rcvavail_timeout = timeout;
7500         /* timeout cannot be larger than rcv_intr_timeout_csr which has already
7501            been verified to be in range */
7502         write_kctxt_csr(dd, rcd->ctxt, RCV_AVAIL_TIME_OUT,
7503                 (u64)timeout << RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_SHIFT);
7504 }
7505
7506 void update_usrhead(struct hfi1_ctxtdata *rcd, u32 hd, u32 updegr, u32 egrhd,
7507                     u32 intr_adjust, u32 npkts)
7508 {
7509         struct hfi1_devdata *dd = rcd->dd;
7510         u64 reg;
7511         u32 ctxt = rcd->ctxt;
7512
7513         /*
7514          * Need to write timeout register before updating RcvHdrHead to ensure
7515          * that a new value is used when the HW decides to restart counting.
7516          */
7517         if (intr_adjust)
7518                 adjust_rcv_timeout(rcd, npkts);
7519         if (updegr) {
7520                 reg = (egrhd & RCV_EGR_INDEX_HEAD_HEAD_MASK)
7521                         << RCV_EGR_INDEX_HEAD_HEAD_SHIFT;
7522                 write_uctxt_csr(dd, ctxt, RCV_EGR_INDEX_HEAD, reg);
7523         }
7524         mmiowb();
7525         reg = ((u64)rcv_intr_count << RCV_HDR_HEAD_COUNTER_SHIFT) |
7526                 (((u64)hd & RCV_HDR_HEAD_HEAD_MASK)
7527                         << RCV_HDR_HEAD_HEAD_SHIFT);
7528         write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, reg);
7529         mmiowb();
7530 }
7531
7532 u32 hdrqempty(struct hfi1_ctxtdata *rcd)
7533 {
7534         u32 head, tail;
7535
7536         head = (read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_HEAD)
7537                 & RCV_HDR_HEAD_HEAD_SMASK) >> RCV_HDR_HEAD_HEAD_SHIFT;
7538
7539         if (rcd->rcvhdrtail_kvaddr)
7540                 tail = get_rcvhdrtail(rcd);
7541         else
7542                 tail = read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL);
7543
7544         return head == tail;
7545 }
7546
7547 /*
7548  * Context Control and Receive Array encoding for buffer size:
7549  *      0x0 invalid
7550  *      0x1   4 KB
7551  *      0x2   8 KB
7552  *      0x3  16 KB
7553  *      0x4  32 KB
7554  *      0x5  64 KB
7555  *      0x6 128 KB
7556  *      0x7 256 KB
7557  *      0x8 512 KB (Receive Array only)
7558  *      0x9   1 MB (Receive Array only)
7559  *      0xa   2 MB (Receive Array only)
7560  *
7561  *      0xB-0xF - reserved (Receive Array only)
7562  *
7563  *
7564  * This routine assumes that the value has already been sanity checked.
7565  */
7566 static u32 encoded_size(u32 size)
7567 {
7568         switch (size) {
7569         case   4*1024: return 0x1;
7570         case   8*1024: return 0x2;
7571         case  16*1024: return 0x3;
7572         case  32*1024: return 0x4;
7573         case  64*1024: return 0x5;
7574         case 128*1024: return 0x6;
7575         case 256*1024: return 0x7;
7576         case 512*1024: return 0x8;
7577         case   1*1024*1024: return 0x9;
7578         case   2*1024*1024: return 0xa;
7579         }
7580         return 0x1;     /* if invalid, go with the minimum size */
7581 }
7582
7583 void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
7584 {
7585         struct hfi1_ctxtdata *rcd;
7586         u64 rcvctrl, reg;
7587         int did_enable = 0;
7588
7589         rcd = dd->rcd[ctxt];
7590         if (!rcd)
7591                 return;
7592
7593         hfi1_cdbg(RCVCTRL, "ctxt %d op 0x%x", ctxt, op);
7594
7595         rcvctrl = read_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL);
7596         /* if the context already enabled, don't do the extra steps */
7597         if ((op & HFI1_RCVCTRL_CTXT_ENB)
7598                         && !(rcvctrl & RCV_CTXT_CTRL_ENABLE_SMASK)) {
7599                 /* reset the tail and hdr addresses, and sequence count */
7600                 write_kctxt_csr(dd, ctxt, RCV_HDR_ADDR,
7601                                 rcd->rcvhdrq_phys);
7602                 if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL))
7603                         write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
7604                                         rcd->rcvhdrqtailaddr_phys);
7605                 rcd->seq_cnt = 1;
7606
7607                 /* reset the cached receive header queue head value */
7608                 rcd->head = 0;
7609
7610                 /*
7611                  * Zero the receive header queue so we don't get false
7612                  * positives when checking the sequence number.  The
7613                  * sequence numbers could land exactly on the same spot.
7614                  * E.g. a rcd restart before the receive header wrapped.
7615                  */
7616                 memset(rcd->rcvhdrq, 0, rcd->rcvhdrq_size);
7617
7618                 /* starting timeout */
7619                 rcd->rcvavail_timeout = dd->rcv_intr_timeout_csr;
7620
7621                 /* enable the context */
7622                 rcvctrl |= RCV_CTXT_CTRL_ENABLE_SMASK;
7623
7624                 /* clean the egr buffer size first */
7625                 rcvctrl &= ~RCV_CTXT_CTRL_EGR_BUF_SIZE_SMASK;
7626                 rcvctrl |= ((u64)encoded_size(rcd->egrbufs.rcvtid_size)
7627                                 & RCV_CTXT_CTRL_EGR_BUF_SIZE_MASK)
7628                                         << RCV_CTXT_CTRL_EGR_BUF_SIZE_SHIFT;
7629
7630                 /* zero RcvHdrHead - set RcvHdrHead.Counter after enable */
7631                 write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, 0);
7632                 did_enable = 1;
7633
7634                 /* zero RcvEgrIndexHead */
7635                 write_uctxt_csr(dd, ctxt, RCV_EGR_INDEX_HEAD, 0);
7636
7637                 /* set eager count and base index */
7638                 reg = (((u64)(rcd->egrbufs.alloced >> RCV_SHIFT)
7639                         & RCV_EGR_CTRL_EGR_CNT_MASK)
7640                        << RCV_EGR_CTRL_EGR_CNT_SHIFT) |
7641                         (((rcd->eager_base >> RCV_SHIFT)
7642                           & RCV_EGR_CTRL_EGR_BASE_INDEX_MASK)
7643                          << RCV_EGR_CTRL_EGR_BASE_INDEX_SHIFT);
7644                 write_kctxt_csr(dd, ctxt, RCV_EGR_CTRL, reg);
7645
7646                 /*
7647                  * Set TID (expected) count and base index.
7648                  * rcd->expected_count is set to individual RcvArray entries,
7649                  * not pairs, and the CSR takes a pair-count in groups of
7650                  * four, so divide by 8.
7651                  */
7652                 reg = (((rcd->expected_count >> RCV_SHIFT)
7653                                         & RCV_TID_CTRL_TID_PAIR_CNT_MASK)
7654                                 << RCV_TID_CTRL_TID_PAIR_CNT_SHIFT) |
7655                       (((rcd->expected_base >> RCV_SHIFT)
7656                                         & RCV_TID_CTRL_TID_BASE_INDEX_MASK)
7657                                 << RCV_TID_CTRL_TID_BASE_INDEX_SHIFT);
7658                 write_kctxt_csr(dd, ctxt, RCV_TID_CTRL, reg);
7659                 if (ctxt == VL15CTXT)
7660                         write_csr(dd, RCV_VL15, VL15CTXT);
7661         }
7662         if (op & HFI1_RCVCTRL_CTXT_DIS) {
7663                 write_csr(dd, RCV_VL15, 0);
7664                 rcvctrl &= ~RCV_CTXT_CTRL_ENABLE_SMASK;
7665         }
7666         if (op & HFI1_RCVCTRL_INTRAVAIL_ENB)
7667                 rcvctrl |= RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
7668         if (op & HFI1_RCVCTRL_INTRAVAIL_DIS)
7669                 rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
7670         if (op & HFI1_RCVCTRL_TAILUPD_ENB && rcd->rcvhdrqtailaddr_phys)
7671                 rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
7672         if (op & HFI1_RCVCTRL_TAILUPD_DIS)
7673                 rcvctrl &= ~RCV_CTXT_CTRL_TAIL_UPD_SMASK;
7674         if (op & HFI1_RCVCTRL_TIDFLOW_ENB)
7675                 rcvctrl |= RCV_CTXT_CTRL_TID_FLOW_ENABLE_SMASK;
7676         if (op & HFI1_RCVCTRL_TIDFLOW_DIS)
7677                 rcvctrl &= ~RCV_CTXT_CTRL_TID_FLOW_ENABLE_SMASK;
7678         if (op & HFI1_RCVCTRL_ONE_PKT_EGR_ENB) {
7679                 /* In one-packet-per-eager mode, the size comes from
7680                    the RcvArray entry. */
7681                 rcvctrl &= ~RCV_CTXT_CTRL_EGR_BUF_SIZE_SMASK;
7682                 rcvctrl |= RCV_CTXT_CTRL_ONE_PACKET_PER_EGR_BUFFER_SMASK;
7683         }
7684         if (op & HFI1_RCVCTRL_ONE_PKT_EGR_DIS)
7685                 rcvctrl &= ~RCV_CTXT_CTRL_ONE_PACKET_PER_EGR_BUFFER_SMASK;
7686         if (op & HFI1_RCVCTRL_NO_RHQ_DROP_ENB)
7687                 rcvctrl |= RCV_CTXT_CTRL_DONT_DROP_RHQ_FULL_SMASK;
7688         if (op & HFI1_RCVCTRL_NO_RHQ_DROP_DIS)
7689                 rcvctrl &= ~RCV_CTXT_CTRL_DONT_DROP_RHQ_FULL_SMASK;
7690         if (op & HFI1_RCVCTRL_NO_EGR_DROP_ENB)
7691                 rcvctrl |= RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK;
7692         if (op & HFI1_RCVCTRL_NO_EGR_DROP_DIS)
7693                 rcvctrl &= ~RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK;
7694         rcd->rcvctrl = rcvctrl;
7695         hfi1_cdbg(RCVCTRL, "ctxt %d rcvctrl 0x%llx\n", ctxt, rcvctrl);
7696         write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcd->rcvctrl);
7697
7698         /* work around sticky RcvCtxtStatus.BlockedRHQFull */
7699         if (did_enable
7700             && (rcvctrl & RCV_CTXT_CTRL_DONT_DROP_RHQ_FULL_SMASK)) {
7701                 reg = read_kctxt_csr(dd, ctxt, RCV_CTXT_STATUS);
7702                 if (reg != 0) {
7703                         dd_dev_info(dd, "ctxt %d status %lld (blocked)\n",
7704                                 ctxt, reg);
7705                         read_uctxt_csr(dd, ctxt, RCV_HDR_HEAD);
7706                         write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, 0x10);
7707                         write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, 0x00);
7708                         read_uctxt_csr(dd, ctxt, RCV_HDR_HEAD);
7709                         reg = read_kctxt_csr(dd, ctxt, RCV_CTXT_STATUS);
7710                         dd_dev_info(dd, "ctxt %d status %lld (%s blocked)\n",
7711                                 ctxt, reg, reg == 0 ? "not" : "still");
7712                 }
7713         }
7714
7715         if (did_enable) {
7716                 /*
7717                  * The interrupt timeout and count must be set after
7718                  * the context is enabled to take effect.
7719                  */
7720                 /* set interrupt timeout */
7721                 write_kctxt_csr(dd, ctxt, RCV_AVAIL_TIME_OUT,
7722                         (u64)rcd->rcvavail_timeout <<
7723                                 RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_SHIFT);
7724
7725                 /* set RcvHdrHead.Counter, zero RcvHdrHead.Head (again) */
7726                 reg = (u64)rcv_intr_count << RCV_HDR_HEAD_COUNTER_SHIFT;
7727                 write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, reg);
7728         }
7729
7730         if (op & (HFI1_RCVCTRL_TAILUPD_DIS | HFI1_RCVCTRL_CTXT_DIS))
7731                 /*
7732                  * If the context has been disabled and the Tail Update has
7733                  * been cleared, clear the RCV_HDR_TAIL_ADDR CSR so
7734                  * it doesn't contain an address that is invalid.
7735                  */
7736                 write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR, 0);
7737 }
7738
7739 u32 hfi1_read_cntrs(struct hfi1_devdata *dd, loff_t pos, char **namep,
7740                     u64 **cntrp)
7741 {
7742         int ret;
7743         u64 val = 0;
7744
7745         if (namep) {
7746                 ret = dd->cntrnameslen;
7747                 if (pos != 0) {
7748                         dd_dev_err(dd, "read_cntrs does not support indexing");
7749                         return 0;
7750                 }
7751                 *namep = dd->cntrnames;
7752         } else {
7753                 const struct cntr_entry *entry;
7754                 int i, j;
7755
7756                 ret = (dd->ndevcntrs) * sizeof(u64);
7757                 if (pos != 0) {
7758                         dd_dev_err(dd, "read_cntrs does not support indexing");
7759                         return 0;
7760                 }
7761
7762                 /* Get the start of the block of counters */
7763                 *cntrp = dd->cntrs;
7764
7765                 /*
7766                  * Now go and fill in each counter in the block.
7767                  */
7768                 for (i = 0; i < DEV_CNTR_LAST; i++) {
7769                         entry = &dev_cntrs[i];
7770                         hfi1_cdbg(CNTR, "reading %s", entry->name);
7771                         if (entry->flags & CNTR_DISABLED) {
7772                                 /* Nothing */
7773                                 hfi1_cdbg(CNTR, "\tDisabled\n");
7774                         } else {
7775                                 if (entry->flags & CNTR_VL) {
7776                                         hfi1_cdbg(CNTR, "\tPer VL\n");
7777                                         for (j = 0; j < C_VL_COUNT; j++) {
7778                                                 val = entry->rw_cntr(entry,
7779                                                                   dd, j,
7780                                                                   CNTR_MODE_R,
7781                                                                   0);
7782                                                 hfi1_cdbg(
7783                                                    CNTR,
7784                                                    "\t\tRead 0x%llx for %d\n",
7785                                                    val, j);
7786                                                 dd->cntrs[entry->offset + j] =
7787                                                                             val;
7788                                         }
7789                                 } else {
7790                                         val = entry->rw_cntr(entry, dd,
7791                                                         CNTR_INVALID_VL,
7792                                                         CNTR_MODE_R, 0);
7793                                         dd->cntrs[entry->offset] = val;
7794                                         hfi1_cdbg(CNTR, "\tRead 0x%llx", val);
7795                                 }
7796                         }
7797                 }
7798         }
7799         return ret;
7800 }
7801
7802 /*
7803  * Used by sysfs to create files for hfi stats to read
7804  */
7805 u32 hfi1_read_portcntrs(struct hfi1_devdata *dd, loff_t pos, u32 port,
7806                         char **namep, u64 **cntrp)
7807 {
7808         int ret;
7809         u64 val = 0;
7810
7811         if (namep) {
7812                 ret = dd->portcntrnameslen;
7813                 if (pos != 0) {
7814                         dd_dev_err(dd, "index not supported");
7815                         return 0;
7816                 }
7817                 *namep = dd->portcntrnames;
7818         } else {
7819                 const struct cntr_entry *entry;
7820                 struct hfi1_pportdata *ppd;
7821                 int i, j;
7822
7823                 ret = (dd->nportcntrs) * sizeof(u64);
7824                 if (pos != 0) {
7825                         dd_dev_err(dd, "indexing not supported");
7826                         return 0;
7827                 }
7828                 ppd = (struct hfi1_pportdata *)(dd + 1 + port);
7829                 *cntrp = ppd->cntrs;
7830
7831                 for (i = 0; i < PORT_CNTR_LAST; i++) {
7832                         entry = &port_cntrs[i];
7833                         hfi1_cdbg(CNTR, "reading %s", entry->name);
7834                         if (entry->flags & CNTR_DISABLED) {
7835                                 /* Nothing */
7836                                 hfi1_cdbg(CNTR, "\tDisabled\n");
7837                                 continue;
7838                         }
7839
7840                         if (entry->flags & CNTR_VL) {
7841                                 hfi1_cdbg(CNTR, "\tPer VL");
7842                                 for (j = 0; j < C_VL_COUNT; j++) {
7843                                         val = entry->rw_cntr(entry, ppd, j,
7844                                                                CNTR_MODE_R,
7845                                                                0);
7846                                         hfi1_cdbg(
7847                                            CNTR,
7848                                            "\t\tRead 0x%llx for %d",
7849                                            val, j);
7850                                         ppd->cntrs[entry->offset + j] = val;
7851                                 }
7852                         } else {
7853                                 val = entry->rw_cntr(entry, ppd,
7854                                                        CNTR_INVALID_VL,
7855                                                        CNTR_MODE_R,
7856                                                        0);
7857                                 ppd->cntrs[entry->offset] = val;
7858                                 hfi1_cdbg(CNTR, "\tRead 0x%llx", val);
7859                         }
7860                 }
7861         }
7862         return ret;
7863 }
7864
7865 static void free_cntrs(struct hfi1_devdata *dd)
7866 {
7867         struct hfi1_pportdata *ppd;
7868         int i;
7869
7870         if (dd->synth_stats_timer.data)
7871                 del_timer_sync(&dd->synth_stats_timer);
7872         dd->synth_stats_timer.data = 0;
7873         ppd = (struct hfi1_pportdata *)(dd + 1);
7874         for (i = 0; i < dd->num_pports; i++, ppd++) {
7875                 kfree(ppd->cntrs);
7876                 kfree(ppd->scntrs);
7877                 free_percpu(ppd->ibport_data.rc_acks);
7878                 free_percpu(ppd->ibport_data.rc_qacks);
7879                 free_percpu(ppd->ibport_data.rc_delayed_comp);
7880                 ppd->cntrs = NULL;
7881                 ppd->scntrs = NULL;
7882                 ppd->ibport_data.rc_acks = NULL;
7883                 ppd->ibport_data.rc_qacks = NULL;
7884                 ppd->ibport_data.rc_delayed_comp = NULL;
7885         }
7886         kfree(dd->portcntrnames);
7887         dd->portcntrnames = NULL;
7888         kfree(dd->cntrs);
7889         dd->cntrs = NULL;
7890         kfree(dd->scntrs);
7891         dd->scntrs = NULL;
7892         kfree(dd->cntrnames);
7893         dd->cntrnames = NULL;
7894 }
7895
7896 #define CNTR_MAX 0xFFFFFFFFFFFFFFFFULL
7897 #define CNTR_32BIT_MAX 0x00000000FFFFFFFF
7898
7899 static u64 read_dev_port_cntr(struct hfi1_devdata *dd, struct cntr_entry *entry,
7900                               u64 *psval, void *context, int vl)
7901 {
7902         u64 val;
7903         u64 sval = *psval;
7904
7905         if (entry->flags & CNTR_DISABLED) {
7906                 dd_dev_err(dd, "Counter %s not enabled", entry->name);
7907                 return 0;
7908         }
7909
7910         hfi1_cdbg(CNTR, "cntr: %s vl %d psval 0x%llx", entry->name, vl, *psval);
7911
7912         val = entry->rw_cntr(entry, context, vl, CNTR_MODE_R, 0);
7913
7914         /* If its a synthetic counter there is more work we need to do */
7915         if (entry->flags & CNTR_SYNTH) {
7916                 if (sval == CNTR_MAX) {
7917                         /* No need to read already saturated */
7918                         return CNTR_MAX;
7919                 }
7920
7921                 if (entry->flags & CNTR_32BIT) {
7922                         /* 32bit counters can wrap multiple times */
7923                         u64 upper = sval >> 32;
7924                         u64 lower = (sval << 32) >> 32;
7925
7926                         if (lower > val) { /* hw wrapped */
7927                                 if (upper == CNTR_32BIT_MAX)
7928                                         val = CNTR_MAX;
7929                                 else
7930                                         upper++;
7931                         }
7932
7933                         if (val != CNTR_MAX)
7934                                 val = (upper << 32) | val;
7935
7936                 } else {
7937                         /* If we rolled we are saturated */
7938                         if ((val < sval) || (val > CNTR_MAX))
7939                                 val = CNTR_MAX;
7940                 }
7941         }
7942
7943         *psval = val;
7944
7945         hfi1_cdbg(CNTR, "\tNew val=0x%llx", val);
7946
7947         return val;
7948 }
7949
7950 static u64 write_dev_port_cntr(struct hfi1_devdata *dd,
7951                                struct cntr_entry *entry,
7952                                u64 *psval, void *context, int vl, u64 data)
7953 {
7954         u64 val;
7955
7956         if (entry->flags & CNTR_DISABLED) {
7957                 dd_dev_err(dd, "Counter %s not enabled", entry->name);
7958                 return 0;
7959         }
7960
7961         hfi1_cdbg(CNTR, "cntr: %s vl %d psval 0x%llx", entry->name, vl, *psval);
7962
7963         if (entry->flags & CNTR_SYNTH) {
7964                 *psval = data;
7965                 if (entry->flags & CNTR_32BIT) {
7966                         val = entry->rw_cntr(entry, context, vl, CNTR_MODE_W,
7967                                              (data << 32) >> 32);
7968                         val = data; /* return the full 64bit value */
7969                 } else {
7970                         val = entry->rw_cntr(entry, context, vl, CNTR_MODE_W,
7971                                              data);
7972                 }
7973         } else {
7974                 val = entry->rw_cntr(entry, context, vl, CNTR_MODE_W, data);
7975         }
7976
7977         *psval = val;
7978
7979         hfi1_cdbg(CNTR, "\tNew val=0x%llx", val);
7980
7981         return val;
7982 }
7983
7984 u64 read_dev_cntr(struct hfi1_devdata *dd, int index, int vl)
7985 {
7986         struct cntr_entry *entry;
7987         u64 *sval;
7988
7989         entry = &dev_cntrs[index];
7990         sval = dd->scntrs + entry->offset;
7991
7992         if (vl != CNTR_INVALID_VL)
7993                 sval += vl;
7994
7995         return read_dev_port_cntr(dd, entry, sval, dd, vl);
7996 }
7997
7998 u64 write_dev_cntr(struct hfi1_devdata *dd, int index, int vl, u64 data)
7999 {
8000         struct cntr_entry *entry;
8001         u64 *sval;
8002
8003         entry = &dev_cntrs[index];
8004         sval = dd->scntrs + entry->offset;
8005
8006         if (vl != CNTR_INVALID_VL)
8007                 sval += vl;
8008
8009         return write_dev_port_cntr(dd, entry, sval, dd, vl, data);
8010 }
8011
8012 u64 read_port_cntr(struct hfi1_pportdata *ppd, int index, int vl)
8013 {
8014         struct cntr_entry *entry;
8015         u64 *sval;
8016
8017         entry = &port_cntrs[index];
8018         sval = ppd->scntrs + entry->offset;
8019
8020         if (vl != CNTR_INVALID_VL)
8021                 sval += vl;
8022
8023         if ((index >= C_RCV_HDR_OVF_FIRST + ppd->dd->num_rcv_contexts) &&
8024             (index <= C_RCV_HDR_OVF_LAST)) {
8025                 /* We do not want to bother for disabled contexts */
8026                 return 0;
8027         }
8028
8029         return read_dev_port_cntr(ppd->dd, entry, sval, ppd, vl);
8030 }
8031
8032 u64 write_port_cntr(struct hfi1_pportdata *ppd, int index, int vl, u64 data)
8033 {
8034         struct cntr_entry *entry;
8035         u64 *sval;
8036
8037         entry = &port_cntrs[index];
8038         sval = ppd->scntrs + entry->offset;
8039
8040         if (vl != CNTR_INVALID_VL)
8041                 sval += vl;
8042
8043         if ((index >= C_RCV_HDR_OVF_FIRST + ppd->dd->num_rcv_contexts) &&
8044             (index <= C_RCV_HDR_OVF_LAST)) {
8045                 /* We do not want to bother for disabled contexts */
8046                 return 0;
8047         }
8048
8049         return write_dev_port_cntr(ppd->dd, entry, sval, ppd, vl, data);
8050 }
8051
8052 static void update_synth_timer(unsigned long opaque)
8053 {
8054         u64 cur_tx;
8055         u64 cur_rx;
8056         u64 total_flits;
8057         u8 update = 0;
8058         int i, j, vl;
8059         struct hfi1_pportdata *ppd;
8060         struct cntr_entry *entry;
8061
8062         struct hfi1_devdata *dd = (struct hfi1_devdata *)opaque;
8063
8064         /*
8065          * Rather than keep beating on the CSRs pick a minimal set that we can
8066          * check to watch for potential roll over. We can do this by looking at
8067          * the number of flits sent/recv. If the total flits exceeds 32bits then
8068          * we have to iterate all the counters and update.
8069          */
8070         entry = &dev_cntrs[C_DC_RCV_FLITS];
8071         cur_rx = entry->rw_cntr(entry, dd, CNTR_INVALID_VL, CNTR_MODE_R, 0);
8072
8073         entry = &dev_cntrs[C_DC_XMIT_FLITS];
8074         cur_tx = entry->rw_cntr(entry, dd, CNTR_INVALID_VL, CNTR_MODE_R, 0);
8075
8076         hfi1_cdbg(
8077             CNTR,
8078             "[%d] curr tx=0x%llx rx=0x%llx :: last tx=0x%llx rx=0x%llx\n",
8079             dd->unit, cur_tx, cur_rx, dd->last_tx, dd->last_rx);
8080
8081         if ((cur_tx < dd->last_tx) || (cur_rx < dd->last_rx)) {
8082                 /*
8083                  * May not be strictly necessary to update but it won't hurt and
8084                  * simplifies the logic here.
8085                  */
8086                 update = 1;
8087                 hfi1_cdbg(CNTR, "[%d] Tripwire counter rolled, updating",
8088                           dd->unit);
8089         } else {
8090                 total_flits = (cur_tx - dd->last_tx) + (cur_rx - dd->last_rx);
8091                 hfi1_cdbg(CNTR,
8092                           "[%d] total flits 0x%llx limit 0x%llx\n", dd->unit,
8093                           total_flits, (u64)CNTR_32BIT_MAX);
8094                 if (total_flits >= CNTR_32BIT_MAX) {
8095                         hfi1_cdbg(CNTR, "[%d] 32bit limit hit, updating",
8096                                   dd->unit);
8097                         update = 1;
8098                 }
8099         }
8100
8101         if (update) {
8102                 hfi1_cdbg(CNTR, "[%d] Updating dd and ppd counters", dd->unit);
8103                 for (i = 0; i < DEV_CNTR_LAST; i++) {
8104                         entry = &dev_cntrs[i];
8105                         if (entry->flags & CNTR_VL) {
8106                                 for (vl = 0; vl < C_VL_COUNT; vl++)
8107                                         read_dev_cntr(dd, i, vl);
8108                         } else {
8109                                 read_dev_cntr(dd, i, CNTR_INVALID_VL);
8110                         }
8111                 }
8112                 ppd = (struct hfi1_pportdata *)(dd + 1);
8113                 for (i = 0; i < dd->num_pports; i++, ppd++) {
8114                         for (j = 0; j < PORT_CNTR_LAST; j++) {
8115                                 entry = &port_cntrs[j];
8116                                 if (entry->flags & CNTR_VL) {
8117                                         for (vl = 0; vl < C_VL_COUNT; vl++)
8118                                                 read_port_cntr(ppd, j, vl);
8119                                 } else {
8120                                         read_port_cntr(ppd, j, CNTR_INVALID_VL);
8121                                 }
8122                         }
8123                 }
8124
8125                 /*
8126                  * We want the value in the register. The goal is to keep track
8127                  * of the number of "ticks" not the counter value. In other
8128                  * words if the register rolls we want to notice it and go ahead
8129                  * and force an update.
8130                  */
8131                 entry = &dev_cntrs[C_DC_XMIT_FLITS];
8132                 dd->last_tx = entry->rw_cntr(entry, dd, CNTR_INVALID_VL,
8133                                                 CNTR_MODE_R, 0);
8134
8135                 entry = &dev_cntrs[C_DC_RCV_FLITS];
8136                 dd->last_rx = entry->rw_cntr(entry, dd, CNTR_INVALID_VL,
8137                                                 CNTR_MODE_R, 0);
8138
8139                 hfi1_cdbg(CNTR, "[%d] setting last tx/rx to 0x%llx 0x%llx",
8140                           dd->unit, dd->last_tx, dd->last_rx);
8141
8142         } else {
8143                 hfi1_cdbg(CNTR, "[%d] No update necessary", dd->unit);
8144         }
8145
8146 mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME);
8147 }
8148
8149 #define C_MAX_NAME 13 /* 12 chars + one for /0 */
8150 static int init_cntrs(struct hfi1_devdata *dd)
8151 {
8152         int i, rcv_ctxts, index, j;
8153         size_t sz;
8154         char *p;
8155         char name[C_MAX_NAME];
8156         struct hfi1_pportdata *ppd;
8157
8158         /* set up the stats timer; the add_timer is done at the end */
8159         init_timer(&dd->synth_stats_timer);
8160         dd->synth_stats_timer.function = update_synth_timer;
8161         dd->synth_stats_timer.data = (unsigned long) dd;
8162
8163         /***********************/
8164         /* per device counters */
8165         /***********************/
8166
8167         /* size names and determine how many we have*/
8168         dd->ndevcntrs = 0;
8169         sz = 0;
8170         index = 0;
8171
8172         for (i = 0; i < DEV_CNTR_LAST; i++) {
8173                 hfi1_dbg_early("Init cntr %s\n", dev_cntrs[i].name);
8174                 if (dev_cntrs[i].flags & CNTR_DISABLED) {
8175                         hfi1_dbg_early("\tSkipping %s\n", dev_cntrs[i].name);
8176                         continue;
8177                 }
8178
8179                 if (dev_cntrs[i].flags & CNTR_VL) {
8180                         hfi1_dbg_early("\tProcessing VL cntr\n");
8181                         dev_cntrs[i].offset = index;
8182                         for (j = 0; j < C_VL_COUNT; j++) {
8183                                 memset(name, '\0', C_MAX_NAME);
8184                                 snprintf(name, C_MAX_NAME, "%s%d",
8185                                         dev_cntrs[i].name,
8186                                         vl_from_idx(j));
8187                                 sz += strlen(name);
8188                                 sz++;
8189                                 hfi1_dbg_early("\t\t%s\n", name);
8190                                 dd->ndevcntrs++;
8191                                 index++;
8192                         }
8193                 } else {
8194                         /* +1 for newline  */
8195                         sz += strlen(dev_cntrs[i].name) + 1;
8196                         dd->ndevcntrs++;
8197                         dev_cntrs[i].offset = index;
8198                         index++;
8199                         hfi1_dbg_early("\tAdding %s\n", dev_cntrs[i].name);
8200                 }
8201         }
8202
8203         /* allocate space for the counter values */
8204         dd->cntrs = kcalloc(index, sizeof(u64), GFP_KERNEL);
8205         if (!dd->cntrs)
8206                 goto bail;
8207
8208         dd->scntrs = kcalloc(index, sizeof(u64), GFP_KERNEL);
8209         if (!dd->scntrs)
8210                 goto bail;
8211
8212
8213         /* allocate space for the counter names */
8214         dd->cntrnameslen = sz;
8215         dd->cntrnames = kmalloc(sz, GFP_KERNEL);
8216         if (!dd->cntrnames)
8217                 goto bail;
8218
8219         /* fill in the names */
8220         for (p = dd->cntrnames, i = 0, index = 0; i < DEV_CNTR_LAST; i++) {
8221                 if (dev_cntrs[i].flags & CNTR_DISABLED) {
8222                         /* Nothing */
8223                 } else {
8224                         if (dev_cntrs[i].flags & CNTR_VL) {
8225                                 for (j = 0; j < C_VL_COUNT; j++) {
8226                                         memset(name, '\0', C_MAX_NAME);
8227                                         snprintf(name, C_MAX_NAME, "%s%d",
8228                                                 dev_cntrs[i].name,
8229                                                 vl_from_idx(j));
8230                                         memcpy(p, name, strlen(name));
8231                                         p += strlen(name);
8232                                         *p++ = '\n';
8233                                 }
8234                         } else {
8235                                 memcpy(p, dev_cntrs[i].name,
8236                                        strlen(dev_cntrs[i].name));
8237                                 p += strlen(dev_cntrs[i].name);
8238                                 *p++ = '\n';
8239                         }
8240                         index++;
8241                 }
8242         }
8243
8244         /*********************/
8245         /* per port counters */
8246         /*********************/
8247
8248         /*
8249          * Go through the counters for the overflows and disable the ones we
8250          * don't need. This varies based on platform so we need to do it
8251          * dynamically here.
8252          */
8253         rcv_ctxts = dd->num_rcv_contexts;
8254         for (i = C_RCV_HDR_OVF_FIRST + rcv_ctxts;
8255              i <= C_RCV_HDR_OVF_LAST; i++) {
8256                 port_cntrs[i].flags |= CNTR_DISABLED;
8257         }
8258
8259         /* size port counter names and determine how many we have*/
8260         sz = 0;
8261         dd->nportcntrs = 0;
8262         for (i = 0; i < PORT_CNTR_LAST; i++) {
8263                 hfi1_dbg_early("Init pcntr %s\n", port_cntrs[i].name);
8264                 if (port_cntrs[i].flags & CNTR_DISABLED) {
8265                         hfi1_dbg_early("\tSkipping %s\n", port_cntrs[i].name);
8266                         continue;
8267                 }
8268
8269                 if (port_cntrs[i].flags & CNTR_VL) {
8270                         hfi1_dbg_early("\tProcessing VL cntr\n");
8271                         port_cntrs[i].offset = dd->nportcntrs;
8272                         for (j = 0; j < C_VL_COUNT; j++) {
8273                                 memset(name, '\0', C_MAX_NAME);
8274                                 snprintf(name, C_MAX_NAME, "%s%d",
8275                                         port_cntrs[i].name,
8276                                         vl_from_idx(j));
8277                                 sz += strlen(name);
8278                                 sz++;
8279                                 hfi1_dbg_early("\t\t%s\n", name);
8280                                 dd->nportcntrs++;
8281                         }
8282                 } else {
8283                         /* +1 for newline  */
8284                         sz += strlen(port_cntrs[i].name) + 1;
8285                         port_cntrs[i].offset = dd->nportcntrs;
8286                         dd->nportcntrs++;
8287                         hfi1_dbg_early("\tAdding %s\n", port_cntrs[i].name);
8288                 }
8289         }
8290
8291         /* allocate space for the counter names */
8292         dd->portcntrnameslen = sz;
8293         dd->portcntrnames = kmalloc(sz, GFP_KERNEL);
8294         if (!dd->portcntrnames)
8295                 goto bail;
8296
8297         /* fill in port cntr names */
8298         for (p = dd->portcntrnames, i = 0; i < PORT_CNTR_LAST; i++) {
8299                 if (port_cntrs[i].flags & CNTR_DISABLED)
8300                         continue;
8301
8302                 if (port_cntrs[i].flags & CNTR_VL) {
8303                         for (j = 0; j < C_VL_COUNT; j++) {
8304                                 memset(name, '\0', C_MAX_NAME);
8305                                 snprintf(name, C_MAX_NAME, "%s%d",
8306                                         port_cntrs[i].name,
8307                                         vl_from_idx(j));
8308                                 memcpy(p, name, strlen(name));
8309                                 p += strlen(name);
8310                                 *p++ = '\n';
8311                         }
8312                 } else {
8313                         memcpy(p, port_cntrs[i].name,
8314                                strlen(port_cntrs[i].name));
8315                         p += strlen(port_cntrs[i].name);
8316                         *p++ = '\n';
8317                 }
8318         }
8319
8320         /* allocate per port storage for counter values */
8321         ppd = (struct hfi1_pportdata *)(dd + 1);
8322         for (i = 0; i < dd->num_pports; i++, ppd++) {
8323                 ppd->cntrs = kcalloc(dd->nportcntrs, sizeof(u64), GFP_KERNEL);
8324                 if (!ppd->cntrs)
8325                         goto bail;
8326
8327                 ppd->scntrs = kcalloc(dd->nportcntrs, sizeof(u64), GFP_KERNEL);
8328                 if (!ppd->scntrs)
8329                         goto bail;
8330         }
8331
8332         /* CPU counters need to be allocated and zeroed */
8333         if (init_cpu_counters(dd))
8334                 goto bail;
8335
8336         mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME);
8337         return 0;
8338 bail:
8339         free_cntrs(dd);
8340         return -ENOMEM;
8341 }
8342
8343
8344 static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate)
8345 {
8346         switch (chip_lstate) {
8347         default:
8348                 dd_dev_err(dd,
8349                          "Unknown logical state 0x%x, reporting IB_PORT_DOWN\n",
8350                          chip_lstate);
8351                 /* fall through */
8352         case LSTATE_DOWN:
8353                 return IB_PORT_DOWN;
8354         case LSTATE_INIT:
8355                 return IB_PORT_INIT;
8356         case LSTATE_ARMED:
8357                 return IB_PORT_ARMED;
8358         case LSTATE_ACTIVE:
8359                 return IB_PORT_ACTIVE;
8360         }
8361 }
8362
8363 u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate)
8364 {
8365         /* look at the HFI meta-states only */
8366         switch (chip_pstate & 0xf0) {
8367         default:
8368                 dd_dev_err(dd, "Unexpected chip physical state of 0x%x\n",
8369                         chip_pstate);
8370                 /* fall through */
8371         case PLS_DISABLED:
8372                 return IB_PORTPHYSSTATE_DISABLED;
8373         case PLS_OFFLINE:
8374                 return OPA_PORTPHYSSTATE_OFFLINE;
8375         case PLS_POLLING:
8376                 return IB_PORTPHYSSTATE_POLLING;
8377         case PLS_CONFIGPHY:
8378                 return IB_PORTPHYSSTATE_TRAINING;
8379         case PLS_LINKUP:
8380                 return IB_PORTPHYSSTATE_LINKUP;
8381         case PLS_PHYTEST:
8382                 return IB_PORTPHYSSTATE_PHY_TEST;
8383         }
8384 }
8385
8386 /* return the OPA port logical state name */
8387 const char *opa_lstate_name(u32 lstate)
8388 {
8389         static const char * const port_logical_names[] = {
8390                 "PORT_NOP",
8391                 "PORT_DOWN",
8392                 "PORT_INIT",
8393                 "PORT_ARMED",
8394                 "PORT_ACTIVE",
8395                 "PORT_ACTIVE_DEFER",
8396         };
8397         if (lstate < ARRAY_SIZE(port_logical_names))
8398                 return port_logical_names[lstate];
8399         return "unknown";
8400 }
8401
8402 /* return the OPA port physical state name */
8403 const char *opa_pstate_name(u32 pstate)
8404 {
8405         static const char * const port_physical_names[] = {
8406                 "PHYS_NOP",
8407                 "reserved1",
8408                 "PHYS_POLL",
8409                 "PHYS_DISABLED",
8410                 "PHYS_TRAINING",
8411                 "PHYS_LINKUP",
8412                 "PHYS_LINK_ERR_RECOVER",
8413                 "PHYS_PHY_TEST",
8414                 "reserved8",
8415                 "PHYS_OFFLINE",
8416                 "PHYS_GANGED",
8417                 "PHYS_TEST",
8418         };
8419         if (pstate < ARRAY_SIZE(port_physical_names))
8420                 return port_physical_names[pstate];
8421         return "unknown";
8422 }
8423
8424 /*
8425  * Read the hardware link state and set the driver's cached value of it.
8426  * Return the (new) current value.
8427  */
8428 u32 get_logical_state(struct hfi1_pportdata *ppd)
8429 {
8430         u32 new_state;
8431
8432         new_state = chip_to_opa_lstate(ppd->dd, read_logical_state(ppd->dd));
8433         if (new_state != ppd->lstate) {
8434                 dd_dev_info(ppd->dd, "logical state changed to %s (0x%x)\n",
8435                         opa_lstate_name(new_state), new_state);
8436                 ppd->lstate = new_state;
8437         }
8438         /*
8439          * Set port status flags in the page mapped into userspace
8440          * memory. Do it here to ensure a reliable state - this is
8441          * the only function called by all state handling code.
8442          * Always set the flags due to the fact that the cache value
8443          * might have been changed explicitly outside of this
8444          * function.
8445          */
8446         if (ppd->statusp) {
8447                 switch (ppd->lstate) {
8448                 case IB_PORT_DOWN:
8449                 case IB_PORT_INIT:
8450                         *ppd->statusp &= ~(HFI1_STATUS_IB_CONF |
8451                                            HFI1_STATUS_IB_READY);
8452                         break;
8453                 case IB_PORT_ARMED:
8454                         *ppd->statusp |= HFI1_STATUS_IB_CONF;
8455                         break;
8456                 case IB_PORT_ACTIVE:
8457                         *ppd->statusp |= HFI1_STATUS_IB_READY;
8458                         break;
8459                 }
8460         }
8461         return ppd->lstate;
8462 }
8463
8464 /**
8465  * wait_logical_linkstate - wait for an IB link state change to occur
8466  * @ppd: port device
8467  * @state: the state to wait for
8468  * @msecs: the number of milliseconds to wait
8469  *
8470  * Wait up to msecs milliseconds for IB link state change to occur.
8471  * For now, take the easy polling route.
8472  * Returns 0 if state reached, otherwise -ETIMEDOUT.
8473  */
8474 static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state,
8475                                   int msecs)
8476 {
8477         unsigned long timeout;
8478
8479         timeout = jiffies + msecs_to_jiffies(msecs);
8480         while (1) {
8481                 if (get_logical_state(ppd) == state)
8482                         return 0;
8483                 if (time_after(jiffies, timeout))
8484                         break;
8485                 msleep(20);
8486         }
8487         dd_dev_err(ppd->dd, "timeout waiting for link state 0x%x\n", state);
8488
8489         return -ETIMEDOUT;
8490 }
8491
8492 u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd)
8493 {
8494         static u32 remembered_state = 0xff;
8495         u32 pstate;
8496         u32 ib_pstate;
8497
8498         pstate = read_physical_state(ppd->dd);
8499         ib_pstate = chip_to_opa_pstate(ppd->dd, pstate);
8500         if (remembered_state != ib_pstate) {
8501                 dd_dev_info(ppd->dd,
8502                         "%s: physical state changed to %s (0x%x), phy 0x%x\n",
8503                         __func__, opa_pstate_name(ib_pstate), ib_pstate,
8504                         pstate);
8505                 remembered_state = ib_pstate;
8506         }
8507         return ib_pstate;
8508 }
8509
8510 /*
8511  * Read/modify/write ASIC_QSFP register bits as selected by mask
8512  * data: 0 or 1 in the positions depending on what needs to be written
8513  * dir: 0 for read, 1 for write
8514  * mask: select by setting
8515  *      I2CCLK  (bit 0)
8516  *      I2CDATA (bit 1)
8517  */
8518 u64 hfi1_gpio_mod(struct hfi1_devdata *dd, u32 target, u32 data, u32 dir,
8519                   u32 mask)
8520 {
8521         u64 qsfp_oe, target_oe;
8522
8523         target_oe = target ? ASIC_QSFP2_OE : ASIC_QSFP1_OE;
8524         if (mask) {
8525                 /* We are writing register bits, so lock access */
8526                 dir &= mask;
8527                 data &= mask;
8528
8529                 qsfp_oe = read_csr(dd, target_oe);
8530                 qsfp_oe = (qsfp_oe & ~(u64)mask) | (u64)dir;
8531                 write_csr(dd, target_oe, qsfp_oe);
8532         }
8533         /* We are exclusively reading bits here, but it is unlikely
8534          * we'll get valid data when we set the direction of the pin
8535          * in the same call, so read should call this function again
8536          * to get valid data
8537          */
8538         return read_csr(dd, target ? ASIC_QSFP2_IN : ASIC_QSFP1_IN);
8539 }
8540
8541 #define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \
8542 (r &= ~SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)
8543
8544 #define SET_STATIC_RATE_CONTROL_SMASK(r) \
8545 (r |= SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)
8546
8547 int hfi1_init_ctxt(struct send_context *sc)
8548 {
8549         if (sc != NULL) {
8550                 struct hfi1_devdata *dd = sc->dd;
8551                 u64 reg;
8552                 u8 set = (sc->type == SC_USER ?
8553                           HFI1_CAP_IS_USET(STATIC_RATE_CTRL) :
8554                           HFI1_CAP_IS_KSET(STATIC_RATE_CTRL));
8555                 reg = read_kctxt_csr(dd, sc->hw_context,
8556                                      SEND_CTXT_CHECK_ENABLE);
8557                 if (set)
8558                         CLEAR_STATIC_RATE_CONTROL_SMASK(reg);
8559                 else
8560                         SET_STATIC_RATE_CONTROL_SMASK(reg);
8561                 write_kctxt_csr(dd, sc->hw_context,
8562                                 SEND_CTXT_CHECK_ENABLE, reg);
8563         }
8564         return 0;
8565 }
8566
8567 int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp)
8568 {
8569         int ret = 0;
8570         u64 reg;
8571
8572         if (dd->icode != ICODE_RTL_SILICON) {
8573                 if (HFI1_CAP_IS_KSET(PRINT_UNIMPL))
8574                         dd_dev_info(dd, "%s: tempsense not supported by HW\n",
8575                                     __func__);
8576                 return -EINVAL;
8577         }
8578         reg = read_csr(dd, ASIC_STS_THERM);
8579         temp->curr = ((reg >> ASIC_STS_THERM_CURR_TEMP_SHIFT) &
8580                       ASIC_STS_THERM_CURR_TEMP_MASK);
8581         temp->lo_lim = ((reg >> ASIC_STS_THERM_LO_TEMP_SHIFT) &
8582                         ASIC_STS_THERM_LO_TEMP_MASK);
8583         temp->hi_lim = ((reg >> ASIC_STS_THERM_HI_TEMP_SHIFT) &
8584                         ASIC_STS_THERM_HI_TEMP_MASK);
8585         temp->crit_lim = ((reg >> ASIC_STS_THERM_CRIT_TEMP_SHIFT) &
8586                           ASIC_STS_THERM_CRIT_TEMP_MASK);
8587         /* triggers is a 3-bit value - 1 bit per trigger. */
8588         temp->triggers = (u8)((reg >> ASIC_STS_THERM_LOW_SHIFT) & 0x7);
8589
8590         return ret;
8591 }
8592
8593 /* ========================================================================= */
8594
8595 /*
8596  * Enable/disable chip from delivering interrupts.
8597  */
8598 void set_intr_state(struct hfi1_devdata *dd, u32 enable)
8599 {
8600         int i;
8601
8602         /*
8603          * In HFI, the mask needs to be 1 to allow interrupts.
8604          */
8605         if (enable) {
8606                 u64 cce_int_mask;
8607                 const int qsfp1_int_smask = QSFP1_INT % 64;
8608                 const int qsfp2_int_smask = QSFP2_INT % 64;
8609
8610                 /* enable all interrupts */
8611                 for (i = 0; i < CCE_NUM_INT_CSRS; i++)
8612                         write_csr(dd, CCE_INT_MASK + (8*i), ~(u64)0);
8613
8614                 /*
8615                  * disable QSFP1 interrupts for HFI1, QSFP2 interrupts for HFI0
8616                  * Qsfp1Int and Qsfp2Int are adjacent bits in the same CSR,
8617                  * therefore just one of QSFP1_INT/QSFP2_INT can be used to find
8618                  * the index of the appropriate CSR in the CCEIntMask CSR array
8619                  */
8620                 cce_int_mask = read_csr(dd, CCE_INT_MASK +
8621                                                 (8*(QSFP1_INT/64)));
8622                 if (dd->hfi1_id) {
8623                         cce_int_mask &= ~((u64)1 << qsfp1_int_smask);
8624                         write_csr(dd, CCE_INT_MASK + (8*(QSFP1_INT/64)),
8625                                         cce_int_mask);
8626                 } else {
8627                         cce_int_mask &= ~((u64)1 << qsfp2_int_smask);
8628                         write_csr(dd, CCE_INT_MASK + (8*(QSFP2_INT/64)),
8629                                         cce_int_mask);
8630                 }
8631         } else {
8632                 for (i = 0; i < CCE_NUM_INT_CSRS; i++)
8633                         write_csr(dd, CCE_INT_MASK + (8*i), 0ull);
8634         }
8635 }
8636
8637 /*
8638  * Clear all interrupt sources on the chip.
8639  */
8640 static void clear_all_interrupts(struct hfi1_devdata *dd)
8641 {
8642         int i;
8643
8644         for (i = 0; i < CCE_NUM_INT_CSRS; i++)
8645                 write_csr(dd, CCE_INT_CLEAR + (8*i), ~(u64)0);
8646
8647         write_csr(dd, CCE_ERR_CLEAR, ~(u64)0);
8648         write_csr(dd, MISC_ERR_CLEAR, ~(u64)0);
8649         write_csr(dd, RCV_ERR_CLEAR, ~(u64)0);
8650         write_csr(dd, SEND_ERR_CLEAR, ~(u64)0);
8651         write_csr(dd, SEND_PIO_ERR_CLEAR, ~(u64)0);
8652         write_csr(dd, SEND_DMA_ERR_CLEAR, ~(u64)0);
8653         write_csr(dd, SEND_EGRESS_ERR_CLEAR, ~(u64)0);
8654         for (i = 0; i < dd->chip_send_contexts; i++)
8655                 write_kctxt_csr(dd, i, SEND_CTXT_ERR_CLEAR, ~(u64)0);
8656         for (i = 0; i < dd->chip_sdma_engines; i++)
8657                 write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_CLEAR, ~(u64)0);
8658
8659         write_csr(dd, DCC_ERR_FLG_CLR, ~(u64)0);
8660         write_csr(dd, DC_LCB_ERR_CLR, ~(u64)0);
8661         write_csr(dd, DC_DC8051_ERR_CLR, ~(u64)0);
8662 }
8663
8664 /* Move to pcie.c? */
8665 static void disable_intx(struct pci_dev *pdev)
8666 {
8667         pci_intx(pdev, 0);
8668 }
8669
8670 static void clean_up_interrupts(struct hfi1_devdata *dd)
8671 {
8672         int i;
8673
8674         /* remove irqs - must happen before disabling/turning off */
8675         if (dd->num_msix_entries) {
8676                 /* MSI-X */
8677                 struct hfi1_msix_entry *me = dd->msix_entries;
8678
8679                 for (i = 0; i < dd->num_msix_entries; i++, me++) {
8680                         if (me->arg == NULL) /* => no irq, no affinity */
8681                                 break;
8682                         irq_set_affinity_hint(dd->msix_entries[i].msix.vector,
8683                                         NULL);
8684                         free_irq(me->msix.vector, me->arg);
8685                 }
8686         } else {
8687                 /* INTx */
8688                 if (dd->requested_intx_irq) {
8689                         free_irq(dd->pcidev->irq, dd);
8690                         dd->requested_intx_irq = 0;
8691                 }
8692         }
8693
8694         /* turn off interrupts */
8695         if (dd->num_msix_entries) {
8696                 /* MSI-X */
8697                 hfi1_nomsix(dd);
8698         } else {
8699                 /* INTx */
8700                 disable_intx(dd->pcidev);
8701         }
8702
8703         /* clean structures */
8704         for (i = 0; i < dd->num_msix_entries; i++)
8705                 free_cpumask_var(dd->msix_entries[i].mask);
8706         kfree(dd->msix_entries);
8707         dd->msix_entries = NULL;
8708         dd->num_msix_entries = 0;
8709 }
8710
8711 /*
8712  * Remap the interrupt source from the general handler to the given MSI-X
8713  * interrupt.
8714  */
8715 static void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr)
8716 {
8717         u64 reg;
8718         int m, n;
8719
8720         /* clear from the handled mask of the general interrupt */
8721         m = isrc / 64;
8722         n = isrc % 64;
8723         dd->gi_mask[m] &= ~((u64)1 << n);
8724
8725         /* direct the chip source to the given MSI-X interrupt */
8726         m = isrc / 8;
8727         n = isrc % 8;
8728         reg = read_csr(dd, CCE_INT_MAP + (8*m));
8729         reg &= ~((u64)0xff << (8*n));
8730         reg |= ((u64)msix_intr & 0xff) << (8*n);
8731         write_csr(dd, CCE_INT_MAP + (8*m), reg);
8732 }
8733
8734 static void remap_sdma_interrupts(struct hfi1_devdata *dd,
8735                                   int engine, int msix_intr)
8736 {
8737         /*
8738          * SDMA engine interrupt sources grouped by type, rather than
8739          * engine.  Per-engine interrupts are as follows:
8740          *      SDMA
8741          *      SDMAProgress
8742          *      SDMAIdle
8743          */
8744         remap_intr(dd, IS_SDMA_START + 0*TXE_NUM_SDMA_ENGINES + engine,
8745                 msix_intr);
8746         remap_intr(dd, IS_SDMA_START + 1*TXE_NUM_SDMA_ENGINES + engine,
8747                 msix_intr);
8748         remap_intr(dd, IS_SDMA_START + 2*TXE_NUM_SDMA_ENGINES + engine,
8749                 msix_intr);
8750 }
8751
8752 static void remap_receive_available_interrupt(struct hfi1_devdata *dd,
8753                                               int rx, int msix_intr)
8754 {
8755         remap_intr(dd, IS_RCVAVAIL_START + rx, msix_intr);
8756 }
8757
8758 static int request_intx_irq(struct hfi1_devdata *dd)
8759 {
8760         int ret;
8761
8762         snprintf(dd->intx_name, sizeof(dd->intx_name), DRIVER_NAME"_%d",
8763                 dd->unit);
8764         ret = request_irq(dd->pcidev->irq, general_interrupt,
8765                                   IRQF_SHARED, dd->intx_name, dd);
8766         if (ret)
8767                 dd_dev_err(dd, "unable to request INTx interrupt, err %d\n",
8768                                 ret);
8769         else
8770                 dd->requested_intx_irq = 1;
8771         return ret;
8772 }
8773
8774 static int request_msix_irqs(struct hfi1_devdata *dd)
8775 {
8776         const struct cpumask *local_mask;
8777         cpumask_var_t def, rcv;
8778         bool def_ret, rcv_ret;
8779         int first_general, last_general;
8780         int first_sdma, last_sdma;
8781         int first_rx, last_rx;
8782         int first_cpu, restart_cpu, curr_cpu;
8783         int rcv_cpu, sdma_cpu;
8784         int i, ret = 0, possible;
8785         int ht;
8786
8787         /* calculate the ranges we are going to use */
8788         first_general = 0;
8789         first_sdma = last_general = first_general + 1;
8790         first_rx = last_sdma = first_sdma + dd->num_sdma;
8791         last_rx = first_rx + dd->n_krcv_queues;
8792
8793         /*
8794          * Interrupt affinity.
8795          *
8796          * non-rcv avail gets a default mask that
8797          * starts as possible cpus with threads reset
8798          * and each rcv avail reset.
8799          *
8800          * rcv avail gets node relative 1 wrapping back
8801          * to the node relative 1 as necessary.
8802          *
8803          */
8804         local_mask = cpumask_of_pcibus(dd->pcidev->bus);
8805         /* if first cpu is invalid, use NUMA 0 */
8806         if (cpumask_first(local_mask) >= nr_cpu_ids)
8807                 local_mask = topology_core_cpumask(0);
8808
8809         def_ret = zalloc_cpumask_var(&def, GFP_KERNEL);
8810         rcv_ret = zalloc_cpumask_var(&rcv, GFP_KERNEL);
8811         if (!def_ret || !rcv_ret)
8812                 goto bail;
8813         /* use local mask as default */
8814         cpumask_copy(def, local_mask);
8815         possible = cpumask_weight(def);
8816         /* disarm threads from default */
8817         ht = cpumask_weight(
8818                         topology_sibling_cpumask(cpumask_first(local_mask)));
8819         for (i = possible/ht; i < possible; i++)
8820                 cpumask_clear_cpu(i, def);
8821         /* reset possible */
8822         possible = cpumask_weight(def);
8823         /* def now has full cores on chosen node*/
8824         first_cpu = cpumask_first(def);
8825         if (nr_cpu_ids >= first_cpu)
8826                 first_cpu++;
8827         restart_cpu = first_cpu;
8828         curr_cpu = restart_cpu;
8829
8830         for (i = first_cpu; i < dd->n_krcv_queues + first_cpu; i++) {
8831                 cpumask_clear_cpu(curr_cpu, def);
8832                 cpumask_set_cpu(curr_cpu, rcv);
8833                 if (curr_cpu >= possible)
8834                         curr_cpu = restart_cpu;
8835                 else
8836                         curr_cpu++;
8837         }
8838         /* def mask has non-rcv, rcv has recv mask */
8839         rcv_cpu = cpumask_first(rcv);
8840         sdma_cpu = cpumask_first(def);
8841
8842         /*
8843          * Sanity check - the code expects all SDMA chip source
8844          * interrupts to be in the same CSR, starting at bit 0.  Verify
8845          * that this is true by checking the bit location of the start.
8846          */
8847         BUILD_BUG_ON(IS_SDMA_START % 64);
8848
8849         for (i = 0; i < dd->num_msix_entries; i++) {
8850                 struct hfi1_msix_entry *me = &dd->msix_entries[i];
8851                 const char *err_info;
8852                 irq_handler_t handler;
8853                 void *arg;
8854                 int idx;
8855                 struct hfi1_ctxtdata *rcd = NULL;
8856                 struct sdma_engine *sde = NULL;
8857
8858                 /* obtain the arguments to request_irq */
8859                 if (first_general <= i && i < last_general) {
8860                         idx = i - first_general;
8861                         handler = general_interrupt;
8862                         arg = dd;
8863                         snprintf(me->name, sizeof(me->name),
8864                                 DRIVER_NAME"_%d", dd->unit);
8865                         err_info = "general";
8866                 } else if (first_sdma <= i && i < last_sdma) {
8867                         idx = i - first_sdma;
8868                         sde = &dd->per_sdma[idx];
8869                         handler = sdma_interrupt;
8870                         arg = sde;
8871                         snprintf(me->name, sizeof(me->name),
8872                                 DRIVER_NAME"_%d sdma%d", dd->unit, idx);
8873                         err_info = "sdma";
8874                         remap_sdma_interrupts(dd, idx, i);
8875                 } else if (first_rx <= i && i < last_rx) {
8876                         idx = i - first_rx;
8877                         rcd = dd->rcd[idx];
8878                         /* no interrupt if no rcd */
8879                         if (!rcd)
8880                                 continue;
8881                         /*
8882                          * Set the interrupt register and mask for this
8883                          * context's interrupt.
8884                          */
8885                         rcd->ireg = (IS_RCVAVAIL_START+idx) / 64;
8886                         rcd->imask = ((u64)1) <<
8887                                         ((IS_RCVAVAIL_START+idx) % 64);
8888                         handler = receive_context_interrupt;
8889                         arg = rcd;
8890                         snprintf(me->name, sizeof(me->name),
8891                                 DRIVER_NAME"_%d kctxt%d", dd->unit, idx);
8892                         err_info = "receive context";
8893                         remap_receive_available_interrupt(dd, idx, i);
8894                 } else {
8895                         /* not in our expected range - complain, then
8896                            ignore it */
8897                         dd_dev_err(dd,
8898                                 "Unexpected extra MSI-X interrupt %d\n", i);
8899                         continue;
8900                 }
8901                 /* no argument, no interrupt */
8902                 if (arg == NULL)
8903                         continue;
8904                 /* make sure the name is terminated */
8905                 me->name[sizeof(me->name)-1] = 0;
8906
8907                 ret = request_irq(me->msix.vector, handler, 0, me->name, arg);
8908                 if (ret) {
8909                         dd_dev_err(dd,
8910                                 "unable to allocate %s interrupt, vector %d, index %d, err %d\n",
8911                                  err_info, me->msix.vector, idx, ret);
8912                         return ret;
8913                 }
8914                 /*
8915                  * assign arg after request_irq call, so it will be
8916                  * cleaned up
8917                  */
8918                 me->arg = arg;
8919
8920                 if (!zalloc_cpumask_var(
8921                         &dd->msix_entries[i].mask,
8922                         GFP_KERNEL))
8923                         goto bail;
8924                 if (handler == sdma_interrupt) {
8925                         dd_dev_info(dd, "sdma engine %d cpu %d\n",
8926                                 sde->this_idx, sdma_cpu);
8927                         cpumask_set_cpu(sdma_cpu, dd->msix_entries[i].mask);
8928                         sdma_cpu = cpumask_next(sdma_cpu, def);
8929                         if (sdma_cpu >= nr_cpu_ids)
8930                                 sdma_cpu = cpumask_first(def);
8931                 } else if (handler == receive_context_interrupt) {
8932                         dd_dev_info(dd, "rcv ctxt %d cpu %d\n",
8933                                 rcd->ctxt, rcv_cpu);
8934                         cpumask_set_cpu(rcv_cpu, dd->msix_entries[i].mask);
8935                         rcv_cpu = cpumask_next(rcv_cpu, rcv);
8936                         if (rcv_cpu >= nr_cpu_ids)
8937                                 rcv_cpu = cpumask_first(rcv);
8938                 } else {
8939                         /* otherwise first def */
8940                         dd_dev_info(dd, "%s cpu %d\n",
8941                                 err_info, cpumask_first(def));
8942                         cpumask_set_cpu(
8943                                 cpumask_first(def), dd->msix_entries[i].mask);
8944                 }
8945                 irq_set_affinity_hint(
8946                         dd->msix_entries[i].msix.vector,
8947                         dd->msix_entries[i].mask);
8948         }
8949
8950 out:
8951         free_cpumask_var(def);
8952         free_cpumask_var(rcv);
8953         return ret;
8954 bail:
8955         ret = -ENOMEM;
8956         goto  out;
8957 }
8958
8959 /*
8960  * Set the general handler to accept all interrupts, remap all
8961  * chip interrupts back to MSI-X 0.
8962  */
8963 static void reset_interrupts(struct hfi1_devdata *dd)
8964 {
8965         int i;
8966
8967         /* all interrupts handled by the general handler */
8968         for (i = 0; i < CCE_NUM_INT_CSRS; i++)
8969                 dd->gi_mask[i] = ~(u64)0;
8970
8971         /* all chip interrupts map to MSI-X 0 */
8972         for (i = 0; i < CCE_NUM_INT_MAP_CSRS; i++)
8973                 write_csr(dd, CCE_INT_MAP + (8*i), 0);
8974 }
8975
8976 static int set_up_interrupts(struct hfi1_devdata *dd)
8977 {
8978         struct hfi1_msix_entry *entries;
8979         u32 total, request;
8980         int i, ret;
8981         int single_interrupt = 0; /* we expect to have all the interrupts */
8982
8983         /*
8984          * Interrupt count:
8985          *      1 general, "slow path" interrupt (includes the SDMA engines
8986          *              slow source, SDMACleanupDone)
8987          *      N interrupts - one per used SDMA engine
8988          *      M interrupt - one per kernel receive context
8989          */
8990         total = 1 + dd->num_sdma + dd->n_krcv_queues;
8991
8992         entries = kcalloc(total, sizeof(*entries), GFP_KERNEL);
8993         if (!entries) {
8994                 dd_dev_err(dd, "cannot allocate msix table\n");
8995                 ret = -ENOMEM;
8996                 goto fail;
8997         }
8998         /* 1-1 MSI-X entry assignment */
8999         for (i = 0; i < total; i++)
9000                 entries[i].msix.entry = i;
9001
9002         /* ask for MSI-X interrupts */
9003         request = total;
9004         request_msix(dd, &request, entries);
9005
9006         if (request == 0) {
9007                 /* using INTx */
9008                 /* dd->num_msix_entries already zero */
9009                 kfree(entries);
9010                 single_interrupt = 1;
9011                 dd_dev_err(dd, "MSI-X failed, using INTx interrupts\n");
9012         } else {
9013                 /* using MSI-X */
9014                 dd->num_msix_entries = request;
9015                 dd->msix_entries = entries;
9016
9017                 if (request != total) {
9018                         /* using MSI-X, with reduced interrupts */
9019                         dd_dev_err(
9020                                 dd,
9021                                 "cannot handle reduced interrupt case, want %u, got %u\n",
9022                                 total, request);
9023                         ret = -EINVAL;
9024                         goto fail;
9025                 }
9026                 dd_dev_info(dd, "%u MSI-X interrupts allocated\n", total);
9027         }
9028
9029         /* mask all interrupts */
9030         set_intr_state(dd, 0);
9031         /* clear all pending interrupts */
9032         clear_all_interrupts(dd);
9033
9034         /* reset general handler mask, chip MSI-X mappings */
9035         reset_interrupts(dd);
9036
9037         if (single_interrupt)
9038                 ret = request_intx_irq(dd);
9039         else
9040                 ret = request_msix_irqs(dd);
9041         if (ret)
9042                 goto fail;
9043
9044         return 0;
9045
9046 fail:
9047         clean_up_interrupts(dd);
9048         return ret;
9049 }
9050
9051 /*
9052  * Set up context values in dd.  Sets:
9053  *
9054  *      num_rcv_contexts - number of contexts being used
9055  *      n_krcv_queues - number of kernel contexts
9056  *      first_user_ctxt - first non-kernel context in array of contexts
9057  *      freectxts  - number of free user contexts
9058  *      num_send_contexts - number of PIO send contexts being used
9059  */
9060 static int set_up_context_variables(struct hfi1_devdata *dd)
9061 {
9062         int num_kernel_contexts;
9063         int num_user_contexts;
9064         int total_contexts;
9065         int ret;
9066         unsigned ngroups;
9067
9068         /*
9069          * Kernel contexts: (to be fixed later):
9070          * - min or 2 or 1 context/numa
9071          * - Context 0 - default/errors
9072          * - Context 1 - VL15
9073          */
9074         if (n_krcvqs)
9075                 num_kernel_contexts = n_krcvqs + MIN_KERNEL_KCTXTS;
9076         else
9077                 num_kernel_contexts = num_online_nodes();
9078         num_kernel_contexts =
9079                 max_t(int, MIN_KERNEL_KCTXTS, num_kernel_contexts);
9080         /*
9081          * Every kernel receive context needs an ACK send context.
9082          * one send context is allocated for each VL{0-7} and VL15
9083          */
9084         if (num_kernel_contexts > (dd->chip_send_contexts - num_vls - 1)) {
9085                 dd_dev_err(dd,
9086                            "Reducing # kernel rcv contexts to: %d, from %d\n",
9087                            (int)(dd->chip_send_contexts - num_vls - 1),
9088                            (int)num_kernel_contexts);
9089                 num_kernel_contexts = dd->chip_send_contexts - num_vls - 1;
9090         }
9091         /*
9092          * User contexts: (to be fixed later)
9093          *      - set to num_rcv_contexts if non-zero
9094          *      - default to 1 user context per CPU
9095          */
9096         if (num_rcv_contexts)
9097                 num_user_contexts = num_rcv_contexts;
9098         else
9099                 num_user_contexts = num_online_cpus();
9100
9101         total_contexts = num_kernel_contexts + num_user_contexts;
9102
9103         /*
9104          * Adjust the counts given a global max.
9105          */
9106         if (total_contexts > dd->chip_rcv_contexts) {
9107                 dd_dev_err(dd,
9108                            "Reducing # user receive contexts to: %d, from %d\n",
9109                            (int)(dd->chip_rcv_contexts - num_kernel_contexts),
9110                            (int)num_user_contexts);
9111                 num_user_contexts = dd->chip_rcv_contexts - num_kernel_contexts;
9112                 /* recalculate */
9113                 total_contexts = num_kernel_contexts + num_user_contexts;
9114         }
9115
9116         /* the first N are kernel contexts, the rest are user contexts */
9117         dd->num_rcv_contexts = total_contexts;
9118         dd->n_krcv_queues = num_kernel_contexts;
9119         dd->first_user_ctxt = num_kernel_contexts;
9120         dd->freectxts = num_user_contexts;
9121         dd_dev_info(dd,
9122                 "rcv contexts: chip %d, used %d (kernel %d, user %d)\n",
9123                 (int)dd->chip_rcv_contexts,
9124                 (int)dd->num_rcv_contexts,
9125                 (int)dd->n_krcv_queues,
9126                 (int)dd->num_rcv_contexts - dd->n_krcv_queues);
9127
9128         /*
9129          * Receive array allocation:
9130          *   All RcvArray entries are divided into groups of 8. This
9131          *   is required by the hardware and will speed up writes to
9132          *   consecutive entries by using write-combining of the entire
9133          *   cacheline.
9134          *
9135          *   The number of groups are evenly divided among all contexts.
9136          *   any left over groups will be given to the first N user
9137          *   contexts.
9138          */
9139         dd->rcv_entries.group_size = RCV_INCREMENT;
9140         ngroups = dd->chip_rcv_array_count / dd->rcv_entries.group_size;
9141         dd->rcv_entries.ngroups = ngroups / dd->num_rcv_contexts;
9142         dd->rcv_entries.nctxt_extra = ngroups -
9143                 (dd->num_rcv_contexts * dd->rcv_entries.ngroups);
9144         dd_dev_info(dd, "RcvArray groups %u, ctxts extra %u\n",
9145                     dd->rcv_entries.ngroups,
9146                     dd->rcv_entries.nctxt_extra);
9147         if (dd->rcv_entries.ngroups * dd->rcv_entries.group_size >
9148             MAX_EAGER_ENTRIES * 2) {
9149                 dd->rcv_entries.ngroups = (MAX_EAGER_ENTRIES * 2) /
9150                         dd->rcv_entries.group_size;
9151                 dd_dev_info(dd,
9152                    "RcvArray group count too high, change to %u\n",
9153                    dd->rcv_entries.ngroups);
9154                 dd->rcv_entries.nctxt_extra = 0;
9155         }
9156         /*
9157          * PIO send contexts
9158          */
9159         ret = init_sc_pools_and_sizes(dd);
9160         if (ret >= 0) { /* success */
9161                 dd->num_send_contexts = ret;
9162                 dd_dev_info(
9163                         dd,
9164                         "send contexts: chip %d, used %d (kernel %d, ack %d, user %d)\n",
9165                         dd->chip_send_contexts,
9166                         dd->num_send_contexts,
9167                         dd->sc_sizes[SC_KERNEL].count,
9168                         dd->sc_sizes[SC_ACK].count,
9169                         dd->sc_sizes[SC_USER].count);
9170                 ret = 0;        /* success */
9171         }
9172
9173         return ret;
9174 }
9175
9176 /*
9177  * Set the device/port partition key table. The MAD code
9178  * will ensure that, at least, the partial management
9179  * partition key is present in the table.
9180  */
9181 static void set_partition_keys(struct hfi1_pportdata *ppd)
9182 {
9183         struct hfi1_devdata *dd = ppd->dd;
9184         u64 reg = 0;
9185         int i;
9186
9187         dd_dev_info(dd, "Setting partition keys\n");
9188         for (i = 0; i < hfi1_get_npkeys(dd); i++) {
9189                 reg |= (ppd->pkeys[i] &
9190                         RCV_PARTITION_KEY_PARTITION_KEY_A_MASK) <<
9191                         ((i % 4) *
9192                          RCV_PARTITION_KEY_PARTITION_KEY_B_SHIFT);
9193                 /* Each register holds 4 PKey values. */
9194                 if ((i % 4) == 3) {
9195                         write_csr(dd, RCV_PARTITION_KEY +
9196                                   ((i - 3) * 2), reg);
9197                         reg = 0;
9198                 }
9199         }
9200
9201         /* Always enable HW pkeys check when pkeys table is set */
9202         add_rcvctrl(dd, RCV_CTRL_RCV_PARTITION_KEY_ENABLE_SMASK);
9203 }
9204
9205 /*
9206  * These CSRs and memories are uninitialized on reset and must be
9207  * written before reading to set the ECC/parity bits.
9208  *
9209  * NOTE: All user context CSRs that are not mmaped write-only
9210  * (e.g. the TID flows) must be initialized even if the driver never
9211  * reads them.
9212  */
9213 static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd)
9214 {
9215         int i, j;
9216
9217         /* CceIntMap */
9218         for (i = 0; i < CCE_NUM_INT_MAP_CSRS; i++)
9219                 write_csr(dd, CCE_INT_MAP+(8*i), 0);
9220
9221         /* SendCtxtCreditReturnAddr */
9222         for (i = 0; i < dd->chip_send_contexts; i++)
9223                 write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_RETURN_ADDR, 0);
9224
9225         /* PIO Send buffers */
9226         /* SDMA Send buffers */
9227         /* These are not normally read, and (presently) have no method
9228            to be read, so are not pre-initialized */
9229
9230         /* RcvHdrAddr */
9231         /* RcvHdrTailAddr */
9232         /* RcvTidFlowTable */
9233         for (i = 0; i < dd->chip_rcv_contexts; i++) {
9234                 write_kctxt_csr(dd, i, RCV_HDR_ADDR, 0);
9235                 write_kctxt_csr(dd, i, RCV_HDR_TAIL_ADDR, 0);
9236                 for (j = 0; j < RXE_NUM_TID_FLOWS; j++)
9237                         write_uctxt_csr(dd, i, RCV_TID_FLOW_TABLE+(8*j), 0);
9238         }
9239
9240         /* RcvArray */
9241         for (i = 0; i < dd->chip_rcv_array_count; i++)
9242                 write_csr(dd, RCV_ARRAY + (8*i),
9243                                         RCV_ARRAY_RT_WRITE_ENABLE_SMASK);
9244
9245         /* RcvQPMapTable */
9246         for (i = 0; i < 32; i++)
9247                 write_csr(dd, RCV_QP_MAP_TABLE + (8 * i), 0);
9248 }
9249
9250 /*
9251  * Use the ctrl_bits in CceCtrl to clear the status_bits in CceStatus.
9252  */
9253 static void clear_cce_status(struct hfi1_devdata *dd, u64 status_bits,
9254                              u64 ctrl_bits)
9255 {
9256         unsigned long timeout;
9257         u64 reg;
9258
9259         /* is the condition present? */
9260         reg = read_csr(dd, CCE_STATUS);
9261         if ((reg & status_bits) == 0)
9262                 return;
9263
9264         /* clear the condition */
9265         write_csr(dd, CCE_CTRL, ctrl_bits);
9266
9267         /* wait for the condition to clear */
9268         timeout = jiffies + msecs_to_jiffies(CCE_STATUS_TIMEOUT);
9269         while (1) {
9270                 reg = read_csr(dd, CCE_STATUS);
9271                 if ((reg & status_bits) == 0)
9272                         return;
9273                 if (time_after(jiffies, timeout)) {
9274                         dd_dev_err(dd,
9275                                 "Timeout waiting for CceStatus to clear bits 0x%llx, remaining 0x%llx\n",
9276                                 status_bits, reg & status_bits);
9277                         return;
9278                 }
9279                 udelay(1);
9280         }
9281 }
9282
9283 /* set CCE CSRs to chip reset defaults */
9284 static void reset_cce_csrs(struct hfi1_devdata *dd)
9285 {
9286         int i;
9287
9288         /* CCE_REVISION read-only */
9289         /* CCE_REVISION2 read-only */
9290         /* CCE_CTRL - bits clear automatically */
9291         /* CCE_STATUS read-only, use CceCtrl to clear */
9292         clear_cce_status(dd, ALL_FROZE, CCE_CTRL_SPC_UNFREEZE_SMASK);
9293         clear_cce_status(dd, ALL_TXE_PAUSE, CCE_CTRL_TXE_RESUME_SMASK);
9294         clear_cce_status(dd, ALL_RXE_PAUSE, CCE_CTRL_RXE_RESUME_SMASK);
9295         for (i = 0; i < CCE_NUM_SCRATCH; i++)
9296                 write_csr(dd, CCE_SCRATCH + (8 * i), 0);
9297         /* CCE_ERR_STATUS read-only */
9298         write_csr(dd, CCE_ERR_MASK, 0);
9299         write_csr(dd, CCE_ERR_CLEAR, ~0ull);
9300         /* CCE_ERR_FORCE leave alone */
9301         for (i = 0; i < CCE_NUM_32_BIT_COUNTERS; i++)
9302                 write_csr(dd, CCE_COUNTER_ARRAY32 + (8 * i), 0);
9303         write_csr(dd, CCE_DC_CTRL, CCE_DC_CTRL_RESETCSR);
9304         /* CCE_PCIE_CTRL leave alone */
9305         for (i = 0; i < CCE_NUM_MSIX_VECTORS; i++) {
9306                 write_csr(dd, CCE_MSIX_TABLE_LOWER + (8 * i), 0);
9307                 write_csr(dd, CCE_MSIX_TABLE_UPPER + (8 * i),
9308                                         CCE_MSIX_TABLE_UPPER_RESETCSR);
9309         }
9310         for (i = 0; i < CCE_NUM_MSIX_PBAS; i++) {
9311                 /* CCE_MSIX_PBA read-only */
9312                 write_csr(dd, CCE_MSIX_INT_GRANTED, ~0ull);
9313                 write_csr(dd, CCE_MSIX_VEC_CLR_WITHOUT_INT, ~0ull);
9314         }
9315         for (i = 0; i < CCE_NUM_INT_MAP_CSRS; i++)
9316                 write_csr(dd, CCE_INT_MAP, 0);
9317         for (i = 0; i < CCE_NUM_INT_CSRS; i++) {
9318                 /* CCE_INT_STATUS read-only */
9319                 write_csr(dd, CCE_INT_MASK + (8 * i), 0);
9320                 write_csr(dd, CCE_INT_CLEAR + (8 * i), ~0ull);
9321                 /* CCE_INT_FORCE leave alone */
9322                 /* CCE_INT_BLOCKED read-only */
9323         }
9324         for (i = 0; i < CCE_NUM_32_BIT_INT_COUNTERS; i++)
9325                 write_csr(dd, CCE_INT_COUNTER_ARRAY32 + (8 * i), 0);
9326 }
9327
9328 /* set ASIC CSRs to chip reset defaults */
9329 static void reset_asic_csrs(struct hfi1_devdata *dd)
9330 {
9331         static DEFINE_MUTEX(asic_mutex);
9332         static int called;
9333         int i;
9334
9335         /*
9336          * If the HFIs are shared between separate nodes or VMs,
9337          * then more will need to be done here.  One idea is a module
9338          * parameter that returns early, letting the first power-on or
9339          * a known first load do the reset and blocking all others.
9340          */
9341
9342         /*
9343          * These CSRs should only be reset once - the first one here will
9344          * do the work.  Use a mutex so that a non-first caller waits until
9345          * the first is finished before it can proceed.
9346          */
9347         mutex_lock(&asic_mutex);
9348         if (called)
9349                 goto done;
9350         called = 1;
9351
9352         if (dd->icode != ICODE_FPGA_EMULATION) {
9353                 /* emulation does not have an SBus - leave these alone */
9354                 /*
9355                  * All writes to ASIC_CFG_SBUS_REQUEST do something.
9356                  * Notes:
9357                  * o The reset is not zero if aimed at the core.  See the
9358                  *   SBus documentation for details.
9359                  * o If the SBus firmware has been updated (e.g. by the BIOS),
9360                  *   will the reset revert that?
9361                  */
9362                 /* ASIC_CFG_SBUS_REQUEST leave alone */
9363                 write_csr(dd, ASIC_CFG_SBUS_EXECUTE, 0);
9364         }
9365         /* ASIC_SBUS_RESULT read-only */
9366         write_csr(dd, ASIC_STS_SBUS_COUNTERS, 0);
9367         for (i = 0; i < ASIC_NUM_SCRATCH; i++)
9368                 write_csr(dd, ASIC_CFG_SCRATCH + (8 * i), 0);
9369         write_csr(dd, ASIC_CFG_MUTEX, 0);       /* this will clear it */
9370         write_csr(dd, ASIC_CFG_DRV_STR, 0);
9371         write_csr(dd, ASIC_CFG_THERM_POLL_EN, 0);
9372         /* ASIC_STS_THERM read-only */
9373         /* ASIC_CFG_RESET leave alone */
9374
9375         write_csr(dd, ASIC_PCIE_SD_HOST_CMD, 0);
9376         /* ASIC_PCIE_SD_HOST_STATUS read-only */
9377         write_csr(dd, ASIC_PCIE_SD_INTRPT_DATA_CODE, 0);
9378         write_csr(dd, ASIC_PCIE_SD_INTRPT_ENABLE, 0);
9379         /* ASIC_PCIE_SD_INTRPT_PROGRESS read-only */
9380         write_csr(dd, ASIC_PCIE_SD_INTRPT_STATUS, ~0ull); /* clear */
9381         /* ASIC_HFI0_PCIE_SD_INTRPT_RSPD_DATA read-only */
9382         /* ASIC_HFI1_PCIE_SD_INTRPT_RSPD_DATA read-only */
9383         for (i = 0; i < 16; i++)
9384                 write_csr(dd, ASIC_PCIE_SD_INTRPT_LIST + (8 * i), 0);
9385
9386         /* ASIC_GPIO_IN read-only */
9387         write_csr(dd, ASIC_GPIO_OE, 0);
9388         write_csr(dd, ASIC_GPIO_INVERT, 0);
9389         write_csr(dd, ASIC_GPIO_OUT, 0);
9390         write_csr(dd, ASIC_GPIO_MASK, 0);
9391         /* ASIC_GPIO_STATUS read-only */
9392         write_csr(dd, ASIC_GPIO_CLEAR, ~0ull);
9393         /* ASIC_GPIO_FORCE leave alone */
9394
9395         /* ASIC_QSFP1_IN read-only */
9396         write_csr(dd, ASIC_QSFP1_OE, 0);
9397         write_csr(dd, ASIC_QSFP1_INVERT, 0);
9398         write_csr(dd, ASIC_QSFP1_OUT, 0);
9399         write_csr(dd, ASIC_QSFP1_MASK, 0);
9400         /* ASIC_QSFP1_STATUS read-only */
9401         write_csr(dd, ASIC_QSFP1_CLEAR, ~0ull);
9402         /* ASIC_QSFP1_FORCE leave alone */
9403
9404         /* ASIC_QSFP2_IN read-only */
9405         write_csr(dd, ASIC_QSFP2_OE, 0);
9406         write_csr(dd, ASIC_QSFP2_INVERT, 0);
9407         write_csr(dd, ASIC_QSFP2_OUT, 0);
9408         write_csr(dd, ASIC_QSFP2_MASK, 0);
9409         /* ASIC_QSFP2_STATUS read-only */
9410         write_csr(dd, ASIC_QSFP2_CLEAR, ~0ull);
9411         /* ASIC_QSFP2_FORCE leave alone */
9412
9413         write_csr(dd, ASIC_EEP_CTL_STAT, ASIC_EEP_CTL_STAT_RESETCSR);
9414         /* this also writes a NOP command, clearing paging mode */
9415         write_csr(dd, ASIC_EEP_ADDR_CMD, 0);
9416         write_csr(dd, ASIC_EEP_DATA, 0);
9417
9418 done:
9419         mutex_unlock(&asic_mutex);
9420 }
9421
9422 /* set MISC CSRs to chip reset defaults */
9423 static void reset_misc_csrs(struct hfi1_devdata *dd)
9424 {
9425         int i;
9426
9427         for (i = 0; i < 32; i++) {
9428                 write_csr(dd, MISC_CFG_RSA_R2 + (8 * i), 0);
9429                 write_csr(dd, MISC_CFG_RSA_SIGNATURE + (8 * i), 0);
9430                 write_csr(dd, MISC_CFG_RSA_MODULUS + (8 * i), 0);
9431         }
9432         /* MISC_CFG_SHA_PRELOAD leave alone - always reads 0 and can
9433            only be written 128-byte chunks */
9434         /* init RSA engine to clear lingering errors */
9435         write_csr(dd, MISC_CFG_RSA_CMD, 1);
9436         write_csr(dd, MISC_CFG_RSA_MU, 0);
9437         write_csr(dd, MISC_CFG_FW_CTRL, 0);
9438         /* MISC_STS_8051_DIGEST read-only */
9439         /* MISC_STS_SBM_DIGEST read-only */
9440         /* MISC_STS_PCIE_DIGEST read-only */
9441         /* MISC_STS_FAB_DIGEST read-only */
9442         /* MISC_ERR_STATUS read-only */
9443         write_csr(dd, MISC_ERR_MASK, 0);
9444         write_csr(dd, MISC_ERR_CLEAR, ~0ull);
9445         /* MISC_ERR_FORCE leave alone */
9446 }
9447
9448 /* set TXE CSRs to chip reset defaults */
9449 static void reset_txe_csrs(struct hfi1_devdata *dd)
9450 {
9451         int i;
9452
9453         /*
9454          * TXE Kernel CSRs
9455          */
9456         write_csr(dd, SEND_CTRL, 0);
9457         __cm_reset(dd, 0);      /* reset CM internal state */
9458         /* SEND_CONTEXTS read-only */
9459         /* SEND_DMA_ENGINES read-only */
9460         /* SEND_PIO_MEM_SIZE read-only */
9461         /* SEND_DMA_MEM_SIZE read-only */
9462         write_csr(dd, SEND_HIGH_PRIORITY_LIMIT, 0);
9463         pio_reset_all(dd);      /* SEND_PIO_INIT_CTXT */
9464         /* SEND_PIO_ERR_STATUS read-only */
9465         write_csr(dd, SEND_PIO_ERR_MASK, 0);
9466         write_csr(dd, SEND_PIO_ERR_CLEAR, ~0ull);
9467         /* SEND_PIO_ERR_FORCE leave alone */
9468         /* SEND_DMA_ERR_STATUS read-only */
9469         write_csr(dd, SEND_DMA_ERR_MASK, 0);
9470         write_csr(dd, SEND_DMA_ERR_CLEAR, ~0ull);
9471         /* SEND_DMA_ERR_FORCE leave alone */
9472         /* SEND_EGRESS_ERR_STATUS read-only */
9473         write_csr(dd, SEND_EGRESS_ERR_MASK, 0);
9474         write_csr(dd, SEND_EGRESS_ERR_CLEAR, ~0ull);
9475         /* SEND_EGRESS_ERR_FORCE leave alone */
9476         write_csr(dd, SEND_BTH_QP, 0);
9477         write_csr(dd, SEND_STATIC_RATE_CONTROL, 0);
9478         write_csr(dd, SEND_SC2VLT0, 0);
9479         write_csr(dd, SEND_SC2VLT1, 0);
9480         write_csr(dd, SEND_SC2VLT2, 0);
9481         write_csr(dd, SEND_SC2VLT3, 0);
9482         write_csr(dd, SEND_LEN_CHECK0, 0);
9483         write_csr(dd, SEND_LEN_CHECK1, 0);
9484         /* SEND_ERR_STATUS read-only */
9485         write_csr(dd, SEND_ERR_MASK, 0);
9486         write_csr(dd, SEND_ERR_CLEAR, ~0ull);
9487         /* SEND_ERR_FORCE read-only */
9488         for (i = 0; i < VL_ARB_LOW_PRIO_TABLE_SIZE; i++)
9489                 write_csr(dd, SEND_LOW_PRIORITY_LIST + (8*i), 0);
9490         for (i = 0; i < VL_ARB_HIGH_PRIO_TABLE_SIZE; i++)
9491                 write_csr(dd, SEND_HIGH_PRIORITY_LIST + (8*i), 0);
9492         for (i = 0; i < dd->chip_send_contexts/NUM_CONTEXTS_PER_SET; i++)
9493                 write_csr(dd, SEND_CONTEXT_SET_CTRL + (8*i), 0);
9494         for (i = 0; i < TXE_NUM_32_BIT_COUNTER; i++)
9495                 write_csr(dd, SEND_COUNTER_ARRAY32 + (8*i), 0);
9496         for (i = 0; i < TXE_NUM_64_BIT_COUNTER; i++)
9497                 write_csr(dd, SEND_COUNTER_ARRAY64 + (8*i), 0);
9498         write_csr(dd, SEND_CM_CTRL, SEND_CM_CTRL_RESETCSR);
9499         write_csr(dd, SEND_CM_GLOBAL_CREDIT,
9500                                         SEND_CM_GLOBAL_CREDIT_RESETCSR);
9501         /* SEND_CM_CREDIT_USED_STATUS read-only */
9502         write_csr(dd, SEND_CM_TIMER_CTRL, 0);
9503         write_csr(dd, SEND_CM_LOCAL_AU_TABLE0_TO3, 0);
9504         write_csr(dd, SEND_CM_LOCAL_AU_TABLE4_TO7, 0);
9505         write_csr(dd, SEND_CM_REMOTE_AU_TABLE0_TO3, 0);
9506         write_csr(dd, SEND_CM_REMOTE_AU_TABLE4_TO7, 0);
9507         for (i = 0; i < TXE_NUM_DATA_VL; i++)
9508                 write_csr(dd, SEND_CM_CREDIT_VL + (8*i), 0);
9509         write_csr(dd, SEND_CM_CREDIT_VL15, 0);
9510         /* SEND_CM_CREDIT_USED_VL read-only */
9511         /* SEND_CM_CREDIT_USED_VL15 read-only */
9512         /* SEND_EGRESS_CTXT_STATUS read-only */
9513         /* SEND_EGRESS_SEND_DMA_STATUS read-only */
9514         write_csr(dd, SEND_EGRESS_ERR_INFO, ~0ull);
9515         /* SEND_EGRESS_ERR_INFO read-only */
9516         /* SEND_EGRESS_ERR_SOURCE read-only */
9517
9518         /*
9519          * TXE Per-Context CSRs
9520          */
9521         for (i = 0; i < dd->chip_send_contexts; i++) {
9522                 write_kctxt_csr(dd, i, SEND_CTXT_CTRL, 0);
9523                 write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_CTRL, 0);
9524                 write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_RETURN_ADDR, 0);
9525                 write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_FORCE, 0);
9526                 write_kctxt_csr(dd, i, SEND_CTXT_ERR_MASK, 0);
9527                 write_kctxt_csr(dd, i, SEND_CTXT_ERR_CLEAR, ~0ull);
9528                 write_kctxt_csr(dd, i, SEND_CTXT_CHECK_ENABLE, 0);
9529                 write_kctxt_csr(dd, i, SEND_CTXT_CHECK_VL, 0);
9530                 write_kctxt_csr(dd, i, SEND_CTXT_CHECK_JOB_KEY, 0);
9531                 write_kctxt_csr(dd, i, SEND_CTXT_CHECK_PARTITION_KEY, 0);
9532                 write_kctxt_csr(dd, i, SEND_CTXT_CHECK_SLID, 0);
9533                 write_kctxt_csr(dd, i, SEND_CTXT_CHECK_OPCODE, 0);
9534         }
9535
9536         /*
9537          * TXE Per-SDMA CSRs
9538          */
9539         for (i = 0; i < dd->chip_sdma_engines; i++) {
9540                 write_kctxt_csr(dd, i, SEND_DMA_CTRL, 0);
9541                 /* SEND_DMA_STATUS read-only */
9542                 write_kctxt_csr(dd, i, SEND_DMA_BASE_ADDR, 0);
9543                 write_kctxt_csr(dd, i, SEND_DMA_LEN_GEN, 0);
9544                 write_kctxt_csr(dd, i, SEND_DMA_TAIL, 0);
9545                 /* SEND_DMA_HEAD read-only */
9546                 write_kctxt_csr(dd, i, SEND_DMA_HEAD_ADDR, 0);
9547                 write_kctxt_csr(dd, i, SEND_DMA_PRIORITY_THLD, 0);
9548                 /* SEND_DMA_IDLE_CNT read-only */
9549                 write_kctxt_csr(dd, i, SEND_DMA_RELOAD_CNT, 0);
9550                 write_kctxt_csr(dd, i, SEND_DMA_DESC_CNT, 0);
9551                 /* SEND_DMA_DESC_FETCHED_CNT read-only */
9552                 /* SEND_DMA_ENG_ERR_STATUS read-only */
9553                 write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_MASK, 0);
9554                 write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_CLEAR, ~0ull);
9555                 /* SEND_DMA_ENG_ERR_FORCE leave alone */
9556                 write_kctxt_csr(dd, i, SEND_DMA_CHECK_ENABLE, 0);
9557                 write_kctxt_csr(dd, i, SEND_DMA_CHECK_VL, 0);
9558                 write_kctxt_csr(dd, i, SEND_DMA_CHECK_JOB_KEY, 0);
9559                 write_kctxt_csr(dd, i, SEND_DMA_CHECK_PARTITION_KEY, 0);
9560                 write_kctxt_csr(dd, i, SEND_DMA_CHECK_SLID, 0);
9561                 write_kctxt_csr(dd, i, SEND_DMA_CHECK_OPCODE, 0);
9562                 write_kctxt_csr(dd, i, SEND_DMA_MEMORY, 0);
9563         }
9564 }
9565
9566 /*
9567  * Expect on entry:
9568  * o Packet ingress is disabled, i.e. RcvCtrl.RcvPortEnable == 0
9569  */
9570 static void init_rbufs(struct hfi1_devdata *dd)
9571 {
9572         u64 reg;
9573         int count;
9574
9575         /*
9576          * Wait for DMA to stop: RxRbufPktPending and RxPktInProgress are
9577          * clear.
9578          */
9579         count = 0;
9580         while (1) {
9581                 reg = read_csr(dd, RCV_STATUS);
9582                 if ((reg & (RCV_STATUS_RX_RBUF_PKT_PENDING_SMASK
9583                             | RCV_STATUS_RX_PKT_IN_PROGRESS_SMASK)) == 0)
9584                         break;
9585                 /*
9586                  * Give up after 1ms - maximum wait time.
9587                  *
9588                  * RBuf size is 148KiB.  Slowest possible is PCIe Gen1 x1 at
9589                  * 250MB/s bandwidth.  Lower rate to 66% for overhead to get:
9590                  *      148 KB / (66% * 250MB/s) = 920us
9591                  */
9592                 if (count++ > 500) {
9593                         dd_dev_err(dd,
9594                                 "%s: in-progress DMA not clearing: RcvStatus 0x%llx, continuing\n",
9595                                 __func__, reg);
9596                         break;
9597                 }
9598                 udelay(2); /* do not busy-wait the CSR */
9599         }
9600
9601         /* start the init - expect RcvCtrl to be 0 */
9602         write_csr(dd, RCV_CTRL, RCV_CTRL_RX_RBUF_INIT_SMASK);
9603
9604         /*
9605          * Read to force the write of Rcvtrl.RxRbufInit.  There is a brief
9606          * period after the write before RcvStatus.RxRbufInitDone is valid.
9607          * The delay in the first run through the loop below is sufficient and
9608          * required before the first read of RcvStatus.RxRbufInintDone.
9609          */
9610         read_csr(dd, RCV_CTRL);
9611
9612         /* wait for the init to finish */
9613         count = 0;
9614         while (1) {
9615                 /* delay is required first time through - see above */
9616                 udelay(2); /* do not busy-wait the CSR */
9617                 reg = read_csr(dd, RCV_STATUS);
9618                 if (reg & (RCV_STATUS_RX_RBUF_INIT_DONE_SMASK))
9619                         break;
9620
9621                 /* give up after 100us - slowest possible at 33MHz is 73us */
9622                 if (count++ > 50) {
9623                         dd_dev_err(dd,
9624                                 "%s: RcvStatus.RxRbufInit not set, continuing\n",
9625                                 __func__);
9626                         break;
9627                 }
9628         }
9629 }
9630
9631 /* set RXE CSRs to chip reset defaults */
9632 static void reset_rxe_csrs(struct hfi1_devdata *dd)
9633 {
9634         int i, j;
9635
9636         /*
9637          * RXE Kernel CSRs
9638          */
9639         write_csr(dd, RCV_CTRL, 0);
9640         init_rbufs(dd);
9641         /* RCV_STATUS read-only */
9642         /* RCV_CONTEXTS read-only */
9643         /* RCV_ARRAY_CNT read-only */
9644         /* RCV_BUF_SIZE read-only */
9645         write_csr(dd, RCV_BTH_QP, 0);
9646         write_csr(dd, RCV_MULTICAST, 0);
9647         write_csr(dd, RCV_BYPASS, 0);
9648         write_csr(dd, RCV_VL15, 0);
9649         /* this is a clear-down */
9650         write_csr(dd, RCV_ERR_INFO,
9651                         RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK);
9652         /* RCV_ERR_STATUS read-only */
9653         write_csr(dd, RCV_ERR_MASK, 0);
9654         write_csr(dd, RCV_ERR_CLEAR, ~0ull);
9655         /* RCV_ERR_FORCE leave alone */
9656         for (i = 0; i < 32; i++)
9657                 write_csr(dd, RCV_QP_MAP_TABLE + (8 * i), 0);
9658         for (i = 0; i < 4; i++)
9659                 write_csr(dd, RCV_PARTITION_KEY + (8 * i), 0);
9660         for (i = 0; i < RXE_NUM_32_BIT_COUNTERS; i++)
9661                 write_csr(dd, RCV_COUNTER_ARRAY32 + (8 * i), 0);
9662         for (i = 0; i < RXE_NUM_64_BIT_COUNTERS; i++)
9663                 write_csr(dd, RCV_COUNTER_ARRAY64 + (8 * i), 0);
9664         for (i = 0; i < RXE_NUM_RSM_INSTANCES; i++) {
9665                 write_csr(dd, RCV_RSM_CFG + (8 * i), 0);
9666                 write_csr(dd, RCV_RSM_SELECT + (8 * i), 0);
9667                 write_csr(dd, RCV_RSM_MATCH + (8 * i), 0);
9668         }
9669         for (i = 0; i < 32; i++)
9670                 write_csr(dd, RCV_RSM_MAP_TABLE + (8 * i), 0);
9671
9672         /*
9673          * RXE Kernel and User Per-Context CSRs
9674          */
9675         for (i = 0; i < dd->chip_rcv_contexts; i++) {
9676                 /* kernel */
9677                 write_kctxt_csr(dd, i, RCV_CTXT_CTRL, 0);
9678                 /* RCV_CTXT_STATUS read-only */
9679                 write_kctxt_csr(dd, i, RCV_EGR_CTRL, 0);
9680                 write_kctxt_csr(dd, i, RCV_TID_CTRL, 0);
9681                 write_kctxt_csr(dd, i, RCV_KEY_CTRL, 0);
9682                 write_kctxt_csr(dd, i, RCV_HDR_ADDR, 0);
9683                 write_kctxt_csr(dd, i, RCV_HDR_CNT, 0);
9684                 write_kctxt_csr(dd, i, RCV_HDR_ENT_SIZE, 0);
9685                 write_kctxt_csr(dd, i, RCV_HDR_SIZE, 0);
9686                 write_kctxt_csr(dd, i, RCV_HDR_TAIL_ADDR, 0);
9687                 write_kctxt_csr(dd, i, RCV_AVAIL_TIME_OUT, 0);
9688                 write_kctxt_csr(dd, i, RCV_HDR_OVFL_CNT, 0);
9689
9690                 /* user */
9691                 /* RCV_HDR_TAIL read-only */
9692                 write_uctxt_csr(dd, i, RCV_HDR_HEAD, 0);
9693                 /* RCV_EGR_INDEX_TAIL read-only */
9694                 write_uctxt_csr(dd, i, RCV_EGR_INDEX_HEAD, 0);
9695                 /* RCV_EGR_OFFSET_TAIL read-only */
9696                 for (j = 0; j < RXE_NUM_TID_FLOWS; j++) {
9697                         write_uctxt_csr(dd, i, RCV_TID_FLOW_TABLE + (8 * j),
9698                                 0);
9699                 }
9700         }
9701 }
9702
9703 /*
9704  * Set sc2vl tables.
9705  *
9706  * They power on to zeros, so to avoid send context errors
9707  * they need to be set:
9708  *
9709  * SC 0-7 -> VL 0-7 (respectively)
9710  * SC 15  -> VL 15
9711  * otherwise
9712  *        -> VL 0
9713  */
9714 static void init_sc2vl_tables(struct hfi1_devdata *dd)
9715 {
9716         int i;
9717         /* init per architecture spec, constrained by hardware capability */
9718
9719         /* HFI maps sent packets */
9720         write_csr(dd, SEND_SC2VLT0, SC2VL_VAL(
9721                 0,
9722                 0, 0, 1, 1,
9723                 2, 2, 3, 3,
9724                 4, 4, 5, 5,
9725                 6, 6, 7, 7));
9726         write_csr(dd, SEND_SC2VLT1, SC2VL_VAL(
9727                 1,
9728                 8, 0, 9, 0,
9729                 10, 0, 11, 0,
9730                 12, 0, 13, 0,
9731                 14, 0, 15, 15));
9732         write_csr(dd, SEND_SC2VLT2, SC2VL_VAL(
9733                 2,
9734                 16, 0, 17, 0,
9735                 18, 0, 19, 0,
9736                 20, 0, 21, 0,
9737                 22, 0, 23, 0));
9738         write_csr(dd, SEND_SC2VLT3, SC2VL_VAL(
9739                 3,
9740                 24, 0, 25, 0,
9741                 26, 0, 27, 0,
9742                 28, 0, 29, 0,
9743                 30, 0, 31, 0));
9744
9745         /* DC maps received packets */
9746         write_csr(dd, DCC_CFG_SC_VL_TABLE_15_0, DC_SC_VL_VAL(
9747                 15_0,
9748                 0, 0, 1, 1,  2, 2,  3, 3,  4, 4,  5, 5,  6, 6,  7,  7,
9749                 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15, 15));
9750         write_csr(dd, DCC_CFG_SC_VL_TABLE_31_16, DC_SC_VL_VAL(
9751                 31_16,
9752                 16, 0, 17, 0, 18, 0, 19, 0, 20, 0, 21, 0, 22, 0, 23, 0,
9753                 24, 0, 25, 0, 26, 0, 27, 0, 28, 0, 29, 0, 30, 0, 31, 0));
9754
9755         /* initialize the cached sc2vl values consistently with h/w */
9756         for (i = 0; i < 32; i++) {
9757                 if (i < 8 || i == 15)
9758                         *((u8 *)(dd->sc2vl) + i) = (u8)i;
9759                 else
9760                         *((u8 *)(dd->sc2vl) + i) = 0;
9761         }
9762 }
9763
9764 /*
9765  * Read chip sizes and then reset parts to sane, disabled, values.  We cannot
9766  * depend on the chip going through a power-on reset - a driver may be loaded
9767  * and unloaded many times.
9768  *
9769  * Do not write any CSR values to the chip in this routine - there may be
9770  * a reset following the (possible) FLR in this routine.
9771  *
9772  */
9773 static void init_chip(struct hfi1_devdata *dd)
9774 {
9775         int i;
9776
9777         /*
9778          * Put the HFI CSRs in a known state.
9779          * Combine this with a DC reset.
9780          *
9781          * Stop the device from doing anything while we do a
9782          * reset.  We know there are no other active users of
9783          * the device since we are now in charge.  Turn off
9784          * off all outbound and inbound traffic and make sure
9785          * the device does not generate any interrupts.
9786          */
9787
9788         /* disable send contexts and SDMA engines */
9789         write_csr(dd, SEND_CTRL, 0);
9790         for (i = 0; i < dd->chip_send_contexts; i++)
9791                 write_kctxt_csr(dd, i, SEND_CTXT_CTRL, 0);
9792         for (i = 0; i < dd->chip_sdma_engines; i++)
9793                 write_kctxt_csr(dd, i, SEND_DMA_CTRL, 0);
9794         /* disable port (turn off RXE inbound traffic) and contexts */
9795         write_csr(dd, RCV_CTRL, 0);
9796         for (i = 0; i < dd->chip_rcv_contexts; i++)
9797                 write_csr(dd, RCV_CTXT_CTRL, 0);
9798         /* mask all interrupt sources */
9799         for (i = 0; i < CCE_NUM_INT_CSRS; i++)
9800                 write_csr(dd, CCE_INT_MASK + (8*i), 0ull);
9801
9802         /*
9803          * DC Reset: do a full DC reset before the register clear.
9804          * A recommended length of time to hold is one CSR read,
9805          * so reread the CceDcCtrl.  Then, hold the DC in reset
9806          * across the clear.
9807          */
9808         write_csr(dd, CCE_DC_CTRL, CCE_DC_CTRL_DC_RESET_SMASK);
9809         (void) read_csr(dd, CCE_DC_CTRL);
9810
9811         if (use_flr) {
9812                 /*
9813                  * A FLR will reset the SPC core and part of the PCIe.
9814                  * The parts that need to be restored have already been
9815                  * saved.
9816                  */
9817                 dd_dev_info(dd, "Resetting CSRs with FLR\n");
9818
9819                 /* do the FLR, the DC reset will remain */
9820                 hfi1_pcie_flr(dd);
9821
9822                 /* restore command and BARs */
9823                 restore_pci_variables(dd);
9824
9825                 if (is_a0(dd)) {
9826                         dd_dev_info(dd, "Resetting CSRs with FLR\n");
9827                         hfi1_pcie_flr(dd);
9828                         restore_pci_variables(dd);
9829                 }
9830
9831         } else {
9832                 dd_dev_info(dd, "Resetting CSRs with writes\n");
9833                 reset_cce_csrs(dd);
9834                 reset_txe_csrs(dd);
9835                 reset_rxe_csrs(dd);
9836                 reset_asic_csrs(dd);
9837                 reset_misc_csrs(dd);
9838         }
9839         /* clear the DC reset */
9840         write_csr(dd, CCE_DC_CTRL, 0);
9841         /* Set the LED off */
9842         if (is_a0(dd))
9843                 setextled(dd, 0);
9844         /*
9845          * Clear the QSFP reset.
9846          * A0 leaves the out lines floating on power on, then on an FLR
9847          * enforces a 0 on all out pins.  The driver does not touch
9848          * ASIC_QSFPn_OUT otherwise.  This leaves RESET_N low and
9849          * anything  plugged constantly in reset, if it pays attention
9850          * to RESET_N.
9851          * A prime example of this is SiPh. For now, set all pins high.
9852          * I2CCLK and I2CDAT will change per direction, and INT_N and
9853          * MODPRS_N are input only and their value is ignored.
9854          */
9855         if (is_a0(dd)) {
9856                 write_csr(dd, ASIC_QSFP1_OUT, 0x1f);
9857                 write_csr(dd, ASIC_QSFP2_OUT, 0x1f);
9858         }
9859 }
9860
9861 static void init_early_variables(struct hfi1_devdata *dd)
9862 {
9863         int i;
9864
9865         /* assign link credit variables */
9866         dd->vau = CM_VAU;
9867         dd->link_credits = CM_GLOBAL_CREDITS;
9868         if (is_a0(dd))
9869                 dd->link_credits--;
9870         dd->vcu = cu_to_vcu(hfi1_cu);
9871         /* enough room for 8 MAD packets plus header - 17K */
9872         dd->vl15_init = (8 * (2048 + 128)) / vau_to_au(dd->vau);
9873         if (dd->vl15_init > dd->link_credits)
9874                 dd->vl15_init = dd->link_credits;
9875
9876         write_uninitialized_csrs_and_memories(dd);
9877
9878         if (HFI1_CAP_IS_KSET(PKEY_CHECK))
9879                 for (i = 0; i < dd->num_pports; i++) {
9880                         struct hfi1_pportdata *ppd = &dd->pport[i];
9881
9882                         set_partition_keys(ppd);
9883                 }
9884         init_sc2vl_tables(dd);
9885 }
9886
9887 static void init_kdeth_qp(struct hfi1_devdata *dd)
9888 {
9889         /* user changed the KDETH_QP */
9890         if (kdeth_qp != 0 && kdeth_qp >= 0xff) {
9891                 /* out of range or illegal value */
9892                 dd_dev_err(dd, "Invalid KDETH queue pair prefix, ignoring");
9893                 kdeth_qp = 0;
9894         }
9895         if (kdeth_qp == 0)      /* not set, or failed range check */
9896                 kdeth_qp = DEFAULT_KDETH_QP;
9897
9898         write_csr(dd, SEND_BTH_QP,
9899                         (kdeth_qp & SEND_BTH_QP_KDETH_QP_MASK)
9900                                 << SEND_BTH_QP_KDETH_QP_SHIFT);
9901
9902         write_csr(dd, RCV_BTH_QP,
9903                         (kdeth_qp & RCV_BTH_QP_KDETH_QP_MASK)
9904                                 << RCV_BTH_QP_KDETH_QP_SHIFT);
9905 }
9906
9907 /**
9908  * init_qpmap_table
9909  * @dd - device data
9910  * @first_ctxt - first context
9911  * @last_ctxt - first context
9912  *
9913  * This return sets the qpn mapping table that
9914  * is indexed by qpn[8:1].
9915  *
9916  * The routine will round robin the 256 settings
9917  * from first_ctxt to last_ctxt.
9918  *
9919  * The first/last looks ahead to having specialized
9920  * receive contexts for mgmt and bypass.  Normal
9921  * verbs traffic will assumed to be on a range
9922  * of receive contexts.
9923  */
9924 static void init_qpmap_table(struct hfi1_devdata *dd,
9925                              u32 first_ctxt,
9926                              u32 last_ctxt)
9927 {
9928         u64 reg = 0;
9929         u64 regno = RCV_QP_MAP_TABLE;
9930         int i;
9931         u64 ctxt = first_ctxt;
9932
9933         for (i = 0; i < 256;) {
9934                 if (ctxt == VL15CTXT) {
9935                         ctxt++;
9936                         if (ctxt > last_ctxt)
9937                                 ctxt = first_ctxt;
9938                         continue;
9939                 }
9940                 reg |= ctxt << (8 * (i % 8));
9941                 i++;
9942                 ctxt++;
9943                 if (ctxt > last_ctxt)
9944                         ctxt = first_ctxt;
9945                 if (i % 8 == 0) {
9946                         write_csr(dd, regno, reg);
9947                         reg = 0;
9948                         regno += 8;
9949                 }
9950         }
9951         if (i % 8)
9952                 write_csr(dd, regno, reg);
9953
9954         add_rcvctrl(dd, RCV_CTRL_RCV_QP_MAP_ENABLE_SMASK
9955                         | RCV_CTRL_RCV_BYPASS_ENABLE_SMASK);
9956 }
9957
9958 /**
9959  * init_qos - init RX qos
9960  * @dd - device data
9961  * @first_context
9962  *
9963  * This routine initializes Rule 0 and the
9964  * RSM map table to implement qos.
9965  *
9966  * If all of the limit tests succeed,
9967  * qos is applied based on the array
9968  * interpretation of krcvqs where
9969  * entry 0 is VL0.
9970  *
9971  * The number of vl bits (n) and the number of qpn
9972  * bits (m) are computed to feed both the RSM map table
9973  * and the single rule.
9974  *
9975  */
9976 static void init_qos(struct hfi1_devdata *dd, u32 first_ctxt)
9977 {
9978         u8 max_by_vl = 0;
9979         unsigned qpns_per_vl, ctxt, i, qpn, n = 1, m;
9980         u64 *rsmmap;
9981         u64 reg;
9982         u8  rxcontext = is_a0(dd) ? 0 : 0xff;  /* 0 is default if a0 ver. */
9983
9984         /* validate */
9985         if (dd->n_krcv_queues <= MIN_KERNEL_KCTXTS ||
9986             num_vls == 1 ||
9987             krcvqsset <= 1)
9988                 goto bail;
9989         for (i = 0; i < min_t(unsigned, num_vls, krcvqsset); i++)
9990                 if (krcvqs[i] > max_by_vl)
9991                         max_by_vl = krcvqs[i];
9992         if (max_by_vl > 32)
9993                 goto bail;
9994         qpns_per_vl = __roundup_pow_of_two(max_by_vl);
9995         /* determine bits vl */
9996         n = ilog2(num_vls);
9997         /* determine bits for qpn */
9998         m = ilog2(qpns_per_vl);
9999         if ((m + n) > 7)
10000                 goto bail;
10001         if (num_vls * qpns_per_vl > dd->chip_rcv_contexts)
10002                 goto bail;
10003         rsmmap = kmalloc_array(NUM_MAP_REGS, sizeof(u64), GFP_KERNEL);
10004         memset(rsmmap, rxcontext, NUM_MAP_REGS * sizeof(u64));
10005         /* init the local copy of the table */
10006         for (i = 0, ctxt = first_ctxt; i < num_vls; i++) {
10007                 unsigned tctxt;
10008
10009                 for (qpn = 0, tctxt = ctxt;
10010                      krcvqs[i] && qpn < qpns_per_vl; qpn++) {
10011                         unsigned idx, regoff, regidx;
10012
10013                         /* generate index <= 128 */
10014                         idx = (qpn << n) ^ i;
10015                         regoff = (idx % 8) * 8;
10016                         regidx = idx / 8;
10017                         reg = rsmmap[regidx];
10018                         /* replace 0xff with context number */
10019                         reg &= ~(RCV_RSM_MAP_TABLE_RCV_CONTEXT_A_MASK
10020                                 << regoff);
10021                         reg |= (u64)(tctxt++) << regoff;
10022                         rsmmap[regidx] = reg;
10023                         if (tctxt == ctxt + krcvqs[i])
10024                                 tctxt = ctxt;
10025                 }
10026                 ctxt += krcvqs[i];
10027         }
10028         /* flush cached copies to chip */
10029         for (i = 0; i < NUM_MAP_REGS; i++)
10030                 write_csr(dd, RCV_RSM_MAP_TABLE + (8 * i), rsmmap[i]);
10031         /* add rule0 */
10032         write_csr(dd, RCV_RSM_CFG /* + (8 * 0) */,
10033                 RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_MASK
10034                         << RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_SHIFT |
10035                 2ull << RCV_RSM_CFG_PACKET_TYPE_SHIFT);
10036         write_csr(dd, RCV_RSM_SELECT /* + (8 * 0) */,
10037                 LRH_BTH_MATCH_OFFSET
10038                         << RCV_RSM_SELECT_FIELD1_OFFSET_SHIFT |
10039                 LRH_SC_MATCH_OFFSET << RCV_RSM_SELECT_FIELD2_OFFSET_SHIFT |
10040                 LRH_SC_SELECT_OFFSET << RCV_RSM_SELECT_INDEX1_OFFSET_SHIFT |
10041                 ((u64)n) << RCV_RSM_SELECT_INDEX1_WIDTH_SHIFT |
10042                 QPN_SELECT_OFFSET << RCV_RSM_SELECT_INDEX2_OFFSET_SHIFT |
10043                 ((u64)m + (u64)n) << RCV_RSM_SELECT_INDEX2_WIDTH_SHIFT);
10044         write_csr(dd, RCV_RSM_MATCH /* + (8 * 0) */,
10045                 LRH_BTH_MASK << RCV_RSM_MATCH_MASK1_SHIFT |
10046                 LRH_BTH_VALUE << RCV_RSM_MATCH_VALUE1_SHIFT |
10047                 LRH_SC_MASK << RCV_RSM_MATCH_MASK2_SHIFT |
10048                 LRH_SC_VALUE << RCV_RSM_MATCH_VALUE2_SHIFT);
10049         /* Enable RSM */
10050         add_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK);
10051         kfree(rsmmap);
10052         /* map everything else (non-VL15) to context 0 */
10053         init_qpmap_table(
10054                 dd,
10055                 0,
10056                 0);
10057         dd->qos_shift = n + 1;
10058         return;
10059 bail:
10060         dd->qos_shift = 1;
10061         init_qpmap_table(
10062                 dd,
10063                 dd->n_krcv_queues > MIN_KERNEL_KCTXTS ? MIN_KERNEL_KCTXTS : 0,
10064                 dd->n_krcv_queues - 1);
10065 }
10066
10067 static void init_rxe(struct hfi1_devdata *dd)
10068 {
10069         /* enable all receive errors */
10070         write_csr(dd, RCV_ERR_MASK, ~0ull);
10071         /* setup QPN map table - start where VL15 context leaves off */
10072         init_qos(
10073                 dd,
10074                 dd->n_krcv_queues > MIN_KERNEL_KCTXTS ? MIN_KERNEL_KCTXTS : 0);
10075         /*
10076          * make sure RcvCtrl.RcvWcb <= PCIe Device Control
10077          * Register Max_Payload_Size (PCI_EXP_DEVCTL in Linux PCIe config
10078          * space, PciCfgCap2.MaxPayloadSize in HFI).  There is only one
10079          * invalid configuration: RcvCtrl.RcvWcb set to its max of 256 and
10080          * Max_PayLoad_Size set to its minimum of 128.
10081          *
10082          * Presently, RcvCtrl.RcvWcb is not modified from its default of 0
10083          * (64 bytes).  Max_Payload_Size is possibly modified upward in
10084          * tune_pcie_caps() which is called after this routine.
10085          */
10086 }
10087
10088 static void init_other(struct hfi1_devdata *dd)
10089 {
10090         /* enable all CCE errors */
10091         write_csr(dd, CCE_ERR_MASK, ~0ull);
10092         /* enable *some* Misc errors */
10093         write_csr(dd, MISC_ERR_MASK, DRIVER_MISC_MASK);
10094         /* enable all DC errors, except LCB */
10095         write_csr(dd, DCC_ERR_FLG_EN, ~0ull);
10096         write_csr(dd, DC_DC8051_ERR_EN, ~0ull);
10097 }
10098
10099 /*
10100  * Fill out the given AU table using the given CU.  A CU is defined in terms
10101  * AUs.  The table is a an encoding: given the index, how many AUs does that
10102  * represent?
10103  *
10104  * NOTE: Assumes that the register layout is the same for the
10105  * local and remote tables.
10106  */
10107 static void assign_cm_au_table(struct hfi1_devdata *dd, u32 cu,
10108                                u32 csr0to3, u32 csr4to7)
10109 {
10110         write_csr(dd, csr0to3,
10111                    0ull <<
10112                         SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE0_SHIFT
10113                 |  1ull <<
10114                         SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE1_SHIFT
10115                 |  2ull * cu <<
10116                         SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE2_SHIFT
10117                 |  4ull * cu <<
10118                         SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE3_SHIFT);
10119         write_csr(dd, csr4to7,
10120                    8ull * cu <<
10121                         SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE4_SHIFT
10122                 | 16ull * cu <<
10123                         SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE5_SHIFT
10124                 | 32ull * cu <<
10125                         SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE6_SHIFT
10126                 | 64ull * cu <<
10127                         SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE7_SHIFT);
10128
10129 }
10130
10131 static void assign_local_cm_au_table(struct hfi1_devdata *dd, u8 vcu)
10132 {
10133         assign_cm_au_table(dd, vcu_to_cu(vcu), SEND_CM_LOCAL_AU_TABLE0_TO3,
10134                                         SEND_CM_LOCAL_AU_TABLE4_TO7);
10135 }
10136
10137 void assign_remote_cm_au_table(struct hfi1_devdata *dd, u8 vcu)
10138 {
10139         assign_cm_au_table(dd, vcu_to_cu(vcu), SEND_CM_REMOTE_AU_TABLE0_TO3,
10140                                         SEND_CM_REMOTE_AU_TABLE4_TO7);
10141 }
10142
10143 static void init_txe(struct hfi1_devdata *dd)
10144 {
10145         int i;
10146
10147         /* enable all PIO, SDMA, general, and Egress errors */
10148         write_csr(dd, SEND_PIO_ERR_MASK, ~0ull);
10149         write_csr(dd, SEND_DMA_ERR_MASK, ~0ull);
10150         write_csr(dd, SEND_ERR_MASK, ~0ull);
10151         write_csr(dd, SEND_EGRESS_ERR_MASK, ~0ull);
10152
10153         /* enable all per-context and per-SDMA engine errors */
10154         for (i = 0; i < dd->chip_send_contexts; i++)
10155                 write_kctxt_csr(dd, i, SEND_CTXT_ERR_MASK, ~0ull);
10156         for (i = 0; i < dd->chip_sdma_engines; i++)
10157                 write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_MASK, ~0ull);
10158
10159         /* set the local CU to AU mapping */
10160         assign_local_cm_au_table(dd, dd->vcu);
10161
10162         /*
10163          * Set reasonable default for Credit Return Timer
10164          * Don't set on Simulator - causes it to choke.
10165          */
10166         if (dd->icode != ICODE_FUNCTIONAL_SIMULATOR)
10167                 write_csr(dd, SEND_CM_TIMER_CTRL, HFI1_CREDIT_RETURN_RATE);
10168 }
10169
10170 int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt, u16 jkey)
10171 {
10172         struct hfi1_ctxtdata *rcd = dd->rcd[ctxt];
10173         unsigned sctxt;
10174         int ret = 0;
10175         u64 reg;
10176
10177         if (!rcd || !rcd->sc) {
10178                 ret = -EINVAL;
10179                 goto done;
10180         }
10181         sctxt = rcd->sc->hw_context;
10182         reg = SEND_CTXT_CHECK_JOB_KEY_MASK_SMASK | /* mask is always 1's */
10183                 ((jkey & SEND_CTXT_CHECK_JOB_KEY_VALUE_MASK) <<
10184                  SEND_CTXT_CHECK_JOB_KEY_VALUE_SHIFT);
10185         /* JOB_KEY_ALLOW_PERMISSIVE is not allowed by default */
10186         if (HFI1_CAP_KGET_MASK(rcd->flags, ALLOW_PERM_JKEY))
10187                 reg |= SEND_CTXT_CHECK_JOB_KEY_ALLOW_PERMISSIVE_SMASK;
10188         write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_JOB_KEY, reg);
10189         /*
10190          * Enable send-side J_KEY integrity check, unless this is A0 h/w
10191          * (due to A0 erratum).
10192          */
10193         if (!is_a0(dd)) {
10194                 reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
10195                 reg |= SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
10196                 write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
10197         }
10198
10199         /* Enable J_KEY check on receive context. */
10200         reg = RCV_KEY_CTRL_JOB_KEY_ENABLE_SMASK |
10201                 ((jkey & RCV_KEY_CTRL_JOB_KEY_VALUE_MASK) <<
10202                  RCV_KEY_CTRL_JOB_KEY_VALUE_SHIFT);
10203         write_kctxt_csr(dd, ctxt, RCV_KEY_CTRL, reg);
10204 done:
10205         return ret;
10206 }
10207
10208 int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt)
10209 {
10210         struct hfi1_ctxtdata *rcd = dd->rcd[ctxt];
10211         unsigned sctxt;
10212         int ret = 0;
10213         u64 reg;
10214
10215         if (!rcd || !rcd->sc) {
10216                 ret = -EINVAL;
10217                 goto done;
10218         }
10219         sctxt = rcd->sc->hw_context;
10220         write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_JOB_KEY, 0);
10221         /*
10222          * Disable send-side J_KEY integrity check, unless this is A0 h/w.
10223          * This check would not have been enabled for A0 h/w, see
10224          * set_ctxt_jkey().
10225          */
10226         if (!is_a0(dd)) {
10227                 reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
10228                 reg &= ~SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
10229                 write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
10230         }
10231         /* Turn off the J_KEY on the receive side */
10232         write_kctxt_csr(dd, ctxt, RCV_KEY_CTRL, 0);
10233 done:
10234         return ret;
10235 }
10236
10237 int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt, u16 pkey)
10238 {
10239         struct hfi1_ctxtdata *rcd;
10240         unsigned sctxt;
10241         int ret = 0;
10242         u64 reg;
10243
10244         if (ctxt < dd->num_rcv_contexts)
10245                 rcd = dd->rcd[ctxt];
10246         else {
10247                 ret = -EINVAL;
10248                 goto done;
10249         }
10250         if (!rcd || !rcd->sc) {
10251                 ret = -EINVAL;
10252                 goto done;
10253         }
10254         sctxt = rcd->sc->hw_context;
10255         reg = ((u64)pkey & SEND_CTXT_CHECK_PARTITION_KEY_VALUE_MASK) <<
10256                 SEND_CTXT_CHECK_PARTITION_KEY_VALUE_SHIFT;
10257         write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_PARTITION_KEY, reg);
10258         reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
10259         reg |= SEND_CTXT_CHECK_ENABLE_CHECK_PARTITION_KEY_SMASK;
10260         write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
10261 done:
10262         return ret;
10263 }
10264
10265 int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt)
10266 {
10267         struct hfi1_ctxtdata *rcd;
10268         unsigned sctxt;
10269         int ret = 0;
10270         u64 reg;
10271
10272         if (ctxt < dd->num_rcv_contexts)
10273                 rcd = dd->rcd[ctxt];
10274         else {
10275                 ret = -EINVAL;
10276                 goto done;
10277         }
10278         if (!rcd || !rcd->sc) {
10279                 ret = -EINVAL;
10280                 goto done;
10281         }
10282         sctxt = rcd->sc->hw_context;
10283         reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
10284         reg &= ~SEND_CTXT_CHECK_ENABLE_CHECK_PARTITION_KEY_SMASK;
10285         write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
10286         write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_PARTITION_KEY, 0);
10287 done:
10288         return ret;
10289 }
10290
10291 /*
10292  * Start doing the clean up the the chip. Our clean up happens in multiple
10293  * stages and this is just the first.
10294  */
10295 void hfi1_start_cleanup(struct hfi1_devdata *dd)
10296 {
10297         free_cntrs(dd);
10298         free_rcverr(dd);
10299         clean_up_interrupts(dd);
10300 }
10301
10302 #define HFI_BASE_GUID(dev) \
10303         ((dev)->base_guid & ~(1ULL << GUID_HFI_INDEX_SHIFT))
10304
10305 /*
10306  * Certain chip functions need to be initialized only once per asic
10307  * instead of per-device. This function finds the peer device and
10308  * checks whether that chip initialization needs to be done by this
10309  * device.
10310  */
10311 static void asic_should_init(struct hfi1_devdata *dd)
10312 {
10313         unsigned long flags;
10314         struct hfi1_devdata *tmp, *peer = NULL;
10315
10316         spin_lock_irqsave(&hfi1_devs_lock, flags);
10317         /* Find our peer device */
10318         list_for_each_entry(tmp, &hfi1_dev_list, list) {
10319                 if ((HFI_BASE_GUID(dd) == HFI_BASE_GUID(tmp)) &&
10320                     dd->unit != tmp->unit) {
10321                         peer = tmp;
10322                         break;
10323                 }
10324         }
10325
10326         /*
10327          * "Claim" the ASIC for initialization if it hasn't been
10328          " "claimed" yet.
10329          */
10330         if (!peer || !(peer->flags & HFI1_DO_INIT_ASIC))
10331                 dd->flags |= HFI1_DO_INIT_ASIC;
10332         spin_unlock_irqrestore(&hfi1_devs_lock, flags);
10333 }
10334
10335 /**
10336  * Allocate an initialize the device structure for the hfi.
10337  * @dev: the pci_dev for hfi1_ib device
10338  * @ent: pci_device_id struct for this dev
10339  *
10340  * Also allocates, initializes, and returns the devdata struct for this
10341  * device instance
10342  *
10343  * This is global, and is called directly at init to set up the
10344  * chip-specific function pointers for later use.
10345  */
10346 struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
10347                                   const struct pci_device_id *ent)
10348 {
10349         struct hfi1_devdata *dd;
10350         struct hfi1_pportdata *ppd;
10351         u64 reg;
10352         int i, ret;
10353         static const char * const inames[] = { /* implementation names */
10354                 "RTL silicon",
10355                 "RTL VCS simulation",
10356                 "RTL FPGA emulation",
10357                 "Functional simulator"
10358         };
10359
10360         dd = hfi1_alloc_devdata(pdev,
10361                 NUM_IB_PORTS * sizeof(struct hfi1_pportdata));
10362         if (IS_ERR(dd))
10363                 goto bail;
10364         ppd = dd->pport;
10365         for (i = 0; i < dd->num_pports; i++, ppd++) {
10366                 int vl;
10367                 /* init common fields */
10368                 hfi1_init_pportdata(pdev, ppd, dd, 0, 1);
10369                 /* DC supports 4 link widths */
10370                 ppd->link_width_supported =
10371                         OPA_LINK_WIDTH_1X | OPA_LINK_WIDTH_2X |
10372                         OPA_LINK_WIDTH_3X | OPA_LINK_WIDTH_4X;
10373                 ppd->link_width_downgrade_supported =
10374                         ppd->link_width_supported;
10375                 /* start out enabling only 4X */
10376                 ppd->link_width_enabled = OPA_LINK_WIDTH_4X;
10377                 ppd->link_width_downgrade_enabled =
10378                                         ppd->link_width_downgrade_supported;
10379                 /* link width active is 0 when link is down */
10380                 /* link width downgrade active is 0 when link is down */
10381
10382                 if (num_vls < HFI1_MIN_VLS_SUPPORTED
10383                         || num_vls > HFI1_MAX_VLS_SUPPORTED) {
10384                         hfi1_early_err(&pdev->dev,
10385                                        "Invalid num_vls %u, using %u VLs\n",
10386                                     num_vls, HFI1_MAX_VLS_SUPPORTED);
10387                         num_vls = HFI1_MAX_VLS_SUPPORTED;
10388                 }
10389                 ppd->vls_supported = num_vls;
10390                 ppd->vls_operational = ppd->vls_supported;
10391                 /* Set the default MTU. */
10392                 for (vl = 0; vl < num_vls; vl++)
10393                         dd->vld[vl].mtu = hfi1_max_mtu;
10394                 dd->vld[15].mtu = MAX_MAD_PACKET;
10395                 /*
10396                  * Set the initial values to reasonable default, will be set
10397                  * for real when link is up.
10398                  */
10399                 ppd->lstate = IB_PORT_DOWN;
10400                 ppd->overrun_threshold = 0x4;
10401                 ppd->phy_error_threshold = 0xf;
10402                 ppd->port_crc_mode_enabled = link_crc_mask;
10403                 /* initialize supported LTP CRC mode */
10404                 ppd->port_ltp_crc_mode = cap_to_port_ltp(link_crc_mask) << 8;
10405                 /* initialize enabled LTP CRC mode */
10406                 ppd->port_ltp_crc_mode |= cap_to_port_ltp(link_crc_mask) << 4;
10407                 /* start in offline */
10408                 ppd->host_link_state = HLS_DN_OFFLINE;
10409                 init_vl_arb_caches(ppd);
10410         }
10411
10412         dd->link_default = HLS_DN_POLL;
10413
10414         /*
10415          * Do remaining PCIe setup and save PCIe values in dd.
10416          * Any error printing is already done by the init code.
10417          * On return, we have the chip mapped.
10418          */
10419         ret = hfi1_pcie_ddinit(dd, pdev, ent);
10420         if (ret < 0)
10421                 goto bail_free;
10422
10423         /* verify that reads actually work, save revision for reset check */
10424         dd->revision = read_csr(dd, CCE_REVISION);
10425         if (dd->revision == ~(u64)0) {
10426                 dd_dev_err(dd, "cannot read chip CSRs\n");
10427                 ret = -EINVAL;
10428                 goto bail_cleanup;
10429         }
10430         dd->majrev = (dd->revision >> CCE_REVISION_CHIP_REV_MAJOR_SHIFT)
10431                         & CCE_REVISION_CHIP_REV_MAJOR_MASK;
10432         dd->minrev = (dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT)
10433                         & CCE_REVISION_CHIP_REV_MINOR_MASK;
10434
10435         /* obtain the hardware ID - NOT related to unit, which is a
10436            software enumeration */
10437         reg = read_csr(dd, CCE_REVISION2);
10438         dd->hfi1_id = (reg >> CCE_REVISION2_HFI_ID_SHIFT)
10439                                         & CCE_REVISION2_HFI_ID_MASK;
10440         /* the variable size will remove unwanted bits */
10441         dd->icode = reg >> CCE_REVISION2_IMPL_CODE_SHIFT;
10442         dd->irev = reg >> CCE_REVISION2_IMPL_REVISION_SHIFT;
10443         dd_dev_info(dd, "Implementation: %s, revision 0x%x\n",
10444                 dd->icode < ARRAY_SIZE(inames) ? inames[dd->icode] : "unknown",
10445                 (int)dd->irev);
10446
10447         /* speeds the hardware can support */
10448         dd->pport->link_speed_supported = OPA_LINK_SPEED_25G;
10449         /* speeds allowed to run at */
10450         dd->pport->link_speed_enabled = dd->pport->link_speed_supported;
10451         /* give a reasonable active value, will be set on link up */
10452         dd->pport->link_speed_active = OPA_LINK_SPEED_25G;
10453
10454         dd->chip_rcv_contexts = read_csr(dd, RCV_CONTEXTS);
10455         dd->chip_send_contexts = read_csr(dd, SEND_CONTEXTS);
10456         dd->chip_sdma_engines = read_csr(dd, SEND_DMA_ENGINES);
10457         dd->chip_pio_mem_size = read_csr(dd, SEND_PIO_MEM_SIZE);
10458         dd->chip_sdma_mem_size = read_csr(dd, SEND_DMA_MEM_SIZE);
10459         /* fix up link widths for emulation _p */
10460         ppd = dd->pport;
10461         if (dd->icode == ICODE_FPGA_EMULATION && is_emulator_p(dd)) {
10462                 ppd->link_width_supported =
10463                         ppd->link_width_enabled =
10464                         ppd->link_width_downgrade_supported =
10465                         ppd->link_width_downgrade_enabled =
10466                                 OPA_LINK_WIDTH_1X;
10467         }
10468         /* insure num_vls isn't larger than number of sdma engines */
10469         if (HFI1_CAP_IS_KSET(SDMA) && num_vls > dd->chip_sdma_engines) {
10470                 dd_dev_err(dd, "num_vls %u too large, using %u VLs\n",
10471                                 num_vls, HFI1_MAX_VLS_SUPPORTED);
10472                 ppd->vls_supported = num_vls = HFI1_MAX_VLS_SUPPORTED;
10473                 ppd->vls_operational = ppd->vls_supported;
10474         }
10475
10476         /*
10477          * Convert the ns parameter to the 64 * cclocks used in the CSR.
10478          * Limit the max if larger than the field holds.  If timeout is
10479          * non-zero, then the calculated field will be at least 1.
10480          *
10481          * Must be after icode is set up - the cclock rate depends
10482          * on knowing the hardware being used.
10483          */
10484         dd->rcv_intr_timeout_csr = ns_to_cclock(dd, rcv_intr_timeout) / 64;
10485         if (dd->rcv_intr_timeout_csr >
10486                         RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_MASK)
10487                 dd->rcv_intr_timeout_csr =
10488                         RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_MASK;
10489         else if (dd->rcv_intr_timeout_csr == 0 && rcv_intr_timeout)
10490                 dd->rcv_intr_timeout_csr = 1;
10491
10492         /* obtain chip sizes, reset chip CSRs */
10493         init_chip(dd);
10494
10495         /* read in the PCIe link speed information */
10496         ret = pcie_speeds(dd);
10497         if (ret)
10498                 goto bail_cleanup;
10499
10500         /* needs to be done before we look for the peer device */
10501         read_guid(dd);
10502
10503         asic_should_init(dd);
10504
10505         /* read in firmware */
10506         ret = hfi1_firmware_init(dd);
10507         if (ret)
10508                 goto bail_cleanup;
10509
10510         /*
10511          * In general, the PCIe Gen3 transition must occur after the
10512          * chip has been idled (so it won't initiate any PCIe transactions
10513          * e.g. an interrupt) and before the driver changes any registers
10514          * (the transition will reset the registers).
10515          *
10516          * In particular, place this call after:
10517          * - init_chip()     - the chip will not initiate any PCIe transactions
10518          * - pcie_speeds()   - reads the current link speed
10519          * - hfi1_firmware_init() - the needed firmware is ready to be
10520          *                          downloaded
10521          */
10522         ret = do_pcie_gen3_transition(dd);
10523         if (ret)
10524                 goto bail_cleanup;
10525
10526         /* start setting dd values and adjusting CSRs */
10527         init_early_variables(dd);
10528
10529         parse_platform_config(dd);
10530
10531         /* add board names as they are defined */
10532         dd->boardname = kmalloc(64, GFP_KERNEL);
10533         if (!dd->boardname)
10534                 goto bail_cleanup;
10535         snprintf(dd->boardname, 64, "Board ID 0x%llx",
10536                  dd->revision >> CCE_REVISION_BOARD_ID_LOWER_NIBBLE_SHIFT
10537                     & CCE_REVISION_BOARD_ID_LOWER_NIBBLE_MASK);
10538
10539         snprintf(dd->boardversion, BOARD_VERS_MAX,
10540                  "ChipABI %u.%u, %s, ChipRev %u.%u, SW Compat %llu\n",
10541                  HFI1_CHIP_VERS_MAJ, HFI1_CHIP_VERS_MIN,
10542                  dd->boardname,
10543                  (u32)dd->majrev,
10544                  (u32)dd->minrev,
10545                  (dd->revision >> CCE_REVISION_SW_SHIFT)
10546                     & CCE_REVISION_SW_MASK);
10547
10548         ret = set_up_context_variables(dd);
10549         if (ret)
10550                 goto bail_cleanup;
10551
10552         /* set initial RXE CSRs */
10553         init_rxe(dd);
10554         /* set initial TXE CSRs */
10555         init_txe(dd);
10556         /* set initial non-RXE, non-TXE CSRs */
10557         init_other(dd);
10558         /* set up KDETH QP prefix in both RX and TX CSRs */
10559         init_kdeth_qp(dd);
10560
10561         /* send contexts must be set up before receive contexts */
10562         ret = init_send_contexts(dd);
10563         if (ret)
10564                 goto bail_cleanup;
10565
10566         ret = hfi1_create_ctxts(dd);
10567         if (ret)
10568                 goto bail_cleanup;
10569
10570         dd->rcvhdrsize = DEFAULT_RCVHDRSIZE;
10571         /*
10572          * rcd[0] is guaranteed to be valid by this point. Also, all
10573          * context are using the same value, as per the module parameter.
10574          */
10575         dd->rhf_offset = dd->rcd[0]->rcvhdrqentsize - sizeof(u64) / sizeof(u32);
10576
10577         ret = init_pervl_scs(dd);
10578         if (ret)
10579                 goto bail_cleanup;
10580
10581         /* sdma init */
10582         for (i = 0; i < dd->num_pports; ++i) {
10583                 ret = sdma_init(dd, i);
10584                 if (ret)
10585                         goto bail_cleanup;
10586         }
10587
10588         /* use contexts created by hfi1_create_ctxts */
10589         ret = set_up_interrupts(dd);
10590         if (ret)
10591                 goto bail_cleanup;
10592
10593         /* set up LCB access - must be after set_up_interrupts() */
10594         init_lcb_access(dd);
10595
10596         snprintf(dd->serial, SERIAL_MAX, "0x%08llx\n",
10597                  dd->base_guid & 0xFFFFFF);
10598
10599         dd->oui1 = dd->base_guid >> 56 & 0xFF;
10600         dd->oui2 = dd->base_guid >> 48 & 0xFF;
10601         dd->oui3 = dd->base_guid >> 40 & 0xFF;
10602
10603         ret = load_firmware(dd); /* asymmetric with dispose_firmware() */
10604         if (ret)
10605                 goto bail_clear_intr;
10606         check_fabric_firmware_versions(dd);
10607
10608         thermal_init(dd);
10609
10610         ret = init_cntrs(dd);
10611         if (ret)
10612                 goto bail_clear_intr;
10613
10614         ret = init_rcverr(dd);
10615         if (ret)
10616                 goto bail_free_cntrs;
10617
10618         ret = eprom_init(dd);
10619         if (ret)
10620                 goto bail_free_rcverr;
10621
10622         goto bail;
10623
10624 bail_free_rcverr:
10625         free_rcverr(dd);
10626 bail_free_cntrs:
10627         free_cntrs(dd);
10628 bail_clear_intr:
10629         clean_up_interrupts(dd);
10630 bail_cleanup:
10631         hfi1_pcie_ddcleanup(dd);
10632 bail_free:
10633         hfi1_free_devdata(dd);
10634         dd = ERR_PTR(ret);
10635 bail:
10636         return dd;
10637 }
10638
10639 static u16 delay_cycles(struct hfi1_pportdata *ppd, u32 desired_egress_rate,
10640                         u32 dw_len)
10641 {
10642         u32 delta_cycles;
10643         u32 current_egress_rate = ppd->current_egress_rate;
10644         /* rates here are in units of 10^6 bits/sec */
10645
10646         if (desired_egress_rate == -1)
10647                 return 0; /* shouldn't happen */
10648
10649         if (desired_egress_rate >= current_egress_rate)
10650                 return 0; /* we can't help go faster, only slower */
10651
10652         delta_cycles = egress_cycles(dw_len * 4, desired_egress_rate) -
10653                         egress_cycles(dw_len * 4, current_egress_rate);
10654
10655         return (u16)delta_cycles;
10656 }
10657
10658
10659 /**
10660  * create_pbc - build a pbc for transmission
10661  * @flags: special case flags or-ed in built pbc
10662  * @srate: static rate
10663  * @vl: vl
10664  * @dwlen: dword length (header words + data words + pbc words)
10665  *
10666  * Create a PBC with the given flags, rate, VL, and length.
10667  *
10668  * NOTE: The PBC created will not insert any HCRC - all callers but one are
10669  * for verbs, which does not use this PSM feature.  The lone other caller
10670  * is for the diagnostic interface which calls this if the user does not
10671  * supply their own PBC.
10672  */
10673 u64 create_pbc(struct hfi1_pportdata *ppd, u64 flags, int srate_mbs, u32 vl,
10674                u32 dw_len)
10675 {
10676         u64 pbc, delay = 0;
10677
10678         if (unlikely(srate_mbs))
10679                 delay = delay_cycles(ppd, srate_mbs, dw_len);
10680
10681         pbc = flags
10682                 | (delay << PBC_STATIC_RATE_CONTROL_COUNT_SHIFT)
10683                 | ((u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT)
10684                 | (vl & PBC_VL_MASK) << PBC_VL_SHIFT
10685                 | (dw_len & PBC_LENGTH_DWS_MASK)
10686                         << PBC_LENGTH_DWS_SHIFT;
10687
10688         return pbc;
10689 }
10690
10691 #define SBUS_THERMAL    0x4f
10692 #define SBUS_THERM_MONITOR_MODE 0x1
10693
10694 #define THERM_FAILURE(dev, ret, reason) \
10695         dd_dev_err((dd),                                                \
10696                    "Thermal sensor initialization failed: %s (%d)\n",   \
10697                    (reason), (ret))
10698
10699 /*
10700  * Initialize the Avago Thermal sensor.
10701  *
10702  * After initialization, enable polling of thermal sensor through
10703  * SBus interface. In order for this to work, the SBus Master
10704  * firmware has to be loaded due to the fact that the HW polling
10705  * logic uses SBus interrupts, which are not supported with
10706  * default firmware. Otherwise, no data will be returned through
10707  * the ASIC_STS_THERM CSR.
10708  */
10709 static int thermal_init(struct hfi1_devdata *dd)
10710 {
10711         int ret = 0;
10712
10713         if (dd->icode != ICODE_RTL_SILICON ||
10714             !(dd->flags & HFI1_DO_INIT_ASIC))
10715                 return ret;
10716
10717         acquire_hw_mutex(dd);
10718         dd_dev_info(dd, "Initializing thermal sensor\n");
10719         /* Thermal Sensor Initialization */
10720         /*    Step 1: Reset the Thermal SBus Receiver */
10721         ret = sbus_request_slow(dd, SBUS_THERMAL, 0x0,
10722                                 RESET_SBUS_RECEIVER, 0);
10723         if (ret) {
10724                 THERM_FAILURE(dd, ret, "Bus Reset");
10725                 goto done;
10726         }
10727         /*    Step 2: Set Reset bit in Thermal block */
10728         ret = sbus_request_slow(dd, SBUS_THERMAL, 0x0,
10729                                 WRITE_SBUS_RECEIVER, 0x1);
10730         if (ret) {
10731                 THERM_FAILURE(dd, ret, "Therm Block Reset");
10732                 goto done;
10733         }
10734         /*    Step 3: Write clock divider value (100MHz -> 2MHz) */
10735         ret = sbus_request_slow(dd, SBUS_THERMAL, 0x1,
10736                                 WRITE_SBUS_RECEIVER, 0x32);
10737         if (ret) {
10738                 THERM_FAILURE(dd, ret, "Write Clock Div");
10739                 goto done;
10740         }
10741         /*    Step 4: Select temperature mode */
10742         ret = sbus_request_slow(dd, SBUS_THERMAL, 0x3,
10743                                 WRITE_SBUS_RECEIVER,
10744                                 SBUS_THERM_MONITOR_MODE);
10745         if (ret) {
10746                 THERM_FAILURE(dd, ret, "Write Mode Sel");
10747                 goto done;
10748         }
10749         /*    Step 5: De-assert block reset and start conversion */
10750         ret = sbus_request_slow(dd, SBUS_THERMAL, 0x0,
10751                                 WRITE_SBUS_RECEIVER, 0x2);
10752         if (ret) {
10753                 THERM_FAILURE(dd, ret, "Write Reset Deassert");
10754                 goto done;
10755         }
10756         /*    Step 5.1: Wait for first conversion (21.5ms per spec) */
10757         msleep(22);
10758
10759         /* Enable polling of thermal readings */
10760         write_csr(dd, ASIC_CFG_THERM_POLL_EN, 0x1);
10761 done:
10762         release_hw_mutex(dd);
10763         return ret;
10764 }
10765
10766 static void handle_temp_err(struct hfi1_devdata *dd)
10767 {
10768         struct hfi1_pportdata *ppd = &dd->pport[0];
10769         /*
10770          * Thermal Critical Interrupt
10771          * Put the device into forced freeze mode, take link down to
10772          * offline, and put DC into reset.
10773          */
10774         dd_dev_emerg(dd,
10775                      "Critical temperature reached! Forcing device into freeze mode!\n");
10776         dd->flags |= HFI1_FORCED_FREEZE;
10777         start_freeze_handling(ppd, FREEZE_SELF|FREEZE_ABORT);
10778         /*
10779          * Shut DC down as much and as quickly as possible.
10780          *
10781          * Step 1: Take the link down to OFFLINE. This will cause the
10782          *         8051 to put the Serdes in reset. However, we don't want to
10783          *         go through the entire link state machine since we want to
10784          *         shutdown ASAP. Furthermore, this is not a graceful shutdown
10785          *         but rather an attempt to save the chip.
10786          *         Code below is almost the same as quiet_serdes() but avoids
10787          *         all the extra work and the sleeps.
10788          */
10789         ppd->driver_link_ready = 0;
10790         ppd->link_enabled = 0;
10791         set_physical_link_state(dd, PLS_OFFLINE |
10792                                 (OPA_LINKDOWN_REASON_SMA_DISABLED << 8));
10793         /*
10794          * Step 2: Shutdown LCB and 8051
10795          *         After shutdown, do not restore DC_CFG_RESET value.
10796          */
10797         dc_shutdown(dd);
10798 }