Merge tag 'omapdrm-4.2-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tomba...
[linux-drm-fsl-dcu.git] / drivers / infiniband / hw / ipath / ipath_verbs.c
1 /*
2  * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
3  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      - Redistributions of source code must retain the above
16  *        copyright notice, this list of conditions and the following
17  *        disclaimer.
18  *
19  *      - Redistributions in binary form must reproduce the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer in the documentation and/or other materials
22  *        provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  */
33
34 #include <rdma/ib_mad.h>
35 #include <rdma/ib_user_verbs.h>
36 #include <linux/io.h>
37 #include <linux/slab.h>
38 #include <linux/module.h>
39 #include <linux/utsname.h>
40 #include <linux/rculist.h>
41
42 #include "ipath_kernel.h"
43 #include "ipath_verbs.h"
44 #include "ipath_common.h"
45
46 static unsigned int ib_ipath_qp_table_size = 251;
47 module_param_named(qp_table_size, ib_ipath_qp_table_size, uint, S_IRUGO);
48 MODULE_PARM_DESC(qp_table_size, "QP table size");
49
50 unsigned int ib_ipath_lkey_table_size = 12;
51 module_param_named(lkey_table_size, ib_ipath_lkey_table_size, uint,
52                    S_IRUGO);
53 MODULE_PARM_DESC(lkey_table_size,
54                  "LKEY table size in bits (2^n, 1 <= n <= 23)");
55
56 static unsigned int ib_ipath_max_pds = 0xFFFF;
57 module_param_named(max_pds, ib_ipath_max_pds, uint, S_IWUSR | S_IRUGO);
58 MODULE_PARM_DESC(max_pds,
59                  "Maximum number of protection domains to support");
60
61 static unsigned int ib_ipath_max_ahs = 0xFFFF;
62 module_param_named(max_ahs, ib_ipath_max_ahs, uint, S_IWUSR | S_IRUGO);
63 MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support");
64
65 unsigned int ib_ipath_max_cqes = 0x2FFFF;
66 module_param_named(max_cqes, ib_ipath_max_cqes, uint, S_IWUSR | S_IRUGO);
67 MODULE_PARM_DESC(max_cqes,
68                  "Maximum number of completion queue entries to support");
69
70 unsigned int ib_ipath_max_cqs = 0x1FFFF;
71 module_param_named(max_cqs, ib_ipath_max_cqs, uint, S_IWUSR | S_IRUGO);
72 MODULE_PARM_DESC(max_cqs, "Maximum number of completion queues to support");
73
74 unsigned int ib_ipath_max_qp_wrs = 0x3FFF;
75 module_param_named(max_qp_wrs, ib_ipath_max_qp_wrs, uint,
76                    S_IWUSR | S_IRUGO);
77 MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support");
78
79 unsigned int ib_ipath_max_qps = 16384;
80 module_param_named(max_qps, ib_ipath_max_qps, uint, S_IWUSR | S_IRUGO);
81 MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support");
82
83 unsigned int ib_ipath_max_sges = 0x60;
84 module_param_named(max_sges, ib_ipath_max_sges, uint, S_IWUSR | S_IRUGO);
85 MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support");
86
87 unsigned int ib_ipath_max_mcast_grps = 16384;
88 module_param_named(max_mcast_grps, ib_ipath_max_mcast_grps, uint,
89                    S_IWUSR | S_IRUGO);
90 MODULE_PARM_DESC(max_mcast_grps,
91                  "Maximum number of multicast groups to support");
92
93 unsigned int ib_ipath_max_mcast_qp_attached = 16;
94 module_param_named(max_mcast_qp_attached, ib_ipath_max_mcast_qp_attached,
95                    uint, S_IWUSR | S_IRUGO);
96 MODULE_PARM_DESC(max_mcast_qp_attached,
97                  "Maximum number of attached QPs to support");
98
99 unsigned int ib_ipath_max_srqs = 1024;
100 module_param_named(max_srqs, ib_ipath_max_srqs, uint, S_IWUSR | S_IRUGO);
101 MODULE_PARM_DESC(max_srqs, "Maximum number of SRQs to support");
102
103 unsigned int ib_ipath_max_srq_sges = 128;
104 module_param_named(max_srq_sges, ib_ipath_max_srq_sges,
105                    uint, S_IWUSR | S_IRUGO);
106 MODULE_PARM_DESC(max_srq_sges, "Maximum number of SRQ SGEs to support");
107
108 unsigned int ib_ipath_max_srq_wrs = 0x1FFFF;
109 module_param_named(max_srq_wrs, ib_ipath_max_srq_wrs,
110                    uint, S_IWUSR | S_IRUGO);
111 MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support");
112
113 static unsigned int ib_ipath_disable_sma;
114 module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO);
115 MODULE_PARM_DESC(disable_sma, "Disable the SMA");
116
117 /*
118  * Note that it is OK to post send work requests in the SQE and ERR
119  * states; ipath_do_send() will process them and generate error
120  * completions as per IB 1.2 C10-96.
121  */
122 const int ib_ipath_state_ops[IB_QPS_ERR + 1] = {
123         [IB_QPS_RESET] = 0,
124         [IB_QPS_INIT] = IPATH_POST_RECV_OK,
125         [IB_QPS_RTR] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK,
126         [IB_QPS_RTS] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
127             IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK |
128             IPATH_PROCESS_NEXT_SEND_OK,
129         [IB_QPS_SQD] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
130             IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK,
131         [IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
132             IPATH_POST_SEND_OK | IPATH_FLUSH_SEND,
133         [IB_QPS_ERR] = IPATH_POST_RECV_OK | IPATH_FLUSH_RECV |
134             IPATH_POST_SEND_OK | IPATH_FLUSH_SEND,
135 };
136
137 struct ipath_ucontext {
138         struct ib_ucontext ibucontext;
139 };
140
141 static inline struct ipath_ucontext *to_iucontext(struct ib_ucontext
142                                                   *ibucontext)
143 {
144         return container_of(ibucontext, struct ipath_ucontext, ibucontext);
145 }
146
147 /*
148  * Translate ib_wr_opcode into ib_wc_opcode.
149  */
150 const enum ib_wc_opcode ib_ipath_wc_opcode[] = {
151         [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
152         [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
153         [IB_WR_SEND] = IB_WC_SEND,
154         [IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
155         [IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
156         [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
157         [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD
158 };
159
160 /*
161  * System image GUID.
162  */
163 static __be64 sys_image_guid;
164
165 /**
166  * ipath_copy_sge - copy data to SGE memory
167  * @ss: the SGE state
168  * @data: the data to copy
169  * @length: the length of the data
170  */
171 void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length)
172 {
173         struct ipath_sge *sge = &ss->sge;
174
175         while (length) {
176                 u32 len = sge->length;
177
178                 if (len > length)
179                         len = length;
180                 if (len > sge->sge_length)
181                         len = sge->sge_length;
182                 BUG_ON(len == 0);
183                 memcpy(sge->vaddr, data, len);
184                 sge->vaddr += len;
185                 sge->length -= len;
186                 sge->sge_length -= len;
187                 if (sge->sge_length == 0) {
188                         if (--ss->num_sge)
189                                 *sge = *ss->sg_list++;
190                 } else if (sge->length == 0 && sge->mr != NULL) {
191                         if (++sge->n >= IPATH_SEGSZ) {
192                                 if (++sge->m >= sge->mr->mapsz)
193                                         break;
194                                 sge->n = 0;
195                         }
196                         sge->vaddr =
197                                 sge->mr->map[sge->m]->segs[sge->n].vaddr;
198                         sge->length =
199                                 sge->mr->map[sge->m]->segs[sge->n].length;
200                 }
201                 data += len;
202                 length -= len;
203         }
204 }
205
206 /**
207  * ipath_skip_sge - skip over SGE memory - XXX almost dup of prev func
208  * @ss: the SGE state
209  * @length: the number of bytes to skip
210  */
211 void ipath_skip_sge(struct ipath_sge_state *ss, u32 length)
212 {
213         struct ipath_sge *sge = &ss->sge;
214
215         while (length) {
216                 u32 len = sge->length;
217
218                 if (len > length)
219                         len = length;
220                 if (len > sge->sge_length)
221                         len = sge->sge_length;
222                 BUG_ON(len == 0);
223                 sge->vaddr += len;
224                 sge->length -= len;
225                 sge->sge_length -= len;
226                 if (sge->sge_length == 0) {
227                         if (--ss->num_sge)
228                                 *sge = *ss->sg_list++;
229                 } else if (sge->length == 0 && sge->mr != NULL) {
230                         if (++sge->n >= IPATH_SEGSZ) {
231                                 if (++sge->m >= sge->mr->mapsz)
232                                         break;
233                                 sge->n = 0;
234                         }
235                         sge->vaddr =
236                                 sge->mr->map[sge->m]->segs[sge->n].vaddr;
237                         sge->length =
238                                 sge->mr->map[sge->m]->segs[sge->n].length;
239                 }
240                 length -= len;
241         }
242 }
243
244 /*
245  * Count the number of DMA descriptors needed to send length bytes of data.
246  * Don't modify the ipath_sge_state to get the count.
247  * Return zero if any of the segments is not aligned.
248  */
249 static u32 ipath_count_sge(struct ipath_sge_state *ss, u32 length)
250 {
251         struct ipath_sge *sg_list = ss->sg_list;
252         struct ipath_sge sge = ss->sge;
253         u8 num_sge = ss->num_sge;
254         u32 ndesc = 1;  /* count the header */
255
256         while (length) {
257                 u32 len = sge.length;
258
259                 if (len > length)
260                         len = length;
261                 if (len > sge.sge_length)
262                         len = sge.sge_length;
263                 BUG_ON(len == 0);
264                 if (((long) sge.vaddr & (sizeof(u32) - 1)) ||
265                     (len != length && (len & (sizeof(u32) - 1)))) {
266                         ndesc = 0;
267                         break;
268                 }
269                 ndesc++;
270                 sge.vaddr += len;
271                 sge.length -= len;
272                 sge.sge_length -= len;
273                 if (sge.sge_length == 0) {
274                         if (--num_sge)
275                                 sge = *sg_list++;
276                 } else if (sge.length == 0 && sge.mr != NULL) {
277                         if (++sge.n >= IPATH_SEGSZ) {
278                                 if (++sge.m >= sge.mr->mapsz)
279                                         break;
280                                 sge.n = 0;
281                         }
282                         sge.vaddr =
283                                 sge.mr->map[sge.m]->segs[sge.n].vaddr;
284                         sge.length =
285                                 sge.mr->map[sge.m]->segs[sge.n].length;
286                 }
287                 length -= len;
288         }
289         return ndesc;
290 }
291
292 /*
293  * Copy from the SGEs to the data buffer.
294  */
295 static void ipath_copy_from_sge(void *data, struct ipath_sge_state *ss,
296                                 u32 length)
297 {
298         struct ipath_sge *sge = &ss->sge;
299
300         while (length) {
301                 u32 len = sge->length;
302
303                 if (len > length)
304                         len = length;
305                 if (len > sge->sge_length)
306                         len = sge->sge_length;
307                 BUG_ON(len == 0);
308                 memcpy(data, sge->vaddr, len);
309                 sge->vaddr += len;
310                 sge->length -= len;
311                 sge->sge_length -= len;
312                 if (sge->sge_length == 0) {
313                         if (--ss->num_sge)
314                                 *sge = *ss->sg_list++;
315                 } else if (sge->length == 0 && sge->mr != NULL) {
316                         if (++sge->n >= IPATH_SEGSZ) {
317                                 if (++sge->m >= sge->mr->mapsz)
318                                         break;
319                                 sge->n = 0;
320                         }
321                         sge->vaddr =
322                                 sge->mr->map[sge->m]->segs[sge->n].vaddr;
323                         sge->length =
324                                 sge->mr->map[sge->m]->segs[sge->n].length;
325                 }
326                 data += len;
327                 length -= len;
328         }
329 }
330
331 /**
332  * ipath_post_one_send - post one RC, UC, or UD send work request
333  * @qp: the QP to post on
334  * @wr: the work request to send
335  */
336 static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr)
337 {
338         struct ipath_swqe *wqe;
339         u32 next;
340         int i;
341         int j;
342         int acc;
343         int ret;
344         unsigned long flags;
345         struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
346
347         spin_lock_irqsave(&qp->s_lock, flags);
348
349         if (qp->ibqp.qp_type != IB_QPT_SMI &&
350             !(dd->ipath_flags & IPATH_LINKACTIVE)) {
351                 ret = -ENETDOWN;
352                 goto bail;
353         }
354
355         /* Check that state is OK to post send. */
356         if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK)))
357                 goto bail_inval;
358
359         /* IB spec says that num_sge == 0 is OK. */
360         if (wr->num_sge > qp->s_max_sge)
361                 goto bail_inval;
362
363         /*
364          * Don't allow RDMA reads or atomic operations on UC or
365          * undefined operations.
366          * Make sure buffer is large enough to hold the result for atomics.
367          */
368         if (qp->ibqp.qp_type == IB_QPT_UC) {
369                 if ((unsigned) wr->opcode >= IB_WR_RDMA_READ)
370                         goto bail_inval;
371         } else if (qp->ibqp.qp_type == IB_QPT_UD) {
372                 /* Check UD opcode */
373                 if (wr->opcode != IB_WR_SEND &&
374                     wr->opcode != IB_WR_SEND_WITH_IMM)
375                         goto bail_inval;
376                 /* Check UD destination address PD */
377                 if (qp->ibqp.pd != wr->wr.ud.ah->pd)
378                         goto bail_inval;
379         } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD)
380                 goto bail_inval;
381         else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP &&
382                    (wr->num_sge == 0 ||
383                     wr->sg_list[0].length < sizeof(u64) ||
384                     wr->sg_list[0].addr & (sizeof(u64) - 1)))
385                 goto bail_inval;
386         else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic)
387                 goto bail_inval;
388
389         next = qp->s_head + 1;
390         if (next >= qp->s_size)
391                 next = 0;
392         if (next == qp->s_last) {
393                 ret = -ENOMEM;
394                 goto bail;
395         }
396
397         wqe = get_swqe_ptr(qp, qp->s_head);
398         wqe->wr = *wr;
399         wqe->length = 0;
400         if (wr->num_sge) {
401                 acc = wr->opcode >= IB_WR_RDMA_READ ?
402                         IB_ACCESS_LOCAL_WRITE : 0;
403                 for (i = 0, j = 0; i < wr->num_sge; i++) {
404                         u32 length = wr->sg_list[i].length;
405                         int ok;
406
407                         if (length == 0)
408                                 continue;
409                         ok = ipath_lkey_ok(qp, &wqe->sg_list[j],
410                                            &wr->sg_list[i], acc);
411                         if (!ok)
412                                 goto bail_inval;
413                         wqe->length += length;
414                         j++;
415                 }
416                 wqe->wr.num_sge = j;
417         }
418         if (qp->ibqp.qp_type == IB_QPT_UC ||
419             qp->ibqp.qp_type == IB_QPT_RC) {
420                 if (wqe->length > 0x80000000U)
421                         goto bail_inval;
422         } else if (wqe->length > to_idev(qp->ibqp.device)->dd->ipath_ibmtu)
423                 goto bail_inval;
424         wqe->ssn = qp->s_ssn++;
425         qp->s_head = next;
426
427         ret = 0;
428         goto bail;
429
430 bail_inval:
431         ret = -EINVAL;
432 bail:
433         spin_unlock_irqrestore(&qp->s_lock, flags);
434         return ret;
435 }
436
437 /**
438  * ipath_post_send - post a send on a QP
439  * @ibqp: the QP to post the send on
440  * @wr: the list of work requests to post
441  * @bad_wr: the first bad WR is put here
442  *
443  * This may be called from interrupt context.
444  */
445 static int ipath_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
446                            struct ib_send_wr **bad_wr)
447 {
448         struct ipath_qp *qp = to_iqp(ibqp);
449         int err = 0;
450
451         for (; wr; wr = wr->next) {
452                 err = ipath_post_one_send(qp, wr);
453                 if (err) {
454                         *bad_wr = wr;
455                         goto bail;
456                 }
457         }
458
459         /* Try to do the send work in the caller's context. */
460         ipath_do_send((unsigned long) qp);
461
462 bail:
463         return err;
464 }
465
466 /**
467  * ipath_post_receive - post a receive on a QP
468  * @ibqp: the QP to post the receive on
469  * @wr: the WR to post
470  * @bad_wr: the first bad WR is put here
471  *
472  * This may be called from interrupt context.
473  */
474 static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
475                               struct ib_recv_wr **bad_wr)
476 {
477         struct ipath_qp *qp = to_iqp(ibqp);
478         struct ipath_rwq *wq = qp->r_rq.wq;
479         unsigned long flags;
480         int ret;
481
482         /* Check that state is OK to post receive. */
483         if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK) || !wq) {
484                 *bad_wr = wr;
485                 ret = -EINVAL;
486                 goto bail;
487         }
488
489         for (; wr; wr = wr->next) {
490                 struct ipath_rwqe *wqe;
491                 u32 next;
492                 int i;
493
494                 if ((unsigned) wr->num_sge > qp->r_rq.max_sge) {
495                         *bad_wr = wr;
496                         ret = -EINVAL;
497                         goto bail;
498                 }
499
500                 spin_lock_irqsave(&qp->r_rq.lock, flags);
501                 next = wq->head + 1;
502                 if (next >= qp->r_rq.size)
503                         next = 0;
504                 if (next == wq->tail) {
505                         spin_unlock_irqrestore(&qp->r_rq.lock, flags);
506                         *bad_wr = wr;
507                         ret = -ENOMEM;
508                         goto bail;
509                 }
510
511                 wqe = get_rwqe_ptr(&qp->r_rq, wq->head);
512                 wqe->wr_id = wr->wr_id;
513                 wqe->num_sge = wr->num_sge;
514                 for (i = 0; i < wr->num_sge; i++)
515                         wqe->sg_list[i] = wr->sg_list[i];
516                 /* Make sure queue entry is written before the head index. */
517                 smp_wmb();
518                 wq->head = next;
519                 spin_unlock_irqrestore(&qp->r_rq.lock, flags);
520         }
521         ret = 0;
522
523 bail:
524         return ret;
525 }
526
527 /**
528  * ipath_qp_rcv - processing an incoming packet on a QP
529  * @dev: the device the packet came on
530  * @hdr: the packet header
531  * @has_grh: true if the packet has a GRH
532  * @data: the packet data
533  * @tlen: the packet length
534  * @qp: the QP the packet came on
535  *
536  * This is called from ipath_ib_rcv() to process an incoming packet
537  * for the given QP.
538  * Called at interrupt level.
539  */
540 static void ipath_qp_rcv(struct ipath_ibdev *dev,
541                          struct ipath_ib_header *hdr, int has_grh,
542                          void *data, u32 tlen, struct ipath_qp *qp)
543 {
544         /* Check for valid receive state. */
545         if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
546                 dev->n_pkt_drops++;
547                 return;
548         }
549
550         switch (qp->ibqp.qp_type) {
551         case IB_QPT_SMI:
552         case IB_QPT_GSI:
553                 if (ib_ipath_disable_sma)
554                         break;
555                 /* FALLTHROUGH */
556         case IB_QPT_UD:
557                 ipath_ud_rcv(dev, hdr, has_grh, data, tlen, qp);
558                 break;
559
560         case IB_QPT_RC:
561                 ipath_rc_rcv(dev, hdr, has_grh, data, tlen, qp);
562                 break;
563
564         case IB_QPT_UC:
565                 ipath_uc_rcv(dev, hdr, has_grh, data, tlen, qp);
566                 break;
567
568         default:
569                 break;
570         }
571 }
572
573 /**
574  * ipath_ib_rcv - process an incoming packet
575  * @arg: the device pointer
576  * @rhdr: the header of the packet
577  * @data: the packet data
578  * @tlen: the packet length
579  *
580  * This is called from ipath_kreceive() to process an incoming packet at
581  * interrupt level. Tlen is the length of the header + data + CRC in bytes.
582  */
583 void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data,
584                   u32 tlen)
585 {
586         struct ipath_ib_header *hdr = rhdr;
587         struct ipath_other_headers *ohdr;
588         struct ipath_qp *qp;
589         u32 qp_num;
590         int lnh;
591         u8 opcode;
592         u16 lid;
593
594         if (unlikely(dev == NULL))
595                 goto bail;
596
597         if (unlikely(tlen < 24)) {      /* LRH+BTH+CRC */
598                 dev->rcv_errors++;
599                 goto bail;
600         }
601
602         /* Check for a valid destination LID (see ch. 7.11.1). */
603         lid = be16_to_cpu(hdr->lrh[1]);
604         if (lid < IPATH_MULTICAST_LID_BASE) {
605                 lid &= ~((1 << dev->dd->ipath_lmc) - 1);
606                 if (unlikely(lid != dev->dd->ipath_lid)) {
607                         dev->rcv_errors++;
608                         goto bail;
609                 }
610         }
611
612         /* Check for GRH */
613         lnh = be16_to_cpu(hdr->lrh[0]) & 3;
614         if (lnh == IPATH_LRH_BTH)
615                 ohdr = &hdr->u.oth;
616         else if (lnh == IPATH_LRH_GRH)
617                 ohdr = &hdr->u.l.oth;
618         else {
619                 dev->rcv_errors++;
620                 goto bail;
621         }
622
623         opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0x7f;
624         dev->opstats[opcode].n_bytes += tlen;
625         dev->opstats[opcode].n_packets++;
626
627         /* Get the destination QP number. */
628         qp_num = be32_to_cpu(ohdr->bth[1]) & IPATH_QPN_MASK;
629         if (qp_num == IPATH_MULTICAST_QPN) {
630                 struct ipath_mcast *mcast;
631                 struct ipath_mcast_qp *p;
632
633                 if (lnh != IPATH_LRH_GRH) {
634                         dev->n_pkt_drops++;
635                         goto bail;
636                 }
637                 mcast = ipath_mcast_find(&hdr->u.l.grh.dgid);
638                 if (mcast == NULL) {
639                         dev->n_pkt_drops++;
640                         goto bail;
641                 }
642                 dev->n_multicast_rcv++;
643                 list_for_each_entry_rcu(p, &mcast->qp_list, list)
644                         ipath_qp_rcv(dev, hdr, 1, data, tlen, p->qp);
645                 /*
646                  * Notify ipath_multicast_detach() if it is waiting for us
647                  * to finish.
648                  */
649                 if (atomic_dec_return(&mcast->refcount) <= 1)
650                         wake_up(&mcast->wait);
651         } else {
652                 qp = ipath_lookup_qpn(&dev->qp_table, qp_num);
653                 if (qp) {
654                         dev->n_unicast_rcv++;
655                         ipath_qp_rcv(dev, hdr, lnh == IPATH_LRH_GRH, data,
656                                      tlen, qp);
657                         /*
658                          * Notify ipath_destroy_qp() if it is waiting
659                          * for us to finish.
660                          */
661                         if (atomic_dec_and_test(&qp->refcount))
662                                 wake_up(&qp->wait);
663                 } else
664                         dev->n_pkt_drops++;
665         }
666
667 bail:;
668 }
669
670 /**
671  * ipath_ib_timer - verbs timer
672  * @arg: the device pointer
673  *
674  * This is called from ipath_do_rcv_timer() at interrupt level to check for
675  * QPs which need retransmits and to collect performance numbers.
676  */
677 static void ipath_ib_timer(struct ipath_ibdev *dev)
678 {
679         struct ipath_qp *resend = NULL;
680         struct ipath_qp *rnr = NULL;
681         struct list_head *last;
682         struct ipath_qp *qp;
683         unsigned long flags;
684
685         if (dev == NULL)
686                 return;
687
688         spin_lock_irqsave(&dev->pending_lock, flags);
689         /* Start filling the next pending queue. */
690         if (++dev->pending_index >= ARRAY_SIZE(dev->pending))
691                 dev->pending_index = 0;
692         /* Save any requests still in the new queue, they have timed out. */
693         last = &dev->pending[dev->pending_index];
694         while (!list_empty(last)) {
695                 qp = list_entry(last->next, struct ipath_qp, timerwait);
696                 list_del_init(&qp->timerwait);
697                 qp->timer_next = resend;
698                 resend = qp;
699                 atomic_inc(&qp->refcount);
700         }
701         last = &dev->rnrwait;
702         if (!list_empty(last)) {
703                 qp = list_entry(last->next, struct ipath_qp, timerwait);
704                 if (--qp->s_rnr_timeout == 0) {
705                         do {
706                                 list_del_init(&qp->timerwait);
707                                 qp->timer_next = rnr;
708                                 rnr = qp;
709                                 atomic_inc(&qp->refcount);
710                                 if (list_empty(last))
711                                         break;
712                                 qp = list_entry(last->next, struct ipath_qp,
713                                                 timerwait);
714                         } while (qp->s_rnr_timeout == 0);
715                 }
716         }
717         /*
718          * We should only be in the started state if pma_sample_start != 0
719          */
720         if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_STARTED &&
721             --dev->pma_sample_start == 0) {
722                 dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING;
723                 ipath_snapshot_counters(dev->dd, &dev->ipath_sword,
724                                         &dev->ipath_rword,
725                                         &dev->ipath_spkts,
726                                         &dev->ipath_rpkts,
727                                         &dev->ipath_xmit_wait);
728         }
729         if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_RUNNING) {
730                 if (dev->pma_sample_interval == 0) {
731                         u64 ta, tb, tc, td, te;
732
733                         dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_DONE;
734                         ipath_snapshot_counters(dev->dd, &ta, &tb,
735                                                 &tc, &td, &te);
736
737                         dev->ipath_sword = ta - dev->ipath_sword;
738                         dev->ipath_rword = tb - dev->ipath_rword;
739                         dev->ipath_spkts = tc - dev->ipath_spkts;
740                         dev->ipath_rpkts = td - dev->ipath_rpkts;
741                         dev->ipath_xmit_wait = te - dev->ipath_xmit_wait;
742                 }
743                 else
744                         dev->pma_sample_interval--;
745         }
746         spin_unlock_irqrestore(&dev->pending_lock, flags);
747
748         /* XXX What if timer fires again while this is running? */
749         while (resend != NULL) {
750                 qp = resend;
751                 resend = qp->timer_next;
752
753                 spin_lock_irqsave(&qp->s_lock, flags);
754                 if (qp->s_last != qp->s_tail &&
755                     ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) {
756                         dev->n_timeouts++;
757                         ipath_restart_rc(qp, qp->s_last_psn + 1);
758                 }
759                 spin_unlock_irqrestore(&qp->s_lock, flags);
760
761                 /* Notify ipath_destroy_qp() if it is waiting. */
762                 if (atomic_dec_and_test(&qp->refcount))
763                         wake_up(&qp->wait);
764         }
765         while (rnr != NULL) {
766                 qp = rnr;
767                 rnr = qp->timer_next;
768
769                 spin_lock_irqsave(&qp->s_lock, flags);
770                 if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)
771                         ipath_schedule_send(qp);
772                 spin_unlock_irqrestore(&qp->s_lock, flags);
773
774                 /* Notify ipath_destroy_qp() if it is waiting. */
775                 if (atomic_dec_and_test(&qp->refcount))
776                         wake_up(&qp->wait);
777         }
778 }
779
780 static void update_sge(struct ipath_sge_state *ss, u32 length)
781 {
782         struct ipath_sge *sge = &ss->sge;
783
784         sge->vaddr += length;
785         sge->length -= length;
786         sge->sge_length -= length;
787         if (sge->sge_length == 0) {
788                 if (--ss->num_sge)
789                         *sge = *ss->sg_list++;
790         } else if (sge->length == 0 && sge->mr != NULL) {
791                 if (++sge->n >= IPATH_SEGSZ) {
792                         if (++sge->m >= sge->mr->mapsz)
793                                 return;
794                         sge->n = 0;
795                 }
796                 sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
797                 sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
798         }
799 }
800
801 #ifdef __LITTLE_ENDIAN
802 static inline u32 get_upper_bits(u32 data, u32 shift)
803 {
804         return data >> shift;
805 }
806
807 static inline u32 set_upper_bits(u32 data, u32 shift)
808 {
809         return data << shift;
810 }
811
812 static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
813 {
814         data <<= ((sizeof(u32) - n) * BITS_PER_BYTE);
815         data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
816         return data;
817 }
818 #else
819 static inline u32 get_upper_bits(u32 data, u32 shift)
820 {
821         return data << shift;
822 }
823
824 static inline u32 set_upper_bits(u32 data, u32 shift)
825 {
826         return data >> shift;
827 }
828
829 static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
830 {
831         data >>= ((sizeof(u32) - n) * BITS_PER_BYTE);
832         data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
833         return data;
834 }
835 #endif
836
837 static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
838                     u32 length, unsigned flush_wc)
839 {
840         u32 extra = 0;
841         u32 data = 0;
842         u32 last;
843
844         while (1) {
845                 u32 len = ss->sge.length;
846                 u32 off;
847
848                 if (len > length)
849                         len = length;
850                 if (len > ss->sge.sge_length)
851                         len = ss->sge.sge_length;
852                 BUG_ON(len == 0);
853                 /* If the source address is not aligned, try to align it. */
854                 off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1);
855                 if (off) {
856                         u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr &
857                                             ~(sizeof(u32) - 1));
858                         u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE);
859                         u32 y;
860
861                         y = sizeof(u32) - off;
862                         if (len > y)
863                                 len = y;
864                         if (len + extra >= sizeof(u32)) {
865                                 data |= set_upper_bits(v, extra *
866                                                        BITS_PER_BYTE);
867                                 len = sizeof(u32) - extra;
868                                 if (len == length) {
869                                         last = data;
870                                         break;
871                                 }
872                                 __raw_writel(data, piobuf);
873                                 piobuf++;
874                                 extra = 0;
875                                 data = 0;
876                         } else {
877                                 /* Clear unused upper bytes */
878                                 data |= clear_upper_bytes(v, len, extra);
879                                 if (len == length) {
880                                         last = data;
881                                         break;
882                                 }
883                                 extra += len;
884                         }
885                 } else if (extra) {
886                         /* Source address is aligned. */
887                         u32 *addr = (u32 *) ss->sge.vaddr;
888                         int shift = extra * BITS_PER_BYTE;
889                         int ushift = 32 - shift;
890                         u32 l = len;
891
892                         while (l >= sizeof(u32)) {
893                                 u32 v = *addr;
894
895                                 data |= set_upper_bits(v, shift);
896                                 __raw_writel(data, piobuf);
897                                 data = get_upper_bits(v, ushift);
898                                 piobuf++;
899                                 addr++;
900                                 l -= sizeof(u32);
901                         }
902                         /*
903                          * We still have 'extra' number of bytes leftover.
904                          */
905                         if (l) {
906                                 u32 v = *addr;
907
908                                 if (l + extra >= sizeof(u32)) {
909                                         data |= set_upper_bits(v, shift);
910                                         len -= l + extra - sizeof(u32);
911                                         if (len == length) {
912                                                 last = data;
913                                                 break;
914                                         }
915                                         __raw_writel(data, piobuf);
916                                         piobuf++;
917                                         extra = 0;
918                                         data = 0;
919                                 } else {
920                                         /* Clear unused upper bytes */
921                                         data |= clear_upper_bytes(v, l,
922                                                                   extra);
923                                         if (len == length) {
924                                                 last = data;
925                                                 break;
926                                         }
927                                         extra += l;
928                                 }
929                         } else if (len == length) {
930                                 last = data;
931                                 break;
932                         }
933                 } else if (len == length) {
934                         u32 w;
935
936                         /*
937                          * Need to round up for the last dword in the
938                          * packet.
939                          */
940                         w = (len + 3) >> 2;
941                         __iowrite32_copy(piobuf, ss->sge.vaddr, w - 1);
942                         piobuf += w - 1;
943                         last = ((u32 *) ss->sge.vaddr)[w - 1];
944                         break;
945                 } else {
946                         u32 w = len >> 2;
947
948                         __iowrite32_copy(piobuf, ss->sge.vaddr, w);
949                         piobuf += w;
950
951                         extra = len & (sizeof(u32) - 1);
952                         if (extra) {
953                                 u32 v = ((u32 *) ss->sge.vaddr)[w];
954
955                                 /* Clear unused upper bytes */
956                                 data = clear_upper_bytes(v, extra, 0);
957                         }
958                 }
959                 update_sge(ss, len);
960                 length -= len;
961         }
962         /* Update address before sending packet. */
963         update_sge(ss, length);
964         if (flush_wc) {
965                 /* must flush early everything before trigger word */
966                 ipath_flush_wc();
967                 __raw_writel(last, piobuf);
968                 /* be sure trigger word is written */
969                 ipath_flush_wc();
970         } else
971                 __raw_writel(last, piobuf);
972 }
973
974 /*
975  * Convert IB rate to delay multiplier.
976  */
977 unsigned ipath_ib_rate_to_mult(enum ib_rate rate)
978 {
979         switch (rate) {
980         case IB_RATE_2_5_GBPS: return 8;
981         case IB_RATE_5_GBPS:   return 4;
982         case IB_RATE_10_GBPS:  return 2;
983         case IB_RATE_20_GBPS:  return 1;
984         default:               return 0;
985         }
986 }
987
988 /*
989  * Convert delay multiplier to IB rate
990  */
991 static enum ib_rate ipath_mult_to_ib_rate(unsigned mult)
992 {
993         switch (mult) {
994         case 8:  return IB_RATE_2_5_GBPS;
995         case 4:  return IB_RATE_5_GBPS;
996         case 2:  return IB_RATE_10_GBPS;
997         case 1:  return IB_RATE_20_GBPS;
998         default: return IB_RATE_PORT_CURRENT;
999         }
1000 }
1001
1002 static inline struct ipath_verbs_txreq *get_txreq(struct ipath_ibdev *dev)
1003 {
1004         struct ipath_verbs_txreq *tx = NULL;
1005         unsigned long flags;
1006
1007         spin_lock_irqsave(&dev->pending_lock, flags);
1008         if (!list_empty(&dev->txreq_free)) {
1009                 struct list_head *l = dev->txreq_free.next;
1010
1011                 list_del(l);
1012                 tx = list_entry(l, struct ipath_verbs_txreq, txreq.list);
1013         }
1014         spin_unlock_irqrestore(&dev->pending_lock, flags);
1015         return tx;
1016 }
1017
1018 static inline void put_txreq(struct ipath_ibdev *dev,
1019                              struct ipath_verbs_txreq *tx)
1020 {
1021         unsigned long flags;
1022
1023         spin_lock_irqsave(&dev->pending_lock, flags);
1024         list_add(&tx->txreq.list, &dev->txreq_free);
1025         spin_unlock_irqrestore(&dev->pending_lock, flags);
1026 }
1027
1028 static void sdma_complete(void *cookie, int status)
1029 {
1030         struct ipath_verbs_txreq *tx = cookie;
1031         struct ipath_qp *qp = tx->qp;
1032         struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
1033         unsigned long flags;
1034         enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ?
1035                 IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR;
1036
1037         if (atomic_dec_and_test(&qp->s_dma_busy)) {
1038                 spin_lock_irqsave(&qp->s_lock, flags);
1039                 if (tx->wqe)
1040                         ipath_send_complete(qp, tx->wqe, ibs);
1041                 if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND &&
1042                      qp->s_last != qp->s_head) ||
1043                     (qp->s_flags & IPATH_S_WAIT_DMA))
1044                         ipath_schedule_send(qp);
1045                 spin_unlock_irqrestore(&qp->s_lock, flags);
1046                 wake_up(&qp->wait_dma);
1047         } else if (tx->wqe) {
1048                 spin_lock_irqsave(&qp->s_lock, flags);
1049                 ipath_send_complete(qp, tx->wqe, ibs);
1050                 spin_unlock_irqrestore(&qp->s_lock, flags);
1051         }
1052
1053         if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF)
1054                 kfree(tx->txreq.map_addr);
1055         put_txreq(dev, tx);
1056
1057         if (atomic_dec_and_test(&qp->refcount))
1058                 wake_up(&qp->wait);
1059 }
1060
1061 static void decrement_dma_busy(struct ipath_qp *qp)
1062 {
1063         unsigned long flags;
1064
1065         if (atomic_dec_and_test(&qp->s_dma_busy)) {
1066                 spin_lock_irqsave(&qp->s_lock, flags);
1067                 if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND &&
1068                      qp->s_last != qp->s_head) ||
1069                     (qp->s_flags & IPATH_S_WAIT_DMA))
1070                         ipath_schedule_send(qp);
1071                 spin_unlock_irqrestore(&qp->s_lock, flags);
1072                 wake_up(&qp->wait_dma);
1073         }
1074 }
1075
1076 /*
1077  * Compute the number of clock cycles of delay before sending the next packet.
1078  * The multipliers reflect the number of clocks for the fastest rate so
1079  * one tick at 4xDDR is 8 ticks at 1xSDR.
1080  * If the destination port will take longer to receive a packet than
1081  * the outgoing link can send it, we need to delay sending the next packet
1082  * by the difference in time it takes the receiver to receive and the sender
1083  * to send this packet.
1084  * Note that this delay is always correct for UC and RC but not always
1085  * optimal for UD. For UD, the destination HCA can be different for each
1086  * packet, in which case, we could send packets to a different destination
1087  * while "waiting" for the delay. The overhead for doing this without
1088  * HW support is more than just paying the cost of delaying some packets
1089  * unnecessarily.
1090  */
1091 static inline unsigned ipath_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult)
1092 {
1093         return (rcv_mult > snd_mult) ?
1094                 (plen * (rcv_mult - snd_mult) + 1) >> 1 : 0;
1095 }
1096
1097 static int ipath_verbs_send_dma(struct ipath_qp *qp,
1098                                 struct ipath_ib_header *hdr, u32 hdrwords,
1099                                 struct ipath_sge_state *ss, u32 len,
1100                                 u32 plen, u32 dwords)
1101 {
1102         struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
1103         struct ipath_devdata *dd = dev->dd;
1104         struct ipath_verbs_txreq *tx;
1105         u32 *piobuf;
1106         u32 control;
1107         u32 ndesc;
1108         int ret;
1109
1110         tx = qp->s_tx;
1111         if (tx) {
1112                 qp->s_tx = NULL;
1113                 /* resend previously constructed packet */
1114                 atomic_inc(&qp->s_dma_busy);
1115                 ret = ipath_sdma_verbs_send(dd, tx->ss, tx->len, tx);
1116                 if (ret) {
1117                         qp->s_tx = tx;
1118                         decrement_dma_busy(qp);
1119                 }
1120                 goto bail;
1121         }
1122
1123         tx = get_txreq(dev);
1124         if (!tx) {
1125                 ret = -EBUSY;
1126                 goto bail;
1127         }
1128
1129         /*
1130          * Get the saved delay count we computed for the previous packet
1131          * and save the delay count for this packet to be used next time
1132          * we get here.
1133          */
1134         control = qp->s_pkt_delay;
1135         qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult);
1136
1137         tx->qp = qp;
1138         atomic_inc(&qp->refcount);
1139         tx->wqe = qp->s_wqe;
1140         tx->txreq.callback = sdma_complete;
1141         tx->txreq.callback_cookie = tx;
1142         tx->txreq.flags = IPATH_SDMA_TXREQ_F_HEADTOHOST |
1143                 IPATH_SDMA_TXREQ_F_INTREQ | IPATH_SDMA_TXREQ_F_FREEDESC;
1144         if (plen + 1 >= IPATH_SMALLBUF_DWORDS)
1145                 tx->txreq.flags |= IPATH_SDMA_TXREQ_F_USELARGEBUF;
1146
1147         /* VL15 packets bypass credit check */
1148         if ((be16_to_cpu(hdr->lrh[0]) >> 12) == 15) {
1149                 control |= 1ULL << 31;
1150                 tx->txreq.flags |= IPATH_SDMA_TXREQ_F_VL15;
1151         }
1152
1153         if (len) {
1154                 /*
1155                  * Don't try to DMA if it takes more descriptors than
1156                  * the queue holds.
1157                  */
1158                 ndesc = ipath_count_sge(ss, len);
1159                 if (ndesc >= dd->ipath_sdma_descq_cnt)
1160                         ndesc = 0;
1161         } else
1162                 ndesc = 1;
1163         if (ndesc) {
1164                 tx->hdr.pbc[0] = cpu_to_le32(plen);
1165                 tx->hdr.pbc[1] = cpu_to_le32(control);
1166                 memcpy(&tx->hdr.hdr, hdr, hdrwords << 2);
1167                 tx->txreq.sg_count = ndesc;
1168                 tx->map_len = (hdrwords + 2) << 2;
1169                 tx->txreq.map_addr = &tx->hdr;
1170                 atomic_inc(&qp->s_dma_busy);
1171                 ret = ipath_sdma_verbs_send(dd, ss, dwords, tx);
1172                 if (ret) {
1173                         /* save ss and length in dwords */
1174                         tx->ss = ss;
1175                         tx->len = dwords;
1176                         qp->s_tx = tx;
1177                         decrement_dma_busy(qp);
1178                 }
1179                 goto bail;
1180         }
1181
1182         /* Allocate a buffer and copy the header and payload to it. */
1183         tx->map_len = (plen + 1) << 2;
1184         piobuf = kmalloc(tx->map_len, GFP_ATOMIC);
1185         if (unlikely(piobuf == NULL)) {
1186                 ret = -EBUSY;
1187                 goto err_tx;
1188         }
1189         tx->txreq.map_addr = piobuf;
1190         tx->txreq.flags |= IPATH_SDMA_TXREQ_F_FREEBUF;
1191         tx->txreq.sg_count = 1;
1192
1193         *piobuf++ = (__force u32) cpu_to_le32(plen);
1194         *piobuf++ = (__force u32) cpu_to_le32(control);
1195         memcpy(piobuf, hdr, hdrwords << 2);
1196         ipath_copy_from_sge(piobuf + hdrwords, ss, len);
1197
1198         atomic_inc(&qp->s_dma_busy);
1199         ret = ipath_sdma_verbs_send(dd, NULL, 0, tx);
1200         /*
1201          * If we couldn't queue the DMA request, save the info
1202          * and try again later rather than destroying the
1203          * buffer and undoing the side effects of the copy.
1204          */
1205         if (ret) {
1206                 tx->ss = NULL;
1207                 tx->len = 0;
1208                 qp->s_tx = tx;
1209                 decrement_dma_busy(qp);
1210         }
1211         dev->n_unaligned++;
1212         goto bail;
1213
1214 err_tx:
1215         if (atomic_dec_and_test(&qp->refcount))
1216                 wake_up(&qp->wait);
1217         put_txreq(dev, tx);
1218 bail:
1219         return ret;
1220 }
1221
1222 static int ipath_verbs_send_pio(struct ipath_qp *qp,
1223                                 struct ipath_ib_header *ibhdr, u32 hdrwords,
1224                                 struct ipath_sge_state *ss, u32 len,
1225                                 u32 plen, u32 dwords)
1226 {
1227         struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
1228         u32 *hdr = (u32 *) ibhdr;
1229         u32 __iomem *piobuf;
1230         unsigned flush_wc;
1231         u32 control;
1232         int ret;
1233         unsigned long flags;
1234
1235         piobuf = ipath_getpiobuf(dd, plen, NULL);
1236         if (unlikely(piobuf == NULL)) {
1237                 ret = -EBUSY;
1238                 goto bail;
1239         }
1240
1241         /*
1242          * Get the saved delay count we computed for the previous packet
1243          * and save the delay count for this packet to be used next time
1244          * we get here.
1245          */
1246         control = qp->s_pkt_delay;
1247         qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult);
1248
1249         /* VL15 packets bypass credit check */
1250         if ((be16_to_cpu(ibhdr->lrh[0]) >> 12) == 15)
1251                 control |= 1ULL << 31;
1252
1253         /*
1254          * Write the length to the control qword plus any needed flags.
1255          * We have to flush after the PBC for correctness on some cpus
1256          * or WC buffer can be written out of order.
1257          */
1258         writeq(((u64) control << 32) | plen, piobuf);
1259         piobuf += 2;
1260
1261         flush_wc = dd->ipath_flags & IPATH_PIO_FLUSH_WC;
1262         if (len == 0) {
1263                 /*
1264                  * If there is just the header portion, must flush before
1265                  * writing last word of header for correctness, and after
1266                  * the last header word (trigger word).
1267                  */
1268                 if (flush_wc) {
1269                         ipath_flush_wc();
1270                         __iowrite32_copy(piobuf, hdr, hdrwords - 1);
1271                         ipath_flush_wc();
1272                         __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
1273                         ipath_flush_wc();
1274                 } else
1275                         __iowrite32_copy(piobuf, hdr, hdrwords);
1276                 goto done;
1277         }
1278
1279         if (flush_wc)
1280                 ipath_flush_wc();
1281         __iowrite32_copy(piobuf, hdr, hdrwords);
1282         piobuf += hdrwords;
1283
1284         /* The common case is aligned and contained in one segment. */
1285         if (likely(ss->num_sge == 1 && len <= ss->sge.length &&
1286                    !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) {
1287                 u32 *addr = (u32 *) ss->sge.vaddr;
1288
1289                 /* Update address before sending packet. */
1290                 update_sge(ss, len);
1291                 if (flush_wc) {
1292                         __iowrite32_copy(piobuf, addr, dwords - 1);
1293                         /* must flush early everything before trigger word */
1294                         ipath_flush_wc();
1295                         __raw_writel(addr[dwords - 1], piobuf + dwords - 1);
1296                         /* be sure trigger word is written */
1297                         ipath_flush_wc();
1298                 } else
1299                         __iowrite32_copy(piobuf, addr, dwords);
1300                 goto done;
1301         }
1302         copy_io(piobuf, ss, len, flush_wc);
1303 done:
1304         if (qp->s_wqe) {
1305                 spin_lock_irqsave(&qp->s_lock, flags);
1306                 ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);
1307                 spin_unlock_irqrestore(&qp->s_lock, flags);
1308         }
1309         ret = 0;
1310 bail:
1311         return ret;
1312 }
1313
1314 /**
1315  * ipath_verbs_send - send a packet
1316  * @qp: the QP to send on
1317  * @hdr: the packet header
1318  * @hdrwords: the number of 32-bit words in the header
1319  * @ss: the SGE to send
1320  * @len: the length of the packet in bytes
1321  */
1322 int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr,
1323                      u32 hdrwords, struct ipath_sge_state *ss, u32 len)
1324 {
1325         struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
1326         u32 plen;
1327         int ret;
1328         u32 dwords = (len + 3) >> 2;
1329
1330         /*
1331          * Calculate the send buffer trigger address.
1332          * The +1 counts for the pbc control dword following the pbc length.
1333          */
1334         plen = hdrwords + dwords + 1;
1335
1336         /*
1337          * VL15 packets (IB_QPT_SMI) will always use PIO, so we
1338          * can defer SDMA restart until link goes ACTIVE without
1339          * worrying about just how we got there.
1340          */
1341         if (qp->ibqp.qp_type == IB_QPT_SMI ||
1342             !(dd->ipath_flags & IPATH_HAS_SEND_DMA))
1343                 ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len,
1344                                            plen, dwords);
1345         else
1346                 ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len,
1347                                            plen, dwords);
1348
1349         return ret;
1350 }
1351
1352 int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
1353                             u64 *rwords, u64 *spkts, u64 *rpkts,
1354                             u64 *xmit_wait)
1355 {
1356         int ret;
1357
1358         if (!(dd->ipath_flags & IPATH_INITTED)) {
1359                 /* no hardware, freeze, etc. */
1360                 ret = -EINVAL;
1361                 goto bail;
1362         }
1363         *swords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
1364         *rwords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
1365         *spkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
1366         *rpkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
1367         *xmit_wait = ipath_snap_cntr(dd, dd->ipath_cregs->cr_sendstallcnt);
1368
1369         ret = 0;
1370
1371 bail:
1372         return ret;
1373 }
1374
1375 /**
1376  * ipath_get_counters - get various chip counters
1377  * @dd: the infinipath device
1378  * @cntrs: counters are placed here
1379  *
1380  * Return the counters needed by recv_pma_get_portcounters().
1381  */
1382 int ipath_get_counters(struct ipath_devdata *dd,
1383                        struct ipath_verbs_counters *cntrs)
1384 {
1385         struct ipath_cregs const *crp = dd->ipath_cregs;
1386         int ret;
1387
1388         if (!(dd->ipath_flags & IPATH_INITTED)) {
1389                 /* no hardware, freeze, etc. */
1390                 ret = -EINVAL;
1391                 goto bail;
1392         }
1393         cntrs->symbol_error_counter =
1394                 ipath_snap_cntr(dd, crp->cr_ibsymbolerrcnt);
1395         cntrs->link_error_recovery_counter =
1396                 ipath_snap_cntr(dd, crp->cr_iblinkerrrecovcnt);
1397         /*
1398          * The link downed counter counts when the other side downs the
1399          * connection.  We add in the number of times we downed the link
1400          * due to local link integrity errors to compensate.
1401          */
1402         cntrs->link_downed_counter =
1403                 ipath_snap_cntr(dd, crp->cr_iblinkdowncnt);
1404         cntrs->port_rcv_errors =
1405                 ipath_snap_cntr(dd, crp->cr_rxdroppktcnt) +
1406                 ipath_snap_cntr(dd, crp->cr_rcvovflcnt) +
1407                 ipath_snap_cntr(dd, crp->cr_portovflcnt) +
1408                 ipath_snap_cntr(dd, crp->cr_err_rlencnt) +
1409                 ipath_snap_cntr(dd, crp->cr_invalidrlencnt) +
1410                 ipath_snap_cntr(dd, crp->cr_errlinkcnt) +
1411                 ipath_snap_cntr(dd, crp->cr_erricrccnt) +
1412                 ipath_snap_cntr(dd, crp->cr_errvcrccnt) +
1413                 ipath_snap_cntr(dd, crp->cr_errlpcrccnt) +
1414                 ipath_snap_cntr(dd, crp->cr_badformatcnt) +
1415                 dd->ipath_rxfc_unsupvl_errs;
1416         if (crp->cr_rxotherlocalphyerrcnt)
1417                 cntrs->port_rcv_errors +=
1418                         ipath_snap_cntr(dd, crp->cr_rxotherlocalphyerrcnt);
1419         if (crp->cr_rxvlerrcnt)
1420                 cntrs->port_rcv_errors +=
1421                         ipath_snap_cntr(dd, crp->cr_rxvlerrcnt);
1422         cntrs->port_rcv_remphys_errors =
1423                 ipath_snap_cntr(dd, crp->cr_rcvebpcnt);
1424         cntrs->port_xmit_discards = ipath_snap_cntr(dd, crp->cr_unsupvlcnt);
1425         cntrs->port_xmit_data = ipath_snap_cntr(dd, crp->cr_wordsendcnt);
1426         cntrs->port_rcv_data = ipath_snap_cntr(dd, crp->cr_wordrcvcnt);
1427         cntrs->port_xmit_packets = ipath_snap_cntr(dd, crp->cr_pktsendcnt);
1428         cntrs->port_rcv_packets = ipath_snap_cntr(dd, crp->cr_pktrcvcnt);
1429         cntrs->local_link_integrity_errors =
1430                 crp->cr_locallinkintegrityerrcnt ?
1431                 ipath_snap_cntr(dd, crp->cr_locallinkintegrityerrcnt) :
1432                 ((dd->ipath_flags & IPATH_GPIO_ERRINTRS) ?
1433                  dd->ipath_lli_errs : dd->ipath_lli_errors);
1434         cntrs->excessive_buffer_overrun_errors =
1435                 crp->cr_excessbufferovflcnt ?
1436                 ipath_snap_cntr(dd, crp->cr_excessbufferovflcnt) :
1437                 dd->ipath_overrun_thresh_errs;
1438         cntrs->vl15_dropped = crp->cr_vl15droppedpktcnt ?
1439                 ipath_snap_cntr(dd, crp->cr_vl15droppedpktcnt) : 0;
1440
1441         ret = 0;
1442
1443 bail:
1444         return ret;
1445 }
1446
1447 /**
1448  * ipath_ib_piobufavail - callback when a PIO buffer is available
1449  * @arg: the device pointer
1450  *
1451  * This is called from ipath_intr() at interrupt level when a PIO buffer is
1452  * available after ipath_verbs_send() returned an error that no buffers were
1453  * available.  Return 1 if we consumed all the PIO buffers and we still have
1454  * QPs waiting for buffers (for now, just restart the send tasklet and
1455  * return zero).
1456  */
1457 int ipath_ib_piobufavail(struct ipath_ibdev *dev)
1458 {
1459         struct list_head *list;
1460         struct ipath_qp *qplist;
1461         struct ipath_qp *qp;
1462         unsigned long flags;
1463
1464         if (dev == NULL)
1465                 goto bail;
1466
1467         list = &dev->piowait;
1468         qplist = NULL;
1469
1470         spin_lock_irqsave(&dev->pending_lock, flags);
1471         while (!list_empty(list)) {
1472                 qp = list_entry(list->next, struct ipath_qp, piowait);
1473                 list_del_init(&qp->piowait);
1474                 qp->pio_next = qplist;
1475                 qplist = qp;
1476                 atomic_inc(&qp->refcount);
1477         }
1478         spin_unlock_irqrestore(&dev->pending_lock, flags);
1479
1480         while (qplist != NULL) {
1481                 qp = qplist;
1482                 qplist = qp->pio_next;
1483
1484                 spin_lock_irqsave(&qp->s_lock, flags);
1485                 if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)
1486                         ipath_schedule_send(qp);
1487                 spin_unlock_irqrestore(&qp->s_lock, flags);
1488
1489                 /* Notify ipath_destroy_qp() if it is waiting. */
1490                 if (atomic_dec_and_test(&qp->refcount))
1491                         wake_up(&qp->wait);
1492         }
1493
1494 bail:
1495         return 0;
1496 }
1497
1498 static int ipath_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
1499                               struct ib_udata *uhw)
1500 {
1501         struct ipath_ibdev *dev = to_idev(ibdev);
1502
1503         if (uhw->inlen || uhw->outlen)
1504                 return -EINVAL;
1505
1506         memset(props, 0, sizeof(*props));
1507
1508         props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
1509                 IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
1510                 IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
1511                 IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE;
1512         props->page_size_cap = PAGE_SIZE;
1513         props->vendor_id =
1514                 IPATH_SRC_OUI_1 << 16 | IPATH_SRC_OUI_2 << 8 | IPATH_SRC_OUI_3;
1515         props->vendor_part_id = dev->dd->ipath_deviceid;
1516         props->hw_ver = dev->dd->ipath_pcirev;
1517
1518         props->sys_image_guid = dev->sys_image_guid;
1519
1520         props->max_mr_size = ~0ull;
1521         props->max_qp = ib_ipath_max_qps;
1522         props->max_qp_wr = ib_ipath_max_qp_wrs;
1523         props->max_sge = ib_ipath_max_sges;
1524         props->max_cq = ib_ipath_max_cqs;
1525         props->max_ah = ib_ipath_max_ahs;
1526         props->max_cqe = ib_ipath_max_cqes;
1527         props->max_mr = dev->lk_table.max;
1528         props->max_fmr = dev->lk_table.max;
1529         props->max_map_per_fmr = 32767;
1530         props->max_pd = ib_ipath_max_pds;
1531         props->max_qp_rd_atom = IPATH_MAX_RDMA_ATOMIC;
1532         props->max_qp_init_rd_atom = 255;
1533         /* props->max_res_rd_atom */
1534         props->max_srq = ib_ipath_max_srqs;
1535         props->max_srq_wr = ib_ipath_max_srq_wrs;
1536         props->max_srq_sge = ib_ipath_max_srq_sges;
1537         /* props->local_ca_ack_delay */
1538         props->atomic_cap = IB_ATOMIC_GLOB;
1539         props->max_pkeys = ipath_get_npkeys(dev->dd);
1540         props->max_mcast_grp = ib_ipath_max_mcast_grps;
1541         props->max_mcast_qp_attach = ib_ipath_max_mcast_qp_attached;
1542         props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
1543                 props->max_mcast_grp;
1544
1545         return 0;
1546 }
1547
1548 const u8 ipath_cvt_physportstate[32] = {
1549         [INFINIPATH_IBCS_LT_STATE_DISABLED] = IB_PHYSPORTSTATE_DISABLED,
1550         [INFINIPATH_IBCS_LT_STATE_LINKUP] = IB_PHYSPORTSTATE_LINKUP,
1551         [INFINIPATH_IBCS_LT_STATE_POLLACTIVE] = IB_PHYSPORTSTATE_POLL,
1552         [INFINIPATH_IBCS_LT_STATE_POLLQUIET] = IB_PHYSPORTSTATE_POLL,
1553         [INFINIPATH_IBCS_LT_STATE_SLEEPDELAY] = IB_PHYSPORTSTATE_SLEEP,
1554         [INFINIPATH_IBCS_LT_STATE_SLEEPQUIET] = IB_PHYSPORTSTATE_SLEEP,
1555         [INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE] =
1556                 IB_PHYSPORTSTATE_CFG_TRAIN,
1557         [INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG] =
1558                 IB_PHYSPORTSTATE_CFG_TRAIN,
1559         [INFINIPATH_IBCS_LT_STATE_CFGWAITRMT] =
1560                 IB_PHYSPORTSTATE_CFG_TRAIN,
1561         [INFINIPATH_IBCS_LT_STATE_CFGIDLE] = IB_PHYSPORTSTATE_CFG_TRAIN,
1562         [INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN] =
1563                 IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
1564         [INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT] =
1565                 IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
1566         [INFINIPATH_IBCS_LT_STATE_RECOVERIDLE] =
1567                 IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
1568         [0x10] = IB_PHYSPORTSTATE_CFG_TRAIN,
1569         [0x11] = IB_PHYSPORTSTATE_CFG_TRAIN,
1570         [0x12] = IB_PHYSPORTSTATE_CFG_TRAIN,
1571         [0x13] = IB_PHYSPORTSTATE_CFG_TRAIN,
1572         [0x14] = IB_PHYSPORTSTATE_CFG_TRAIN,
1573         [0x15] = IB_PHYSPORTSTATE_CFG_TRAIN,
1574         [0x16] = IB_PHYSPORTSTATE_CFG_TRAIN,
1575         [0x17] = IB_PHYSPORTSTATE_CFG_TRAIN
1576 };
1577
1578 u32 ipath_get_cr_errpkey(struct ipath_devdata *dd)
1579 {
1580         return ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey);
1581 }
1582
1583 static int ipath_query_port(struct ib_device *ibdev,
1584                             u8 port, struct ib_port_attr *props)
1585 {
1586         struct ipath_ibdev *dev = to_idev(ibdev);
1587         struct ipath_devdata *dd = dev->dd;
1588         enum ib_mtu mtu;
1589         u16 lid = dd->ipath_lid;
1590         u64 ibcstat;
1591
1592         memset(props, 0, sizeof(*props));
1593         props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE);
1594         props->lmc = dd->ipath_lmc;
1595         props->sm_lid = dev->sm_lid;
1596         props->sm_sl = dev->sm_sl;
1597         ibcstat = dd->ipath_lastibcstat;
1598         /* map LinkState to IB portinfo values.  */
1599         props->state = ipath_ib_linkstate(dd, ibcstat) + 1;
1600
1601         /* See phys_state_show() */
1602         props->phys_state = /* MEA: assumes shift == 0 */
1603                 ipath_cvt_physportstate[dd->ipath_lastibcstat &
1604                 dd->ibcs_lts_mask];
1605         props->port_cap_flags = dev->port_cap_flags;
1606         props->gid_tbl_len = 1;
1607         props->max_msg_sz = 0x80000000;
1608         props->pkey_tbl_len = ipath_get_npkeys(dd);
1609         props->bad_pkey_cntr = ipath_get_cr_errpkey(dd) -
1610                 dev->z_pkey_violations;
1611         props->qkey_viol_cntr = dev->qkey_violations;
1612         props->active_width = dd->ipath_link_width_active;
1613         /* See rate_show() */
1614         props->active_speed = dd->ipath_link_speed_active;
1615         props->max_vl_num = 1;          /* VLCap = VL0 */
1616         props->init_type_reply = 0;
1617
1618         props->max_mtu = ipath_mtu4096 ? IB_MTU_4096 : IB_MTU_2048;
1619         switch (dd->ipath_ibmtu) {
1620         case 4096:
1621                 mtu = IB_MTU_4096;
1622                 break;
1623         case 2048:
1624                 mtu = IB_MTU_2048;
1625                 break;
1626         case 1024:
1627                 mtu = IB_MTU_1024;
1628                 break;
1629         case 512:
1630                 mtu = IB_MTU_512;
1631                 break;
1632         case 256:
1633                 mtu = IB_MTU_256;
1634                 break;
1635         default:
1636                 mtu = IB_MTU_2048;
1637         }
1638         props->active_mtu = mtu;
1639         props->subnet_timeout = dev->subnet_timeout;
1640
1641         return 0;
1642 }
1643
1644 static int ipath_modify_device(struct ib_device *device,
1645                                int device_modify_mask,
1646                                struct ib_device_modify *device_modify)
1647 {
1648         int ret;
1649
1650         if (device_modify_mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID |
1651                                    IB_DEVICE_MODIFY_NODE_DESC)) {
1652                 ret = -EOPNOTSUPP;
1653                 goto bail;
1654         }
1655
1656         if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC)
1657                 memcpy(device->node_desc, device_modify->node_desc, 64);
1658
1659         if (device_modify_mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID)
1660                 to_idev(device)->sys_image_guid =
1661                         cpu_to_be64(device_modify->sys_image_guid);
1662
1663         ret = 0;
1664
1665 bail:
1666         return ret;
1667 }
1668
1669 static int ipath_modify_port(struct ib_device *ibdev,
1670                              u8 port, int port_modify_mask,
1671                              struct ib_port_modify *props)
1672 {
1673         struct ipath_ibdev *dev = to_idev(ibdev);
1674
1675         dev->port_cap_flags |= props->set_port_cap_mask;
1676         dev->port_cap_flags &= ~props->clr_port_cap_mask;
1677         if (port_modify_mask & IB_PORT_SHUTDOWN)
1678                 ipath_set_linkstate(dev->dd, IPATH_IB_LINKDOWN);
1679         if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR)
1680                 dev->qkey_violations = 0;
1681         return 0;
1682 }
1683
1684 static int ipath_query_gid(struct ib_device *ibdev, u8 port,
1685                            int index, union ib_gid *gid)
1686 {
1687         struct ipath_ibdev *dev = to_idev(ibdev);
1688         int ret;
1689
1690         if (index >= 1) {
1691                 ret = -EINVAL;
1692                 goto bail;
1693         }
1694         gid->global.subnet_prefix = dev->gid_prefix;
1695         gid->global.interface_id = dev->dd->ipath_guid;
1696
1697         ret = 0;
1698
1699 bail:
1700         return ret;
1701 }
1702
1703 static struct ib_pd *ipath_alloc_pd(struct ib_device *ibdev,
1704                                     struct ib_ucontext *context,
1705                                     struct ib_udata *udata)
1706 {
1707         struct ipath_ibdev *dev = to_idev(ibdev);
1708         struct ipath_pd *pd;
1709         struct ib_pd *ret;
1710
1711         /*
1712          * This is actually totally arbitrary.  Some correctness tests
1713          * assume there's a maximum number of PDs that can be allocated.
1714          * We don't actually have this limit, but we fail the test if
1715          * we allow allocations of more than we report for this value.
1716          */
1717
1718         pd = kmalloc(sizeof *pd, GFP_KERNEL);
1719         if (!pd) {
1720                 ret = ERR_PTR(-ENOMEM);
1721                 goto bail;
1722         }
1723
1724         spin_lock(&dev->n_pds_lock);
1725         if (dev->n_pds_allocated == ib_ipath_max_pds) {
1726                 spin_unlock(&dev->n_pds_lock);
1727                 kfree(pd);
1728                 ret = ERR_PTR(-ENOMEM);
1729                 goto bail;
1730         }
1731
1732         dev->n_pds_allocated++;
1733         spin_unlock(&dev->n_pds_lock);
1734
1735         /* ib_alloc_pd() will initialize pd->ibpd. */
1736         pd->user = udata != NULL;
1737
1738         ret = &pd->ibpd;
1739
1740 bail:
1741         return ret;
1742 }
1743
1744 static int ipath_dealloc_pd(struct ib_pd *ibpd)
1745 {
1746         struct ipath_pd *pd = to_ipd(ibpd);
1747         struct ipath_ibdev *dev = to_idev(ibpd->device);
1748
1749         spin_lock(&dev->n_pds_lock);
1750         dev->n_pds_allocated--;
1751         spin_unlock(&dev->n_pds_lock);
1752
1753         kfree(pd);
1754
1755         return 0;
1756 }
1757
1758 /**
1759  * ipath_create_ah - create an address handle
1760  * @pd: the protection domain
1761  * @ah_attr: the attributes of the AH
1762  *
1763  * This may be called from interrupt context.
1764  */
1765 static struct ib_ah *ipath_create_ah(struct ib_pd *pd,
1766                                      struct ib_ah_attr *ah_attr)
1767 {
1768         struct ipath_ah *ah;
1769         struct ib_ah *ret;
1770         struct ipath_ibdev *dev = to_idev(pd->device);
1771         unsigned long flags;
1772
1773         /* A multicast address requires a GRH (see ch. 8.4.1). */
1774         if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE &&
1775             ah_attr->dlid != IPATH_PERMISSIVE_LID &&
1776             !(ah_attr->ah_flags & IB_AH_GRH)) {
1777                 ret = ERR_PTR(-EINVAL);
1778                 goto bail;
1779         }
1780
1781         if (ah_attr->dlid == 0) {
1782                 ret = ERR_PTR(-EINVAL);
1783                 goto bail;
1784         }
1785
1786         if (ah_attr->port_num < 1 ||
1787             ah_attr->port_num > pd->device->phys_port_cnt) {
1788                 ret = ERR_PTR(-EINVAL);
1789                 goto bail;
1790         }
1791
1792         ah = kmalloc(sizeof *ah, GFP_ATOMIC);
1793         if (!ah) {
1794                 ret = ERR_PTR(-ENOMEM);
1795                 goto bail;
1796         }
1797
1798         spin_lock_irqsave(&dev->n_ahs_lock, flags);
1799         if (dev->n_ahs_allocated == ib_ipath_max_ahs) {
1800                 spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
1801                 kfree(ah);
1802                 ret = ERR_PTR(-ENOMEM);
1803                 goto bail;
1804         }
1805
1806         dev->n_ahs_allocated++;
1807         spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
1808
1809         /* ib_create_ah() will initialize ah->ibah. */
1810         ah->attr = *ah_attr;
1811         ah->attr.static_rate = ipath_ib_rate_to_mult(ah_attr->static_rate);
1812
1813         ret = &ah->ibah;
1814
1815 bail:
1816         return ret;
1817 }
1818
1819 /**
1820  * ipath_destroy_ah - destroy an address handle
1821  * @ibah: the AH to destroy
1822  *
1823  * This may be called from interrupt context.
1824  */
1825 static int ipath_destroy_ah(struct ib_ah *ibah)
1826 {
1827         struct ipath_ibdev *dev = to_idev(ibah->device);
1828         struct ipath_ah *ah = to_iah(ibah);
1829         unsigned long flags;
1830
1831         spin_lock_irqsave(&dev->n_ahs_lock, flags);
1832         dev->n_ahs_allocated--;
1833         spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
1834
1835         kfree(ah);
1836
1837         return 0;
1838 }
1839
1840 static int ipath_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
1841 {
1842         struct ipath_ah *ah = to_iah(ibah);
1843
1844         *ah_attr = ah->attr;
1845         ah_attr->static_rate = ipath_mult_to_ib_rate(ah->attr.static_rate);
1846
1847         return 0;
1848 }
1849
1850 /**
1851  * ipath_get_npkeys - return the size of the PKEY table for port 0
1852  * @dd: the infinipath device
1853  */
1854 unsigned ipath_get_npkeys(struct ipath_devdata *dd)
1855 {
1856         return ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys);
1857 }
1858
1859 /**
1860  * ipath_get_pkey - return the indexed PKEY from the port PKEY table
1861  * @dd: the infinipath device
1862  * @index: the PKEY index
1863  */
1864 unsigned ipath_get_pkey(struct ipath_devdata *dd, unsigned index)
1865 {
1866         unsigned ret;
1867
1868         /* always a kernel port, no locking needed */
1869         if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys))
1870                 ret = 0;
1871         else
1872                 ret = dd->ipath_pd[0]->port_pkeys[index];
1873
1874         return ret;
1875 }
1876
1877 static int ipath_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
1878                             u16 *pkey)
1879 {
1880         struct ipath_ibdev *dev = to_idev(ibdev);
1881         int ret;
1882
1883         if (index >= ipath_get_npkeys(dev->dd)) {
1884                 ret = -EINVAL;
1885                 goto bail;
1886         }
1887
1888         *pkey = ipath_get_pkey(dev->dd, index);
1889         ret = 0;
1890
1891 bail:
1892         return ret;
1893 }
1894
1895 /**
1896  * ipath_alloc_ucontext - allocate a ucontest
1897  * @ibdev: the infiniband device
1898  * @udata: not used by the InfiniPath driver
1899  */
1900
1901 static struct ib_ucontext *ipath_alloc_ucontext(struct ib_device *ibdev,
1902                                                 struct ib_udata *udata)
1903 {
1904         struct ipath_ucontext *context;
1905         struct ib_ucontext *ret;
1906
1907         context = kmalloc(sizeof *context, GFP_KERNEL);
1908         if (!context) {
1909                 ret = ERR_PTR(-ENOMEM);
1910                 goto bail;
1911         }
1912
1913         ret = &context->ibucontext;
1914
1915 bail:
1916         return ret;
1917 }
1918
1919 static int ipath_dealloc_ucontext(struct ib_ucontext *context)
1920 {
1921         kfree(to_iucontext(context));
1922         return 0;
1923 }
1924
1925 static int ipath_verbs_register_sysfs(struct ib_device *dev);
1926
1927 static void __verbs_timer(unsigned long arg)
1928 {
1929         struct ipath_devdata *dd = (struct ipath_devdata *) arg;
1930
1931         /* Handle verbs layer timeouts. */
1932         ipath_ib_timer(dd->verbs_dev);
1933
1934         mod_timer(&dd->verbs_timer, jiffies + 1);
1935 }
1936
1937 static int enable_timer(struct ipath_devdata *dd)
1938 {
1939         /*
1940          * Early chips had a design flaw where the chip and kernel idea
1941          * of the tail register don't always agree, and therefore we won't
1942          * get an interrupt on the next packet received.
1943          * If the board supports per packet receive interrupts, use it.
1944          * Otherwise, the timer function periodically checks for packets
1945          * to cover this case.
1946          * Either way, the timer is needed for verbs layer related
1947          * processing.
1948          */
1949         if (dd->ipath_flags & IPATH_GPIO_INTR) {
1950                 ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect,
1951                                  0x2074076542310ULL);
1952                 /* Enable GPIO bit 2 interrupt */
1953                 dd->ipath_gpio_mask |= (u64) (1 << IPATH_GPIO_PORT0_BIT);
1954                 ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
1955                                  dd->ipath_gpio_mask);
1956         }
1957
1958         init_timer(&dd->verbs_timer);
1959         dd->verbs_timer.function = __verbs_timer;
1960         dd->verbs_timer.data = (unsigned long)dd;
1961         dd->verbs_timer.expires = jiffies + 1;
1962         add_timer(&dd->verbs_timer);
1963
1964         return 0;
1965 }
1966
1967 static int disable_timer(struct ipath_devdata *dd)
1968 {
1969         /* Disable GPIO bit 2 interrupt */
1970         if (dd->ipath_flags & IPATH_GPIO_INTR) {
1971                 /* Disable GPIO bit 2 interrupt */
1972                 dd->ipath_gpio_mask &= ~((u64) (1 << IPATH_GPIO_PORT0_BIT));
1973                 ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
1974                                  dd->ipath_gpio_mask);
1975                 /*
1976                  * We might want to undo changes to debugportselect,
1977                  * but how?
1978                  */
1979         }
1980
1981         del_timer_sync(&dd->verbs_timer);
1982
1983         return 0;
1984 }
1985
1986 static int ipath_port_immutable(struct ib_device *ibdev, u8 port_num,
1987                                 struct ib_port_immutable *immutable)
1988 {
1989         struct ib_port_attr attr;
1990         int err;
1991
1992         err = ipath_query_port(ibdev, port_num, &attr);
1993         if (err)
1994                 return err;
1995
1996         immutable->pkey_tbl_len = attr.pkey_tbl_len;
1997         immutable->gid_tbl_len = attr.gid_tbl_len;
1998         immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
1999         immutable->max_mad_size = IB_MGMT_MAD_SIZE;
2000
2001         return 0;
2002 }
2003
2004 /**
2005  * ipath_register_ib_device - register our device with the infiniband core
2006  * @dd: the device data structure
2007  * Return the allocated ipath_ibdev pointer or NULL on error.
2008  */
2009 int ipath_register_ib_device(struct ipath_devdata *dd)
2010 {
2011         struct ipath_verbs_counters cntrs;
2012         struct ipath_ibdev *idev;
2013         struct ib_device *dev;
2014         struct ipath_verbs_txreq *tx;
2015         unsigned i;
2016         int ret;
2017
2018         idev = (struct ipath_ibdev *)ib_alloc_device(sizeof *idev);
2019         if (idev == NULL) {
2020                 ret = -ENOMEM;
2021                 goto bail;
2022         }
2023
2024         dev = &idev->ibdev;
2025
2026         if (dd->ipath_sdma_descq_cnt) {
2027                 tx = kmalloc(dd->ipath_sdma_descq_cnt * sizeof *tx,
2028                              GFP_KERNEL);
2029                 if (tx == NULL) {
2030                         ret = -ENOMEM;
2031                         goto err_tx;
2032                 }
2033         } else
2034                 tx = NULL;
2035         idev->txreq_bufs = tx;
2036
2037         /* Only need to initialize non-zero fields. */
2038         spin_lock_init(&idev->n_pds_lock);
2039         spin_lock_init(&idev->n_ahs_lock);
2040         spin_lock_init(&idev->n_cqs_lock);
2041         spin_lock_init(&idev->n_qps_lock);
2042         spin_lock_init(&idev->n_srqs_lock);
2043         spin_lock_init(&idev->n_mcast_grps_lock);
2044
2045         spin_lock_init(&idev->qp_table.lock);
2046         spin_lock_init(&idev->lk_table.lock);
2047         idev->sm_lid = __constant_be16_to_cpu(IB_LID_PERMISSIVE);
2048         /* Set the prefix to the default value (see ch. 4.1.1) */
2049         idev->gid_prefix = __constant_cpu_to_be64(0xfe80000000000000ULL);
2050
2051         ret = ipath_init_qp_table(idev, ib_ipath_qp_table_size);
2052         if (ret)
2053                 goto err_qp;
2054
2055         /*
2056          * The top ib_ipath_lkey_table_size bits are used to index the
2057          * table.  The lower 8 bits can be owned by the user (copied from
2058          * the LKEY).  The remaining bits act as a generation number or tag.
2059          */
2060         idev->lk_table.max = 1 << ib_ipath_lkey_table_size;
2061         idev->lk_table.table = kzalloc(idev->lk_table.max *
2062                                        sizeof(*idev->lk_table.table),
2063                                        GFP_KERNEL);
2064         if (idev->lk_table.table == NULL) {
2065                 ret = -ENOMEM;
2066                 goto err_lk;
2067         }
2068         INIT_LIST_HEAD(&idev->pending_mmaps);
2069         spin_lock_init(&idev->pending_lock);
2070         idev->mmap_offset = PAGE_SIZE;
2071         spin_lock_init(&idev->mmap_offset_lock);
2072         INIT_LIST_HEAD(&idev->pending[0]);
2073         INIT_LIST_HEAD(&idev->pending[1]);
2074         INIT_LIST_HEAD(&idev->pending[2]);
2075         INIT_LIST_HEAD(&idev->piowait);
2076         INIT_LIST_HEAD(&idev->rnrwait);
2077         INIT_LIST_HEAD(&idev->txreq_free);
2078         idev->pending_index = 0;
2079         idev->port_cap_flags =
2080                 IB_PORT_SYS_IMAGE_GUID_SUP | IB_PORT_CLIENT_REG_SUP;
2081         if (dd->ipath_flags & IPATH_HAS_LINK_LATENCY)
2082                 idev->port_cap_flags |= IB_PORT_LINK_LATENCY_SUP;
2083         idev->pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
2084         idev->pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
2085         idev->pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
2086         idev->pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;
2087         idev->pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT;
2088
2089         /* Snapshot current HW counters to "clear" them. */
2090         ipath_get_counters(dd, &cntrs);
2091         idev->z_symbol_error_counter = cntrs.symbol_error_counter;
2092         idev->z_link_error_recovery_counter =
2093                 cntrs.link_error_recovery_counter;
2094         idev->z_link_downed_counter = cntrs.link_downed_counter;
2095         idev->z_port_rcv_errors = cntrs.port_rcv_errors;
2096         idev->z_port_rcv_remphys_errors =
2097                 cntrs.port_rcv_remphys_errors;
2098         idev->z_port_xmit_discards = cntrs.port_xmit_discards;
2099         idev->z_port_xmit_data = cntrs.port_xmit_data;
2100         idev->z_port_rcv_data = cntrs.port_rcv_data;
2101         idev->z_port_xmit_packets = cntrs.port_xmit_packets;
2102         idev->z_port_rcv_packets = cntrs.port_rcv_packets;
2103         idev->z_local_link_integrity_errors =
2104                 cntrs.local_link_integrity_errors;
2105         idev->z_excessive_buffer_overrun_errors =
2106                 cntrs.excessive_buffer_overrun_errors;
2107         idev->z_vl15_dropped = cntrs.vl15_dropped;
2108
2109         for (i = 0; i < dd->ipath_sdma_descq_cnt; i++, tx++)
2110                 list_add(&tx->txreq.list, &idev->txreq_free);
2111
2112         /*
2113          * The system image GUID is supposed to be the same for all
2114          * IB HCAs in a single system but since there can be other
2115          * device types in the system, we can't be sure this is unique.
2116          */
2117         if (!sys_image_guid)
2118                 sys_image_guid = dd->ipath_guid;
2119         idev->sys_image_guid = sys_image_guid;
2120         idev->ib_unit = dd->ipath_unit;
2121         idev->dd = dd;
2122
2123         strlcpy(dev->name, "ipath%d", IB_DEVICE_NAME_MAX);
2124         dev->owner = THIS_MODULE;
2125         dev->node_guid = dd->ipath_guid;
2126         dev->uverbs_abi_ver = IPATH_UVERBS_ABI_VERSION;
2127         dev->uverbs_cmd_mask =
2128                 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT)         |
2129                 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)        |
2130                 (1ull << IB_USER_VERBS_CMD_QUERY_PORT)          |
2131                 (1ull << IB_USER_VERBS_CMD_ALLOC_PD)            |
2132                 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD)          |
2133                 (1ull << IB_USER_VERBS_CMD_CREATE_AH)           |
2134                 (1ull << IB_USER_VERBS_CMD_DESTROY_AH)          |
2135                 (1ull << IB_USER_VERBS_CMD_QUERY_AH)            |
2136                 (1ull << IB_USER_VERBS_CMD_REG_MR)              |
2137                 (1ull << IB_USER_VERBS_CMD_DEREG_MR)            |
2138                 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
2139                 (1ull << IB_USER_VERBS_CMD_CREATE_CQ)           |
2140                 (1ull << IB_USER_VERBS_CMD_RESIZE_CQ)           |
2141                 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ)          |
2142                 (1ull << IB_USER_VERBS_CMD_POLL_CQ)             |
2143                 (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ)       |
2144                 (1ull << IB_USER_VERBS_CMD_CREATE_QP)           |
2145                 (1ull << IB_USER_VERBS_CMD_QUERY_QP)            |
2146                 (1ull << IB_USER_VERBS_CMD_MODIFY_QP)           |
2147                 (1ull << IB_USER_VERBS_CMD_DESTROY_QP)          |
2148                 (1ull << IB_USER_VERBS_CMD_POST_SEND)           |
2149                 (1ull << IB_USER_VERBS_CMD_POST_RECV)           |
2150                 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)        |
2151                 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST)        |
2152                 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ)          |
2153                 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)          |
2154                 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ)           |
2155                 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)         |
2156                 (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
2157         dev->node_type = RDMA_NODE_IB_CA;
2158         dev->phys_port_cnt = 1;
2159         dev->num_comp_vectors = 1;
2160         dev->dma_device = &dd->pcidev->dev;
2161         dev->query_device = ipath_query_device;
2162         dev->modify_device = ipath_modify_device;
2163         dev->query_port = ipath_query_port;
2164         dev->modify_port = ipath_modify_port;
2165         dev->query_pkey = ipath_query_pkey;
2166         dev->query_gid = ipath_query_gid;
2167         dev->alloc_ucontext = ipath_alloc_ucontext;
2168         dev->dealloc_ucontext = ipath_dealloc_ucontext;
2169         dev->alloc_pd = ipath_alloc_pd;
2170         dev->dealloc_pd = ipath_dealloc_pd;
2171         dev->create_ah = ipath_create_ah;
2172         dev->destroy_ah = ipath_destroy_ah;
2173         dev->query_ah = ipath_query_ah;
2174         dev->create_srq = ipath_create_srq;
2175         dev->modify_srq = ipath_modify_srq;
2176         dev->query_srq = ipath_query_srq;
2177         dev->destroy_srq = ipath_destroy_srq;
2178         dev->create_qp = ipath_create_qp;
2179         dev->modify_qp = ipath_modify_qp;
2180         dev->query_qp = ipath_query_qp;
2181         dev->destroy_qp = ipath_destroy_qp;
2182         dev->post_send = ipath_post_send;
2183         dev->post_recv = ipath_post_receive;
2184         dev->post_srq_recv = ipath_post_srq_receive;
2185         dev->create_cq = ipath_create_cq;
2186         dev->destroy_cq = ipath_destroy_cq;
2187         dev->resize_cq = ipath_resize_cq;
2188         dev->poll_cq = ipath_poll_cq;
2189         dev->req_notify_cq = ipath_req_notify_cq;
2190         dev->get_dma_mr = ipath_get_dma_mr;
2191         dev->reg_phys_mr = ipath_reg_phys_mr;
2192         dev->reg_user_mr = ipath_reg_user_mr;
2193         dev->dereg_mr = ipath_dereg_mr;
2194         dev->alloc_fmr = ipath_alloc_fmr;
2195         dev->map_phys_fmr = ipath_map_phys_fmr;
2196         dev->unmap_fmr = ipath_unmap_fmr;
2197         dev->dealloc_fmr = ipath_dealloc_fmr;
2198         dev->attach_mcast = ipath_multicast_attach;
2199         dev->detach_mcast = ipath_multicast_detach;
2200         dev->process_mad = ipath_process_mad;
2201         dev->mmap = ipath_mmap;
2202         dev->dma_ops = &ipath_dma_mapping_ops;
2203         dev->get_port_immutable = ipath_port_immutable;
2204
2205         snprintf(dev->node_desc, sizeof(dev->node_desc),
2206                  IPATH_IDSTR " %s", init_utsname()->nodename);
2207
2208         ret = ib_register_device(dev, NULL);
2209         if (ret)
2210                 goto err_reg;
2211
2212         ret = ipath_verbs_register_sysfs(dev);
2213         if (ret)
2214                 goto err_class;
2215
2216         enable_timer(dd);
2217
2218         goto bail;
2219
2220 err_class:
2221         ib_unregister_device(dev);
2222 err_reg:
2223         kfree(idev->lk_table.table);
2224 err_lk:
2225         kfree(idev->qp_table.table);
2226 err_qp:
2227         kfree(idev->txreq_bufs);
2228 err_tx:
2229         ib_dealloc_device(dev);
2230         ipath_dev_err(dd, "cannot register verbs: %d!\n", -ret);
2231         idev = NULL;
2232
2233 bail:
2234         dd->verbs_dev = idev;
2235         return ret;
2236 }
2237
2238 void ipath_unregister_ib_device(struct ipath_ibdev *dev)
2239 {
2240         struct ib_device *ibdev = &dev->ibdev;
2241         u32 qps_inuse;
2242
2243         ib_unregister_device(ibdev);
2244
2245         disable_timer(dev->dd);
2246
2247         if (!list_empty(&dev->pending[0]) ||
2248             !list_empty(&dev->pending[1]) ||
2249             !list_empty(&dev->pending[2]))
2250                 ipath_dev_err(dev->dd, "pending list not empty!\n");
2251         if (!list_empty(&dev->piowait))
2252                 ipath_dev_err(dev->dd, "piowait list not empty!\n");
2253         if (!list_empty(&dev->rnrwait))
2254                 ipath_dev_err(dev->dd, "rnrwait list not empty!\n");
2255         if (!ipath_mcast_tree_empty())
2256                 ipath_dev_err(dev->dd, "multicast table memory leak!\n");
2257         /*
2258          * Note that ipath_unregister_ib_device() can be called before all
2259          * the QPs are destroyed!
2260          */
2261         qps_inuse = ipath_free_all_qps(&dev->qp_table);
2262         if (qps_inuse)
2263                 ipath_dev_err(dev->dd, "QP memory leak! %u still in use\n",
2264                         qps_inuse);
2265         kfree(dev->qp_table.table);
2266         kfree(dev->lk_table.table);
2267         kfree(dev->txreq_bufs);
2268         ib_dealloc_device(ibdev);
2269 }
2270
2271 static ssize_t show_rev(struct device *device, struct device_attribute *attr,
2272                         char *buf)
2273 {
2274         struct ipath_ibdev *dev =
2275                 container_of(device, struct ipath_ibdev, ibdev.dev);
2276
2277         return sprintf(buf, "%x\n", dev->dd->ipath_pcirev);
2278 }
2279
2280 static ssize_t show_hca(struct device *device, struct device_attribute *attr,
2281                         char *buf)
2282 {
2283         struct ipath_ibdev *dev =
2284                 container_of(device, struct ipath_ibdev, ibdev.dev);
2285         int ret;
2286
2287         ret = dev->dd->ipath_f_get_boardname(dev->dd, buf, 128);
2288         if (ret < 0)
2289                 goto bail;
2290         strcat(buf, "\n");
2291         ret = strlen(buf);
2292
2293 bail:
2294         return ret;
2295 }
2296
2297 static ssize_t show_stats(struct device *device, struct device_attribute *attr,
2298                           char *buf)
2299 {
2300         struct ipath_ibdev *dev =
2301                 container_of(device, struct ipath_ibdev, ibdev.dev);
2302         int i;
2303         int len;
2304
2305         len = sprintf(buf,
2306                       "RC resends  %d\n"
2307                       "RC no QACK  %d\n"
2308                       "RC ACKs     %d\n"
2309                       "RC SEQ NAKs %d\n"
2310                       "RC RDMA seq %d\n"
2311                       "RC RNR NAKs %d\n"
2312                       "RC OTH NAKs %d\n"
2313                       "RC timeouts %d\n"
2314                       "RC RDMA dup %d\n"
2315                       "piobuf wait %d\n"
2316                       "unaligned   %d\n"
2317                       "PKT drops   %d\n"
2318                       "WQE errs    %d\n",
2319                       dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks,
2320                       dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks,
2321                       dev->n_other_naks, dev->n_timeouts,
2322                       dev->n_rdma_dup_busy, dev->n_piowait, dev->n_unaligned,
2323                       dev->n_pkt_drops, dev->n_wqe_errs);
2324         for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) {
2325                 const struct ipath_opcode_stats *si = &dev->opstats[i];
2326
2327                 if (!si->n_packets && !si->n_bytes)
2328                         continue;
2329                 len += sprintf(buf + len, "%02x %llu/%llu\n", i,
2330                                (unsigned long long) si->n_packets,
2331                                (unsigned long long) si->n_bytes);
2332         }
2333         return len;
2334 }
2335
2336 static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
2337 static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
2338 static DEVICE_ATTR(board_id, S_IRUGO, show_hca, NULL);
2339 static DEVICE_ATTR(stats, S_IRUGO, show_stats, NULL);
2340
2341 static struct device_attribute *ipath_class_attributes[] = {
2342         &dev_attr_hw_rev,
2343         &dev_attr_hca_type,
2344         &dev_attr_board_id,
2345         &dev_attr_stats
2346 };
2347
2348 static int ipath_verbs_register_sysfs(struct ib_device *dev)
2349 {
2350         int i;
2351         int ret;
2352
2353         for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i) {
2354                 ret = device_create_file(&dev->dev,
2355                                        ipath_class_attributes[i]);
2356                 if (ret)
2357                         goto bail;
2358         }
2359         return 0;
2360 bail:
2361         for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i)
2362                 device_remove_file(&dev->dev, ipath_class_attributes[i]);
2363         return ret;
2364 }