Merge git://oss.sgi.com:8090/xfs/xfs-2.6
[linux-drm-fsl-dcu.git] / net / sunrpc / svcsock.c
index ff1f8bf680aab43e741249912409b09013ba6fad..2fd0ba2b20dfd5e149d680e0e0abda687fc6229e 100644 (file)
  *     providing that certain rules are followed:
  *
  *     SK_CONN, SK_DATA, can be set or cleared at any time.
- *             after a set, svc_sock_enqueue must be called.   
+ *             after a set, svc_sock_enqueue must be called.
  *             after a clear, the socket must be read/accepted
  *              if this succeeds, it must be set again.
  *     SK_CLOSE can set at any time. It is never cleared.
+ *      sk_inuse contains a bias of '1' until SK_DEAD is set.
+ *             so when sk_inuse hits zero, we know the socket is dead
+ *             and no-one is using it.
+ *      SK_DEAD can only be set while SK_BUSY is held which ensures
+ *             no other thread will be using the socket or will try to
+ *            set SK_DEAD.
  *
  */
 
@@ -70,6 +76,7 @@
 
 static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *,
                                         int *errp, int pmap_reg);
+static void            svc_delete_socket(struct svc_sock *svsk);
 static void            svc_udp_data_ready(struct sock *, int);
 static int             svc_udp_recvfrom(struct svc_rqst *);
 static int             svc_udp_sendto(struct svc_rqst *);
@@ -245,7 +252,7 @@ svc_sock_enqueue(struct svc_sock *svsk)
                        svsk->sk_sk, rqstp);
                svc_thread_dequeue(pool, rqstp);
                if (rqstp->rq_sock)
-                       printk(KERN_ERR 
+                       printk(KERN_ERR
                                "svc_sock_enqueue: server %p, rq_sock=%p!\n",
                                rqstp, rqstp->rq_sock);
                rqstp->rq_sock = svsk;
@@ -329,8 +336,9 @@ void svc_reserve(struct svc_rqst *rqstp, int space)
 static inline void
 svc_sock_put(struct svc_sock *svsk)
 {
-       if (atomic_dec_and_test(&svsk->sk_inuse) &&
-                       test_bit(SK_DEAD, &svsk->sk_flags)) {
+       if (atomic_dec_and_test(&svsk->sk_inuse)) {
+               BUG_ON(! test_bit(SK_DEAD, &svsk->sk_flags));
+
                dprintk("svc: releasing dead socket\n");
                if (svsk->sk_sock->file)
                        sockfd_put(svsk->sk_sock);
@@ -476,7 +484,7 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
        if (xdr->tail[0].iov_len) {
                result = kernel_sendpage(sock, rqstp->rq_respages[0],
                                             ((unsigned long)xdr->tail[0].iov_base)
-                                               & (PAGE_SIZE-1),
+                                               & (PAGE_SIZE-1),
                                             xdr->tail[0].iov_len, 0);
 
                if (result > 0)
@@ -520,7 +528,7 @@ svc_sock_names(char *buf, struct svc_serv *serv, char *toclose)
 
        if (!serv)
                return 0;
-       spin_lock(&serv->sv_lock);
+       spin_lock_bh(&serv->sv_lock);
        list_for_each_entry(svsk, &serv->sv_permsocks, sk_list) {
                int onelen = one_sock_name(buf+len, svsk);
                if (toclose && strcmp(toclose, buf+len) == 0)
@@ -528,12 +536,12 @@ svc_sock_names(char *buf, struct svc_serv *serv, char *toclose)
                else
                        len += onelen;
        }
-       spin_unlock(&serv->sv_lock);
+       spin_unlock_bh(&serv->sv_lock);
        if (closesk)
                /* Should unregister with portmap, but you cannot
                 * unregister just one protocol...
                 */
-               svc_delete_socket(closesk);
+               svc_close_socket(closesk);
        else if (toclose)
                return -ENOENT;
        return len;
@@ -683,6 +691,11 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
                return svc_deferred_recv(rqstp);
        }
 
+       if (test_bit(SK_CLOSE, &svsk->sk_flags)) {
+               svc_delete_socket(svsk);
+               return 0;
+       }
+
        clear_bit(SK_DATA, &svsk->sk_flags);
        while ((skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err)) == NULL) {
                if (err == -EAGAIN) {
@@ -698,7 +711,7 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
                tv.tv_sec = xtime.tv_sec;
                tv.tv_usec = xtime.tv_nsec / NSEC_PER_USEC;
                skb_set_timestamp(skb, &tv);
-               /* Don't enable netstamp, sunrpc doesn't 
+               /* Don't enable netstamp, sunrpc doesn't
                   need that much accuracy */
        }
        skb_get_timestamp(skb, &svsk->sk_sk->sk_stamp);
@@ -730,7 +743,7 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
                        return 0;
                }
                local_bh_enable();
-               skb_free_datagram(svsk->sk_sk, skb); 
+               skb_free_datagram(svsk->sk_sk, skb);
        } else {
                /* we can use it in-place */
                rqstp->rq_arg.head[0].iov_base = skb->data + sizeof(struct udphdr);
@@ -781,7 +794,7 @@ svc_udp_init(struct svc_sock *svsk)
        svsk->sk_sendto = svc_udp_sendto;
 
        /* initialise setting must have enough space to
-        * receive and respond to one request.  
+        * receive and respond to one request.
         * svc_udp_recvfrom will re-adjust if necessary
         */
        svc_sock_setbufsize(svsk->sk_sock,
@@ -910,7 +923,7 @@ svc_tcp_accept(struct svc_sock *svsk)
        if (ntohs(sin.sin_port) >= 1024) {
                dprintk(KERN_WARNING
                        "%s: connect from unprivileged port: %u.%u.%u.%u:%d\n",
-                       serv->sv_name, 
+                       serv->sv_name,
                        NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
        }
 
@@ -1025,7 +1038,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
                 * on the number of threads which will access the socket.
                 *
                 * rcvbuf just needs to be able to hold a few requests.
-                * Normally they will be removed from the queue 
+                * Normally they will be removed from the queue
                 * as soon a a complete request arrives.
                 */
                svc_sock_setbufsize(svsk->sk_sock,
@@ -1050,7 +1063,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
 
                if (len < want) {
                        dprintk("svc: short recvfrom while reading record length (%d of %lu)\n",
-                               len, want);
+                               len, want);
                        svc_sock_received(svsk);
                        return -EAGAIN; /* record header not complete */
                }
@@ -1176,7 +1189,8 @@ svc_tcp_sendto(struct svc_rqst *rqstp)
                       rqstp->rq_sock->sk_server->sv_name,
                       (sent<0)?"got error":"sent only",
                       sent, xbufp->len);
-               svc_delete_socket(rqstp->rq_sock);
+               set_bit(SK_CLOSE, &rqstp->rq_sock->sk_flags);
+               svc_sock_enqueue(rqstp->rq_sock);
                sent = -EAGAIN;
        }
        return sent;
@@ -1207,7 +1221,7 @@ svc_tcp_init(struct svc_sock *svsk)
                tp->nonagle = 1;        /* disable Nagle's algorithm */
 
                /* initialise setting must have enough space to
-                * receive and respond to one request.  
+                * receive and respond to one request.
                 * svc_tcp_recvfrom will re-adjust if necessary
                 */
                svc_sock_setbufsize(svsk->sk_sock,
@@ -1216,7 +1230,7 @@ svc_tcp_init(struct svc_sock *svsk)
 
                set_bit(SK_CHNGBUF, &svsk->sk_flags);
                set_bit(SK_DATA, &svsk->sk_flags);
-               if (sk->sk_state != TCP_ESTABLISHED) 
+               if (sk->sk_state != TCP_ESTABLISHED)
                        set_bit(SK_CLOSE, &svsk->sk_flags);
        }
 }
@@ -1232,7 +1246,7 @@ svc_sock_update_bufs(struct svc_serv *serv)
 
        spin_lock_bh(&serv->sv_lock);
        list_for_each(le, &serv->sv_permsocks) {
-               struct svc_sock *svsk = 
+               struct svc_sock *svsk =
                        list_entry(le, struct svc_sock, sk_list);
                set_bit(SK_CHNGBUF, &svsk->sk_flags);
        }
@@ -1264,11 +1278,11 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
                rqstp, timeout);
 
        if (rqstp->rq_sock)
-               printk(KERN_ERR 
+               printk(KERN_ERR
                        "svc_recv: service %p, socket not NULL!\n",
                         rqstp);
        if (waitqueue_active(&rqstp->rq_wait))
-               printk(KERN_ERR 
+               printk(KERN_ERR
                        "svc_recv: service %p, wait queue active!\n",
                         rqstp);
 
@@ -1357,7 +1371,7 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
        return len;
 }
 
-/* 
+/*
  * Drop request
  */
 void
@@ -1495,7 +1509,7 @@ svc_setup_socket(struct svc_serv *serv, struct socket *sock,
        svsk->sk_odata = inet->sk_data_ready;
        svsk->sk_owspace = inet->sk_write_space;
        svsk->sk_server = serv;
-       atomic_set(&svsk->sk_inuse, 0);
+       atomic_set(&svsk->sk_inuse, 1);
        svsk->sk_lastrecv = get_seconds();
        spin_lock_init(&svsk->sk_defer_lock);
        INIT_LIST_HEAD(&svsk->sk_deferred);
@@ -1618,7 +1632,7 @@ bummer:
 /*
  * Remove a dead socket
  */
-void
+static void
 svc_delete_socket(struct svc_sock *svsk)
 {
        struct svc_serv *serv;
@@ -1637,23 +1651,33 @@ svc_delete_socket(struct svc_sock *svsk)
 
        if (!test_and_set_bit(SK_DETACHED, &svsk->sk_flags))
                list_del_init(&svsk->sk_list);
-       /*
+       /*
         * We used to delete the svc_sock from whichever list
         * it's sk_ready node was on, but we don't actually
         * need to.  This is because the only time we're called
         * while still attached to a queue, the queue itself
         * is about to be destroyed (in svc_destroy).
         */
-       if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags))
+       if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags)) {
+               BUG_ON(atomic_read(&svsk->sk_inuse)<2);
+               atomic_dec(&svsk->sk_inuse);
                if (test_bit(SK_TEMP, &svsk->sk_flags))
                        serv->sv_tmpcnt--;
+       }
 
-       /* This atomic_inc should be needed - svc_delete_socket
-        * should have the semantic of dropping a reference.
-        * But it doesn't yet....
-        */
-       atomic_inc(&svsk->sk_inuse);
        spin_unlock_bh(&serv->sv_lock);
+}
+
+void svc_close_socket(struct svc_sock *svsk)
+{
+       set_bit(SK_CLOSE, &svsk->sk_flags);
+       if (test_and_set_bit(SK_BUSY, &svsk->sk_flags))
+               /* someone else will have to effect the close */
+               return;
+
+       atomic_inc(&svsk->sk_inuse);
+       svc_delete_socket(svsk);
+       clear_bit(SK_BUSY, &svsk->sk_flags);
        svc_sock_put(svsk);
 }
 
@@ -1673,7 +1697,7 @@ svc_makesock(struct svc_serv *serv, int protocol, unsigned short port)
 }
 
 /*
- * Handle defer and revisit of requests 
+ * Handle defer and revisit of requests
  */
 
 static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
@@ -1752,7 +1776,7 @@ static int svc_deferred_recv(struct svc_rqst *rqstp)
 static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk)
 {
        struct svc_deferred_req *dr = NULL;
-       
+
        if (!test_bit(SK_DEFERRED, &svsk->sk_flags))
                return NULL;
        spin_lock_bh(&svsk->sk_defer_lock);