[PATCH] knfsd: ratelimit some nfsd messages that are triggered by external events
[linux-drm-fsl-dcu.git] / net / sunrpc / svcsock.c
index cba85d195222e9603be9c29c68628b113fd5582a..ff1f8bf680aab43e741249912409b09013ba6fad 100644 (file)
@@ -32,6 +32,7 @@
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <linux/file.h>
+#include <linux/freezer.h>
 #include <net/sock.h>
 #include <net/checksum.h>
 #include <net/ip.h>
@@ -84,6 +85,35 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req);
  */
 static int svc_conn_age_period = 6*60;
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+static struct lock_class_key svc_key[2];
+static struct lock_class_key svc_slock_key[2];
+
+static inline void svc_reclassify_socket(struct socket *sock)
+{
+       struct sock *sk = sock->sk;
+       BUG_ON(sk->sk_lock.owner != NULL);
+       switch (sk->sk_family) {
+       case AF_INET:
+               sock_lock_init_class_and_name(sk, "slock-AF_INET-NFSD",
+                   &svc_slock_key[0], "sk_lock-AF_INET-NFSD", &svc_key[0]);
+               break;
+
+       case AF_INET6:
+               sock_lock_init_class_and_name(sk, "slock-AF_INET6-NFSD",
+                   &svc_slock_key[1], "sk_lock-AF_INET6-NFSD", &svc_key[1]);
+               break;
+
+       default:
+               BUG();
+       }
+}
+#else
+static inline void svc_reclassify_socket(struct socket *sock)
+{
+}
+#endif
+
 /*
  * Queue up an idle server thread.  Must have pool->sp_lock held.
  * Note: this is really a stack rather than a queue, so that we only
@@ -192,13 +222,13 @@ svc_sock_enqueue(struct svc_sock *svsk)
        svsk->sk_pool = pool;
 
        set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
-       if (((atomic_read(&svsk->sk_reserved) + serv->sv_bufsz)*2
+       if (((atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg)*2
             > svc_sock_wspace(svsk))
            && !test_bit(SK_CLOSE, &svsk->sk_flags)
            && !test_bit(SK_CONN, &svsk->sk_flags)) {
                /* Don't enqueue while not enough space for reply */
                dprintk("svc: socket %p  no space, %d*2 > %ld, not enqueued\n",
-                       svsk->sk_sk, atomic_read(&svsk->sk_reserved)+serv->sv_bufsz,
+                       svsk->sk_sk, atomic_read(&svsk->sk_reserved)+serv->sv_max_mesg,
                        svc_sock_wspace(svsk));
                svsk->sk_pool = NULL;
                clear_bit(SK_BUSY, &svsk->sk_flags);
@@ -220,7 +250,7 @@ svc_sock_enqueue(struct svc_sock *svsk)
                                rqstp, rqstp->rq_sock);
                rqstp->rq_sock = svsk;
                atomic_inc(&svsk->sk_inuse);
-               rqstp->rq_reserved = serv->sv_bufsz;
+               rqstp->rq_reserved = serv->sv_max_mesg;
                atomic_add(rqstp->rq_reserved, &svsk->sk_reserved);
                BUG_ON(svsk->sk_pool != pool);
                wake_up(&rqstp->rq_wait);
@@ -299,9 +329,15 @@ void svc_reserve(struct svc_rqst *rqstp, int space)
 static inline void
 svc_sock_put(struct svc_sock *svsk)
 {
-       if (atomic_dec_and_test(&svsk->sk_inuse) && test_bit(SK_DEAD, &svsk->sk_flags)) {
+       if (atomic_dec_and_test(&svsk->sk_inuse) &&
+                       test_bit(SK_DEAD, &svsk->sk_flags)) {
                dprintk("svc: releasing dead socket\n");
-               sock_release(svsk->sk_sock);
+               if (svsk->sk_sock->file)
+                       sockfd_put(svsk->sk_sock);
+               else
+                       sock_release(svsk->sk_sock);
+               if (svsk->sk_info_authunix != NULL)
+                       svcauth_unix_info_release(svsk->sk_info_authunix);
                kfree(svsk);
        }
 }
@@ -313,7 +349,7 @@ svc_sock_release(struct svc_rqst *rqstp)
 
        svc_release_skb(rqstp);
 
-       svc_free_allpages(rqstp);
+       svc_free_res_pages(rqstp);
        rqstp->rq_res.page_len = 0;
        rqstp->rq_res.page_base = 0;
 
@@ -412,7 +448,8 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
        /* send head */
        if (slen == xdr->head[0].iov_len)
                flags = 0;
-       len = kernel_sendpage(sock, rqstp->rq_respages[0], 0, xdr->head[0].iov_len, flags);
+       len = kernel_sendpage(sock, rqstp->rq_respages[0], 0,
+                                 xdr->head[0].iov_len, flags);
        if (len != xdr->head[0].iov_len)
                goto out;
        slen -= xdr->head[0].iov_len;
@@ -437,8 +474,9 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
        }
        /* send tail */
        if (xdr->tail[0].iov_len) {
-               result = kernel_sendpage(sock, rqstp->rq_respages[rqstp->rq_restailpage],
-                                            ((unsigned long)xdr->tail[0].iov_base)& (PAGE_SIZE-1),
+               result = kernel_sendpage(sock, rqstp->rq_respages[0],
+                                            ((unsigned long)xdr->tail[0].iov_base)
+                                               & (PAGE_SIZE-1),
                                             xdr->tail[0].iov_len, 0);
 
                if (result > 0)
@@ -492,7 +530,12 @@ svc_sock_names(char *buf, struct svc_serv *serv, char *toclose)
        }
        spin_unlock(&serv->sv_lock);
        if (closesk)
+               /* Should unregister with portmap, but you cannot
+                * unregister just one protocol...
+                */
                svc_delete_socket(closesk);
+       else if (toclose)
+               return -ENOENT;
        return len;
 }
 EXPORT_SYMBOL(svc_sock_names);
@@ -632,8 +675,8 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
             * which will access the socket.
             */
            svc_sock_setbufsize(svsk->sk_sock,
-                               (serv->sv_nrthreads+3) * serv->sv_bufsz,
-                               (serv->sv_nrthreads+3) * serv->sv_bufsz);
+                               (serv->sv_nrthreads+3) * serv->sv_max_mesg,
+                               (serv->sv_nrthreads+3) * serv->sv_max_mesg);
 
        if ((rqstp->rq_deferred = svc_deferred_dequeue(svsk))) {
                svc_sock_received(svsk);
@@ -703,9 +746,11 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
        if (len <= rqstp->rq_arg.head[0].iov_len) {
                rqstp->rq_arg.head[0].iov_len = len;
                rqstp->rq_arg.page_len = 0;
+               rqstp->rq_respages = rqstp->rq_pages+1;
        } else {
                rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len;
-               rqstp->rq_argused += (rqstp->rq_arg.page_len + PAGE_SIZE - 1)/ PAGE_SIZE;
+               rqstp->rq_respages = rqstp->rq_pages + 1 +
+                       (rqstp->rq_arg.page_len + PAGE_SIZE - 1)/ PAGE_SIZE;
        }
 
        if (serv->sv_stats)
@@ -740,8 +785,8 @@ svc_udp_init(struct svc_sock *svsk)
         * svc_udp_recvfrom will re-adjust if necessary
         */
        svc_sock_setbufsize(svsk->sk_sock,
-                           3 * svsk->sk_server->sv_bufsz,
-                           3 * svsk->sk_server->sv_bufsz);
+                           3 * svsk->sk_server->sv_max_mesg,
+                           3 * svsk->sk_server->sv_max_mesg);
 
        set_bit(SK_DATA, &svsk->sk_flags); /* might have come in before data_ready set up */
        set_bit(SK_CHNGBUF, &svsk->sk_flags);
@@ -946,7 +991,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
        struct svc_sock *svsk = rqstp->rq_sock;
        struct svc_serv *serv = svsk->sk_server;
        int             len;
-       struct kvec vec[RPCSVC_MAXPAGES];
+       struct kvec *vec;
        int pnum, vlen;
 
        dprintk("svc: tcp_recv %p data %d conn %d close %d\n",
@@ -964,7 +1009,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
                return 0;
        }
 
-       if (test_bit(SK_CONN, &svsk->sk_flags)) {
+       if (svsk->sk_sk->sk_state == TCP_LISTEN) {
                svc_tcp_accept(svsk);
                svc_sock_received(svsk);
                return 0;
@@ -984,8 +1029,8 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
                 * as soon a a complete request arrives.
                 */
                svc_sock_setbufsize(svsk->sk_sock,
-                                   (serv->sv_nrthreads+3) * serv->sv_bufsz,
-                                   3 * serv->sv_bufsz);
+                                   (serv->sv_nrthreads+3) * serv->sv_max_mesg,
+                                   3 * serv->sv_max_mesg);
 
        clear_bit(SK_DATA, &svsk->sk_flags);
 
@@ -1017,15 +1062,19 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
                         *  bit set in the fragment length header.
                         *  But apparently no known nfs clients send fragmented
                         *  records. */
-                       printk(KERN_NOTICE "RPC: bad TCP reclen 0x%08lx (non-terminal)\n",
-                              (unsigned long) svsk->sk_reclen);
+                       if (net_ratelimit())
+                               printk(KERN_NOTICE "RPC: bad TCP reclen 0x%08lx"
+                                      " (non-terminal)\n",
+                                      (unsigned long) svsk->sk_reclen);
                        goto err_delete;
                }
                svsk->sk_reclen &= 0x7fffffff;
                dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen);
-               if (svsk->sk_reclen > serv->sv_bufsz) {
-                       printk(KERN_NOTICE "RPC: bad TCP reclen 0x%08lx (large)\n",
-                              (unsigned long) svsk->sk_reclen);
+               if (svsk->sk_reclen > serv->sv_max_mesg) {
+                       if (net_ratelimit())
+                               printk(KERN_NOTICE "RPC: bad TCP reclen 0x%08lx"
+                                      " (large)\n",
+                                      (unsigned long) svsk->sk_reclen);
                        goto err_delete;
                }
        }
@@ -1044,15 +1093,17 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
        len = svsk->sk_reclen;
        set_bit(SK_DATA, &svsk->sk_flags);
 
+       vec = rqstp->rq_vec;
        vec[0] = rqstp->rq_arg.head[0];
        vlen = PAGE_SIZE;
        pnum = 1;
        while (vlen < len) {
-               vec[pnum].iov_base = page_address(rqstp->rq_argpages[rqstp->rq_argused++]);
+               vec[pnum].iov_base = page_address(rqstp->rq_pages[pnum]);
                vec[pnum].iov_len = PAGE_SIZE;
                pnum++;
                vlen += PAGE_SIZE;
        }
+       rqstp->rq_respages = &rqstp->rq_pages[pnum];
 
        /* Now receive data */
        len = svc_recvfrom(rqstp, vec, pnum, len);
@@ -1160,8 +1211,8 @@ svc_tcp_init(struct svc_sock *svsk)
                 * svc_tcp_recvfrom will re-adjust if necessary
                 */
                svc_sock_setbufsize(svsk->sk_sock,
-                                   3 * svsk->sk_server->sv_bufsz,
-                                   3 * svsk->sk_server->sv_bufsz);
+                                   3 * svsk->sk_server->sv_max_mesg,
+                                   3 * svsk->sk_server->sv_max_mesg);
 
                set_bit(SK_CHNGBUF, &svsk->sk_flags);
                set_bit(SK_DATA, &svsk->sk_flags);
@@ -1204,7 +1255,7 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
        struct svc_sock         *svsk =NULL;
        struct svc_serv         *serv = rqstp->rq_server;
        struct svc_pool         *pool = rqstp->rq_pool;
-       int                     len;
+       int                     len, i;
        int                     pages;
        struct xdr_buf          *arg;
        DECLARE_WAITQUEUE(wait, current);
@@ -1221,27 +1272,24 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
                        "svc_recv: service %p, wait queue active!\n",
                         rqstp);
 
-       /* Initialize the buffers */
-       /* first reclaim pages that were moved to response list */
-       svc_pushback_allpages(rqstp);
 
        /* now allocate needed pages.  If we get a failure, sleep briefly */
-       pages = 2 + (serv->sv_bufsz + PAGE_SIZE -1) / PAGE_SIZE;
-       while (rqstp->rq_arghi < pages) {
-               struct page *p = alloc_page(GFP_KERNEL);
-               if (!p) {
-                       schedule_timeout_uninterruptible(msecs_to_jiffies(500));
-                       continue;
+       pages = (serv->sv_max_mesg + PAGE_SIZE) / PAGE_SIZE;
+       for (i=0; i < pages ; i++)
+               while (rqstp->rq_pages[i] == NULL) {
+                       struct page *p = alloc_page(GFP_KERNEL);
+                       if (!p)
+                               schedule_timeout_uninterruptible(msecs_to_jiffies(500));
+                       rqstp->rq_pages[i] = p;
                }
-               rqstp->rq_argpages[rqstp->rq_arghi++] = p;
-       }
+       rqstp->rq_pages[i++] = NULL; /* this might be seen in nfs_read_actor */
+       BUG_ON(pages >= RPCSVC_MAXPAGES);
 
        /* Make arg->head point to first page and arg->pages point to rest */
        arg = &rqstp->rq_arg;
-       arg->head[0].iov_base = page_address(rqstp->rq_argpages[0]);
+       arg->head[0].iov_base = page_address(rqstp->rq_pages[0]);
        arg->head[0].iov_len = PAGE_SIZE;
-       rqstp->rq_argused = 1;
-       arg->pages = rqstp->rq_argpages + 1;
+       arg->pages = rqstp->rq_pages + 1;
        arg->page_base = 0;
        /* save at least one page for response */
        arg->page_len = (pages-2)*PAGE_SIZE;
@@ -1257,7 +1305,7 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
        if ((svsk = svc_sock_dequeue(pool)) != NULL) {
                rqstp->rq_sock = svsk;
                atomic_inc(&svsk->sk_inuse);
-               rqstp->rq_reserved = serv->sv_bufsz;    
+               rqstp->rq_reserved = serv->sv_max_mesg;
                atomic_add(rqstp->rq_reserved, &svsk->sk_reserved);
        } else {
                /* No data pending. Go to sleep */
@@ -1544,6 +1592,8 @@ svc_create_socket(struct svc_serv *serv, int protocol, struct sockaddr_in *sin)
        if ((error = sock_create_kern(PF_INET, type, protocol, &sock)) < 0)
                return error;
 
+       svc_reclassify_socket(sock);
+
        if (type == SOCK_STREAM)
                sock->sk->sk_reuse = 1; /* allow address reuse */
        error = kernel_bind(sock, (struct sockaddr *) sin,
@@ -1598,18 +1648,13 @@ svc_delete_socket(struct svc_sock *svsk)
                if (test_bit(SK_TEMP, &svsk->sk_flags))
                        serv->sv_tmpcnt--;
 
-       if (!atomic_read(&svsk->sk_inuse)) {
-               spin_unlock_bh(&serv->sv_lock);
-               if (svsk->sk_sock->file)
-                       sockfd_put(svsk->sk_sock);
-               else
-                       sock_release(svsk->sk_sock);
-               kfree(svsk);
-       } else {
-               spin_unlock_bh(&serv->sv_lock);
-               dprintk(KERN_NOTICE "svc: server socket destroy delayed\n");
-               /* svsk->sk_server = NULL; */
-       }
+       /* This atomic_inc should be needed - svc_delete_socket
+        * should have the semantic of dropping a reference.
+        * But it doesn't yet....
+        */
+       atomic_inc(&svsk->sk_inuse);
+       spin_unlock_bh(&serv->sv_lock);
+       svc_sock_put(svsk);
 }
 
 /*
@@ -1699,6 +1744,7 @@ static int svc_deferred_recv(struct svc_rqst *rqstp)
        rqstp->rq_prot        = dr->prot;
        rqstp->rq_addr        = dr->addr;
        rqstp->rq_daddr       = dr->daddr;
+       rqstp->rq_respages    = rqstp->rq_pages;
        return dr->argslen<<2;
 }