packet: fix bitfield update race
authorWillem de Bruijn <willemb@google.com>
Mon, 23 Apr 2018 21:37:03 +0000 (17:37 -0400)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 29 Apr 2018 05:45:29 +0000 (07:45 +0200)
[ Upstream commit a6361f0ca4b25460f2cdf3235ebe8115f622901e ]

Updates to the bitfields in struct packet_sock are not atomic.
Serialize these read-modify-write cycles.

Move po->running into a separate variable. Its writes are protected by
po->bind_lock (except for one startup case at packet_create). Also
replace a textual precondition warning with lockdep annotation.

All others are set only in packet_setsockopt. Serialize these
updates by holding the socket lock. Analogous to other field updates,
also hold the lock when testing whether a ring is active (pg_vec).

Fixes: 8dc419447415 ("[PACKET]: Add optional checksum computation for recvmsg")
Reported-by: DaeRyong Jeong <threeearcat@gmail.com>
Reported-by: Byoungyoung Lee <byoungyoung@purdue.edu>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
net/packet/af_packet.c
net/packet/internal.h

index 87488f2721fbe031225e03f6cd339cb837628cfc..ef2f8ebc5e3ab361606f9e487e7db0617ddcaf52 100644 (file)
@@ -323,11 +323,11 @@ static void packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb)
        skb_set_queue_mapping(skb, queue_index);
 }
 
-/* register_prot_hook must be invoked with the po->bind_lock held,
+/* __register_prot_hook must be invoked through register_prot_hook
  * or from a context in which asynchronous accesses to the packet
  * socket is not possible (packet_create()).
  */
-static void register_prot_hook(struct sock *sk)
+static void __register_prot_hook(struct sock *sk)
 {
        struct packet_sock *po = pkt_sk(sk);
 
@@ -342,8 +342,13 @@ static void register_prot_hook(struct sock *sk)
        }
 }
 
-/* {,__}unregister_prot_hook() must be invoked with the po->bind_lock
- * held.   If the sync parameter is true, we will temporarily drop
+static void register_prot_hook(struct sock *sk)
+{
+       lockdep_assert_held_once(&pkt_sk(sk)->bind_lock);
+       __register_prot_hook(sk);
+}
+
+/* If the sync parameter is true, we will temporarily drop
  * the po->bind_lock and do a synchronize_net to make sure no
  * asynchronous packet processing paths still refer to the elements
  * of po->prot_hook.  If the sync parameter is false, it is the
@@ -353,6 +358,8 @@ static void __unregister_prot_hook(struct sock *sk, bool sync)
 {
        struct packet_sock *po = pkt_sk(sk);
 
+       lockdep_assert_held_once(&po->bind_lock);
+
        po->running = 0;
 
        if (po->fanout)
@@ -2861,7 +2868,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
 
        if (proto) {
                po->prot_hook.type = proto;
-               register_prot_hook(sk);
+               __register_prot_hook(sk);
        }
 
        mutex_lock(&net->packet.sklist_lock);
@@ -3352,12 +3359,18 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 
                if (optlen != sizeof(val))
                        return -EINVAL;
-               if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
-                       return -EBUSY;
                if (copy_from_user(&val, optval, sizeof(val)))
                        return -EFAULT;
-               po->tp_loss = !!val;
-               return 0;
+
+               lock_sock(sk);
+               if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
+                       ret = -EBUSY;
+               } else {
+                       po->tp_loss = !!val;
+                       ret = 0;
+               }
+               release_sock(sk);
+               return ret;
        }
        case PACKET_AUXDATA:
        {
@@ -3368,7 +3381,9 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
                if (copy_from_user(&val, optval, sizeof(val)))
                        return -EFAULT;
 
+               lock_sock(sk);
                po->auxdata = !!val;
+               release_sock(sk);
                return 0;
        }
        case PACKET_ORIGDEV:
@@ -3380,7 +3395,9 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
                if (copy_from_user(&val, optval, sizeof(val)))
                        return -EFAULT;
 
+               lock_sock(sk);
                po->origdev = !!val;
+               release_sock(sk);
                return 0;
        }
        case PACKET_VNET_HDR:
@@ -3389,15 +3406,20 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 
                if (sock->type != SOCK_RAW)
                        return -EINVAL;
-               if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
-                       return -EBUSY;
                if (optlen < sizeof(val))
                        return -EINVAL;
                if (copy_from_user(&val, optval, sizeof(val)))
                        return -EFAULT;
 
-               po->has_vnet_hdr = !!val;
-               return 0;
+               lock_sock(sk);
+               if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
+                       ret = -EBUSY;
+               } else {
+                       po->has_vnet_hdr = !!val;
+                       ret = 0;
+               }
+               release_sock(sk);
+               return ret;
        }
        case PACKET_TIMESTAMP:
        {
@@ -3428,11 +3450,17 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 
                if (optlen != sizeof(val))
                        return -EINVAL;
-               if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
-                       return -EBUSY;
                if (copy_from_user(&val, optval, sizeof(val)))
                        return -EFAULT;
-               po->tp_tx_has_off = !!val;
+
+               lock_sock(sk);
+               if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
+                       ret = -EBUSY;
+               } else {
+                       po->tp_tx_has_off = !!val;
+                       ret = 0;
+               }
+               release_sock(sk);
                return 0;
        }
        case PACKET_QDISC_BYPASS:
index cdddf6a303996b1a512a7a3896362acc31635f0e..68aa14802768eb4b35f972f6f86e5ed10a81f8fd 100644 (file)
@@ -100,10 +100,12 @@ struct packet_sock {
        int                     copy_thresh;
        spinlock_t              bind_lock;
        struct mutex            pg_vec_lock;
-       unsigned int            running:1,      /* prot_hook is attached*/
-                               auxdata:1,
+       unsigned int            running;        /* bind_lock must be held */
+       unsigned int            auxdata:1,      /* writer must hold sock lock */
                                origdev:1,
-                               has_vnet_hdr:1;
+                               has_vnet_hdr:1,
+                               tp_loss:1,
+                               tp_tx_has_off:1;
        int                     ifindex;        /* bound device         */
        __be16                  num;
        struct packet_mclist    *mclist;
@@ -111,8 +113,6 @@ struct packet_sock {
        enum tpacket_versions   tp_version;
        unsigned int            tp_hdrlen;
        unsigned int            tp_reserve;
-       unsigned int            tp_loss:1;
-       unsigned int            tp_tx_has_off:1;
        unsigned int            tp_tstamp;
        struct net_device __rcu *cached_dev;
        int                     (*xmit)(struct sk_buff *skb);