[NET]: Fix the race between sk_filter_(de|at)tach and sk_clone()
authorPavel Emelyanov <xemul@openvz.org>
Thu, 18 Oct 2007 04:22:42 +0000 (21:22 -0700)
committerDavid S. Miller <davem@davemloft.net>
Thu, 18 Oct 2007 04:22:42 +0000 (21:22 -0700)
The proposed fix is to delay the reference counter decrement
until the quiescent state pass. This will give sk_clone() a
chance to get the reference on the cloned filter.

Regular sk_filter_uncharge can happen from the sk_free() only
and there's no need in delaying the put - the socket is dead
anyway and is to be release itself.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/net/sock.h
net/core/filter.c

index b9cfe125c9e68e56ed4768552280e71cb894ba16..43fc3fa50d6237227e5bec82bc0b4e8e0d4ce8b7 100644 (file)
@@ -904,16 +904,6 @@ static inline int sk_filter(struct sock *sk, struct sk_buff *skb)
        return err;
 }
 
-/**
- *     sk_filter_rcu_free: Free a socket filter
- *     @rcu: rcu_head that contains the sk_filter to free
- */
-static inline void sk_filter_rcu_free(struct rcu_head *rcu)
-{
-       struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
-       kfree(fp);
-}
-
 /**
  *     sk_filter_release: Release a socket filter
  *     @sk: socket
@@ -925,7 +915,7 @@ static inline void sk_filter_rcu_free(struct rcu_head *rcu)
 static inline void sk_filter_release(struct sk_filter *fp)
 {
        if (atomic_dec_and_test(&fp->refcnt))
-               call_rcu_bh(&fp->rcu, sk_filter_rcu_free);
+               kfree(fp);
 }
 
 static inline void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
index 54dddc92452dc226f2ba6232cdc43f4a89a9cb9b..1f0068eae5018948ec187aa31af5a7ce5ed44fde 100644 (file)
@@ -386,6 +386,25 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
        return (BPF_CLASS(filter[flen - 1].code) == BPF_RET) ? 0 : -EINVAL;
 }
 
+/**
+ *     sk_filter_rcu_release: Release a socket filter by rcu_head
+ *     @rcu: rcu_head that contains the sk_filter to free
+ */
+static void sk_filter_rcu_release(struct rcu_head *rcu)
+{
+       struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
+
+       sk_filter_release(fp);
+}
+
+static void sk_filter_delayed_uncharge(struct sock *sk, struct sk_filter *fp)
+{
+       unsigned int size = sk_filter_len(fp);
+
+       atomic_sub(size, &sk->sk_omem_alloc);
+       call_rcu_bh(&fp->rcu, sk_filter_rcu_release);
+}
+
 /**
  *     sk_attach_filter - attach a socket filter
  *     @fprog: the filter program
@@ -428,7 +447,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
        rcu_assign_pointer(sk->sk_filter, fp);
        rcu_read_unlock_bh();
 
-       sk_filter_uncharge(sk, old_fp);
+       sk_filter_delayed_uncharge(sk, old_fp);
        return 0;
 }
 
@@ -441,7 +460,7 @@ int sk_detach_filter(struct sock *sk)
        filter = rcu_dereference(sk->sk_filter);
        if (filter) {
                rcu_assign_pointer(sk->sk_filter, NULL);
-               sk_filter_uncharge(sk, filter);
+               sk_filter_delayed_uncharge(sk, filter);
                ret = 0;
        }
        rcu_read_unlock_bh();