ocfs2: avoid blocking in ocfs2_mark_lockres_freeing() in downconvert thread
authorJan Kara <jack@suse.cz>
Thu, 3 Apr 2014 21:46:57 +0000 (14:46 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 3 Apr 2014 23:20:55 +0000 (16:20 -0700)
If we are dropping last inode reference from downconvert thread, we will
end up calling ocfs2_mark_lockres_freeing() which can block if the lock
we are freeing is queued thus creating an A-A deadlock.  Luckily, since
we are the downconvert thread, we can immediately dequeue the lock and
thus avoid waiting in this case.

Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Mark Fasheh <mfasheh@suse.de>
Reviewed-by: Srinivas Eeda <srinivas.eeda@oracle.com>
Cc: Joel Becker <jlbec@evilplan.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
fs/ocfs2/dlmglue.c
fs/ocfs2/dlmglue.h
fs/ocfs2/inode.c

index 19986959d14948bd1660e116321533efd027bbc1..6bd690b5a0613b8dc65143bdbd036968b592c8f3 100644 (file)
@@ -3144,22 +3144,60 @@ out:
        return 0;
 }
 
+static void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
+                                      struct ocfs2_lock_res *lockres);
+
 /* Mark the lockres as being dropped. It will no longer be
  * queued if blocking, but we still may have to wait on it
  * being dequeued from the downconvert thread before we can consider
  * it safe to drop.
  *
  * You can *not* attempt to call cluster_lock on this lockres anymore. */
-void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres)
+void ocfs2_mark_lockres_freeing(struct ocfs2_super *osb,
+                               struct ocfs2_lock_res *lockres)
 {
        int status;
        struct ocfs2_mask_waiter mw;
-       unsigned long flags;
+       unsigned long flags, flags2;
 
        ocfs2_init_mask_waiter(&mw);
 
        spin_lock_irqsave(&lockres->l_lock, flags);
        lockres->l_flags |= OCFS2_LOCK_FREEING;
+       if (lockres->l_flags & OCFS2_LOCK_QUEUED && current == osb->dc_task) {
+               /*
+                * We know the downconvert is queued but not in progress
+                * because we are the downconvert thread and processing
+                * different lock. So we can just remove the lock from the
+                * queue. This is not only an optimization but also a way
+                * to avoid the following deadlock:
+                *   ocfs2_dentry_post_unlock()
+                *     ocfs2_dentry_lock_put()
+                *       ocfs2_drop_dentry_lock()
+                *         iput()
+                *           ocfs2_evict_inode()
+                *             ocfs2_clear_inode()
+                *               ocfs2_mark_lockres_freeing()
+                *                 ... blocks waiting for OCFS2_LOCK_QUEUED
+                *                 since we are the downconvert thread which
+                *                 should clear the flag.
+                */
+               spin_unlock_irqrestore(&lockres->l_lock, flags);
+               spin_lock_irqsave(&osb->dc_task_lock, flags2);
+               list_del_init(&lockres->l_blocked_list);
+               osb->blocked_lock_count--;
+               spin_unlock_irqrestore(&osb->dc_task_lock, flags2);
+               /*
+                * Warn if we recurse into another post_unlock call.  Strictly
+                * speaking it isn't a problem but we need to be careful if
+                * that happens (stack overflow, deadlocks, ...) so warn if
+                * ocfs2 grows a path for which this can happen.
+                */
+               WARN_ON_ONCE(lockres->l_ops->post_unlock);
+               /* Since the lock is freeing we don't do much in the fn below */
+               ocfs2_process_blocked_lock(osb, lockres);
+               return;
+       }
        while (lockres->l_flags & OCFS2_LOCK_QUEUED) {
                lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_QUEUED, 0);
                spin_unlock_irqrestore(&lockres->l_lock, flags);
@@ -3180,7 +3218,7 @@ void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
 {
        int ret;
 
-       ocfs2_mark_lockres_freeing(lockres);
+       ocfs2_mark_lockres_freeing(osb, lockres);
        ret = ocfs2_drop_lock(osb, lockres);
        if (ret)
                mlog_errno(ret);
index 1d596d8c4a4a55dfd185ecaff4d7d9900bf10b24..d293a22c32c5447e3595f1a61deba545f74df7e5 100644 (file)
@@ -157,7 +157,8 @@ int ocfs2_refcount_lock(struct ocfs2_refcount_tree *ref_tree, int ex);
 void ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex);
 
 
-void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres);
+void ocfs2_mark_lockres_freeing(struct ocfs2_super *osb,
+                               struct ocfs2_lock_res *lockres);
 void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
                               struct ocfs2_lock_res *lockres);
 
index 809b5d57a6b89e0f8ae2959f424667d6d97bda4e..d437f3ba90b0760b788db1664120941e9e9891a3 100644 (file)
@@ -1080,6 +1080,7 @@ static void ocfs2_clear_inode(struct inode *inode)
 {
        int status;
        struct ocfs2_inode_info *oi = OCFS2_I(inode);
+       struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 
        clear_inode(inode);
        trace_ocfs2_clear_inode((unsigned long long)oi->ip_blkno,
@@ -1096,9 +1097,9 @@ static void ocfs2_clear_inode(struct inode *inode)
 
        /* Do these before all the other work so that we don't bounce
         * the downconvert thread while waiting to destroy the locks. */
-       ocfs2_mark_lockres_freeing(&oi->ip_rw_lockres);
-       ocfs2_mark_lockres_freeing(&oi->ip_inode_lockres);
-       ocfs2_mark_lockres_freeing(&oi->ip_open_lockres);
+       ocfs2_mark_lockres_freeing(osb, &oi->ip_rw_lockres);
+       ocfs2_mark_lockres_freeing(osb, &oi->ip_inode_lockres);
+       ocfs2_mark_lockres_freeing(osb, &oi->ip_open_lockres);
 
        ocfs2_resv_discard(&OCFS2_SB(inode->i_sb)->osb_la_resmap,
                           &oi->ip_la_data_resv);