futex: update documentation for ordering guarantees
[linux.git] / fs / locks.c
index b49e853a9c7b7c2d40105d83a392d114473de4e0..13fc7a6d380ae6648945c8956cc53901de2d0ccc 100644 (file)
 #define IS_POSIX(fl)   (fl->fl_flags & FL_POSIX)
 #define IS_FLOCK(fl)   (fl->fl_flags & FL_FLOCK)
 #define IS_LEASE(fl)   (fl->fl_flags & (FL_LEASE|FL_DELEG))
+#define IS_FILE_PVT(fl)        (fl->fl_flags & FL_FILE_PVT)
 
 static bool lease_breaking(struct file_lock *fl)
 {
@@ -388,6 +389,18 @@ static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl,
        fl->fl_ops = NULL;
        fl->fl_lmops = NULL;
 
+       /* Ensure that fl->fl_filp has compatible f_mode */
+       switch (l->l_type) {
+       case F_RDLCK:
+               if (!(filp->f_mode & FMODE_READ))
+                       return -EBADF;
+               break;
+       case F_WRLCK:
+               if (!(filp->f_mode & FMODE_WRITE))
+                       return -EBADF;
+               break;
+       }
+
        return assign_type(fl, l->l_type);
 }
 
@@ -551,7 +564,7 @@ static void __locks_insert_block(struct file_lock *blocker,
        BUG_ON(!list_empty(&waiter->fl_block));
        waiter->fl_next = blocker;
        list_add_tail(&waiter->fl_block, &blocker->fl_block);
-       if (IS_POSIX(blocker))
+       if (IS_POSIX(blocker) && !IS_FILE_PVT(blocker))
                locks_insert_global_blocked(waiter);
 }
 
@@ -744,8 +757,16 @@ EXPORT_SYMBOL(posix_test_lock);
  * Note: the above assumption may not be true when handling lock
  * requests from a broken NFS client. It may also fail in the presence
  * of tasks (such as posix threads) sharing the same open file table.
- *
  * To handle those cases, we just bail out after a few iterations.
+ *
+ * For FL_FILE_PVT locks, the owner is the filp, not the files_struct.
+ * Because the owner is not even nominally tied to a thread of
+ * execution, the deadlock detection below can't reasonably work well. Just
+ * skip it for those.
+ *
+ * In principle, we could do a more limited deadlock detection on FL_FILE_PVT
+ * locks that just checks for the case where two tasks are attempting to
+ * upgrade from read to write locks on the same inode.
  */
 
 #define MAX_DEADLK_ITERATIONS 10
@@ -768,6 +789,13 @@ static int posix_locks_deadlock(struct file_lock *caller_fl,
 {
        int i = 0;
 
+       /*
+        * This deadlock detector can't reasonably detect deadlocks with
+        * FL_FILE_PVT locks, since they aren't owned by a process, per-se.
+        */
+       if (IS_FILE_PVT(caller_fl))
+               return 0;
+
        while ((block_fl = what_owner_is_waiting_for(block_fl))) {
                if (i++ > MAX_DEADLK_ITERATIONS)
                        return 0;
@@ -1127,13 +1155,14 @@ EXPORT_SYMBOL(posix_lock_file_wait);
 
 /**
  * locks_mandatory_locked - Check for an active lock
- * @inode: the file to check
+ * @file: the file to check
  *
  * Searches the inode's list of locks to find any POSIX locks which conflict.
  * This function is called from locks_verify_locked() only.
  */
-int locks_mandatory_locked(struct inode *inode)
+int locks_mandatory_locked(struct file *file)
 {
+       struct inode *inode = file_inode(file);
        fl_owner_t owner = current->files;
        struct file_lock *fl;
 
@@ -1144,7 +1173,7 @@ int locks_mandatory_locked(struct inode *inode)
        for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
                if (!IS_POSIX(fl))
                        continue;
-               if (fl->fl_owner != owner)
+               if (fl->fl_owner != owner && fl->fl_owner != (fl_owner_t)file)
                        break;
        }
        spin_unlock(&inode->i_lock);
@@ -1170,19 +1199,30 @@ int locks_mandatory_area(int read_write, struct inode *inode,
 {
        struct file_lock fl;
        int error;
+       bool sleep = false;
 
        locks_init_lock(&fl);
-       fl.fl_owner = current->files;
        fl.fl_pid = current->tgid;
        fl.fl_file = filp;
        fl.fl_flags = FL_POSIX | FL_ACCESS;
        if (filp && !(filp->f_flags & O_NONBLOCK))
-               fl.fl_flags |= FL_SLEEP;
+               sleep = true;
        fl.fl_type = (read_write == FLOCK_VERIFY_WRITE) ? F_WRLCK : F_RDLCK;
        fl.fl_start = offset;
        fl.fl_end = offset + count - 1;
 
        for (;;) {
+               if (filp) {
+                       fl.fl_owner = (fl_owner_t)filp;
+                       fl.fl_flags &= ~FL_SLEEP;
+                       error = __posix_lock_file(inode, &fl, NULL);
+                       if (!error)
+                               break;
+               }
+
+               if (sleep)
+                       fl.fl_flags |= FL_SLEEP;
+               fl.fl_owner = current->files;
                error = __posix_lock_file(inode, &fl, NULL);
                if (error != FILE_LOCK_DEFERRED)
                        break;
@@ -1851,7 +1891,7 @@ EXPORT_SYMBOL_GPL(vfs_test_lock);
 
 static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl)
 {
-       flock->l_pid = fl->fl_pid;
+       flock->l_pid = IS_FILE_PVT(fl) ? -1 : fl->fl_pid;
 #if BITS_PER_LONG == 32
        /*
         * Make sure we can represent the posix lock via
@@ -1873,7 +1913,7 @@ static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl)
 #if BITS_PER_LONG == 32
 static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl)
 {
-       flock->l_pid = fl->fl_pid;
+       flock->l_pid = IS_FILE_PVT(fl) ? -1 : fl->fl_pid;
        flock->l_start = fl->fl_start;
        flock->l_len = fl->fl_end == OFFSET_MAX ? 0 :
                fl->fl_end - fl->fl_start + 1;
@@ -1885,7 +1925,7 @@ static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl)
 /* Report the first existing lock that would conflict with l.
  * This implements the F_GETLK command of fcntl().
  */
-int fcntl_getlk(struct file *filp, struct flock __user *l)
+int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock __user *l)
 {
        struct file_lock file_lock;
        struct flock flock;
@@ -1902,6 +1942,16 @@ int fcntl_getlk(struct file *filp, struct flock __user *l)
        if (error)
                goto out;
 
+       if (cmd == F_GETLKP) {
+               error = -EINVAL;
+               if (flock.l_pid != 0)
+                       goto out;
+
+               cmd = F_GETLK;
+               file_lock.fl_flags |= FL_FILE_PVT;
+               file_lock.fl_owner = (fl_owner_t)filp;
+       }
+
        error = vfs_test_lock(filp, &file_lock);
        if (error)
                goto out;
@@ -2021,25 +2071,32 @@ again:
        error = flock_to_posix_lock(filp, file_lock, &flock);
        if (error)
                goto out;
-       if (cmd == F_SETLKW) {
-               file_lock->fl_flags |= FL_SLEEP;
-       }
-       
-       error = -EBADF;
-       switch (flock.l_type) {
-       case F_RDLCK:
-               if (!(filp->f_mode & FMODE_READ))
-                       goto out;
-               break;
-       case F_WRLCK:
-               if (!(filp->f_mode & FMODE_WRITE))
+
+       /*
+        * If the cmd is requesting file-private locks, then set the
+        * FL_FILE_PVT flag and override the owner.
+        */
+       switch (cmd) {
+       case F_SETLKP:
+               error = -EINVAL;
+               if (flock.l_pid != 0)
                        goto out;
+
+               cmd = F_SETLK;
+               file_lock->fl_flags |= FL_FILE_PVT;
+               file_lock->fl_owner = (fl_owner_t)filp;
                break;
-       case F_UNLCK:
-               break;
-       default:
+       case F_SETLKPW:
                error = -EINVAL;
-               goto out;
+               if (flock.l_pid != 0)
+                       goto out;
+
+               cmd = F_SETLKW;
+               file_lock->fl_flags |= FL_FILE_PVT;
+               file_lock->fl_owner = (fl_owner_t)filp;
+               /* Fallthrough */
+       case F_SETLKW:
+               file_lock->fl_flags |= FL_SLEEP;
        }
 
        error = do_lock_file_wait(filp, cmd, file_lock);
@@ -2070,7 +2127,7 @@ out:
 /* Report the first existing lock that would conflict with l.
  * This implements the F_GETLK command of fcntl().
  */
-int fcntl_getlk64(struct file *filp, struct flock64 __user *l)
+int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l)
 {
        struct file_lock file_lock;
        struct flock64 flock;
@@ -2087,6 +2144,16 @@ int fcntl_getlk64(struct file *filp, struct flock64 __user *l)
        if (error)
                goto out;
 
+       if (cmd == F_GETLKP) {
+               error = -EINVAL;
+               if (flock.l_pid != 0)
+                       goto out;
+
+               cmd = F_GETLK64;
+               file_lock.fl_flags |= FL_FILE_PVT;
+               file_lock.fl_owner = (fl_owner_t)filp;
+       }
+
        error = vfs_test_lock(filp, &file_lock);
        if (error)
                goto out;
@@ -2139,25 +2206,32 @@ again:
        error = flock64_to_posix_lock(filp, file_lock, &flock);
        if (error)
                goto out;
-       if (cmd == F_SETLKW64) {
-               file_lock->fl_flags |= FL_SLEEP;
-       }
-       
-       error = -EBADF;
-       switch (flock.l_type) {
-       case F_RDLCK:
-               if (!(filp->f_mode & FMODE_READ))
-                       goto out;
-               break;
-       case F_WRLCK:
-               if (!(filp->f_mode & FMODE_WRITE))
+
+       /*
+        * If the cmd is requesting file-private locks, then set the
+        * FL_FILE_PVT flag and override the owner.
+        */
+       switch (cmd) {
+       case F_SETLKP:
+               error = -EINVAL;
+               if (flock.l_pid != 0)
                        goto out;
+
+               cmd = F_SETLK64;
+               file_lock->fl_flags |= FL_FILE_PVT;
+               file_lock->fl_owner = (fl_owner_t)filp;
                break;
-       case F_UNLCK:
-               break;
-       default:
+       case F_SETLKPW:
                error = -EINVAL;
-               goto out;
+               if (flock.l_pid != 0)
+                       goto out;
+
+               cmd = F_SETLKW64;
+               file_lock->fl_flags |= FL_FILE_PVT;
+               file_lock->fl_owner = (fl_owner_t)filp;
+               /* Fallthrough */
+       case F_SETLKW64:
+               file_lock->fl_flags |= FL_SLEEP;
        }
 
        error = do_lock_file_wait(filp, cmd, file_lock);
@@ -2218,7 +2292,7 @@ EXPORT_SYMBOL(locks_remove_posix);
 /*
  * This function is called on the last close of an open file.
  */
-void locks_remove_flock(struct file *filp)
+void locks_remove_file(struct file *filp)
 {
        struct inode * inode = file_inode(filp);
        struct file_lock *fl;
@@ -2227,6 +2301,8 @@ void locks_remove_flock(struct file *filp)
        if (!inode->i_flock)
                return;
 
+       locks_remove_posix(filp, (fl_owner_t)filp);
+
        if (filp->f_op->flock) {
                struct file_lock fl = {
                        .fl_pid = current->tgid,
@@ -2335,8 +2411,14 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,
 
        seq_printf(f, "%lld:%s ", id, pfx);
        if (IS_POSIX(fl)) {
-               seq_printf(f, "%6s %s ",
-                            (fl->fl_flags & FL_ACCESS) ? "ACCESS" : "POSIX ",
+               if (fl->fl_flags & FL_ACCESS)
+                       seq_printf(f, "ACCESS");
+               else if (IS_FILE_PVT(fl))
+                       seq_printf(f, "FLPVT ");
+               else
+                       seq_printf(f, "POSIX ");
+
+               seq_printf(f, " %s ",
                             (inode == NULL) ? "*NOINODE*" :
                             mandatory_lock(inode) ? "MANDATORY" : "ADVISORY ");
        } else if (IS_FLOCK(fl)) {