Merge tag 'xfs-for-linus-4.1-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git...

author Linus Torvalds <torvalds@linux-foundation.org>

Fri, 24 Apr 2015 14:08:41 +0000 (07:08 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 24 Apr 2015 14:08:41 +0000 (07:08 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Fri, 24 Apr 2015 14:08:41 +0000 (07:08 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 24 Apr 2015 14:08:41 +0000 (07:08 -0700)
diff --cc fs/open.c
Simple merge
diff --cc fs/xfs/xfs_aops.c
Simple merge
diff --cc fs/xfs/xfs_file.c

index 1f12ad0a8585b3d0f0788cfe5b88b6ab662a1547,3a5d305e60c9f505a71ec391b7af61f98d10e686..8121e75352ee9bddd4726ca685d6d3e855256bdd
--- 1/fs/xfs/xfs_file.c
--- 2/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@@ -544,22 -545,21 +544,22 @@@ xfs_zero_eof
    */
   STATIC ssize_t
   xfs_file_aio_write_checks(
- -      struct file             *file,
- -      loff_t                  *pos,
- -      size_t                  *count,
+ +      struct kiocb            *iocb,
+ +      struct iov_iter         *from,
         int                     *iolock)
   {
+ +      struct file             *file = iocb->ki_filp;
         struct inode            *inode = file->f_mapping->host;
         struct xfs_inode        *ip = XFS_I(inode);
- -      int                     error = 0;
+ +      ssize_t                 error = 0;
+ +      size_t                  count = iov_iter_count(from);
   
   restart:
- -      error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode));
- -      if (error)
+ +      error = generic_write_checks(iocb, from);
+ +      if (error <= 0)
                 return error;
   
-       error = xfs_break_layouts(inode, iolock);
+       error = xfs_break_layouts(inode, iolock, true);
         if (error)
                 return error;
   
@@@ -569,21 -569,41 +569,42 @@@
          * write.  If zeroing is needed and we are currently holding the
          * iolock shared, we need to update it to exclusive which implies
          * having to redo all checks before.
+        *
+        * We need to serialise against EOF updates that occur in IO
+        * completions here. We want to make sure that nobody is changing the
+        * size while we do this check until we have placed an IO barrier (i.e.
+        * hold the XFS_IOLOCK_EXCL) that prevents new IO from being dispatched.
+        * The spinlock effectively forms a memory barrier once we have the
+        * XFS_IOLOCK_EXCL so we are guaranteed to see the latest EOF value
+        * and hence be able to correctly determine if we need to run zeroing.
          */
- -      if (*pos > i_size_read(inode)) {
+       spin_lock(&ip->i_flags_lock);
+ +      if (iocb->ki_pos > i_size_read(inode)) {
                 bool    zero = false;
   
+               spin_unlock(&ip->i_flags_lock);
                 if (*iolock == XFS_IOLOCK_SHARED) {
                         xfs_rw_iunlock(ip, *iolock);
                         *iolock = XFS_IOLOCK_EXCL;
                         xfs_rw_ilock(ip, *iolock);
+ +                      iov_iter_reexpand(from, count);
+ 
+                       /*
+                        * We now have an IO submission barrier in place, but
+                        * AIO can do EOF updates during IO completion and hence
+                        * we now need to wait for all of them to drain. Non-AIO
+                        * DIO will have drained before we are given the
+                        * XFS_IOLOCK_EXCL, and so for most cases this wait is a
+                        * no-op.
+                        */
+                       inode_dio_wait(inode);
                         goto restart;
                 }
- -              error = xfs_zero_eof(ip, *pos, i_size_read(inode), &zero);
+ +              error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), &zero);
                 if (error)
                         return error;
-       }
+       } else
+               spin_unlock(&ip->i_flags_lock);
   
         /*
          * Updating the timestamps will grab the ilock again from
@@@ -680,11 -702,11 +703,12 @@@ xfs_file_dio_aio_write
                 xfs_rw_ilock(ip, iolock);
         }
   
- -      ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock);
+ +      ret = xfs_file_aio_write_checks(iocb, from, &iolock);
         if (ret)
                 goto out;
- -      iov_iter_truncate(from, count);
+ +      count = iov_iter_count(from);
+ +      pos = iocb->ki_pos;
+       end = pos + count - 1;
   
         if (mapping->nrpages) {
                 ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
@@@ -715,8 -737,22 +739,22 @@@
         }
   
         trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
-       ret = generic_file_direct_write(iocb, from, pos);
   
- -      ret = mapping->a_ops->direct_IO(WRITE, iocb, &data, pos);
+       data = *from;
++      ret = mapping->a_ops->direct_IO(iocb, &data, pos);
+ 
+       /* see generic_file_direct_write() for why this is necessary */
+       if (mapping->nrpages) {
+               invalidate_inode_pages2_range(mapping,
+                                             pos >> PAGE_CACHE_SHIFT,
+                                             end >> PAGE_CACHE_SHIFT);
+       }
+ 
+       if (ret > 0) {
+               pos += ret;
+               iov_iter_advance(from, ret);
+               iocb->ki_pos = pos;
+       }
   out:
         xfs_rw_iunlock(ip, iolock);
   
@@@ -1385,8 -1449,59 +1449,57 @@@ xfs_file_llseek
         }
   }
   
+ /*
+  * Locking for serialisation of IO during page faults. This results in a lock
+  * ordering of:
+  *
+  * mmap_sem (MM)
+  *   i_mmap_lock (XFS - truncate serialisation)
+  *     page_lock (MM)
+  *       i_lock (XFS - extent map serialisation)
+  */
+ STATIC int
+ xfs_filemap_fault(
+       struct vm_area_struct   *vma,
+       struct vm_fault         *vmf)
+ {
+       struct xfs_inode        *ip = XFS_I(vma->vm_file->f_mapping->host);
+       int                     error;
+ 
+       trace_xfs_filemap_fault(ip);
+ 
+       xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
+       error = filemap_fault(vma, vmf);
+       xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
+ 
+       return error;
+ }
+ 
+ /*
+  * mmap()d file has taken write protection fault and is being made writable. We
+  * can set the page state up correctly for a writable page, which means we can
+  * do correct delalloc accounting (ENOSPC checking!) and unwritten extent
+  * mapping.
+  */
+ STATIC int
+ xfs_filemap_page_mkwrite(
+       struct vm_area_struct   *vma,
+       struct vm_fault         *vmf)
+ {
+       struct xfs_inode        *ip = XFS_I(vma->vm_file->f_mapping->host);
+       int                     error;
+ 
+       trace_xfs_filemap_page_mkwrite(ip);
+ 
+       xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
+       error = block_page_mkwrite(vma, vmf, xfs_get_blocks);
+       xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
+ 
+       return error;
+ }
+ 
   const struct file_operations xfs_file_operations = {
         .llseek         = xfs_file_llseek,
- -      .read           = new_sync_read,
- -      .write          = new_sync_write,
         .read_iter      = xfs_file_read_iter,
         .write_iter     = xfs_file_write_iter,
         .splice_read    = xfs_file_splice_read,
author	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 24 Apr 2015 14:08:41 +0000 (07:08 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 24 Apr 2015 14:08:41 +0000 (07:08 -0700)
		1	2
fs/open.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/xfs/xfs_aops.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/xfs/xfs_file.c	patch \|	diff1 \|	diff2 \|	blob \| history