Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso...

author Linus Torvalds <torvalds@linux-foundation.org>

Fri, 22 Jan 2016 19:23:35 +0000 (11:23 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 22 Jan 2016 19:23:35 +0000 (11:23 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Fri, 22 Jan 2016 19:23:35 +0000 (11:23 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 22 Jan 2016 19:23:35 +0000 (11:23 -0800)
diff --combined fs/ext4/inode.c

index b3bd912df6bfaf475f9304eba7fb1b9ce6368e87,6770c07ab39f2532b6dea7be1b4fc7f562fb0501..d964195ea0e2ad15db38e5a5ae9e79a2a145d45b
--- 1/fs/ext4/inode.c
--- 2/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@@ -383,6 -383,21 +383,21 @@@ static int __check_block_validity(struc
         return 0;
   }
   
+ int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk, ext4_fsblk_t pblk,
+                      ext4_lblk_t len)
+ {
+       int ret;
+ 
+       if (ext4_encrypted_inode(inode))
+               return ext4_encrypted_zeroout(inode, lblk, pblk, len);
+ 
+       ret = sb_issue_zeroout(inode->i_sb, pblk, len, GFP_NOFS);
+       if (ret > 0)
+               ret = 0;
+ 
+       return ret;
+ }
+ 
   #define check_block_validity(inode, map)      \
         __check_block_validity((inode), __func__, __LINE__, (map))
   
@@@ -403,8 -418,7 +418,7 @@@ static void ext4_map_blocks_es_recheck(
          * out taking i_data_sem.  So at the time the unwritten extent
          * could be converted.
          */
-       if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
-               down_read(&EXT4_I(inode)->i_data_sem);
+       down_read(&EXT4_I(inode)->i_data_sem);
         if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
                 retval = ext4_ext_map_blocks(handle, inode, map, flags &
                                              EXT4_GET_BLOCKS_KEEP_SIZE);
@@@ -412,8 -426,7 +426,7 @@@
                 retval = ext4_ind_map_blocks(handle, inode, map, flags &
                                              EXT4_GET_BLOCKS_KEEP_SIZE);
         }
-       if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
-               up_read((&EXT4_I(inode)->i_data_sem));
+       up_read((&EXT4_I(inode)->i_data_sem));
   
         /*
          * We don't check m_len because extent will be collpased in status
@@@ -509,8 -522,7 +522,7 @@@ int ext4_map_blocks(handle_t *handle, s
          * Try to see if we can get the block without requesting a new
          * file system block.
          */
-       if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
-               down_read(&EXT4_I(inode)->i_data_sem);
+       down_read(&EXT4_I(inode)->i_data_sem);
         if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
                 retval = ext4_ext_map_blocks(handle, inode, map, flags &
                                              EXT4_GET_BLOCKS_KEEP_SIZE);
@@@ -541,8 -553,7 +553,7 @@@
                 if (ret < 0)
                         retval = ret;
         }
-       if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
-               up_read((&EXT4_I(inode)->i_data_sem));
+       up_read((&EXT4_I(inode)->i_data_sem));
   
   found:
         if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
@@@ -625,6 -636,22 +636,22 @@@
                         WARN_ON(1);
                 }
   
+               /*
+                * We have to zeroout blocks before inserting them into extent
+                * status tree. Otherwise someone could look them up there and
+                * use them before they are really zeroed.
+                */
+               if (flags & EXT4_GET_BLOCKS_ZERO &&
+                   map->m_flags & EXT4_MAP_MAPPED &&
+                   map->m_flags & EXT4_MAP_NEW) {
+                       ret = ext4_issue_zeroout(inode, map->m_lblk,
+                                                map->m_pblk, map->m_len);
+                       if (ret) {
+                               retval = ret;
+                               goto out_sem;
+                       }
+               }
+ 
                 /*
                  * If the extent has been zeroed out, we don't need to update
                  * extent status tree.
@@@ -632,7 -659,7 +659,7 @@@
                 if ((flags & EXT4_GET_BLOCKS_PRE_IO) &&
                     ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
                         if (ext4_es_is_written(&es))
-                               goto has_zeroout;
+                               goto out_sem;
                 }
                 status = map->m_flags & EXT4_MAP_UNWRITTEN ?
                                 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
@@@ -643,11 -670,13 +670,13 @@@
                         status |= EXTENT_STATUS_DELAYED;
                 ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
                                             map->m_pblk, status);
-               if (ret < 0)
+               if (ret < 0) {
                         retval = ret;
+                       goto out_sem;
+               }
         }
   
- has_zeroout:
+ out_sem:
         up_write((&EXT4_I(inode)->i_data_sem));
         if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
                 ret = check_block_validity(inode, map);
@@@ -674,7 -703,7 +703,7 @@@ static int _ext4_get_block(struct inod
         map.m_lblk = iblock;
         map.m_len = bh->b_size >> inode->i_blkbits;
   
-       if (flags && !(flags & EXT4_GET_BLOCKS_NO_LOCK) && !handle) {
+       if (flags && !handle) {
                 /* Direct IO write... */
                 if (map.m_len > DIO_MAX_BLOCKS)
                         map.m_len = DIO_MAX_BLOCKS;
@@@ -694,16 -723,6 +723,6 @@@
   
                 map_bh(bh, inode->i_sb, map.m_pblk);
                 bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
-               if (IS_DAX(inode) && buffer_unwritten(bh)) {
-                       /*
-                        * dgc: I suspect unwritten conversion on ext4+DAX is
-                        * fundamentally broken here when there are concurrent
-                        * read/write in progress on this inode.
-                        */
-                       WARN_ON_ONCE(io_end);
-                       bh->b_assoc_map = inode->i_mapping;
-                       bh->b_private = (void *)(unsigned long)iblock;
-               }
                 if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN)
                         set_buffer_defer_completion(bh);
                 bh->b_size = inode->i_sb->s_blocksize * map.m_len;
@@@ -879,9 -898,6 +898,6 @@@ int do_journal_get_write_access(handle_
         return ret;
   }
   
- static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock,
-                  struct buffer_head *bh_result, int create);
- 
   #ifdef CONFIG_EXT4_FS_ENCRYPTION
   static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
                                   get_block_t *get_block)
@@@ -3054,25 -3070,96 +3070,96 @@@ int ext4_get_block_write(struct inode *
                                EXT4_GET_BLOCKS_IO_CREATE_EXT);
   }
   
- static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock,
+ static int ext4_get_block_overwrite(struct inode *inode, sector_t iblock,
                    struct buffer_head *bh_result, int create)
   {
-       ext4_debug("ext4_get_block_write_nolock: inode %lu, create flag %d\n",
+       int ret;
+ 
+       ext4_debug("ext4_get_block_overwrite: inode %lu, create flag %d\n",
                    inode->i_ino, create);
-       return _ext4_get_block(inode, iblock, bh_result,
-                              EXT4_GET_BLOCKS_NO_LOCK);
+       ret = _ext4_get_block(inode, iblock, bh_result, 0);
+       /*
+        * Blocks should have been preallocated! ext4_file_write_iter() checks
+        * that.
+        */
+       WARN_ON_ONCE(!buffer_mapped(bh_result));
+ 
+       return ret;
   }
   
- int ext4_get_block_dax(struct inode *inode, sector_t iblock,
-                  struct buffer_head *bh_result, int create)
+ #ifdef CONFIG_FS_DAX
+ int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock,
+                           struct buffer_head *bh_result, int create)
   {
-       int flags = EXT4_GET_BLOCKS_PRE_IO | EXT4_GET_BLOCKS_UNWRIT_EXT;
-       if (create)
-               flags |= EXT4_GET_BLOCKS_CREATE;
-       ext4_debug("ext4_get_block_dax: inode %lu, create flag %d\n",
+       int ret, err;
+       int credits;
+       struct ext4_map_blocks map;
+       handle_t *handle = NULL;
+       int flags = 0;
+ 
+       ext4_debug("ext4_dax_mmap_get_block: inode %lu, create flag %d\n",
                    inode->i_ino, create);
-       return _ext4_get_block(inode, iblock, bh_result, flags);
+       map.m_lblk = iblock;
+       map.m_len = bh_result->b_size >> inode->i_blkbits;
+       credits = ext4_chunk_trans_blocks(inode, map.m_len);
+       if (create) {
+               flags |= EXT4_GET_BLOCKS_PRE_IO | EXT4_GET_BLOCKS_CREATE_ZERO;
+               handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits);
+               if (IS_ERR(handle)) {
+                       ret = PTR_ERR(handle);
+                       return ret;
+               }
+       }
+ 
+       ret = ext4_map_blocks(handle, inode, &map, flags);
+       if (create) {
+               err = ext4_journal_stop(handle);
+               if (ret >= 0 && err < 0)
+                       ret = err;
+       }
+       if (ret <= 0)
+               goto out;
+       if (map.m_flags & EXT4_MAP_UNWRITTEN) {
+               int err2;
+ 
+               /*
+                * We are protected by i_mmap_sem so we know block cannot go
+                * away from under us even though we dropped i_data_sem.
+                * Convert extent to written and write zeros there.
+                *
+                * Note: We may get here even when create == 0.
+                */
+               handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits);
+               if (IS_ERR(handle)) {
+                       ret = PTR_ERR(handle);
+                       goto out;
+               }
+ 
+               err = ext4_map_blocks(handle, inode, &map,
+                     EXT4_GET_BLOCKS_CONVERT | EXT4_GET_BLOCKS_CREATE_ZERO);
+               if (err < 0)
+                       ret = err;
+               err2 = ext4_journal_stop(handle);
+               if (err2 < 0 && ret > 0)
+                       ret = err2;
+       }
+ out:
+       WARN_ON_ONCE(ret == 0 && create);
+       if (ret > 0) {
+               map_bh(bh_result, inode->i_sb, map.m_pblk);
+               bh_result->b_state = (bh_result->b_state & ~EXT4_MAP_FLAGS) |
+                                       map.m_flags;
+               /*
+                * At least for now we have to clear BH_New so that DAX code
+                * doesn't attempt to zero blocks again in a racy way.
+                */
+               bh_result->b_state &= ~(1 << BH_New);
+               bh_result->b_size = map.m_len << inode->i_blkbits;
+               ret = 0;
+       }
+       return ret;
   }
+ #endif
   
   static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
                             ssize_t size, void *private)
@@@ -3143,10 -3230,8 +3230,8 @@@ static ssize_t ext4_ext_direct_IO(struc
         /* If we do a overwrite dio, i_mutex locking can be released */
         overwrite = *((int *)iocb->private);
   
-       if (overwrite) {
-               down_read(&EXT4_I(inode)->i_data_sem);
+       if (overwrite)
                 mutex_unlock(&inode->i_mutex);
-       }
   
         /*
          * We could direct write to holes and fallocate.
@@@ -3189,7 -3274,7 +3274,7 @@@
         }
   
         if (overwrite) {
-               get_block_func = ext4_get_block_write_nolock;
+               get_block_func = ext4_get_block_overwrite;
         } else {
                 get_block_func = ext4_get_block_write;
                 dio_flags = DIO_LOCKING;
@@@ -3245,10 -3330,8 +3330,8 @@@ retake_lock
         if (iov_iter_rw(iter) == WRITE)
                 inode_dio_end(inode);
         /* take i_mutex locking again if we do a ovewrite dio */
-       if (overwrite) {
-               up_read(&EXT4_I(inode)->i_data_sem);
+       if (overwrite)
                 mutex_lock(&inode->i_mutex);
-       }
   
         return ret;
   }
@@@ -3558,6 -3641,35 +3641,35 @@@ int ext4_can_truncate(struct inode *ino
         return 0;
   }
   
+ /*
+  * We have to make sure i_disksize gets properly updated before we truncate
+  * page cache due to hole punching or zero range. Otherwise i_disksize update
+  * can get lost as it may have been postponed to submission of writeback but
+  * that will never happen after we truncate page cache.
+  */
+ int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
+                                     loff_t len)
+ {
+       handle_t *handle;
+       loff_t size = i_size_read(inode);
+ 
+       WARN_ON(!mutex_is_locked(&inode->i_mutex));
+       if (offset > size || offset + len < size)
+               return 0;
+ 
+       if (EXT4_I(inode)->i_disksize >= size)
+               return 0;
+ 
+       handle = ext4_journal_start(inode, EXT4_HT_MISC, 1);
+       if (IS_ERR(handle))
+               return PTR_ERR(handle);
+       ext4_update_i_disksize(inode, size);
+       ext4_mark_inode_dirty(handle, inode);
+       ext4_journal_stop(handle);
+ 
+       return 0;
+ }
+ 
   /*
    * ext4_punch_hole: punches a hole in a file by releaseing the blocks
    * associated with the given offset and length
@@@ -3623,17 -3735,26 +3735,26 @@@ int ext4_punch_hole(struct inode *inode
   
         }
   
+       /* Wait all existing dio workers, newcomers will block on i_mutex */
+       ext4_inode_block_unlocked_dio(inode);
+       inode_dio_wait(inode);
+ 
+       /*
+        * Prevent page faults from reinstantiating pages we have released from
+        * page cache.
+        */
+       down_write(&EXT4_I(inode)->i_mmap_sem);
         first_block_offset = round_up(offset, sb->s_blocksize);
         last_block_offset = round_down((offset + length), sb->s_blocksize) - 1;
   
         /* Now release the pages and zero block aligned part of pages*/
-       if (last_block_offset > first_block_offset)
+       if (last_block_offset > first_block_offset) {
+               ret = ext4_update_disksize_before_punch(inode, offset, length);
+               if (ret)
+                       goto out_dio;
                 truncate_pagecache_range(inode, first_block_offset,
                                          last_block_offset);
- 
-       /* Wait all existing dio workers, newcomers will block on i_mutex */
-       ext4_inode_block_unlocked_dio(inode);
-       inode_dio_wait(inode);
+       }
   
         if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
                 credits = ext4_writepage_trans_blocks(inode);
@@@ -3680,16 -3801,12 +3801,12 @@@
         if (IS_SYNC(inode))
                 ext4_handle_sync(handle);
   
-       /* Now release the pages again to reduce race window */
-       if (last_block_offset > first_block_offset)
-               truncate_pagecache_range(inode, first_block_offset,
-                                        last_block_offset);
- 
         inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
         ext4_mark_inode_dirty(handle, inode);
   out_stop:
         ext4_journal_stop(handle);
   out_dio:
+       up_write(&EXT4_I(inode)->i_mmap_sem);
         ext4_inode_resume_unlocked_dio(inode);
   out_mutex:
         mutex_unlock(&inode->i_mutex);
@@@ -4076,6 -4193,14 +4193,14 @@@ static inline void ext4_iget_extra_inod
                 EXT4_I(inode)->i_inline_off = 0;
   }
   
+ int ext4_get_projid(struct inode *inode, kprojid_t *projid)
+ {
+       if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, EXT4_FEATURE_RO_COMPAT_PROJECT))
+               return -EOPNOTSUPP;
+       *projid = EXT4_I(inode)->i_projid;
+       return 0;
+ }
+ 
   struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
   {
         struct ext4_iloc iloc;
@@@ -4087,6 -4212,7 +4212,7 @@@
         int block;
         uid_t i_uid;
         gid_t i_gid;
+       projid_t i_projid;
   
         inode = iget_locked(sb, ino);
         if (!inode)
@@@ -4136,12 -4262,20 +4262,20 @@@
         inode->i_mode = le16_to_cpu(raw_inode->i_mode);
         i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
         i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
+       if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_PROJECT) &&
+           EXT4_INODE_SIZE(sb) > EXT4_GOOD_OLD_INODE_SIZE &&
+           EXT4_FITS_IN_INODE(raw_inode, ei, i_projid))
+               i_projid = (projid_t)le32_to_cpu(raw_inode->i_projid);
+       else
+               i_projid = EXT4_DEF_PROJID;
+ 
         if (!(test_opt(inode->i_sb, NO_UID32))) {
                 i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
                 i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
         }
         i_uid_write(inode, i_uid);
         i_gid_write(inode, i_gid);
+       ei->i_projid = make_kprojid(&init_user_ns, i_projid);
         set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
   
         ext4_clear_state_flags(ei);     /* Only relevant on 32-bit archs */
@@@ -4283,7 -4417,6 +4417,7 @@@
                         inode->i_op = &ext4_symlink_inode_operations;
                         ext4_set_aops(inode);
                 }
+ +              inode_nohighmem(inode);
         } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
               S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
                 inode->i_op = &ext4_special_inode_operations;
@@@ -4440,6 -4573,7 +4574,7 @@@ static int ext4_do_update_inode(handle_
         int need_datasync = 0, set_large_file = 0;
         uid_t i_uid;
         gid_t i_gid;
+       projid_t i_projid;
   
         spin_lock(&ei->i_raw_lock);
   
@@@ -4452,6 -4586,7 +4587,7 @@@
         raw_inode->i_mode = cpu_to_le16(inode->i_mode);
         i_uid = i_uid_read(inode);
         i_gid = i_gid_read(inode);
+       i_projid = from_kprojid(&init_user_ns, ei->i_projid);
         if (!(test_opt(inode->i_sb, NO_UID32))) {
                 raw_inode->i_uid_low = cpu_to_le16(low_16_bits(i_uid));
                 raw_inode->i_gid_low = cpu_to_le16(low_16_bits(i_gid));
@@@ -4529,6 -4664,15 +4665,15 @@@
                                 cpu_to_le16(ei->i_extra_isize);
                 }
         }
+ 
+       BUG_ON(!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+                       EXT4_FEATURE_RO_COMPAT_PROJECT) &&
+              i_projid != EXT4_DEF_PROJID);
+ 
+       if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
+           EXT4_FITS_IN_INODE(raw_inode, ei, i_projid))
+               raw_inode->i_projid = cpu_to_le32(i_projid);
+ 
         ext4_inode_csum_set(inode, raw_inode, ei);
         spin_unlock(&ei->i_raw_lock);
         if (inode->i_sb->s_flags & MS_LAZYTIME)
@@@ -4824,6 -4968,7 +4969,7 @@@ int ext4_setattr(struct dentry *dentry
                         } else
                                 ext4_wait_for_tail_page_commit(inode);
                 }
+               down_write(&EXT4_I(inode)->i_mmap_sem);
                 /*
                  * Truncate pagecache after we've waited for commit
                  * in data=journal mode to make pages freeable.
@@@ -4831,6 -4976,7 +4977,7 @@@
                 truncate_pagecache(inode, inode->i_size);
                 if (shrink)
                         ext4_truncate(inode);
+               up_write(&EXT4_I(inode)->i_mmap_sem);
         }
   
         if (!rc) {
@@@ -5279,6 -5425,8 +5426,8 @@@ int ext4_page_mkwrite(struct vm_area_st
   
         sb_start_pagefault(inode->i_sb);
         file_update_time(vma->vm_file);
+ 
+       down_read(&EXT4_I(inode)->i_mmap_sem);
         /* Delalloc case is easy... */
         if (test_opt(inode->i_sb, DELALLOC) &&
             !ext4_should_journal_data(inode) &&
@@@ -5348,6 -5496,19 +5497,19 @@@ retry_alloc
   out_ret:
         ret = block_page_mkwrite_return(ret);
   out:
+       up_read(&EXT4_I(inode)->i_mmap_sem);
         sb_end_pagefault(inode->i_sb);
         return ret;
   }
+ 
+ int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+ {
+       struct inode *inode = file_inode(vma->vm_file);
+       int err;
+ 
+       down_read(&EXT4_I(inode)->i_mmap_sem);
+       err = filemap_fault(vma, vmf);
+       up_read(&EXT4_I(inode)->i_mmap_sem);
+ 
+       return err;
+ }
diff --combined fs/ext4/namei.c

index f27e0c2598c59edb5685c27310c2766d0860838d,2047ff7c5fbcd7d06db5a2f171572ab5513961bd..854f75de4599b8e6f40dcd4a773319e9c8e009dd
--- 1/fs/ext4/namei.c
--- 2/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@@ -273,7 -273,7 +273,7 @@@ static struct buffer_head * ext4_dx_fin
                 struct ext4_filename *fname,
                 struct ext4_dir_entry_2 **res_dir);
   static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
-                            struct dentry *dentry, struct inode *inode);
+                            struct inode *dir, struct inode *inode);
   
   /* checksumming functions */
   void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
@@@ -1928,10 -1928,9 +1928,9 @@@ static int add_dirent_to_buf(handle_t *
    * directory, and adds the dentry to the indexed directory.
    */
   static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
-                           struct dentry *dentry,
+                           struct inode *dir,
                             struct inode *inode, struct buffer_head *bh)
   {
-       struct inode    *dir = d_inode(dentry->d_parent);
         struct buffer_head *bh2;
         struct dx_root  *root;
         struct dx_frame frames[2], *frame;
@@@ -2086,8 -2085,7 +2085,7 @@@ static int ext4_add_entry(handle_t *han
                 return retval;
   
         if (ext4_has_inline_data(dir)) {
-               retval = ext4_try_add_inline_entry(handle, &fname,
-                                                  dentry, inode);
+               retval = ext4_try_add_inline_entry(handle, &fname, dir, inode);
                 if (retval < 0)
                         goto out;
                 if (retval == 1) {
@@@ -2097,7 -2095,7 +2095,7 @@@
         }
   
         if (is_dx(dir)) {
-               retval = ext4_dx_add_entry(handle, &fname, dentry, inode);
+               retval = ext4_dx_add_entry(handle, &fname, dir, inode);
                 if (!retval || (retval != ERR_BAD_DX_DIR))
                         goto out;
                 ext4_clear_inode_flag(dir, EXT4_INODE_INDEX);
@@@ -2119,7 -2117,7 +2117,7 @@@
   
                 if (blocks == 1 && !dx_fallback &&
                     ext4_has_feature_dir_index(sb)) {
-                       retval = make_indexed_dir(handle, &fname, dentry,
+                       retval = make_indexed_dir(handle, &fname, dir,
                                                   inode, bh);
                         bh = NULL; /* make_indexed_dir releases bh */
                         goto out;
@@@ -2154,12 -2152,11 +2152,11 @@@ out
    * Returns 0 for success, or a negative error value
    */
   static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
-                            struct dentry *dentry, struct inode *inode)
+                            struct inode *dir, struct inode *inode)
   {
         struct dx_frame frames[2], *frame;
         struct dx_entry *entries, *at;
         struct buffer_head *bh;
-       struct inode *dir = d_inode(dentry->d_parent);
         struct super_block *sb = dir->i_sb;
         struct ext4_dir_entry_2 *de;
         int err;
@@@ -3132,7 -3129,6 +3129,7 @@@ static int ext4_symlink(struct inode *d
         if ((disk_link.len > EXT4_N_BLOCKS * 4)) {
                 if (!encryption_required)
                         inode->i_op = &ext4_symlink_inode_operations;
+ +              inode_nohighmem(inode);
                 ext4_set_aops(inode);
                 /*
                  * We cannot call page_symlink() with transaction started
@@@ -3212,6 -3208,12 +3209,12 @@@ static int ext4_link(struct dentry *old
         if (ext4_encrypted_inode(dir) &&
             !ext4_is_child_context_consistent_with_parent(dir, inode))
                 return -EPERM;
+ 
+        if ((ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT)) &&
+          (!projid_eq(EXT4_I(dir)->i_projid,
+                      EXT4_I(old_dentry->d_inode)->i_projid)))
+               return -EXDEV;
+ 
         err = dquot_initialize(dir);
         if (err)
                 return err;
@@@ -3492,6 -3494,11 +3495,11 @@@ static int ext4_rename(struct inode *ol
         int credits;
         u8 old_file_type;
   
+       if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT)) &&
+           (!projid_eq(EXT4_I(new_dir)->i_projid,
+                       EXT4_I(old_dentry->d_inode)->i_projid)))
+               return -EXDEV;
+ 
         retval = dquot_initialize(old.dir);
         if (retval)
                 return retval;
@@@ -3701,6 -3708,14 +3709,14 @@@ static int ext4_cross_rename(struct ino
                                                            new.inode)))
                 return -EPERM;
   
+       if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT) &&
+            !projid_eq(EXT4_I(new_dir)->i_projid,
+                       EXT4_I(old_dentry->d_inode)->i_projid)) ||
+           (ext4_test_inode_flag(old_dir, EXT4_INODE_PROJINHERIT) &&
+            !projid_eq(EXT4_I(old_dir)->i_projid,
+                       EXT4_I(new_dentry->d_inode)->i_projid)))
+               return -EXDEV;
+ 
         retval = dquot_initialize(old.dir);
         if (retval)
                 return retval;
diff --combined fs/ext4/super.c

index f1b56ff0120894e3e4cc79886928ea7852ac5e25,3aea58a7ea8f700826dc08259a0e9a4ee75b5b42..00c98fab6333562bfd42b5585bdf39ba2579e25c
--- 1/fs/ext4/super.c
--- 2/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@@ -80,6 -80,36 +80,36 @@@ static void ext4_destroy_lazyinit_threa
   static void ext4_unregister_li_request(struct super_block *sb);
   static void ext4_clear_request_list(void);
   
+ /*
+  * Lock ordering
+  *
+  * Note the difference between i_mmap_sem (EXT4_I(inode)->i_mmap_sem) and
+  * i_mmap_rwsem (inode->i_mmap_rwsem)!
+  *
+  * page fault path:
+  * mmap_sem -> sb_start_pagefault -> i_mmap_sem (r) -> transaction start ->
+  *   page lock -> i_data_sem (rw)
+  *
+  * buffered write path:
+  * sb_start_write -> i_mutex -> mmap_sem
+  * sb_start_write -> i_mutex -> transaction start -> page lock ->
+  *   i_data_sem (rw)
+  *
+  * truncate:
+  * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (w) -> i_mmap_sem (w) ->
+  *   i_mmap_rwsem (w) -> page lock
+  * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (w) -> i_mmap_sem (w) ->
+  *   transaction start -> i_data_sem (rw)
+  *
+  * direct IO:
+  * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (r) -> mmap_sem
+  * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (r) ->
+  *   transaction start -> i_data_sem (rw)
+  *
+  * writepages:
+  * transaction start -> page lock(s) -> i_data_sem (rw)
+  */
+ 
   #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
   static struct file_system_type ext2_fs_type = {
         .owner          = THIS_MODULE,
@@@ -958,6 -988,7 +988,7 @@@ static void init_once(void *foo
         INIT_LIST_HEAD(&ei->i_orphan);
         init_rwsem(&ei->xattr_sem);
         init_rwsem(&ei->i_data_sem);
+       init_rwsem(&ei->i_mmap_sem);
         inode_init_once(&ei->vfs_inode);
   }
   
@@@ -966,7 -997,7 +997,7 @@@ static int __init init_inodecache(void
         ext4_inode_cachep = kmem_cache_create("ext4_inode_cache",
                                              sizeof(struct ext4_inode_info),
                                              0, (SLAB_RECLAIM_ACCOUNT|
- -                                              SLAB_MEM_SPREAD),
+ +                                              SLAB_MEM_SPREAD|SLAB_ACCOUNT),
                                              init_once);
         if (ext4_inode_cachep == NULL)
                 return -ENOMEM;
@@@ -1066,8 -1097,8 +1097,8 @@@ static int bdev_try_to_free_page(struc
   }
   
   #ifdef CONFIG_QUOTA
- #define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group")
- #define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
+ static char *quotatypes[] = INITQFNAMES;
+ #define QTYPE2NAME(t) (quotatypes[t])
   
   static int ext4_write_dquot(struct dquot *dquot);
   static int ext4_acquire_dquot(struct dquot *dquot);
@@@ -1100,6 -1131,7 +1131,7 @@@ static const struct dquot_operations ex
         .write_info     = ext4_write_info,
         .alloc_dquot    = dquot_alloc,
         .destroy_dquot  = dquot_destroy,
+       .get_projid     = ext4_get_projid,
   };
   
   static const struct quotactl_ops ext4_qctl_operations = {
@@@ -2526,6 -2558,12 +2558,12 @@@ static int ext4_feature_set_ok(struct s
                          "without CONFIG_QUOTA");
                 return 0;
         }
+       if (ext4_has_feature_project(sb) && !readonly) {
+               ext4_msg(sb, KERN_ERR,
+                        "Filesystem with project quota feature cannot be mounted RDWR "
+                        "without CONFIG_QUOTA");
+               return 0;
+       }
   #endif  /* CONFIG_QUOTA */
         return 1;
   }
@@@ -3654,7 -3692,7 +3692,7 @@@ static int ext4_fill_super(struct super
                 sb->s_qcop = &dquot_quotactl_sysfile_ops;
         else
                 sb->s_qcop = &ext4_qctl_operations;
-       sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
+       sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
   #endif
         memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
   
@@@ -4790,6 -4828,48 +4828,48 @@@ restore_opts
         return err;
   }
   
+ #ifdef CONFIG_QUOTA
+ static int ext4_statfs_project(struct super_block *sb,
+                              kprojid_t projid, struct kstatfs *buf)
+ {
+       struct kqid qid;
+       struct dquot *dquot;
+       u64 limit;
+       u64 curblock;
+ 
+       qid = make_kqid_projid(projid);
+       dquot = dqget(sb, qid);
+       if (IS_ERR(dquot))
+               return PTR_ERR(dquot);
+       spin_lock(&dq_data_lock);
+ 
+       limit = (dquot->dq_dqb.dqb_bsoftlimit ?
+                dquot->dq_dqb.dqb_bsoftlimit :
+                dquot->dq_dqb.dqb_bhardlimit) >> sb->s_blocksize_bits;
+       if (limit && buf->f_blocks > limit) {
+               curblock = dquot->dq_dqb.dqb_curspace >> sb->s_blocksize_bits;
+               buf->f_blocks = limit;
+               buf->f_bfree = buf->f_bavail =
+                       (buf->f_blocks > curblock) ?
+                        (buf->f_blocks - curblock) : 0;
+       }
+ 
+       limit = dquot->dq_dqb.dqb_isoftlimit ?
+               dquot->dq_dqb.dqb_isoftlimit :
+               dquot->dq_dqb.dqb_ihardlimit;
+       if (limit && buf->f_files > limit) {
+               buf->f_files = limit;
+               buf->f_ffree =
+                       (buf->f_files > dquot->dq_dqb.dqb_curinodes) ?
+                        (buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0;
+       }
+ 
+       spin_unlock(&dq_data_lock);
+       dqput(dquot);
+       return 0;
+ }
+ #endif
+ 
   static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
   {
         struct super_block *sb = dentry->d_sb;
@@@ -4822,6 -4902,11 +4902,11 @@@
         buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
         buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
   
+ #ifdef CONFIG_QUOTA
+       if (ext4_test_inode_flag(dentry->d_inode, EXT4_INODE_PROJINHERIT) &&
+           sb_has_quota_limits_enabled(sb, PRJQUOTA))
+               ext4_statfs_project(sb, EXT4_I(dentry->d_inode)->i_projid, buf);
+ #endif
         return 0;
   }
   
@@@ -4986,7 -5071,8 +5071,8 @@@ static int ext4_quota_enable(struct sup
         struct inode *qf_inode;
         unsigned long qf_inums[EXT4_MAXQUOTAS] = {
                 le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
-               le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
+               le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
+               le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
         };
   
         BUG_ON(!ext4_has_feature_quota(sb));
@@@ -5014,7 -5100,8 +5100,8 @@@ static int ext4_enable_quotas(struct su
         int type, err = 0;
         unsigned long qf_inums[EXT4_MAXQUOTAS] = {
                 le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
-               le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
+               le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
+               le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
         };
   
         sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
diff --combined include/uapi/linux/fs.h

index 8c8451f76633c566751e795d7616ff730740582e,c5083d2e2c0296949a383dde6ebb74ab89dcc30c..41e0433b4a8398b3dc3da6d2d6aacb8e46287150
--- 1/include/uapi/linux/fs.h
--- 2/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@@ -2,8 -2,11 +2,11 @@@
   #define _UAPI_LINUX_FS_H
   
   /*
-  * This file has definitions for some important file table
-  * structures etc.
+  * This file has definitions for some important file table structures
+  * and constants and structures used by various generic file system
+  * ioctl's.  Please do not make any changes in this file before
+  * sending patches for review to linux-fsdevel@vger.kernel.org and
+  * linux-api@vger.kernel.org.
    */
   
   #include <linux/limits.h>
@@@ -39,48 -42,12 +42,48 @@@
   #define RENAME_EXCHANGE               (1 << 1)        /* Exchange source and dest */
   #define RENAME_WHITEOUT               (1 << 2)        /* Whiteout source */
   
+ +struct file_clone_range {
+ +      __s64 src_fd;
+ +      __u64 src_offset;
+ +      __u64 src_length;
+ +      __u64 dest_offset;
+ +};
+ +
   struct fstrim_range {
         __u64 start;
         __u64 len;
         __u64 minlen;
   };
   
+ +/* extent-same (dedupe) ioctls; these MUST match the btrfs ioctl definitions */
+ +#define FILE_DEDUPE_RANGE_SAME                0
+ +#define FILE_DEDUPE_RANGE_DIFFERS     1
+ +
+ +/* from struct btrfs_ioctl_file_extent_same_info */
+ +struct file_dedupe_range_info {
+ +      __s64 dest_fd;          /* in - destination file */
+ +      __u64 dest_offset;      /* in - start of extent in destination */
+ +      __u64 bytes_deduped;    /* out - total # of bytes we were able
+ +                               * to dedupe from this file. */
+ +      /* status of this dedupe operation:
+ +       * < 0 for error
+ +       * == FILE_DEDUPE_RANGE_SAME if dedupe succeeds
+ +       * == FILE_DEDUPE_RANGE_DIFFERS if data differs
+ +       */
+ +      __s32 status;           /* out - see above description */
+ +      __u32 reserved;         /* must be zero */
+ +};
+ +
+ +/* from struct btrfs_ioctl_file_extent_same_args */
+ +struct file_dedupe_range {
+ +      __u64 src_offset;       /* in - start of extent in source */
+ +      __u64 src_length;       /* in - length of extent */
+ +      __u16 dest_count;       /* in - total elements in info array */
+ +      __u16 reserved1;        /* must be zero */
+ +      __u32 reserved2;        /* must be zero */
+ +      struct file_dedupe_range_info info[0];
+ +};
+ +
   /* And dynamically-tunable limits and defaults: */
   struct files_stat_struct {
         unsigned long nr_files;         /* read only */
@@@ -146,37 -113,6 +149,37 @@@ struct inodes_stat_t 
   #define MS_MGC_VAL 0xC0ED0000
   #define MS_MGC_MSK 0xffff0000
   
+ +/*
+ + * Structure for FS_IOC_FSGETXATTR[A] and FS_IOC_FSSETXATTR.
+ + */
+ +struct fsxattr {
+ +      __u32           fsx_xflags;     /* xflags field value (get/set) */
+ +      __u32           fsx_extsize;    /* extsize field value (get/set)*/
+ +      __u32           fsx_nextents;   /* nextents field value (get)   */
+ +      __u32           fsx_projid;     /* project identifier (get/set) */
+ +      unsigned char   fsx_pad[12];
+ +};
+ +
+ +/*
+ + * Flags for the fsx_xflags field
+ + */
+ +#define FS_XFLAG_REALTIME     0x00000001      /* data in realtime volume */
+ +#define FS_XFLAG_PREALLOC     0x00000002      /* preallocated file extents */
+ +#define FS_XFLAG_IMMUTABLE    0x00000008      /* file cannot be modified */
+ +#define FS_XFLAG_APPEND               0x00000010      /* all writes append */
+ +#define FS_XFLAG_SYNC         0x00000020      /* all writes synchronous */
+ +#define FS_XFLAG_NOATIME      0x00000040      /* do not update access time */
+ +#define FS_XFLAG_NODUMP               0x00000080      /* do not include in backups */
+ +#define FS_XFLAG_RTINHERIT    0x00000100      /* create with rt bit set */
+ +#define FS_XFLAG_PROJINHERIT  0x00000200      /* create with parents projid */
+ +#define FS_XFLAG_NOSYMLINKS   0x00000400      /* disallow symlink creation */
+ +#define FS_XFLAG_EXTSIZE      0x00000800      /* extent size allocator hint */
+ +#define FS_XFLAG_EXTSZINHERIT 0x00001000      /* inherit inode extent size */
+ +#define FS_XFLAG_NODEFRAG     0x00002000      /* do not defragment */
+ +#define FS_XFLAG_FILESTREAM   0x00004000      /* use filestream allocator */
+ +#define FS_XFLAG_DAX          0x00008000      /* use DAX for IO */
+ +#define FS_XFLAG_HASATTR      0x80000000      /* no DIFLAG for this   */
+ +
   /* the read-only stuff doesn't really belong here, but any other place is
      probably as bad and I don't want to create yet another include file. */
   
@@@ -219,8 -155,6 +222,8 @@@
   #define BLKSECDISCARD _IO(0x12,125)
   #define BLKROTATIONAL _IO(0x12,126)
   #define BLKZEROOUT _IO(0x12,127)
+ +#define BLKDAXSET _IO(0x12,128)
+ +#define BLKDAXGET _IO(0x12,129)
   
   #define BMAP_IOCTL 1          /* obsolete - kept for compatibility */
   #define FIBMAP           _IO(0x00,1)  /* bmap access */
@@@ -228,9 -162,6 +231,9 @@@
   #define FIFREEZE      _IOWR('X', 119, int)    /* Freeze */
   #define FITHAW                _IOWR('X', 120, int)    /* Thaw */
   #define FITRIM                _IOWR('X', 121, struct fstrim_range)    /* Trim */
+ +#define FICLONE               _IOW(0x94, 9, int)
+ +#define FICLONERANGE  _IOW(0x94, 13, struct file_clone_range)
+ +#define FIDEDUPERANGE _IOWR(0x94, 54, struct file_dedupe_range)
   
   #define       FS_IOC_GETFLAGS                 _IOR('f', 1, long)
   #define       FS_IOC_SETFLAGS                 _IOW('f', 2, long)
@@@ -241,11 -172,26 +244,28 @@@
   #define FS_IOC32_SETFLAGS             _IOW('f', 2, int)
   #define FS_IOC32_GETVERSION           _IOR('v', 1, int)
   #define FS_IOC32_SETVERSION           _IOW('v', 2, int)
+ +#define FS_IOC_FSGETXATTR             _IOR ('X', 31, struct fsxattr)
+ +#define FS_IOC_FSSETXATTR             _IOW ('X', 32, struct fsxattr)
   
   /*
    * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS)
+  *
+  * Note: for historical reasons, these flags were originally used and
+  * defined for use by ext2/ext3, and then other file systems started
+  * using these flags so they wouldn't need to write their own version
+  * of chattr/lsattr (which was shipped as part of e2fsprogs).  You
+  * should think twice before trying to use these flags in new
+  * contexts, or trying to assign these flags, since they are used both
+  * as the UAPI and the on-disk encoding for ext2/3/4.  Also, we are
+  * almost out of 32-bit flags.  :-)
+  *
+  * We have recently hoisted FS_IOC_FSGETXATTR / FS_IOC_FSSETXATTR from
+  * XFS to the generic FS level interface.  This uses a structure that
+  * has padding and hence has more room to grow, so it may be more
+  * appropriate for many new use cases.
+  *
+  * Please do not change these flags or interfaces before checking with
+  * linux-fsdevel@vger.kernel.org and linux-api@vger.kernel.org.
    */
   #define       FS_SECRM_FL                     0x00000001 /* Secure deletion */
   #define       FS_UNRM_FL                      0x00000002 /* Undelete */
@@@ -259,8 -205,8 +279,8 @@@
   #define FS_DIRTY_FL                   0x00000100
   #define FS_COMPRBLK_FL                        0x00000200 /* One or more compressed clusters */
   #define FS_NOCOMP_FL                  0x00000400 /* Don't compress */
- #define FS_ECOMPR_FL                  0x00000800 /* Compression error */
   /* End compression flags --- maybe not all used */
+ #define FS_ENCRYPT_FL                 0x00000800 /* Encrypted file */
   #define FS_BTREE_FL                   0x00001000 /* btree format dir */
   #define FS_INDEX_FL                   0x00001000 /* hash-indexed directory */
   #define FS_IMAGIC_FL                  0x00002000 /* AFS directory */
@@@ -268,9 -214,12 +288,12 @@@
   #define FS_NOTAIL_FL                  0x00008000 /* file tail should not be merged */
   #define FS_DIRSYNC_FL                 0x00010000 /* dirsync behaviour (directories only) */
   #define FS_TOPDIR_FL                  0x00020000 /* Top of directory hierarchies*/
+ #define FS_HUGE_FILE_FL                       0x00040000 /* Reserved for ext4 */
   #define FS_EXTENT_FL                  0x00080000 /* Extents */
- #define FS_DIRECTIO_FL                        0x00100000 /* Use direct i/o */
+ #define FS_EA_INODE_FL                        0x00200000 /* Inode used for large EA */
+ #define FS_EOFBLOCKS_FL                       0x00400000 /* Reserved for ext4 */
   #define FS_NOCOW_FL                   0x00800000 /* Do not cow file */
+ #define FS_INLINE_DATA_FL             0x10000000 /* Reserved for ext4 */
   #define FS_PROJINHERIT_FL             0x20000000 /* Create with parents projid */
   #define FS_RESERVED_FL                        0x80000000 /* reserved for ext2 lib */
author	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 22 Jan 2016 19:23:35 +0000 (11:23 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 22 Jan 2016 19:23:35 +0000 (11:23 -0800)
		1	2
fs/ext4/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/namei.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/super.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/uapi/linux/fs.h	patch \|	diff1 \|	diff2 \|	blob \| history