Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 22 Jan 2016 19:23:35 +0000 (11:23 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 22 Jan 2016 19:23:35 +0000 (11:23 -0800)
Pull ext4 updates from Ted Ts'o:
 "Some locking and page fault bug fixes from Jan Kara, some ext4
  encryption fixes from me, and Li Xi's Project Quota commits"

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  fs: clean up the flags definition in uapi/linux/fs.h
  ext4: add FS_IOC_FSSETXATTR/FS_IOC_FSGETXATTR interface support
  ext4: add project quota support
  ext4: adds project ID support
  ext4 crypto: simplify interfaces to directory entry insert functions
  ext4 crypto: add missing locking for keyring_key access
  ext4: use pre-zeroed blocks for DAX page faults
  ext4: implement allocation of pre-zeroed blocks
  ext4: provide ext4_issue_zeroout()
  ext4: get rid of EXT4_GET_BLOCKS_NO_LOCK flag
  ext4: document lock ordering
  ext4: fix races of writeback with punch hole and zero range
  ext4: fix races between buffered IO and collapse / insert range
  ext4: move unlocked dio protection from ext4_alloc_file_blocks()
  ext4: fix races between page faults and hole punching

14 files changed:
fs/ext4/crypto.c
fs/ext4/crypto_key.c
fs/ext4/ext4.h
fs/ext4/extents.c
fs/ext4/file.c
fs/ext4/ialloc.c
fs/ext4/inline.c
fs/ext4/inode.c
fs/ext4/ioctl.c
fs/ext4/namei.c
fs/ext4/super.c
fs/ext4/truncate.h
include/trace/events/ext4.h
include/uapi/linux/fs.h

index 1a0835073663ff3f1dad1f376328de5e2b1332ce..c8021208a7eb1a98ba32e16a8536146f570d5c4a 100644 (file)
@@ -384,14 +384,12 @@ int ext4_decrypt(struct page *page)
                                EXT4_DECRYPT, page->index, page, page);
 }
 
-int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex)
+int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk,
+                          ext4_fsblk_t pblk, ext4_lblk_t len)
 {
        struct ext4_crypto_ctx  *ctx;
        struct page             *ciphertext_page = NULL;
        struct bio              *bio;
-       ext4_lblk_t             lblk = le32_to_cpu(ex->ee_block);
-       ext4_fsblk_t            pblk = ext4_ext_pblock(ex);
-       unsigned int            len = ext4_ext_get_actual_len(ex);
        int                     ret, err = 0;
 
 #if 0
index c5882b36e5582d0005582d0fe3ac86cab70d9c9c..9a16d1e75a493f9e4663bb4ea05b9da9980ba65d 100644 (file)
@@ -213,9 +213,11 @@ retry:
                res = -ENOKEY;
                goto out;
        }
+       down_read(&keyring_key->sem);
        ukp = user_key_payload(keyring_key);
        if (ukp->datalen != sizeof(struct ext4_encryption_key)) {
                res = -EINVAL;
+               up_read(&keyring_key->sem);
                goto out;
        }
        master_key = (struct ext4_encryption_key *)ukp->data;
@@ -226,10 +228,12 @@ retry:
                            "ext4: key size incorrect: %d\n",
                            master_key->size);
                res = -ENOKEY;
+               up_read(&keyring_key->sem);
                goto out;
        }
        res = ext4_derive_key_aes(ctx.nonce, master_key->raw,
                                  raw_key);
+       up_read(&keyring_key->sem);
        if (res)
                goto out;
 got_key:
index cc7ca4e87144a540332213ce03b63e80e601f40e..1c127213363a0cf98e2442a286805f1abef09177 100644 (file)
@@ -378,14 +378,22 @@ struct flex_groups {
 #define EXT4_PROJINHERIT_FL            0x20000000 /* Create with parents projid */
 #define EXT4_RESERVED_FL               0x80000000 /* reserved for ext4 lib */
 
-#define EXT4_FL_USER_VISIBLE           0x004BDFFF /* User visible flags */
-#define EXT4_FL_USER_MODIFIABLE                0x004380FF /* User modifiable flags */
+#define EXT4_FL_USER_VISIBLE           0x304BDFFF /* User visible flags */
+#define EXT4_FL_USER_MODIFIABLE                0x204380FF /* User modifiable flags */
+
+#define EXT4_FL_XFLAG_VISIBLE          (EXT4_SYNC_FL | \
+                                        EXT4_IMMUTABLE_FL | \
+                                        EXT4_APPEND_FL | \
+                                        EXT4_NODUMP_FL | \
+                                        EXT4_NOATIME_FL | \
+                                        EXT4_PROJINHERIT_FL)
 
 /* Flags that should be inherited by new inodes from their parent. */
 #define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
                           EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
                           EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
-                          EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL)
+                          EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL |\
+                          EXT4_PROJINHERIT_FL)
 
 /* Flags that are appropriate for regular files (all but dir-specific ones). */
 #define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL))
@@ -555,10 +563,12 @@ enum {
 #define EXT4_GET_BLOCKS_NO_NORMALIZE           0x0040
        /* Request will not result in inode size update (user for fallocate) */
 #define EXT4_GET_BLOCKS_KEEP_SIZE              0x0080
-       /* Do not take i_data_sem locking in ext4_map_blocks */
-#define EXT4_GET_BLOCKS_NO_LOCK                        0x0100
        /* Convert written extents to unwritten */
-#define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN      0x0200
+#define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN      0x0100
+       /* Write zeros to newly created written extents */
+#define EXT4_GET_BLOCKS_ZERO                   0x0200
+#define EXT4_GET_BLOCKS_CREATE_ZERO            (EXT4_GET_BLOCKS_CREATE |\
+                                       EXT4_GET_BLOCKS_ZERO)
 
 /*
  * The bit position of these flags must not overlap with any of the
@@ -616,6 +626,46 @@ enum {
 #define EXT4_IOC_GET_ENCRYPTION_PWSALT _IOW('f', 20, __u8[16])
 #define EXT4_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct ext4_encryption_policy)
 
+#ifndef FS_IOC_FSGETXATTR
+/* Until the uapi changes get merged for project quota... */
+
+#define FS_IOC_FSGETXATTR              _IOR('X', 31, struct fsxattr)
+#define FS_IOC_FSSETXATTR              _IOW('X', 32, struct fsxattr)
+
+/*
+ * Structure for FS_IOC_FSGETXATTR and FS_IOC_FSSETXATTR.
+ */
+struct fsxattr {
+       __u32           fsx_xflags;     /* xflags field value (get/set) */
+       __u32           fsx_extsize;    /* extsize field value (get/set)*/
+       __u32           fsx_nextents;   /* nextents field value (get)   */
+       __u32           fsx_projid;     /* project identifier (get/set) */
+       unsigned char   fsx_pad[12];
+};
+
+/*
+ * Flags for the fsx_xflags field
+ */
+#define FS_XFLAG_REALTIME      0x00000001      /* data in realtime volume */
+#define FS_XFLAG_PREALLOC      0x00000002      /* preallocated file extents */
+#define FS_XFLAG_IMMUTABLE     0x00000008      /* file cannot be modified */
+#define FS_XFLAG_APPEND                0x00000010      /* all writes append */
+#define FS_XFLAG_SYNC          0x00000020      /* all writes synchronous */
+#define FS_XFLAG_NOATIME       0x00000040      /* do not update access time */
+#define FS_XFLAG_NODUMP                0x00000080      /* do not include in backups */
+#define FS_XFLAG_RTINHERIT     0x00000100      /* create with rt bit set */
+#define FS_XFLAG_PROJINHERIT   0x00000200      /* create with parents projid */
+#define FS_XFLAG_NOSYMLINKS    0x00000400      /* disallow symlink creation */
+#define FS_XFLAG_EXTSIZE       0x00000800      /* extent size allocator hint */
+#define FS_XFLAG_EXTSZINHERIT  0x00001000      /* inherit inode extent size */
+#define FS_XFLAG_NODEFRAG      0x00002000      /* do not defragment */
+#define FS_XFLAG_FILESTREAM    0x00004000      /* use filestream allocator */
+#define FS_XFLAG_HASATTR       0x80000000      /* no DIFLAG for this */
+#endif /* !defined(FS_IOC_FSGETXATTR) */
+
+#define EXT4_IOC_FSGETXATTR            FS_IOC_FSGETXATTR
+#define EXT4_IOC_FSSETXATTR            FS_IOC_FSSETXATTR
+
 #if defined(__KERNEL__) && defined(CONFIG_COMPAT)
 /*
  * ioctl commands in 32 bit emulation
@@ -910,6 +960,15 @@ struct ext4_inode_info {
         * by other means, so we have i_data_sem.
         */
        struct rw_semaphore i_data_sem;
+       /*
+        * i_mmap_sem is for serializing page faults with truncate / punch hole
+        * operations. We have to make sure that new page cannot be faulted in
+        * a section of the inode that is being punched. We cannot easily use
+        * i_data_sem for this since we need protection for the whole punch
+        * operation and i_data_sem ranks below transaction start so we have
+        * to occasionally drop it.
+        */
+       struct rw_semaphore i_mmap_sem;
        struct inode vfs_inode;
        struct jbd2_inode *jinode;
 
@@ -993,6 +1052,7 @@ struct ext4_inode_info {
        /* Encryption params */
        struct ext4_crypt_info *i_crypt_info;
 #endif
+       kprojid_t i_projid;
 };
 
 /*
@@ -1248,7 +1308,7 @@ struct ext4_super_block {
 #endif
 
 /* Number of quota types we support */
-#define EXT4_MAXQUOTAS 2
+#define EXT4_MAXQUOTAS 3
 
 /*
  * fourth extended-fs super-block data in memory
@@ -1754,7 +1814,8 @@ EXT4_FEATURE_INCOMPAT_FUNCS(encrypt,              ENCRYPT)
                                         EXT4_FEATURE_RO_COMPAT_HUGE_FILE |\
                                         EXT4_FEATURE_RO_COMPAT_BIGALLOC |\
                                         EXT4_FEATURE_RO_COMPAT_METADATA_CSUM|\
-                                        EXT4_FEATURE_RO_COMPAT_QUOTA)
+                                        EXT4_FEATURE_RO_COMPAT_QUOTA |\
+                                        EXT4_FEATURE_RO_COMPAT_PROJECT)
 
 #define EXTN_FEATURE_FUNCS(ver) \
 static inline bool ext4_has_unknown_ext##ver##_compat_features(struct super_block *sb) \
@@ -1796,6 +1857,11 @@ static inline bool ext4_has_incompat_features(struct super_block *sb)
 #define        EXT4_DEF_RESUID         0
 #define        EXT4_DEF_RESGID         0
 
+/*
+ * Default project ID
+ */
+#define        EXT4_DEF_PROJID         0
+
 #define EXT4_DEF_INODE_READAHEAD_BLKS  32
 
 /*
@@ -2234,7 +2300,8 @@ void ext4_restore_control_page(struct page *data_page);
 struct page *ext4_encrypt(struct inode *inode,
                          struct page *plaintext_page);
 int ext4_decrypt(struct page *page);
-int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex);
+int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk,
+                          ext4_fsblk_t pblk, ext4_lblk_t len);
 
 #ifdef CONFIG_EXT4_FS_ENCRYPTION
 int ext4_init_crypto(void);
@@ -2440,8 +2507,8 @@ struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int);
 struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int);
 int ext4_get_block_write(struct inode *inode, sector_t iblock,
                         struct buffer_head *bh_result, int create);
-int ext4_get_block_dax(struct inode *inode, sector_t iblock,
-                        struct buffer_head *bh_result, int create);
+int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock,
+                           struct buffer_head *bh_result, int create);
 int ext4_get_block(struct inode *inode, sector_t iblock,
                                struct buffer_head *bh_result, int create);
 int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
@@ -2484,9 +2551,13 @@ extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
 extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
                             loff_t lstart, loff_t lend);
 extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
+extern int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
 extern qsize_t *ext4_get_reserved_space(struct inode *inode);
+extern int ext4_get_projid(struct inode *inode, kprojid_t *projid);
 extern void ext4_da_update_reserve_space(struct inode *inode,
                                        int used, int quota_claim);
+extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk,
+                             ext4_fsblk_t pblk, ext4_lblk_t len);
 
 /* indirect.c */
 extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
@@ -2848,6 +2919,9 @@ static inline int ext4_update_inode_size(struct inode *inode, loff_t newsize)
        return changed;
 }
 
+int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
+                                     loff_t len);
+
 struct ext4_group_info {
        unsigned long   bb_state;
        struct rb_root  bb_free_root;
@@ -2986,8 +3060,7 @@ extern int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
                                         struct page *page);
 extern int ext4_try_add_inline_entry(handle_t *handle,
                                     struct ext4_filename *fname,
-                                    struct dentry *dentry,
-                                    struct inode *inode);
+                                    struct inode *dir, struct inode *inode);
 extern int ext4_try_create_inline_dir(handle_t *handle,
                                      struct inode *parent,
                                      struct inode *inode);
index 551353b1b17ab930ead8eff62ee3a699b4cfe10f..b52fea3b72196b35f53b68b05ddb3d9e36b1277d 100644 (file)
@@ -3119,19 +3119,11 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
 {
        ext4_fsblk_t ee_pblock;
        unsigned int ee_len;
-       int ret;
 
        ee_len    = ext4_ext_get_actual_len(ex);
        ee_pblock = ext4_ext_pblock(ex);
-
-       if (ext4_encrypted_inode(inode))
-               return ext4_encrypted_zeroout(inode, ex);
-
-       ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS);
-       if (ret > 0)
-               ret = 0;
-
-       return ret;
+       return ext4_issue_zeroout(inode, le32_to_cpu(ex->ee_block), ee_pblock,
+                                 ee_len);
 }
 
 /*
@@ -4052,6 +4044,14 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
        }
        /* IO end_io complete, convert the filled extent to written */
        if (flags & EXT4_GET_BLOCKS_CONVERT) {
+               if (flags & EXT4_GET_BLOCKS_ZERO) {
+                       if (allocated > map->m_len)
+                               allocated = map->m_len;
+                       err = ext4_issue_zeroout(inode, map->m_lblk, newblock,
+                                                allocated);
+                       if (err < 0)
+                               goto out2;
+               }
                ret = ext4_convert_unwritten_extents_endio(handle, inode, map,
                                                           ppath);
                if (ret >= 0) {
@@ -4685,10 +4685,6 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
        if (len <= EXT_UNWRITTEN_MAX_LEN)
                flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
 
-       /* Wait all existing dio workers, newcomers will block on i_mutex */
-       ext4_inode_block_unlocked_dio(inode);
-       inode_dio_wait(inode);
-
        /*
         * credits to insert 1 extent into extent tree
         */
@@ -4752,8 +4748,6 @@ retry:
                goto retry;
        }
 
-       ext4_inode_resume_unlocked_dio(inode);
-
        return ret > 0 ? ret2 : ret;
 }
 
@@ -4770,7 +4764,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
        int partial_begin, partial_end;
        loff_t start, end;
        ext4_lblk_t lblk;
-       struct address_space *mapping = inode->i_mapping;
        unsigned int blkbits = inode->i_blkbits;
 
        trace_ext4_zero_range(inode, offset, len, mode);
@@ -4785,17 +4778,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
                        return ret;
        }
 
-       /*
-        * Write out all dirty pages to avoid race conditions
-        * Then release them.
-        */
-       if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
-               ret = filemap_write_and_wait_range(mapping, offset,
-                                                  offset + len - 1);
-               if (ret)
-                       return ret;
-       }
-
        /*
         * Round up offset. This is not fallocate, we neet to zero out
         * blocks, so convert interior block aligned part of the range to
@@ -4839,6 +4821,10 @@ static long ext4_zero_range(struct file *file, loff_t offset,
        if (mode & FALLOC_FL_KEEP_SIZE)
                flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
 
+       /* Wait all existing dio workers, newcomers will block on i_mutex */
+       ext4_inode_block_unlocked_dio(inode);
+       inode_dio_wait(inode);
+
        /* Preallocate the range including the unaligned edges */
        if (partial_begin || partial_end) {
                ret = ext4_alloc_file_blocks(file,
@@ -4847,7 +4833,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
                                 round_down(offset, 1 << blkbits)) >> blkbits,
                                new_size, flags, mode);
                if (ret)
-                       goto out_mutex;
+                       goto out_dio;
 
        }
 
@@ -4856,16 +4842,23 @@ static long ext4_zero_range(struct file *file, loff_t offset,
                flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
                          EXT4_EX_NOCACHE);
 
-               /* Now release the pages and zero block aligned part of pages*/
+               /*
+                * Prevent page faults from reinstantiating pages we have
+                * released from page cache.
+                */
+               down_write(&EXT4_I(inode)->i_mmap_sem);
+               ret = ext4_update_disksize_before_punch(inode, offset, len);
+               if (ret) {
+                       up_write(&EXT4_I(inode)->i_mmap_sem);
+                       goto out_dio;
+               }
+               /* Now release the pages and zero block aligned part of pages */
                truncate_pagecache_range(inode, start, end - 1);
                inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
 
-               /* Wait all existing dio workers, newcomers will block on i_mutex */
-               ext4_inode_block_unlocked_dio(inode);
-               inode_dio_wait(inode);
-
                ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
                                             flags, mode);
+               up_write(&EXT4_I(inode)->i_mmap_sem);
                if (ret)
                        goto out_dio;
        }
@@ -4998,8 +4991,13 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
                        goto out;
        }
 
+       /* Wait all existing dio workers, newcomers will block on i_mutex */
+       ext4_inode_block_unlocked_dio(inode);
+       inode_dio_wait(inode);
+
        ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
                                     flags, mode);
+       ext4_inode_resume_unlocked_dio(inode);
        if (ret)
                goto out;
 
@@ -5494,21 +5492,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
                        return ret;
        }
 
-       /*
-        * Need to round down offset to be aligned with page size boundary
-        * for page size > block size.
-        */
-       ioffset = round_down(offset, PAGE_SIZE);
-
-       /* Write out all dirty pages */
-       ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
-                                          LLONG_MAX);
-       if (ret)
-               return ret;
-
-       /* Take mutex lock */
        mutex_lock(&inode->i_mutex);
-
        /*
         * There is no need to overlap collapse range with EOF, in which case
         * it is effectively a truncate operation
@@ -5524,17 +5508,43 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
                goto out_mutex;
        }
 
-       truncate_pagecache(inode, ioffset);
-
        /* Wait for existing dio to complete */
        ext4_inode_block_unlocked_dio(inode);
        inode_dio_wait(inode);
 
+       /*
+        * Prevent page faults from reinstantiating pages we have released from
+        * page cache.
+        */
+       down_write(&EXT4_I(inode)->i_mmap_sem);
+       /*
+        * Need to round down offset to be aligned with page size boundary
+        * for page size > block size.
+        */
+       ioffset = round_down(offset, PAGE_SIZE);
+       /*
+        * Write tail of the last page before removed range since it will get
+        * removed from the page cache below.
+        */
+       ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, offset);
+       if (ret)
+               goto out_mmap;
+       /*
+        * Write data that will be shifted to preserve them when discarding
+        * page cache below. We are also protected from pages becoming dirty
+        * by i_mmap_sem.
+        */
+       ret = filemap_write_and_wait_range(inode->i_mapping, offset + len,
+                                          LLONG_MAX);
+       if (ret)
+               goto out_mmap;
+       truncate_pagecache(inode, ioffset);
+
        credits = ext4_writepage_trans_blocks(inode);
        handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
        if (IS_ERR(handle)) {
                ret = PTR_ERR(handle);
-               goto out_dio;
+               goto out_mmap;
        }
 
        down_write(&EXT4_I(inode)->i_data_sem);
@@ -5573,7 +5583,8 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 
 out_stop:
        ext4_journal_stop(handle);
-out_dio:
+out_mmap:
+       up_write(&EXT4_I(inode)->i_mmap_sem);
        ext4_inode_resume_unlocked_dio(inode);
 out_mutex:
        mutex_unlock(&inode->i_mutex);
@@ -5627,21 +5638,7 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
                        return ret;
        }
 
-       /*
-        * Need to round down to align start offset to page size boundary
-        * for page size > block size.
-        */
-       ioffset = round_down(offset, PAGE_SIZE);
-
-       /* Write out all dirty pages */
-       ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
-                       LLONG_MAX);
-       if (ret)
-               return ret;
-
-       /* Take mutex lock */
        mutex_lock(&inode->i_mutex);
-
        /* Currently just for extent based files */
        if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
                ret = -EOPNOTSUPP;
@@ -5660,17 +5657,32 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
                goto out_mutex;
        }
 
-       truncate_pagecache(inode, ioffset);
-
        /* Wait for existing dio to complete */
        ext4_inode_block_unlocked_dio(inode);
        inode_dio_wait(inode);
 
+       /*
+        * Prevent page faults from reinstantiating pages we have released from
+        * page cache.
+        */
+       down_write(&EXT4_I(inode)->i_mmap_sem);
+       /*
+        * Need to round down to align start offset to page size boundary
+        * for page size > block size.
+        */
+       ioffset = round_down(offset, PAGE_SIZE);
+       /* Write out all dirty pages */
+       ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
+                       LLONG_MAX);
+       if (ret)
+               goto out_mmap;
+       truncate_pagecache(inode, ioffset);
+
        credits = ext4_writepage_trans_blocks(inode);
        handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
        if (IS_ERR(handle)) {
                ret = PTR_ERR(handle);
-               goto out_dio;
+               goto out_mmap;
        }
 
        /* Expand file to avoid data loss if there is error while shifting */
@@ -5741,7 +5753,8 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
 
 out_stop:
        ext4_journal_stop(handle);
-out_dio:
+out_mmap:
+       up_write(&EXT4_I(inode)->i_mmap_sem);
        ext4_inode_resume_unlocked_dio(inode);
 out_mutex:
        mutex_unlock(&inode->i_mutex);
index 113837e7ba98d5cf866ee365a40a89d7fc781a71..749b222e6498e8647126af37830dd1c312a9fb20 100644 (file)
@@ -193,43 +193,35 @@ out:
 }
 
 #ifdef CONFIG_FS_DAX
-static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate)
-{
-       struct inode *inode = bh->b_assoc_map->host;
-       /* XXX: breaks on 32-bit > 16TB. Is that even supported? */
-       loff_t offset = (loff_t)(uintptr_t)bh->b_private << inode->i_blkbits;
-       int err;
-       if (!uptodate)
-               return;
-       WARN_ON(!buffer_unwritten(bh));
-       err = ext4_convert_unwritten_extents(NULL, inode, offset, bh->b_size);
-}
-
 static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
        int result;
        handle_t *handle = NULL;
-       struct super_block *sb = file_inode(vma->vm_file)->i_sb;
+       struct inode *inode = file_inode(vma->vm_file);
+       struct super_block *sb = inode->i_sb;
        bool write = vmf->flags & FAULT_FLAG_WRITE;
 
        if (write) {
                sb_start_pagefault(sb);
                file_update_time(vma->vm_file);
+               down_read(&EXT4_I(inode)->i_mmap_sem);
                handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
                                                EXT4_DATA_TRANS_BLOCKS(sb));
-       }
+       } else
+               down_read(&EXT4_I(inode)->i_mmap_sem);
 
        if (IS_ERR(handle))
                result = VM_FAULT_SIGBUS;
        else
-               result = __dax_fault(vma, vmf, ext4_get_block_dax,
-                                               ext4_end_io_unwritten);
+               result = __dax_fault(vma, vmf, ext4_dax_mmap_get_block, NULL);
 
        if (write) {
                if (!IS_ERR(handle))
                        ext4_journal_stop(handle);
+               up_read(&EXT4_I(inode)->i_mmap_sem);
                sb_end_pagefault(sb);
-       }
+       } else
+               up_read(&EXT4_I(inode)->i_mmap_sem);
 
        return result;
 }
@@ -246,44 +238,86 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
        if (write) {
                sb_start_pagefault(sb);
                file_update_time(vma->vm_file);
+               down_read(&EXT4_I(inode)->i_mmap_sem);
                handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
                                ext4_chunk_trans_blocks(inode,
                                                        PMD_SIZE / PAGE_SIZE));
-       }
+       } else
+               down_read(&EXT4_I(inode)->i_mmap_sem);
 
        if (IS_ERR(handle))
                result = VM_FAULT_SIGBUS;
        else
                result = __dax_pmd_fault(vma, addr, pmd, flags,
-                               ext4_get_block_dax, ext4_end_io_unwritten);
+                               ext4_dax_mmap_get_block, NULL);
 
        if (write) {
                if (!IS_ERR(handle))
                        ext4_journal_stop(handle);
+               up_read(&EXT4_I(inode)->i_mmap_sem);
                sb_end_pagefault(sb);
-       }
+       } else
+               up_read(&EXT4_I(inode)->i_mmap_sem);
 
        return result;
 }
 
 static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
-       return dax_mkwrite(vma, vmf, ext4_get_block_dax,
-                               ext4_end_io_unwritten);
+       int err;
+       struct inode *inode = file_inode(vma->vm_file);
+
+       sb_start_pagefault(inode->i_sb);
+       file_update_time(vma->vm_file);
+       down_read(&EXT4_I(inode)->i_mmap_sem);
+       err = __dax_mkwrite(vma, vmf, ext4_dax_mmap_get_block, NULL);
+       up_read(&EXT4_I(inode)->i_mmap_sem);
+       sb_end_pagefault(inode->i_sb);
+
+       return err;
+}
+
+/*
+ * Handle write fault for VM_MIXEDMAP mappings. Similarly to ext4_dax_mkwrite()
+ * handler we check for races agaist truncate. Note that since we cycle through
+ * i_mmap_sem, we are sure that also any hole punching that began before we
+ * were called is finished by now and so if it included part of the file we
+ * are working on, our pte will get unmapped and the check for pte_same() in
+ * wp_pfn_shared() fails. Thus fault gets retried and things work out as
+ * desired.
+ */
+static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma,
+                               struct vm_fault *vmf)
+{
+       struct inode *inode = file_inode(vma->vm_file);
+       struct super_block *sb = inode->i_sb;
+       int ret = VM_FAULT_NOPAGE;
+       loff_t size;
+
+       sb_start_pagefault(sb);
+       file_update_time(vma->vm_file);
+       down_read(&EXT4_I(inode)->i_mmap_sem);
+       size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
+       if (vmf->pgoff >= size)
+               ret = VM_FAULT_SIGBUS;
+       up_read(&EXT4_I(inode)->i_mmap_sem);
+       sb_end_pagefault(sb);
+
+       return ret;
 }
 
 static const struct vm_operations_struct ext4_dax_vm_ops = {
        .fault          = ext4_dax_fault,
        .pmd_fault      = ext4_dax_pmd_fault,
        .page_mkwrite   = ext4_dax_mkwrite,
-       .pfn_mkwrite    = dax_pfn_mkwrite,
+       .pfn_mkwrite    = ext4_dax_pfn_mkwrite,
 };
 #else
 #define ext4_dax_vm_ops        ext4_file_vm_ops
 #endif
 
 static const struct vm_operations_struct ext4_file_vm_ops = {
-       .fault          = filemap_fault,
+       .fault          = ext4_filemap_fault,
        .map_pages      = filemap_map_pages,
        .page_mkwrite   = ext4_page_mkwrite,
 };
index 1b8024d26f654c5458c7e84db5a2c439adc6500e..3fcfd50a2e8a0c05315d823fa1219bea2c87f6c8 100644 (file)
@@ -799,6 +799,13 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
                inode->i_gid = dir->i_gid;
        } else
                inode_init_owner(inode, dir, mode);
+
+       if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_PROJECT) &&
+           ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT))
+               ei->i_projid = EXT4_I(dir)->i_projid;
+       else
+               ei->i_projid = make_kprojid(&init_user_ns, EXT4_DEF_PROJID);
+
        err = dquot_initialize(inode);
        if (err)
                goto out;
index d884989cc83dd99238a710f8131ab38b1139c7ca..dfe3b9bafc0d2b9cb4957c189290f8da68fb4dc4 100644 (file)
@@ -995,12 +995,11 @@ void ext4_show_inline_dir(struct inode *dir, struct buffer_head *bh,
  */
 static int ext4_add_dirent_to_inline(handle_t *handle,
                                     struct ext4_filename *fname,
-                                    struct dentry *dentry,
+                                    struct inode *dir,
                                     struct inode *inode,
                                     struct ext4_iloc *iloc,
                                     void *inline_start, int inline_size)
 {
-       struct inode    *dir = d_inode(dentry->d_parent);
        int             err;
        struct ext4_dir_entry_2 *de;
 
@@ -1245,12 +1244,11 @@ out:
  * the new created block.
  */
 int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
-                             struct dentry *dentry, struct inode *inode)
+                             struct inode *dir, struct inode *inode)
 {
        int ret, inline_size;
        void *inline_start;
        struct ext4_iloc iloc;
-       struct inode *dir = d_inode(dentry->d_parent);
 
        ret = ext4_get_inode_loc(dir, &iloc);
        if (ret)
@@ -1264,7 +1262,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
                                                 EXT4_INLINE_DOTDOT_SIZE;
        inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE;
 
-       ret = ext4_add_dirent_to_inline(handle, fname, dentry, inode, &iloc,
+       ret = ext4_add_dirent_to_inline(handle, fname, dir, inode, &iloc,
                                        inline_start, inline_size);
        if (ret != -ENOSPC)
                goto out;
@@ -1285,7 +1283,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
        if (inline_size) {
                inline_start = ext4_get_inline_xattr_pos(dir, &iloc);
 
-               ret = ext4_add_dirent_to_inline(handle, fname, dentry,
+               ret = ext4_add_dirent_to_inline(handle, fname, dir,
                                                inode, &iloc, inline_start,
                                                inline_size);
 
index b3bd912df6bfaf475f9304eba7fb1b9ce6368e87..d964195ea0e2ad15db38e5a5ae9e79a2a145d45b 100644 (file)
@@ -383,6 +383,21 @@ static int __check_block_validity(struct inode *inode, const char *func,
        return 0;
 }
 
+int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk, ext4_fsblk_t pblk,
+                      ext4_lblk_t len)
+{
+       int ret;
+
+       if (ext4_encrypted_inode(inode))
+               return ext4_encrypted_zeroout(inode, lblk, pblk, len);
+
+       ret = sb_issue_zeroout(inode->i_sb, pblk, len, GFP_NOFS);
+       if (ret > 0)
+               ret = 0;
+
+       return ret;
+}
+
 #define check_block_validity(inode, map)       \
        __check_block_validity((inode), __func__, __LINE__, (map))
 
@@ -403,8 +418,7 @@ static void ext4_map_blocks_es_recheck(handle_t *handle,
         * out taking i_data_sem.  So at the time the unwritten extent
         * could be converted.
         */
-       if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
-               down_read(&EXT4_I(inode)->i_data_sem);
+       down_read(&EXT4_I(inode)->i_data_sem);
        if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
                retval = ext4_ext_map_blocks(handle, inode, map, flags &
                                             EXT4_GET_BLOCKS_KEEP_SIZE);
@@ -412,8 +426,7 @@ static void ext4_map_blocks_es_recheck(handle_t *handle,
                retval = ext4_ind_map_blocks(handle, inode, map, flags &
                                             EXT4_GET_BLOCKS_KEEP_SIZE);
        }
-       if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
-               up_read((&EXT4_I(inode)->i_data_sem));
+       up_read((&EXT4_I(inode)->i_data_sem));
 
        /*
         * We don't check m_len because extent will be collpased in status
@@ -509,8 +522,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
         * Try to see if we can get the block without requesting a new
         * file system block.
         */
-       if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
-               down_read(&EXT4_I(inode)->i_data_sem);
+       down_read(&EXT4_I(inode)->i_data_sem);
        if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
                retval = ext4_ext_map_blocks(handle, inode, map, flags &
                                             EXT4_GET_BLOCKS_KEEP_SIZE);
@@ -541,8 +553,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
                if (ret < 0)
                        retval = ret;
        }
-       if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
-               up_read((&EXT4_I(inode)->i_data_sem));
+       up_read((&EXT4_I(inode)->i_data_sem));
 
 found:
        if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
@@ -625,6 +636,22 @@ found:
                        WARN_ON(1);
                }
 
+               /*
+                * We have to zeroout blocks before inserting them into extent
+                * status tree. Otherwise someone could look them up there and
+                * use them before they are really zeroed.
+                */
+               if (flags & EXT4_GET_BLOCKS_ZERO &&
+                   map->m_flags & EXT4_MAP_MAPPED &&
+                   map->m_flags & EXT4_MAP_NEW) {
+                       ret = ext4_issue_zeroout(inode, map->m_lblk,
+                                                map->m_pblk, map->m_len);
+                       if (ret) {
+                               retval = ret;
+                               goto out_sem;
+                       }
+               }
+
                /*
                 * If the extent has been zeroed out, we don't need to update
                 * extent status tree.
@@ -632,7 +659,7 @@ found:
                if ((flags & EXT4_GET_BLOCKS_PRE_IO) &&
                    ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
                        if (ext4_es_is_written(&es))
-                               goto has_zeroout;
+                               goto out_sem;
                }
                status = map->m_flags & EXT4_MAP_UNWRITTEN ?
                                EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
@@ -643,11 +670,13 @@ found:
                        status |= EXTENT_STATUS_DELAYED;
                ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
                                            map->m_pblk, status);
-               if (ret < 0)
+               if (ret < 0) {
                        retval = ret;
+                       goto out_sem;
+               }
        }
 
-has_zeroout:
+out_sem:
        up_write((&EXT4_I(inode)->i_data_sem));
        if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
                ret = check_block_validity(inode, map);
@@ -674,7 +703,7 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
        map.m_lblk = iblock;
        map.m_len = bh->b_size >> inode->i_blkbits;
 
-       if (flags && !(flags & EXT4_GET_BLOCKS_NO_LOCK) && !handle) {
+       if (flags && !handle) {
                /* Direct IO write... */
                if (map.m_len > DIO_MAX_BLOCKS)
                        map.m_len = DIO_MAX_BLOCKS;
@@ -694,16 +723,6 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
 
                map_bh(bh, inode->i_sb, map.m_pblk);
                bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
-               if (IS_DAX(inode) && buffer_unwritten(bh)) {
-                       /*
-                        * dgc: I suspect unwritten conversion on ext4+DAX is
-                        * fundamentally broken here when there are concurrent
-                        * read/write in progress on this inode.
-                        */
-                       WARN_ON_ONCE(io_end);
-                       bh->b_assoc_map = inode->i_mapping;
-                       bh->b_private = (void *)(unsigned long)iblock;
-               }
                if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN)
                        set_buffer_defer_completion(bh);
                bh->b_size = inode->i_sb->s_blocksize * map.m_len;
@@ -879,9 +898,6 @@ int do_journal_get_write_access(handle_t *handle,
        return ret;
 }
 
-static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock,
-                  struct buffer_head *bh_result, int create);
-
 #ifdef CONFIG_EXT4_FS_ENCRYPTION
 static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
                                  get_block_t *get_block)
@@ -3054,25 +3070,96 @@ int ext4_get_block_write(struct inode *inode, sector_t iblock,
                               EXT4_GET_BLOCKS_IO_CREATE_EXT);
 }
 
-static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock,
+static int ext4_get_block_overwrite(struct inode *inode, sector_t iblock,
                   struct buffer_head *bh_result, int create)
 {
-       ext4_debug("ext4_get_block_write_nolock: inode %lu, create flag %d\n",
+       int ret;
+
+       ext4_debug("ext4_get_block_overwrite: inode %lu, create flag %d\n",
                   inode->i_ino, create);
-       return _ext4_get_block(inode, iblock, bh_result,
-                              EXT4_GET_BLOCKS_NO_LOCK);
+       ret = _ext4_get_block(inode, iblock, bh_result, 0);
+       /*
+        * Blocks should have been preallocated! ext4_file_write_iter() checks
+        * that.
+        */
+       WARN_ON_ONCE(!buffer_mapped(bh_result));
+
+       return ret;
 }
 
-int ext4_get_block_dax(struct inode *inode, sector_t iblock,
-                  struct buffer_head *bh_result, int create)
+#ifdef CONFIG_FS_DAX
+int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock,
+                           struct buffer_head *bh_result, int create)
 {
-       int flags = EXT4_GET_BLOCKS_PRE_IO | EXT4_GET_BLOCKS_UNWRIT_EXT;
-       if (create)
-               flags |= EXT4_GET_BLOCKS_CREATE;
-       ext4_debug("ext4_get_block_dax: inode %lu, create flag %d\n",
+       int ret, err;
+       int credits;
+       struct ext4_map_blocks map;
+       handle_t *handle = NULL;
+       int flags = 0;
+
+       ext4_debug("ext4_dax_mmap_get_block: inode %lu, create flag %d\n",
                   inode->i_ino, create);
-       return _ext4_get_block(inode, iblock, bh_result, flags);
+       map.m_lblk = iblock;
+       map.m_len = bh_result->b_size >> inode->i_blkbits;
+       credits = ext4_chunk_trans_blocks(inode, map.m_len);
+       if (create) {
+               flags |= EXT4_GET_BLOCKS_PRE_IO | EXT4_GET_BLOCKS_CREATE_ZERO;
+               handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits);
+               if (IS_ERR(handle)) {
+                       ret = PTR_ERR(handle);
+                       return ret;
+               }
+       }
+
+       ret = ext4_map_blocks(handle, inode, &map, flags);
+       if (create) {
+               err = ext4_journal_stop(handle);
+               if (ret >= 0 && err < 0)
+                       ret = err;
+       }
+       if (ret <= 0)
+               goto out;
+       if (map.m_flags & EXT4_MAP_UNWRITTEN) {
+               int err2;
+
+               /*
+                * We are protected by i_mmap_sem so we know block cannot go
+                * away from under us even though we dropped i_data_sem.
+                * Convert extent to written and write zeros there.
+                *
+                * Note: We may get here even when create == 0.
+                */
+               handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits);
+               if (IS_ERR(handle)) {
+                       ret = PTR_ERR(handle);
+                       goto out;
+               }
+
+               err = ext4_map_blocks(handle, inode, &map,
+                     EXT4_GET_BLOCKS_CONVERT | EXT4_GET_BLOCKS_CREATE_ZERO);
+               if (err < 0)
+                       ret = err;
+               err2 = ext4_journal_stop(handle);
+               if (err2 < 0 && ret > 0)
+                       ret = err2;
+       }
+out:
+       WARN_ON_ONCE(ret == 0 && create);
+       if (ret > 0) {
+               map_bh(bh_result, inode->i_sb, map.m_pblk);
+               bh_result->b_state = (bh_result->b_state & ~EXT4_MAP_FLAGS) |
+                                       map.m_flags;
+               /*
+                * At least for now we have to clear BH_New so that DAX code
+                * doesn't attempt to zero blocks again in a racy way.
+                */
+               bh_result->b_state &= ~(1 << BH_New);
+               bh_result->b_size = map.m_len << inode->i_blkbits;
+               ret = 0;
+       }
+       return ret;
 }
+#endif
 
 static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
                            ssize_t size, void *private)
@@ -3143,10 +3230,8 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
        /* If we do a overwrite dio, i_mutex locking can be released */
        overwrite = *((int *)iocb->private);
 
-       if (overwrite) {
-               down_read(&EXT4_I(inode)->i_data_sem);
+       if (overwrite)
                mutex_unlock(&inode->i_mutex);
-       }
 
        /*
         * We could direct write to holes and fallocate.
@@ -3189,7 +3274,7 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
        }
 
        if (overwrite) {
-               get_block_func = ext4_get_block_write_nolock;
+               get_block_func = ext4_get_block_overwrite;
        } else {
                get_block_func = ext4_get_block_write;
                dio_flags = DIO_LOCKING;
@@ -3245,10 +3330,8 @@ retake_lock:
        if (iov_iter_rw(iter) == WRITE)
                inode_dio_end(inode);
        /* take i_mutex locking again if we do a ovewrite dio */
-       if (overwrite) {
-               up_read(&EXT4_I(inode)->i_data_sem);
+       if (overwrite)
                mutex_lock(&inode->i_mutex);
-       }
 
        return ret;
 }
@@ -3558,6 +3641,35 @@ int ext4_can_truncate(struct inode *inode)
        return 0;
 }
 
+/*
+ * We have to make sure i_disksize gets properly updated before we truncate
+ * page cache due to hole punching or zero range. Otherwise i_disksize update
+ * can get lost as it may have been postponed to submission of writeback but
+ * that will never happen after we truncate page cache.
+ */
+int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
+                                     loff_t len)
+{
+       handle_t *handle;
+       loff_t size = i_size_read(inode);
+
+       WARN_ON(!mutex_is_locked(&inode->i_mutex));
+       if (offset > size || offset + len < size)
+               return 0;
+
+       if (EXT4_I(inode)->i_disksize >= size)
+               return 0;
+
+       handle = ext4_journal_start(inode, EXT4_HT_MISC, 1);
+       if (IS_ERR(handle))
+               return PTR_ERR(handle);
+       ext4_update_i_disksize(inode, size);
+       ext4_mark_inode_dirty(handle, inode);
+       ext4_journal_stop(handle);
+
+       return 0;
+}
+
 /*
  * ext4_punch_hole: punches a hole in a file by releaseing the blocks
  * associated with the given offset and length
@@ -3623,17 +3735,26 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
 
        }
 
+       /* Wait all existing dio workers, newcomers will block on i_mutex */
+       ext4_inode_block_unlocked_dio(inode);
+       inode_dio_wait(inode);
+
+       /*
+        * Prevent page faults from reinstantiating pages we have released from
+        * page cache.
+        */
+       down_write(&EXT4_I(inode)->i_mmap_sem);
        first_block_offset = round_up(offset, sb->s_blocksize);
        last_block_offset = round_down((offset + length), sb->s_blocksize) - 1;
 
        /* Now release the pages and zero block aligned part of pages*/
-       if (last_block_offset > first_block_offset)
+       if (last_block_offset > first_block_offset) {
+               ret = ext4_update_disksize_before_punch(inode, offset, length);
+               if (ret)
+                       goto out_dio;
                truncate_pagecache_range(inode, first_block_offset,
                                         last_block_offset);
-
-       /* Wait all existing dio workers, newcomers will block on i_mutex */
-       ext4_inode_block_unlocked_dio(inode);
-       inode_dio_wait(inode);
+       }
 
        if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
                credits = ext4_writepage_trans_blocks(inode);
@@ -3680,16 +3801,12 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
        if (IS_SYNC(inode))
                ext4_handle_sync(handle);
 
-       /* Now release the pages again to reduce race window */
-       if (last_block_offset > first_block_offset)
-               truncate_pagecache_range(inode, first_block_offset,
-                                        last_block_offset);
-
        inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
        ext4_mark_inode_dirty(handle, inode);
 out_stop:
        ext4_journal_stop(handle);
 out_dio:
+       up_write(&EXT4_I(inode)->i_mmap_sem);
        ext4_inode_resume_unlocked_dio(inode);
 out_mutex:
        mutex_unlock(&inode->i_mutex);
@@ -4076,6 +4193,14 @@ static inline void ext4_iget_extra_inode(struct inode *inode,
                EXT4_I(inode)->i_inline_off = 0;
 }
 
+int ext4_get_projid(struct inode *inode, kprojid_t *projid)
+{
+       if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, EXT4_FEATURE_RO_COMPAT_PROJECT))
+               return -EOPNOTSUPP;
+       *projid = EXT4_I(inode)->i_projid;
+       return 0;
+}
+
 struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
 {
        struct ext4_iloc iloc;
@@ -4087,6 +4212,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
        int block;
        uid_t i_uid;
        gid_t i_gid;
+       projid_t i_projid;
 
        inode = iget_locked(sb, ino);
        if (!inode)
@@ -4136,12 +4262,20 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
        inode->i_mode = le16_to_cpu(raw_inode->i_mode);
        i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
        i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
+       if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_PROJECT) &&
+           EXT4_INODE_SIZE(sb) > EXT4_GOOD_OLD_INODE_SIZE &&
+           EXT4_FITS_IN_INODE(raw_inode, ei, i_projid))
+               i_projid = (projid_t)le32_to_cpu(raw_inode->i_projid);
+       else
+               i_projid = EXT4_DEF_PROJID;
+
        if (!(test_opt(inode->i_sb, NO_UID32))) {
                i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
                i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
        }
        i_uid_write(inode, i_uid);
        i_gid_write(inode, i_gid);
+       ei->i_projid = make_kprojid(&init_user_ns, i_projid);
        set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
 
        ext4_clear_state_flags(ei);     /* Only relevant on 32-bit archs */
@@ -4440,6 +4574,7 @@ static int ext4_do_update_inode(handle_t *handle,
        int need_datasync = 0, set_large_file = 0;
        uid_t i_uid;
        gid_t i_gid;
+       projid_t i_projid;
 
        spin_lock(&ei->i_raw_lock);
 
@@ -4452,6 +4587,7 @@ static int ext4_do_update_inode(handle_t *handle,
        raw_inode->i_mode = cpu_to_le16(inode->i_mode);
        i_uid = i_uid_read(inode);
        i_gid = i_gid_read(inode);
+       i_projid = from_kprojid(&init_user_ns, ei->i_projid);
        if (!(test_opt(inode->i_sb, NO_UID32))) {
                raw_inode->i_uid_low = cpu_to_le16(low_16_bits(i_uid));
                raw_inode->i_gid_low = cpu_to_le16(low_16_bits(i_gid));
@@ -4529,6 +4665,15 @@ static int ext4_do_update_inode(handle_t *handle,
                                cpu_to_le16(ei->i_extra_isize);
                }
        }
+
+       BUG_ON(!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+                       EXT4_FEATURE_RO_COMPAT_PROJECT) &&
+              i_projid != EXT4_DEF_PROJID);
+
+       if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
+           EXT4_FITS_IN_INODE(raw_inode, ei, i_projid))
+               raw_inode->i_projid = cpu_to_le32(i_projid);
+
        ext4_inode_csum_set(inode, raw_inode, ei);
        spin_unlock(&ei->i_raw_lock);
        if (inode->i_sb->s_flags & MS_LAZYTIME)
@@ -4824,6 +4969,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
                        } else
                                ext4_wait_for_tail_page_commit(inode);
                }
+               down_write(&EXT4_I(inode)->i_mmap_sem);
                /*
                 * Truncate pagecache after we've waited for commit
                 * in data=journal mode to make pages freeable.
@@ -4831,6 +4977,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
                truncate_pagecache(inode, inode->i_size);
                if (shrink)
                        ext4_truncate(inode);
+               up_write(&EXT4_I(inode)->i_mmap_sem);
        }
 
        if (!rc) {
@@ -5279,6 +5426,8 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 
        sb_start_pagefault(inode->i_sb);
        file_update_time(vma->vm_file);
+
+       down_read(&EXT4_I(inode)->i_mmap_sem);
        /* Delalloc case is easy... */
        if (test_opt(inode->i_sb, DELALLOC) &&
            !ext4_should_journal_data(inode) &&
@@ -5348,6 +5497,19 @@ retry_alloc:
 out_ret:
        ret = block_page_mkwrite_return(ret);
 out:
+       up_read(&EXT4_I(inode)->i_mmap_sem);
        sb_end_pagefault(inode->i_sb);
        return ret;
 }
+
+int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+       struct inode *inode = file_inode(vma->vm_file);
+       int err;
+
+       down_read(&EXT4_I(inode)->i_mmap_sem);
+       err = filemap_fault(vma, vmf);
+       up_read(&EXT4_I(inode)->i_mmap_sem);
+
+       return err;
+}
index 5e872fd40e5e38655435fbcf91802244c9ce8959..2b0cb84255eb3662c38be467bf24101a094f0e86 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/mount.h>
 #include <linux/file.h>
 #include <linux/random.h>
+#include <linux/quotaops.h>
 #include <asm/uaccess.h>
 #include "ext4_jbd2.h"
 #include "ext4.h"
@@ -202,6 +203,238 @@ static int uuid_is_zero(__u8 u[16])
        return 1;
 }
 
+static int ext4_ioctl_setflags(struct inode *inode,
+                              unsigned int flags)
+{
+       struct ext4_inode_info *ei = EXT4_I(inode);
+       handle_t *handle = NULL;
+       int err = EPERM, migrate = 0;
+       struct ext4_iloc iloc;
+       unsigned int oldflags, mask, i;
+       unsigned int jflag;
+
+       /* Is it quota file? Do not allow user to mess with it */
+       if (IS_NOQUOTA(inode))
+               goto flags_out;
+
+       oldflags = ei->i_flags;
+
+       /* The JOURNAL_DATA flag is modifiable only by root */
+       jflag = flags & EXT4_JOURNAL_DATA_FL;
+
+       /*
+        * The IMMUTABLE and APPEND_ONLY flags can only be changed by
+        * the relevant capability.
+        *
+        * This test looks nicer. Thanks to Pauline Middelink
+        */
+       if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
+               if (!capable(CAP_LINUX_IMMUTABLE))
+                       goto flags_out;
+       }
+
+       /*
+        * The JOURNAL_DATA flag can only be changed by
+        * the relevant capability.
+        */
+       if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
+               if (!capable(CAP_SYS_RESOURCE))
+                       goto flags_out;
+       }
+       if ((flags ^ oldflags) & EXT4_EXTENTS_FL)
+               migrate = 1;
+
+       if (flags & EXT4_EOFBLOCKS_FL) {
+               /* we don't support adding EOFBLOCKS flag */
+               if (!(oldflags & EXT4_EOFBLOCKS_FL)) {
+                       err = -EOPNOTSUPP;
+                       goto flags_out;
+               }
+       } else if (oldflags & EXT4_EOFBLOCKS_FL)
+               ext4_truncate(inode);
+
+       handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
+       if (IS_ERR(handle)) {
+               err = PTR_ERR(handle);
+               goto flags_out;
+       }
+       if (IS_SYNC(inode))
+               ext4_handle_sync(handle);
+       err = ext4_reserve_inode_write(handle, inode, &iloc);
+       if (err)
+               goto flags_err;
+
+       for (i = 0, mask = 1; i < 32; i++, mask <<= 1) {
+               if (!(mask & EXT4_FL_USER_MODIFIABLE))
+                       continue;
+               if (mask & flags)
+                       ext4_set_inode_flag(inode, i);
+               else
+                       ext4_clear_inode_flag(inode, i);
+       }
+
+       ext4_set_inode_flags(inode);
+       inode->i_ctime = ext4_current_time(inode);
+
+       err = ext4_mark_iloc_dirty(handle, inode, &iloc);
+flags_err:
+       ext4_journal_stop(handle);
+       if (err)
+               goto flags_out;
+
+       if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL))
+               err = ext4_change_inode_journal_flag(inode, jflag);
+       if (err)
+               goto flags_out;
+       if (migrate) {
+               if (flags & EXT4_EXTENTS_FL)
+                       err = ext4_ext_migrate(inode);
+               else
+                       err = ext4_ind_migrate(inode);
+       }
+
+flags_out:
+       return err;
+}
+
+#ifdef CONFIG_QUOTA
+static int ext4_ioctl_setproject(struct file *filp, __u32 projid)
+{
+       struct inode *inode = file_inode(filp);
+       struct super_block *sb = inode->i_sb;
+       struct ext4_inode_info *ei = EXT4_I(inode);
+       int err, rc;
+       handle_t *handle;
+       kprojid_t kprojid;
+       struct ext4_iloc iloc;
+       struct ext4_inode *raw_inode;
+
+       if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
+                       EXT4_FEATURE_RO_COMPAT_PROJECT)) {
+               if (projid != EXT4_DEF_PROJID)
+                       return -EOPNOTSUPP;
+               else
+                       return 0;
+       }
+
+       if (EXT4_INODE_SIZE(sb) <= EXT4_GOOD_OLD_INODE_SIZE)
+               return -EOPNOTSUPP;
+
+       kprojid = make_kprojid(&init_user_ns, (projid_t)projid);
+
+       if (projid_eq(kprojid, EXT4_I(inode)->i_projid))
+               return 0;
+
+       err = mnt_want_write_file(filp);
+       if (err)
+               return err;
+
+       err = -EPERM;
+       mutex_lock(&inode->i_mutex);
+       /* Is it quota file? Do not allow user to mess with it */
+       if (IS_NOQUOTA(inode))
+               goto out_unlock;
+
+       err = ext4_get_inode_loc(inode, &iloc);
+       if (err)
+               goto out_unlock;
+
+       raw_inode = ext4_raw_inode(&iloc);
+       if (!EXT4_FITS_IN_INODE(raw_inode, ei, i_projid)) {
+               err = -EOVERFLOW;
+               brelse(iloc.bh);
+               goto out_unlock;
+       }
+       brelse(iloc.bh);
+
+       dquot_initialize(inode);
+
+       handle = ext4_journal_start(inode, EXT4_HT_QUOTA,
+               EXT4_QUOTA_INIT_BLOCKS(sb) +
+               EXT4_QUOTA_DEL_BLOCKS(sb) + 3);
+       if (IS_ERR(handle)) {
+               err = PTR_ERR(handle);
+               goto out_unlock;
+       }
+
+       err = ext4_reserve_inode_write(handle, inode, &iloc);
+       if (err)
+               goto out_stop;
+
+       if (sb_has_quota_limits_enabled(sb, PRJQUOTA)) {
+               struct dquot *transfer_to[MAXQUOTAS] = { };
+
+               transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid));
+               if (transfer_to[PRJQUOTA]) {
+                       err = __dquot_transfer(inode, transfer_to);
+                       dqput(transfer_to[PRJQUOTA]);
+                       if (err)
+                               goto out_dirty;
+               }
+       }
+       EXT4_I(inode)->i_projid = kprojid;
+       inode->i_ctime = ext4_current_time(inode);
+out_dirty:
+       rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
+       if (!err)
+               err = rc;
+out_stop:
+       ext4_journal_stop(handle);
+out_unlock:
+       mutex_unlock(&inode->i_mutex);
+       mnt_drop_write_file(filp);
+       return err;
+}
+#else
+static int ext4_ioctl_setproject(struct file *filp, __u32 projid)
+{
+       if (projid != EXT4_DEF_PROJID)
+               return -EOPNOTSUPP;
+       return 0;
+}
+#endif
+
+/* Transfer internal flags to xflags */
+static inline __u32 ext4_iflags_to_xflags(unsigned long iflags)
+{
+       __u32 xflags = 0;
+
+       if (iflags & EXT4_SYNC_FL)
+               xflags |= FS_XFLAG_SYNC;
+       if (iflags & EXT4_IMMUTABLE_FL)
+               xflags |= FS_XFLAG_IMMUTABLE;
+       if (iflags & EXT4_APPEND_FL)
+               xflags |= FS_XFLAG_APPEND;
+       if (iflags & EXT4_NODUMP_FL)
+               xflags |= FS_XFLAG_NODUMP;
+       if (iflags & EXT4_NOATIME_FL)
+               xflags |= FS_XFLAG_NOATIME;
+       if (iflags & EXT4_PROJINHERIT_FL)
+               xflags |= FS_XFLAG_PROJINHERIT;
+       return xflags;
+}
+
+/* Transfer xflags flags to internal */
+static inline unsigned long ext4_xflags_to_iflags(__u32 xflags)
+{
+       unsigned long iflags = 0;
+
+       if (xflags & FS_XFLAG_SYNC)
+               iflags |= EXT4_SYNC_FL;
+       if (xflags & FS_XFLAG_IMMUTABLE)
+               iflags |= EXT4_IMMUTABLE_FL;
+       if (xflags & FS_XFLAG_APPEND)
+               iflags |= EXT4_APPEND_FL;
+       if (xflags & FS_XFLAG_NODUMP)
+               iflags |= EXT4_NODUMP_FL;
+       if (xflags & FS_XFLAG_NOATIME)
+               iflags |= EXT4_NOATIME_FL;
+       if (xflags & FS_XFLAG_PROJINHERIT)
+               iflags |= EXT4_PROJINHERIT_FL;
+
+       return iflags;
+}
+
 long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
        struct inode *inode = file_inode(filp);
@@ -217,11 +450,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
                return put_user(flags, (int __user *) arg);
        case EXT4_IOC_SETFLAGS: {
-               handle_t *handle = NULL;
-               int err, migrate = 0;
-               struct ext4_iloc iloc;
-               unsigned int oldflags, mask, i;
-               unsigned int jflag;
+               int err;
 
                if (!inode_owner_or_capable(inode))
                        return -EACCES;
@@ -235,89 +464,8 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 
                flags = ext4_mask_flags(inode->i_mode, flags);
 
-               err = -EPERM;
                mutex_lock(&inode->i_mutex);
-               /* Is it quota file? Do not allow user to mess with it */
-               if (IS_NOQUOTA(inode))
-                       goto flags_out;
-
-               oldflags = ei->i_flags;
-
-               /* The JOURNAL_DATA flag is modifiable only by root */
-               jflag = flags & EXT4_JOURNAL_DATA_FL;
-
-               /*
-                * The IMMUTABLE and APPEND_ONLY flags can only be changed by
-                * the relevant capability.
-                *
-                * This test looks nicer. Thanks to Pauline Middelink
-                */
-               if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
-                       if (!capable(CAP_LINUX_IMMUTABLE))
-                               goto flags_out;
-               }
-
-               /*
-                * The JOURNAL_DATA flag can only be changed by
-                * the relevant capability.
-                */
-               if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
-                       if (!capable(CAP_SYS_RESOURCE))
-                               goto flags_out;
-               }
-               if ((flags ^ oldflags) & EXT4_EXTENTS_FL)
-                       migrate = 1;
-
-               if (flags & EXT4_EOFBLOCKS_FL) {
-                       /* we don't support adding EOFBLOCKS flag */
-                       if (!(oldflags & EXT4_EOFBLOCKS_FL)) {
-                               err = -EOPNOTSUPP;
-                               goto flags_out;
-                       }
-               } else if (oldflags & EXT4_EOFBLOCKS_FL)
-                       ext4_truncate(inode);
-
-               handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
-               if (IS_ERR(handle)) {
-                       err = PTR_ERR(handle);
-                       goto flags_out;
-               }
-               if (IS_SYNC(inode))
-                       ext4_handle_sync(handle);
-               err = ext4_reserve_inode_write(handle, inode, &iloc);
-               if (err)
-                       goto flags_err;
-
-               for (i = 0, mask = 1; i < 32; i++, mask <<= 1) {
-                       if (!(mask & EXT4_FL_USER_MODIFIABLE))
-                               continue;
-                       if (mask & flags)
-                               ext4_set_inode_flag(inode, i);
-                       else
-                               ext4_clear_inode_flag(inode, i);
-               }
-
-               ext4_set_inode_flags(inode);
-               inode->i_ctime = ext4_current_time(inode);
-
-               err = ext4_mark_iloc_dirty(handle, inode, &iloc);
-flags_err:
-               ext4_journal_stop(handle);
-               if (err)
-                       goto flags_out;
-
-               if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL))
-                       err = ext4_change_inode_journal_flag(inode, jflag);
-               if (err)
-                       goto flags_out;
-               if (migrate) {
-                       if (flags & EXT4_EXTENTS_FL)
-                               err = ext4_ext_migrate(inode);
-                       else
-                               err = ext4_ind_migrate(inode);
-               }
-
-flags_out:
+               err = ext4_ioctl_setflags(inode, flags);
                mutex_unlock(&inode->i_mutex);
                mnt_drop_write_file(filp);
                return err;
@@ -689,6 +837,60 @@ encryption_policy_out:
                return -EOPNOTSUPP;
 #endif
        }
+       case EXT4_IOC_FSGETXATTR:
+       {
+               struct fsxattr fa;
+
+               memset(&fa, 0, sizeof(struct fsxattr));
+               ext4_get_inode_flags(ei);
+               fa.fsx_xflags = ext4_iflags_to_xflags(ei->i_flags & EXT4_FL_USER_VISIBLE);
+
+               if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+                               EXT4_FEATURE_RO_COMPAT_PROJECT)) {
+                       fa.fsx_projid = (__u32)from_kprojid(&init_user_ns,
+                               EXT4_I(inode)->i_projid);
+               }
+
+               if (copy_to_user((struct fsxattr __user *)arg,
+                                &fa, sizeof(fa)))
+                       return -EFAULT;
+               return 0;
+       }
+       case EXT4_IOC_FSSETXATTR:
+       {
+               struct fsxattr fa;
+               int err;
+
+               if (copy_from_user(&fa, (struct fsxattr __user *)arg,
+                                  sizeof(fa)))
+                       return -EFAULT;
+
+               /* Make sure caller has proper permission */
+               if (!inode_owner_or_capable(inode))
+                       return -EACCES;
+
+               err = mnt_want_write_file(filp);
+               if (err)
+                       return err;
+
+               flags = ext4_xflags_to_iflags(fa.fsx_xflags);
+               flags = ext4_mask_flags(inode->i_mode, flags);
+
+               mutex_lock(&inode->i_mutex);
+               flags = (ei->i_flags & ~EXT4_FL_XFLAG_VISIBLE) |
+                        (flags & EXT4_FL_XFLAG_VISIBLE);
+               err = ext4_ioctl_setflags(inode, flags);
+               mutex_unlock(&inode->i_mutex);
+               mnt_drop_write_file(filp);
+               if (err)
+                       return err;
+
+               err = ext4_ioctl_setproject(filp, fa.fsx_projid);
+               if (err)
+                       return err;
+
+               return 0;
+       }
        default:
                return -ENOTTY;
        }
index f27e0c2598c59edb5685c27310c2766d0860838d..854f75de4599b8e6f40dcd4a773319e9c8e009dd 100644 (file)
@@ -273,7 +273,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
                struct ext4_filename *fname,
                struct ext4_dir_entry_2 **res_dir);
 static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
-                            struct dentry *dentry, struct inode *inode);
+                            struct inode *dir, struct inode *inode);
 
 /* checksumming functions */
 void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
@@ -1928,10 +1928,9 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
  * directory, and adds the dentry to the indexed directory.
  */
 static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
-                           struct dentry *dentry,
+                           struct inode *dir,
                            struct inode *inode, struct buffer_head *bh)
 {
-       struct inode    *dir = d_inode(dentry->d_parent);
        struct buffer_head *bh2;
        struct dx_root  *root;
        struct dx_frame frames[2], *frame;
@@ -2086,8 +2085,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
                return retval;
 
        if (ext4_has_inline_data(dir)) {
-               retval = ext4_try_add_inline_entry(handle, &fname,
-                                                  dentry, inode);
+               retval = ext4_try_add_inline_entry(handle, &fname, dir, inode);
                if (retval < 0)
                        goto out;
                if (retval == 1) {
@@ -2097,7 +2095,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
        }
 
        if (is_dx(dir)) {
-               retval = ext4_dx_add_entry(handle, &fname, dentry, inode);
+               retval = ext4_dx_add_entry(handle, &fname, dir, inode);
                if (!retval || (retval != ERR_BAD_DX_DIR))
                        goto out;
                ext4_clear_inode_flag(dir, EXT4_INODE_INDEX);
@@ -2119,7 +2117,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
 
                if (blocks == 1 && !dx_fallback &&
                    ext4_has_feature_dir_index(sb)) {
-                       retval = make_indexed_dir(handle, &fname, dentry,
+                       retval = make_indexed_dir(handle, &fname, dir,
                                                  inode, bh);
                        bh = NULL; /* make_indexed_dir releases bh */
                        goto out;
@@ -2154,12 +2152,11 @@ out:
  * Returns 0 for success, or a negative error value
  */
 static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
-                            struct dentry *dentry, struct inode *inode)
+                            struct inode *dir, struct inode *inode)
 {
        struct dx_frame frames[2], *frame;
        struct dx_entry *entries, *at;
        struct buffer_head *bh;
-       struct inode *dir = d_inode(dentry->d_parent);
        struct super_block *sb = dir->i_sb;
        struct ext4_dir_entry_2 *de;
        int err;
@@ -3212,6 +3209,12 @@ static int ext4_link(struct dentry *old_dentry,
        if (ext4_encrypted_inode(dir) &&
            !ext4_is_child_context_consistent_with_parent(dir, inode))
                return -EPERM;
+
+       if ((ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT)) &&
+          (!projid_eq(EXT4_I(dir)->i_projid,
+                      EXT4_I(old_dentry->d_inode)->i_projid)))
+               return -EXDEV;
+
        err = dquot_initialize(dir);
        if (err)
                return err;
@@ -3492,6 +3495,11 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
        int credits;
        u8 old_file_type;
 
+       if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT)) &&
+           (!projid_eq(EXT4_I(new_dir)->i_projid,
+                       EXT4_I(old_dentry->d_inode)->i_projid)))
+               return -EXDEV;
+
        retval = dquot_initialize(old.dir);
        if (retval)
                return retval;
@@ -3701,6 +3709,14 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
                                                           new.inode)))
                return -EPERM;
 
+       if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT) &&
+            !projid_eq(EXT4_I(new_dir)->i_projid,
+                       EXT4_I(old_dentry->d_inode)->i_projid)) ||
+           (ext4_test_inode_flag(old_dir, EXT4_INODE_PROJINHERIT) &&
+            !projid_eq(EXT4_I(old_dir)->i_projid,
+                       EXT4_I(new_dentry->d_inode)->i_projid)))
+               return -EXDEV;
+
        retval = dquot_initialize(old.dir);
        if (retval)
                return retval;
index f1b56ff0120894e3e4cc79886928ea7852ac5e25..00c98fab6333562bfd42b5585bdf39ba2579e25c 100644 (file)
@@ -80,6 +80,36 @@ static void ext4_destroy_lazyinit_thread(void);
 static void ext4_unregister_li_request(struct super_block *sb);
 static void ext4_clear_request_list(void);
 
+/*
+ * Lock ordering
+ *
+ * Note the difference between i_mmap_sem (EXT4_I(inode)->i_mmap_sem) and
+ * i_mmap_rwsem (inode->i_mmap_rwsem)!
+ *
+ * page fault path:
+ * mmap_sem -> sb_start_pagefault -> i_mmap_sem (r) -> transaction start ->
+ *   page lock -> i_data_sem (rw)
+ *
+ * buffered write path:
+ * sb_start_write -> i_mutex -> mmap_sem
+ * sb_start_write -> i_mutex -> transaction start -> page lock ->
+ *   i_data_sem (rw)
+ *
+ * truncate:
+ * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (w) -> i_mmap_sem (w) ->
+ *   i_mmap_rwsem (w) -> page lock
+ * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (w) -> i_mmap_sem (w) ->
+ *   transaction start -> i_data_sem (rw)
+ *
+ * direct IO:
+ * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (r) -> mmap_sem
+ * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (r) ->
+ *   transaction start -> i_data_sem (rw)
+ *
+ * writepages:
+ * transaction start -> page lock(s) -> i_data_sem (rw)
+ */
+
 #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
 static struct file_system_type ext2_fs_type = {
        .owner          = THIS_MODULE,
@@ -958,6 +988,7 @@ static void init_once(void *foo)
        INIT_LIST_HEAD(&ei->i_orphan);
        init_rwsem(&ei->xattr_sem);
        init_rwsem(&ei->i_data_sem);
+       init_rwsem(&ei->i_mmap_sem);
        inode_init_once(&ei->vfs_inode);
 }
 
@@ -1066,8 +1097,8 @@ static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
 }
 
 #ifdef CONFIG_QUOTA
-#define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group")
-#define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
+static char *quotatypes[] = INITQFNAMES;
+#define QTYPE2NAME(t) (quotatypes[t])
 
 static int ext4_write_dquot(struct dquot *dquot);
 static int ext4_acquire_dquot(struct dquot *dquot);
@@ -1100,6 +1131,7 @@ static const struct dquot_operations ext4_quota_operations = {
        .write_info     = ext4_write_info,
        .alloc_dquot    = dquot_alloc,
        .destroy_dquot  = dquot_destroy,
+       .get_projid     = ext4_get_projid,
 };
 
 static const struct quotactl_ops ext4_qctl_operations = {
@@ -2526,6 +2558,12 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly)
                         "without CONFIG_QUOTA");
                return 0;
        }
+       if (ext4_has_feature_project(sb) && !readonly) {
+               ext4_msg(sb, KERN_ERR,
+                        "Filesystem with project quota feature cannot be mounted RDWR "
+                        "without CONFIG_QUOTA");
+               return 0;
+       }
 #endif  /* CONFIG_QUOTA */
        return 1;
 }
@@ -3654,7 +3692,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                sb->s_qcop = &dquot_quotactl_sysfile_ops;
        else
                sb->s_qcop = &ext4_qctl_operations;
-       sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
+       sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
 #endif
        memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
 
@@ -4790,6 +4828,48 @@ restore_opts:
        return err;
 }
 
+#ifdef CONFIG_QUOTA
+static int ext4_statfs_project(struct super_block *sb,
+                              kprojid_t projid, struct kstatfs *buf)
+{
+       struct kqid qid;
+       struct dquot *dquot;
+       u64 limit;
+       u64 curblock;
+
+       qid = make_kqid_projid(projid);
+       dquot = dqget(sb, qid);
+       if (IS_ERR(dquot))
+               return PTR_ERR(dquot);
+       spin_lock(&dq_data_lock);
+
+       limit = (dquot->dq_dqb.dqb_bsoftlimit ?
+                dquot->dq_dqb.dqb_bsoftlimit :
+                dquot->dq_dqb.dqb_bhardlimit) >> sb->s_blocksize_bits;
+       if (limit && buf->f_blocks > limit) {
+               curblock = dquot->dq_dqb.dqb_curspace >> sb->s_blocksize_bits;
+               buf->f_blocks = limit;
+               buf->f_bfree = buf->f_bavail =
+                       (buf->f_blocks > curblock) ?
+                        (buf->f_blocks - curblock) : 0;
+       }
+
+       limit = dquot->dq_dqb.dqb_isoftlimit ?
+               dquot->dq_dqb.dqb_isoftlimit :
+               dquot->dq_dqb.dqb_ihardlimit;
+       if (limit && buf->f_files > limit) {
+               buf->f_files = limit;
+               buf->f_ffree =
+                       (buf->f_files > dquot->dq_dqb.dqb_curinodes) ?
+                        (buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0;
+       }
+
+       spin_unlock(&dq_data_lock);
+       dqput(dquot);
+       return 0;
+}
+#endif
+
 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
        struct super_block *sb = dentry->d_sb;
@@ -4822,6 +4902,11 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
        buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
        buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
 
+#ifdef CONFIG_QUOTA
+       if (ext4_test_inode_flag(dentry->d_inode, EXT4_INODE_PROJINHERIT) &&
+           sb_has_quota_limits_enabled(sb, PRJQUOTA))
+               ext4_statfs_project(sb, EXT4_I(dentry->d_inode)->i_projid, buf);
+#endif
        return 0;
 }
 
@@ -4986,7 +5071,8 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
        struct inode *qf_inode;
        unsigned long qf_inums[EXT4_MAXQUOTAS] = {
                le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
-               le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
+               le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
+               le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
        };
 
        BUG_ON(!ext4_has_feature_quota(sb));
@@ -5014,7 +5100,8 @@ static int ext4_enable_quotas(struct super_block *sb)
        int type, err = 0;
        unsigned long qf_inums[EXT4_MAXQUOTAS] = {
                le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
-               le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
+               le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
+               le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
        };
 
        sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
index 011ba6670d990285f6f61b56ea3fb8b421b70e59..c70d06a383e28819cc556f1027ea118272e5f738 100644 (file)
  */
 static inline void ext4_truncate_failed_write(struct inode *inode)
 {
+       down_write(&EXT4_I(inode)->i_mmap_sem);
        truncate_inode_pages(inode->i_mapping, inode->i_size);
        ext4_truncate(inode);
+       up_write(&EXT4_I(inode)->i_mmap_sem);
 }
 
 /*
index 594b4b29a2241ba0493e22e09b0b66ea2d6c3cc4..4e4b2fa786097c3638e78a6df76995e598a4cecf 100644 (file)
@@ -43,7 +43,7 @@ struct extent_status;
        { EXT4_GET_BLOCKS_METADATA_NOFAIL,      "METADATA_NOFAIL" },    \
        { EXT4_GET_BLOCKS_NO_NORMALIZE,         "NO_NORMALIZE" },       \
        { EXT4_GET_BLOCKS_KEEP_SIZE,            "KEEP_SIZE" },          \
-       { EXT4_GET_BLOCKS_NO_LOCK,              "NO_LOCK" })
+       { EXT4_GET_BLOCKS_ZERO,                 "ZERO" })
 
 #define show_mflags(flags) __print_flags(flags, "",    \
        { EXT4_MAP_NEW,         "N" },                  \
index 8c8451f76633c566751e795d7616ff730740582e..41e0433b4a8398b3dc3da6d2d6aacb8e46287150 100644 (file)
@@ -2,8 +2,11 @@
 #define _UAPI_LINUX_FS_H
 
 /*
- * This file has definitions for some important file table
- * structures etc.
+ * This file has definitions for some important file table structures
+ * and constants and structures used by various generic file system
+ * ioctl's.  Please do not make any changes in this file before
+ * sending patches for review to linux-fsdevel@vger.kernel.org and
+ * linux-api@vger.kernel.org.
  */
 
 #include <linux/limits.h>
@@ -246,6 +249,23 @@ struct fsxattr {
 
 /*
  * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS)
+ *
+ * Note: for historical reasons, these flags were originally used and
+ * defined for use by ext2/ext3, and then other file systems started
+ * using these flags so they wouldn't need to write their own version
+ * of chattr/lsattr (which was shipped as part of e2fsprogs).  You
+ * should think twice before trying to use these flags in new
+ * contexts, or trying to assign these flags, since they are used both
+ * as the UAPI and the on-disk encoding for ext2/3/4.  Also, we are
+ * almost out of 32-bit flags.  :-)
+ *
+ * We have recently hoisted FS_IOC_FSGETXATTR / FS_IOC_FSSETXATTR from
+ * XFS to the generic FS level interface.  This uses a structure that
+ * has padding and hence has more room to grow, so it may be more
+ * appropriate for many new use cases.
+ *
+ * Please do not change these flags or interfaces before checking with
+ * linux-fsdevel@vger.kernel.org and linux-api@vger.kernel.org.
  */
 #define        FS_SECRM_FL                     0x00000001 /* Secure deletion */
 #define        FS_UNRM_FL                      0x00000002 /* Undelete */
@@ -259,8 +279,8 @@ struct fsxattr {
 #define FS_DIRTY_FL                    0x00000100
 #define FS_COMPRBLK_FL                 0x00000200 /* One or more compressed clusters */
 #define FS_NOCOMP_FL                   0x00000400 /* Don't compress */
-#define FS_ECOMPR_FL                   0x00000800 /* Compression error */
 /* End compression flags --- maybe not all used */
+#define FS_ENCRYPT_FL                  0x00000800 /* Encrypted file */
 #define FS_BTREE_FL                    0x00001000 /* btree format dir */
 #define FS_INDEX_FL                    0x00001000 /* hash-indexed directory */
 #define FS_IMAGIC_FL                   0x00002000 /* AFS directory */
@@ -268,9 +288,12 @@ struct fsxattr {
 #define FS_NOTAIL_FL                   0x00008000 /* file tail should not be merged */
 #define FS_DIRSYNC_FL                  0x00010000 /* dirsync behaviour (directories only) */
 #define FS_TOPDIR_FL                   0x00020000 /* Top of directory hierarchies*/
+#define FS_HUGE_FILE_FL                        0x00040000 /* Reserved for ext4 */
 #define FS_EXTENT_FL                   0x00080000 /* Extents */
-#define FS_DIRECTIO_FL                 0x00100000 /* Use direct i/o */
+#define FS_EA_INODE_FL                 0x00200000 /* Inode used for large EA */
+#define FS_EOFBLOCKS_FL                        0x00400000 /* Reserved for ext4 */
 #define FS_NOCOW_FL                    0x00800000 /* Do not cow file */
+#define FS_INLINE_DATA_FL              0x10000000 /* Reserved for ext4 */
 #define FS_PROJINHERIT_FL              0x20000000 /* Create with parents projid */
 #define FS_RESERVED_FL                 0x80000000 /* reserved for ext2 lib */