vfs,ext2: introduce IS_DAX(inode)
authorMatthew Wilcox <matthew.r.wilcox@intel.com>
Mon, 16 Feb 2015 23:58:53 +0000 (15:58 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 17 Feb 2015 01:56:03 +0000 (17:56 -0800)
Use an inode flag to tag inodes which should avoid using the page cache.
Convert ext2 to use it instead of mapping_is_xip().  Prevent I/Os to files
tagged with the DAX flag from falling back to buffered I/O.

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andreas Dilger <andreas.dilger@intel.com>
Cc: Boaz Harrosh <boaz@plexistor.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
fs/ext2/inode.c
fs/ext2/xip.h
include/linux/fs.h
mm/filemap.c

index 36d35c36311d69a025c5b804e8d8597cbd9cb2b2..0cb04486577d63791c362831d8fb14fbf02d9b15 100644 (file)
@@ -731,7 +731,7 @@ static int ext2_get_blocks(struct inode *inode,
                goto cleanup;
        }
 
-       if (ext2_use_xip(inode->i_sb)) {
+       if (IS_DAX(inode)) {
                /*
                 * we need to clear the block
                 */
@@ -1201,7 +1201,7 @@ static int ext2_setsize(struct inode *inode, loff_t newsize)
 
        inode_dio_wait(inode);
 
-       if (mapping_is_xip(inode->i_mapping))
+       if (IS_DAX(inode))
                error = xip_truncate_page(inode->i_mapping, newsize);
        else if (test_opt(inode->i_sb, NOBH))
                error = nobh_truncate_page(inode->i_mapping,
@@ -1273,7 +1273,8 @@ void ext2_set_inode_flags(struct inode *inode)
 {
        unsigned int flags = EXT2_I(inode)->i_flags;
 
-       inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
+       inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME |
+                               S_DIRSYNC | S_DAX);
        if (flags & EXT2_SYNC_FL)
                inode->i_flags |= S_SYNC;
        if (flags & EXT2_APPEND_FL)
@@ -1284,6 +1285,8 @@ void ext2_set_inode_flags(struct inode *inode)
                inode->i_flags |= S_NOATIME;
        if (flags & EXT2_DIRSYNC_FL)
                inode->i_flags |= S_DIRSYNC;
+       if (test_opt(inode->i_sb, XIP))
+               inode->i_flags |= S_DAX;
 }
 
 /* Propagate flags from i_flags to EXT2_I(inode)->i_flags */
index 18b34d2f31b3784fa95565103ea914bab45809b5..29be73781419b8b8fc4cdfc383b8406cc0339e30 100644 (file)
@@ -16,9 +16,7 @@ static inline int ext2_use_xip (struct super_block *sb)
 }
 int ext2_get_xip_mem(struct address_space *, pgoff_t, int,
                                void **, unsigned long *);
-#define mapping_is_xip(map) unlikely(map->a_ops->get_xip_mem)
 #else
-#define mapping_is_xip(map)                    0
 #define ext2_xip_verify_sb(sb)                 do { } while (0)
 #define ext2_use_xip(sb)                       0
 #define ext2_clear_xip_target(inode, chain)    0
index e49f10cc8a738340be7f3c89dc473cd8b9c004cd..fb373bb5cf03bf6a10dc416530b22cd1c02928dd 100644 (file)
@@ -1677,6 +1677,11 @@ struct super_operations {
 #define S_IMA          1024    /* Inode has an associated IMA struct */
 #define S_AUTOMOUNT    2048    /* Automount/referral quasi-directory */
 #define S_NOSEC                4096    /* no suid or xattr security attributes */
+#ifdef CONFIG_FS_XIP
+#define S_DAX          8192    /* Direct Access, avoiding the page cache */
+#else
+#define S_DAX          0       /* Make all the DAX code disappear */
+#endif
 
 /*
  * Note that nosuid etc flags are inode-specific: setting some file-system
@@ -1714,6 +1719,7 @@ struct super_operations {
 #define IS_IMA(inode)          ((inode)->i_flags & S_IMA)
 #define IS_AUTOMOUNT(inode)    ((inode)->i_flags & S_AUTOMOUNT)
 #define IS_NOSEC(inode)                ((inode)->i_flags & S_NOSEC)
+#define IS_DAX(inode)          ((inode)->i_flags & S_DAX)
 
 #define IS_WHITEOUT(inode)     (S_ISCHR(inode->i_mode) && \
                                 (inode)->i_rdev == WHITEOUT_DEV)
index d9f5336552d7b12cad62315a2c512a9ca922bf55..1578c224285e636ed4e088bb591e958813bd4a74 100644 (file)
@@ -1723,9 +1723,11 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
                 * we've already read everything we wanted to, or if
                 * there was a short read because we hit EOF, go ahead
                 * and return.  Otherwise fallthrough to buffered io for
-                * the rest of the read.
+                * the rest of the read.  Buffered reads will not work for
+                * DAX files, so don't bother trying.
                 */
-               if (retval < 0 || !iov_iter_count(iter) || *ppos >= size) {
+               if (retval < 0 || !iov_iter_count(iter) || *ppos >= size ||
+                   IS_DAX(inode)) {
                        file_accessed(file);
                        goto out;
                }
@@ -2587,13 +2589,16 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
                loff_t endbyte;
 
                written = generic_file_direct_write(iocb, from, pos);
-               if (written < 0 || written == count)
-                       goto out;
-
                /*
-                * direct-io write to a hole: fall through to buffered I/O
-                * for completing the rest of the request.
+                * If the write stopped short of completing, fall back to
+                * buffered writes.  Some filesystems do this for writes to
+                * holes, for example.  For DAX files, a buffered write will
+                * not succeed (even if it did, DAX does not handle dirty
+                * page-cache pages correctly).
                 */
+               if (written < 0 || written == count || IS_DAX(inode))
+                       goto out;
+
                pos += written;
                count -= written;