ext4: fix data integrity sync in ordered mode
authorNamjae Jeon <namjae.jeon@samsung.com>
Mon, 12 May 2014 12:12:25 +0000 (08:12 -0400)
committerJiri Slaby <jslaby@suse.cz>
Wed, 2 Jul 2014 10:06:15 +0000 (12:06 +0200)
commit 1c8349a17137b93f0a83f276c764a6df1b9a116e upstream.

When we perform a data integrity sync we tag all the dirty pages with
PAGECACHE_TAG_TOWRITE at start of ext4_da_writepages.  Later we check
for this tag in write_cache_pages_da and creates a struct
mpage_da_data containing contiguously indexed pages tagged with this
tag and sync these pages with a call to mpage_da_map_and_submit.  This
process is done in while loop until all the PAGECACHE_TAG_TOWRITE
pages are synced. We also do journal start and stop in each iteration.
journal_stop could initiate journal commit which would call
ext4_writepage which in turn will call ext4_bio_write_page even for
delayed OR unwritten buffers. When ext4_bio_write_page is called for
such buffers, even though it does not sync them but it clears the
PAGECACHE_TAG_TOWRITE of the corresponding page and hence these pages
are also not synced by the currently running data integrity sync. We
will end up with dirty pages although sync is completed.

This could cause a potential data loss when the sync call is followed
by a truncate_pagecache call, which is exactly the case in
collapse_range.  (It will cause generic/127 failure in xfstests)

To avoid this issue, we can use set_page_writeback_keepwrite instead of
set_page_writeback, which doesn't clear TOWRITE tag.

Signed-off-by: Namjae Jeon <namjae.jeon@samsung.com>
Signed-off-by: Ashish Sangwan <a.sangwan@samsung.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jiri Slaby <jslaby@suse.cz>
fs/ext4/ext4.h
fs/ext4/inode.c
fs/ext4/page-io.c
include/linux/page-flags.h
mm/page-writeback.c

index 1e25d6b57bc514dfc089dad545ba3bde1f0083bc..54d94db2cf036892baf04254c90326282d186cc1 100644 (file)
@@ -2749,7 +2749,8 @@ extern void ext4_io_submit(struct ext4_io_submit *io);
 extern int ext4_bio_write_page(struct ext4_io_submit *io,
                               struct page *page,
                               int len,
-                              struct writeback_control *wbc);
+                              struct writeback_control *wbc,
+                              bool keep_towrite);
 
 /* mmp.c */
 extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
index ea9793d8a77f7cd91c5a01825c9e91b475db7ed0..e5d9908c0bc38393e19b12922e5ffeb2107e6ae8 100644 (file)
@@ -1835,6 +1835,7 @@ static int ext4_writepage(struct page *page,
        struct buffer_head *page_bufs = NULL;
        struct inode *inode = page->mapping->host;
        struct ext4_io_submit io_submit;
+       bool keep_towrite = false;
 
        trace_ext4_writepage(page);
        size = i_size_read(inode);
@@ -1865,6 +1866,7 @@ static int ext4_writepage(struct page *page,
                        unlock_page(page);
                        return 0;
                }
+               keep_towrite = true;
        }
 
        if (PageChecked(page) && ext4_should_journal_data(inode))
@@ -1881,7 +1883,7 @@ static int ext4_writepage(struct page *page,
                unlock_page(page);
                return -ENOMEM;
        }
-       ret = ext4_bio_write_page(&io_submit, page, len, wbc);
+       ret = ext4_bio_write_page(&io_submit, page, len, wbc, keep_towrite);
        ext4_io_submit(&io_submit);
        /* Drop io_end reference we got from init */
        ext4_put_io_end_defer(io_submit.io_end);
@@ -1900,7 +1902,7 @@ static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
        else
                len = PAGE_CACHE_SIZE;
        clear_page_dirty_for_io(page);
-       err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc);
+       err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc, false);
        if (!err)
                mpd->wbc->nr_to_write--;
        mpd->first_page++;
index 02e94ef1489b33934f270dc0f8e2fefc6104c808..0881ede35baa34d83106ebe4ceb84796060f0227 100644 (file)
@@ -400,7 +400,8 @@ submit_and_retry:
 int ext4_bio_write_page(struct ext4_io_submit *io,
                        struct page *page,
                        int len,
-                       struct writeback_control *wbc)
+                       struct writeback_control *wbc,
+                       bool keep_towrite)
 {
        struct inode *inode = page->mapping->host;
        unsigned block_start, blocksize;
@@ -413,7 +414,10 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
        BUG_ON(!PageLocked(page));
        BUG_ON(PageWriteback(page));
 
-       set_page_writeback(page);
+       if (keep_towrite)
+               set_page_writeback_keepwrite(page);
+       else
+               set_page_writeback(page);
        ClearPageError(page);
 
        /*
index 6d53675c2b54691225b12f3f23c914aca86c35ac..dd7d45b5c496b7be3fb365b959a46d1b20946dcc 100644 (file)
@@ -317,13 +317,23 @@ CLEARPAGEFLAG(Uptodate, uptodate)
 extern void cancel_dirty_page(struct page *page, unsigned int account_size);
 
 int test_clear_page_writeback(struct page *page);
-int test_set_page_writeback(struct page *page);
+int __test_set_page_writeback(struct page *page, bool keep_write);
+
+#define test_set_page_writeback(page)                  \
+       __test_set_page_writeback(page, false)
+#define test_set_page_writeback_keepwrite(page)        \
+       __test_set_page_writeback(page, true)
 
 static inline void set_page_writeback(struct page *page)
 {
        test_set_page_writeback(page);
 }
 
+static inline void set_page_writeback_keepwrite(struct page *page)
+{
+       test_set_page_writeback_keepwrite(page);
+}
+
 #ifdef CONFIG_PAGEFLAGS_EXTENDED
 /*
  * System with lots of page flags available. This allows separate
index 8f6daa62206dea0ecde66afb911f45c064ca39bd..d013dba21429f2e1fcba4d8321a119baa617fb1c 100644 (file)
@@ -2398,7 +2398,7 @@ int test_clear_page_writeback(struct page *page)
        return ret;
 }
 
-int test_set_page_writeback(struct page *page)
+int __test_set_page_writeback(struct page *page, bool keep_write)
 {
        struct address_space *mapping = page_mapping(page);
        int ret;
@@ -2423,9 +2423,10 @@ int test_set_page_writeback(struct page *page)
                        radix_tree_tag_clear(&mapping->page_tree,
                                                page_index(page),
                                                PAGECACHE_TAG_DIRTY);
-               radix_tree_tag_clear(&mapping->page_tree,
-                                    page_index(page),
-                                    PAGECACHE_TAG_TOWRITE);
+               if (!keep_write)
+                       radix_tree_tag_clear(&mapping->page_tree,
+                                               page_index(page),
+                                               PAGECACHE_TAG_TOWRITE);
                spin_unlock_irqrestore(&mapping->tree_lock, flags);
        } else {
                ret = TestSetPageWriteback(page);
@@ -2436,7 +2437,7 @@ int test_set_page_writeback(struct page *page)
        return ret;
 
 }
-EXPORT_SYMBOL(test_set_page_writeback);
+EXPORT_SYMBOL(__test_set_page_writeback);
 
 /*
  * Return true if any of the pages in the mapping are marked with the