Merge tag 'md/3.15' of git://neil.brown.name/md
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 12 Apr 2014 00:20:38 +0000 (17:20 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 12 Apr 2014 00:20:38 +0000 (17:20 -0700)
Pull md updates from Neil Brown:
 "Just a few md patches for the 3.15 merge window.

  Not much happening in md/raid at the moment.  Just a few bug fixes
  (one for -stable) and a couple of performance tweaks"

* tag 'md/3.15' of git://neil.brown.name/md:
  raid5: get_active_stripe avoids device_lock
  raid5: make_request does less prepare wait
  md: avoid oops on unload if some process is in poll or select.
  md/raid1: r1buf_pool_alloc: free allocate pages when subsequent allocation fails.
  md/bitmap: don't abuse i_writecount for bitmap files.

drivers/md/bitmap.c
drivers/md/md.c
drivers/md/md.h
drivers/md/raid1.c
drivers/md/raid5.c

index 4195a01b15359bcf44950b2904c723194c4d048b..9a8e66ae04f51e95c169ea9a38f48b0d83f6ed03 100644 (file)
@@ -1988,7 +1988,6 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
                if (mddev->bitmap_info.file) {
                        struct file *f = mddev->bitmap_info.file;
                        mddev->bitmap_info.file = NULL;
-                       restore_bitmap_write_access(f);
                        fput(f);
                }
        } else {
index 4ad5cc4e63e8438ca3c32fea1f40f69ec71657fb..8fda38d23e3847aa4d96ecd147e996514a5a4af7 100644 (file)
@@ -5181,32 +5181,6 @@ static int restart_array(struct mddev *mddev)
        return 0;
 }
 
-/* similar to deny_write_access, but accounts for our holding a reference
- * to the file ourselves */
-static int deny_bitmap_write_access(struct file * file)
-{
-       struct inode *inode = file->f_mapping->host;
-
-       spin_lock(&inode->i_lock);
-       if (atomic_read(&inode->i_writecount) > 1) {
-               spin_unlock(&inode->i_lock);
-               return -ETXTBSY;
-       }
-       atomic_set(&inode->i_writecount, -1);
-       spin_unlock(&inode->i_lock);
-
-       return 0;
-}
-
-void restore_bitmap_write_access(struct file *file)
-{
-       struct inode *inode = file->f_mapping->host;
-
-       spin_lock(&inode->i_lock);
-       atomic_set(&inode->i_writecount, 1);
-       spin_unlock(&inode->i_lock);
-}
-
 static void md_clean(struct mddev *mddev)
 {
        mddev->array_sectors = 0;
@@ -5427,7 +5401,6 @@ static int do_md_stop(struct mddev * mddev, int mode,
 
                bitmap_destroy(mddev);
                if (mddev->bitmap_info.file) {
-                       restore_bitmap_write_access(mddev->bitmap_info.file);
                        fput(mddev->bitmap_info.file);
                        mddev->bitmap_info.file = NULL;
                }
@@ -5979,7 +5952,7 @@ abort_export:
 
 static int set_bitmap_file(struct mddev *mddev, int fd)
 {
-       int err;
+       int err = 0;
 
        if (mddev->pers) {
                if (!mddev->pers->quiesce)
@@ -5991,6 +5964,7 @@ static int set_bitmap_file(struct mddev *mddev, int fd)
 
 
        if (fd >= 0) {
+               struct inode *inode;
                if (mddev->bitmap)
                        return -EEXIST; /* cannot add when bitmap is present */
                mddev->bitmap_info.file = fget(fd);
@@ -6001,10 +5975,21 @@ static int set_bitmap_file(struct mddev *mddev, int fd)
                        return -EBADF;
                }
 
-               err = deny_bitmap_write_access(mddev->bitmap_info.file);
-               if (err) {
+               inode = mddev->bitmap_info.file->f_mapping->host;
+               if (!S_ISREG(inode->i_mode)) {
+                       printk(KERN_ERR "%s: error: bitmap file must be a regular file\n",
+                              mdname(mddev));
+                       err = -EBADF;
+               } else if (!(mddev->bitmap_info.file->f_mode & FMODE_WRITE)) {
+                       printk(KERN_ERR "%s: error: bitmap file must open for write\n",
+                              mdname(mddev));
+                       err = -EBADF;
+               } else if (atomic_read(&inode->i_writecount) != 1) {
                        printk(KERN_ERR "%s: error: bitmap file is already in use\n",
                               mdname(mddev));
+                       err = -EBUSY;
+               }
+               if (err) {
                        fput(mddev->bitmap_info.file);
                        mddev->bitmap_info.file = NULL;
                        return err;
@@ -6027,10 +6012,8 @@ static int set_bitmap_file(struct mddev *mddev, int fd)
                mddev->pers->quiesce(mddev, 0);
        }
        if (fd < 0) {
-               if (mddev->bitmap_info.file) {
-                       restore_bitmap_write_access(mddev->bitmap_info.file);
+               if (mddev->bitmap_info.file)
                        fput(mddev->bitmap_info.file);
-               }
                mddev->bitmap_info.file = NULL;
        }
 
@@ -7182,11 +7165,14 @@ static int md_seq_open(struct inode *inode, struct file *file)
        return error;
 }
 
+static int md_unloading;
 static unsigned int mdstat_poll(struct file *filp, poll_table *wait)
 {
        struct seq_file *seq = filp->private_data;
        int mask;
 
+       if (md_unloading)
+               return POLLIN|POLLRDNORM|POLLERR|POLLPRI;;
        poll_wait(filp, &md_event_waiters, wait);
 
        /* always allow read */
@@ -8672,6 +8658,7 @@ static __exit void md_exit(void)
 {
        struct mddev *mddev;
        struct list_head *tmp;
+       int delay = 1;
 
        blk_unregister_region(MKDEV(MD_MAJOR,0), 1U << MINORBITS);
        blk_unregister_region(MKDEV(mdp_major,0), 1U << MINORBITS);
@@ -8680,7 +8667,19 @@ static __exit void md_exit(void)
        unregister_blkdev(mdp_major, "mdp");
        unregister_reboot_notifier(&md_notifier);
        unregister_sysctl_table(raid_table_header);
+
+       /* We cannot unload the modules while some process is
+        * waiting for us in select() or poll() - wake them up
+        */
+       md_unloading = 1;
+       while (waitqueue_active(&md_event_waiters)) {
+               /* not safe to leave yet */
+               wake_up(&md_event_waiters);
+               msleep(delay);
+               delay += delay;
+       }
        remove_proc_entry("mdstat", NULL);
+
        for_each_mddev(mddev, tmp) {
                export_array(mddev);
                mddev->hold_active = 0;
index 07bba96de26047d3f4b868eefc78d3727f0be52d..a49d991f3fe11d4d9fa1622096c1ca2735c0572d 100644 (file)
@@ -605,7 +605,6 @@ extern int md_check_no_bitmap(struct mddev *mddev);
 extern int md_integrity_register(struct mddev *mddev);
 extern void md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev);
 extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale);
-extern void restore_bitmap_write_access(struct file *file);
 
 extern void mddev_init(struct mddev *mddev);
 extern int md_run(struct mddev *mddev);
index 4a6ca1cb2e78539679b96a00b89542f6f0eab8f0..56e24c072b629324ec382037ab17fd43310ec3ef 100644 (file)
@@ -97,6 +97,7 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
        struct pool_info *pi = data;
        struct r1bio *r1_bio;
        struct bio *bio;
+       int need_pages;
        int i, j;
 
        r1_bio = r1bio_pool_alloc(gfp_flags, pi);
@@ -119,15 +120,15 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
         * RESYNC_PAGES for each bio.
         */
        if (test_bit(MD_RECOVERY_REQUESTED, &pi->mddev->recovery))
-               j = pi->raid_disks;
+               need_pages = pi->raid_disks;
        else
-               j = 1;
-       while(j--) {
+               need_pages = 1;
+       for (j = 0; j < need_pages; j++) {
                bio = r1_bio->bios[j];
                bio->bi_vcnt = RESYNC_PAGES;
 
                if (bio_alloc_pages(bio, gfp_flags))
-                       goto out_free_bio;
+                       goto out_free_pages;
        }
        /* If not user-requests, copy the page pointers to all bios */
        if (!test_bit(MD_RECOVERY_REQUESTED, &pi->mddev->recovery)) {
@@ -141,6 +142,14 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
 
        return r1_bio;
 
+out_free_pages:
+       while (--j >= 0) {
+               struct bio_vec *bv;
+
+               bio_for_each_segment_all(bv, r1_bio->bios[j], i)
+                       __free_page(bv->bv_page);
+       }
+
 out_free_bio:
        while (++j < pi->raid_disks)
                bio_put(r1_bio->bios[j]);
index 16f5c21963db5391ed25fd1e185ab8399f353e74..25247a8529124f7cc93eda0db7033655d52f2ef7 100644 (file)
@@ -679,14 +679,9 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
                                init_stripe(sh, sector, previous);
                                atomic_inc(&sh->count);
                        }
-               } else {
+               } else if (!atomic_inc_not_zero(&sh->count)) {
                        spin_lock(&conf->device_lock);
-                       if (atomic_read(&sh->count)) {
-                               BUG_ON(!list_empty(&sh->lru)
-                                   && !test_bit(STRIPE_EXPANDING, &sh->state)
-                                   && !test_bit(STRIPE_ON_UNPLUG_LIST, &sh->state)
-                                       );
-                       } else {
+                       if (!atomic_read(&sh->count)) {
                                if (!test_bit(STRIPE_HANDLE, &sh->state))
                                        atomic_inc(&conf->active_stripes);
                                BUG_ON(list_empty(&sh->lru) &&
@@ -4552,6 +4547,8 @@ static void make_request(struct mddev *mddev, struct bio * bi)
        struct stripe_head *sh;
        const int rw = bio_data_dir(bi);
        int remaining;
+       DEFINE_WAIT(w);
+       bool do_prepare;
 
        if (unlikely(bi->bi_rw & REQ_FLUSH)) {
                md_flush_request(mddev, bi);
@@ -4575,15 +4572,18 @@ static void make_request(struct mddev *mddev, struct bio * bi)
        bi->bi_next = NULL;
        bi->bi_phys_segments = 1;       /* over-loaded to count active stripes */
 
+       prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
        for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
-               DEFINE_WAIT(w);
                int previous;
                int seq;
 
+               do_prepare = false;
        retry:
                seq = read_seqcount_begin(&conf->gen_lock);
                previous = 0;
-               prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
+               if (do_prepare)
+                       prepare_to_wait(&conf->wait_for_overlap, &w,
+                               TASK_UNINTERRUPTIBLE);
                if (unlikely(conf->reshape_progress != MaxSector)) {
                        /* spinlock is needed as reshape_progress may be
                         * 64bit on a 32bit platform, and so it might be
@@ -4604,6 +4604,7 @@ static void make_request(struct mddev *mddev, struct bio * bi)
                                    : logical_sector >= conf->reshape_safe) {
                                        spin_unlock_irq(&conf->device_lock);
                                        schedule();
+                                       do_prepare = true;
                                        goto retry;
                                }
                        }
@@ -4640,6 +4641,7 @@ static void make_request(struct mddev *mddev, struct bio * bi)
                                if (must_retry) {
                                        release_stripe(sh);
                                        schedule();
+                                       do_prepare = true;
                                        goto retry;
                                }
                        }
@@ -4663,8 +4665,10 @@ static void make_request(struct mddev *mddev, struct bio * bi)
                                prepare_to_wait(&conf->wait_for_overlap,
                                                &w, TASK_INTERRUPTIBLE);
                                if (logical_sector >= mddev->suspend_lo &&
-                                   logical_sector < mddev->suspend_hi)
+                                   logical_sector < mddev->suspend_hi) {
                                        schedule();
+                                       do_prepare = true;
+                               }
                                goto retry;
                        }
 
@@ -4677,9 +4681,9 @@ static void make_request(struct mddev *mddev, struct bio * bi)
                                md_wakeup_thread(mddev->thread);
                                release_stripe(sh);
                                schedule();
+                               do_prepare = true;
                                goto retry;
                        }
-                       finish_wait(&conf->wait_for_overlap, &w);
                        set_bit(STRIPE_HANDLE, &sh->state);
                        clear_bit(STRIPE_DELAYED, &sh->state);
                        if ((bi->bi_rw & REQ_SYNC) &&
@@ -4689,10 +4693,10 @@ static void make_request(struct mddev *mddev, struct bio * bi)
                } else {
                        /* cannot get stripe for read-ahead, just give-up */
                        clear_bit(BIO_UPTODATE, &bi->bi_flags);
-                       finish_wait(&conf->wait_for_overlap, &w);
                        break;
                }
        }
+       finish_wait(&conf->wait_for_overlap, &w);
 
        remaining = raid5_dec_bi_active_stripes(bi);
        if (remaining == 0) {