md: use MD_RECOVERY_INTR instead of kthread_should_stop in resync thread.
authorNeilBrown <neilb@suse.de>
Tue, 19 Nov 2013 01:02:01 +0000 (12:02 +1100)
committerNeilBrown <neilb@suse.de>
Tue, 19 Nov 2013 04:19:17 +0000 (15:19 +1100)
We currently use kthread_should_stop() in various places in the
sync/reshape code to abort early.
However some places set MD_RECOVERY_INTR but don't immediately call
md_reap_sync_thread() (and we will shortly get another one).
When this happens we are relying on md_check_recovery() to reap the
thread and that only happen when it finishes normally.
So MD_RECOVERY_INTR must lead to a normal finish without the
kthread_should_stop() test.

So replace all relevant tests, and be more careful when the thread is
interrupted not to acknowledge that latest step in a reshape as it may
not be fully committed yet.

Also add a test on MD_RECOVERY_INTR in the 'is_mddev_idle' loop
so we don't wait have to wait for the speed to drop before we can abort.

Signed-off-by: NeilBrown <neilb@suse.de>
drivers/md/md.c
drivers/md/raid10.c
drivers/md/raid5.c

index 1ca47b0f47793169207c51045bfeb0e92c01b620..a74045df7bab17caf64f0f58ac4abb5771500de7 100644 (file)
@@ -7410,9 +7410,6 @@ void md_do_sync(struct md_thread *thread)
                mddev->curr_resync = 2;
 
        try_again:
-               if (kthread_should_stop())
-                       set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-
                if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
                        goto skip;
                for_each_mddev(mddev2, tmp) {
@@ -7437,7 +7434,7 @@ void md_do_sync(struct md_thread *thread)
                                 * be caught by 'softlockup'
                                 */
                                prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
-                               if (!kthread_should_stop() &&
+                               if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
                                    mddev2->curr_resync >= mddev->curr_resync) {
                                        printk(KERN_INFO "md: delaying %s of %s"
                                               " until %s has finished (they"
@@ -7513,7 +7510,7 @@ void md_do_sync(struct md_thread *thread)
        last_check = 0;
 
        if (j>2) {
-               printk(KERN_INFO 
+               printk(KERN_INFO
                       "md: resuming %s of %s from checkpoint.\n",
                       desc, mdname(mddev));
                mddev->curr_resync = j;
@@ -7550,7 +7547,8 @@ void md_do_sync(struct md_thread *thread)
                        sysfs_notify(&mddev->kobj, NULL, "sync_completed");
                }
 
-               while (j >= mddev->resync_max && !kthread_should_stop()) {
+               while (j >= mddev->resync_max &&
+                      !test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
                        /* As this condition is controlled by user-space,
                         * we can block indefinitely, so use '_interruptible'
                         * to avoid triggering warnings.
@@ -7558,17 +7556,18 @@ void md_do_sync(struct md_thread *thread)
                        flush_signals(current); /* just in case */
                        wait_event_interruptible(mddev->recovery_wait,
                                                 mddev->resync_max > j
-                                                || kthread_should_stop());
+                                                || test_bit(MD_RECOVERY_INTR,
+                                                            &mddev->recovery));
                }
 
-               if (kthread_should_stop())
-                       goto interrupted;
+               if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
+                       break;
 
                sectors = mddev->pers->sync_request(mddev, j, &skipped,
                                                  currspeed < speed_min(mddev));
                if (sectors == 0) {
                        set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-                       goto out;
+                       break;
                }
 
                if (!skipped) { /* actual IO requested */
@@ -7605,10 +7604,8 @@ void md_do_sync(struct md_thread *thread)
                        last_mark = next;
                }
 
-
-               if (kthread_should_stop())
-                       goto interrupted;
-
+               if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
+                       break;
 
                /*
                 * this loop exits only if either when we are slower than
@@ -7631,11 +7628,12 @@ void md_do_sync(struct md_thread *thread)
                        }
                }
        }
-       printk(KERN_INFO "md: %s: %s done.\n",mdname(mddev), desc);
+       printk(KERN_INFO "md: %s: %s %s.\n",mdname(mddev), desc,
+              test_bit(MD_RECOVERY_INTR, &mddev->recovery)
+              ? "interrupted" : "done");
        /*
         * this also signals 'finished resyncing' to md_stop
         */
- out:
        blk_finish_plug(&plug);
        wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
 
@@ -7689,16 +7687,6 @@ void md_do_sync(struct md_thread *thread)
        set_bit(MD_RECOVERY_DONE, &mddev->recovery);
        md_wakeup_thread(mddev->thread);
        return;
-
- interrupted:
-       /*
-        * got a signal, exit.
-        */
-       printk(KERN_INFO
-              "md: md_do_sync() got signal ... exiting\n");
-       set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-       goto out;
-
 }
 EXPORT_SYMBOL_GPL(md_do_sync);
 
index 73dc8a377522e19af92c9bed26519a479f36dbea..4f87ba5f3a66cd57b448e99ba26d1e48adca4fc2 100644 (file)
@@ -4386,7 +4386,11 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
                set_bit(MD_CHANGE_DEVS, &mddev->flags);
                md_wakeup_thread(mddev->thread);
                wait_event(mddev->sb_wait, mddev->flags == 0 ||
-                          kthread_should_stop());
+                          test_bit(MD_RECOVERY_INTR, &mddev->recovery));
+               if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
+                       allow_barrier(conf);
+                       return sectors_done;
+               }
                conf->reshape_safe = mddev->reshape_position;
                allow_barrier(conf);
        }
index 2c890770610953aefcd40e88af124e2c9698c290..02f6bc2ac2dbf4295846f95ff6fdea908af6edf0 100644 (file)
@@ -4842,14 +4842,19 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
            time_after(jiffies, conf->reshape_checkpoint + 10*HZ)) {
                /* Cannot proceed until we've updated the superblock... */
                wait_event(conf->wait_for_overlap,
-                          atomic_read(&conf->reshape_stripes)==0);
+                          atomic_read(&conf->reshape_stripes)==0
+                          || test_bit(MD_RECOVERY_INTR, &mddev->recovery));
+               if (atomic_read(&conf->reshape_stripes) != 0)
+                       return 0;
                mddev->reshape_position = conf->reshape_progress;
                mddev->curr_resync_completed = sector_nr;
                conf->reshape_checkpoint = jiffies;
                set_bit(MD_CHANGE_DEVS, &mddev->flags);
                md_wakeup_thread(mddev->thread);
                wait_event(mddev->sb_wait, mddev->flags == 0 ||
-                          kthread_should_stop());
+                          test_bit(MD_RECOVERY_INTR, &mddev->recovery));
+               if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
+                       return 0;
                spin_lock_irq(&conf->device_lock);
                conf->reshape_safe = mddev->reshape_position;
                spin_unlock_irq(&conf->device_lock);
@@ -4932,7 +4937,10 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
            >= mddev->resync_max - mddev->curr_resync_completed) {
                /* Cannot proceed until we've updated the superblock... */
                wait_event(conf->wait_for_overlap,
-                          atomic_read(&conf->reshape_stripes) == 0);
+                          atomic_read(&conf->reshape_stripes) == 0
+                          || test_bit(MD_RECOVERY_INTR, &mddev->recovery));
+               if (atomic_read(&conf->reshape_stripes) != 0)
+                       goto ret;
                mddev->reshape_position = conf->reshape_progress;
                mddev->curr_resync_completed = sector_nr;
                conf->reshape_checkpoint = jiffies;
@@ -4940,13 +4948,16 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
                md_wakeup_thread(mddev->thread);
                wait_event(mddev->sb_wait,
                           !test_bit(MD_CHANGE_DEVS, &mddev->flags)
-                          || kthread_should_stop());
+                          || test_bit(MD_RECOVERY_INTR, &mddev->recovery));
+               if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
+                       goto ret;
                spin_lock_irq(&conf->device_lock);
                conf->reshape_safe = mddev->reshape_position;
                spin_unlock_irq(&conf->device_lock);
                wake_up(&conf->wait_for_overlap);
                sysfs_notify(&mddev->kobj, NULL, "sync_completed");
        }
+ret:
        return reshape_sectors;
 }