raid1: Rewrite the implementation of iobarrier.

author majianpeng <majianpeng@gmail.com>

Fri, 15 Nov 2013 06:55:02 +0000 (14:55 +0800)

committer NeilBrown <neilb@suse.de>

Tue, 19 Nov 2013 04:19:18 +0000 (15:19 +1100)
author majianpeng <majianpeng@gmail.com>
Fri, 15 Nov 2013 06:55:02 +0000 (14:55 +0800)
committer NeilBrown <neilb@suse.de>
Tue, 19 Nov 2013 04:19:18 +0000 (15:19 +1100)
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c

index 78da3392f577633320a04e1e8d290e8aca43bc2f..d9ee4edd7a53ae5119aedb84edd9824538701d5c 100644 (file)
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -66,7 +66,8 @@
   */
  static int max_queued_requests = 1024;
  
-static void allow_barrier(struct r1conf *conf);
+static void allow_barrier(struct r1conf *conf, sector_t start_next_window,
+                         sector_t bi_sector);
  static void lower_barrier(struct r1conf *conf);
  
  static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data)
@@ -227,6 +228,8 @@ static void call_bio_endio(struct r1bio *r1_bio)
         struct bio *bio = r1_bio->master_bio;
         int done;
         struct r1conf *conf = r1_bio->mddev->private;
+       sector_t start_next_window = r1_bio->start_next_window;
+       sector_t bi_sector = bio->bi_sector;
  
         if (bio->bi_phys_segments) {
                 unsigned long flags;
@@ -234,6 +237,11 @@ static void call_bio_endio(struct r1bio *r1_bio)
                 bio->bi_phys_segments--;
                 done = (bio->bi_phys_segments == 0);
                 spin_unlock_irqrestore(&conf->device_lock, flags);
+               /*
+                * make_request() might be waiting for
+                * bi_phys_segments to decrease
+                */
+               wake_up(&conf->wait_barrier);
         } else
                 done = 1;
  
@@ -245,7 +253,7 @@ static void call_bio_endio(struct r1bio *r1_bio)
                  * Wake up any possible resync thread that waits for the device
                  * to go idle.
                  */
-               allow_barrier(conf);
+               allow_barrier(conf, start_next_window, bi_sector);
         }
  }
  
@@ -827,10 +835,19 @@ static void raise_barrier(struct r1conf *conf)
         /* block any new IO from starting */
         conf->barrier++;
  
-       /* Now wait for all pending IO to complete */
+       /* For these conditions we must wait:
+        * A: while the array is in frozen state
+        * B: while barrier >= RESYNC_DEPTH, meaning resync reach
+        *    the max count which allowed.
+        * C: next_resync + RESYNC_SECTORS > start_next_window, meaning
+        *    next resync will reach to the window which normal bios are
+        *    handling.
+        */
         wait_event_lock_irq(conf->wait_barrier,
                             !conf->array_frozen &&
-                           !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
+                           conf->barrier < RESYNC_DEPTH &&
+                           (conf->start_next_window >=
+                            conf->next_resync + RESYNC_SECTORS),
                             conf->resync_lock);
  
         spin_unlock_irq(&conf->resync_lock);
@@ -846,10 +863,33 @@ static void lower_barrier(struct r1conf *conf)
         wake_up(&conf->wait_barrier);
  }
  
-static void wait_barrier(struct r1conf *conf)
+static bool need_to_wait_for_sync(struct r1conf *conf, struct bio *bio)
+{
+       bool wait = false;
+
+       if (conf->array_frozen || !bio)
+               wait = true;
+       else if (conf->barrier && bio_data_dir(bio) == WRITE) {
+               if (conf->next_resync < RESYNC_WINDOW_SECTORS)
+                       wait = true;
+               else if ((conf->next_resync - RESYNC_WINDOW_SECTORS
+                               >= bio_end_sector(bio)) ||
+                        (conf->next_resync + NEXT_NORMALIO_DISTANCE
+                               <= bio->bi_sector))
+                       wait = false;
+               else
+                       wait = true;
+       }
+
+       return wait;
+}
+
+static sector_t wait_barrier(struct r1conf *conf, struct bio *bio)
  {
+       sector_t sector = 0;
+
         spin_lock_irq(&conf->resync_lock);
-       if (conf->barrier) {
+       if (need_to_wait_for_sync(conf, bio)) {
                 conf->nr_waiting++;
                 /* Wait for the barrier to drop.
                  * However if there are already pending
@@ -863,21 +903,65 @@ static void wait_barrier(struct r1conf *conf)
                 wait_event_lock_irq(conf->wait_barrier,
                                     !conf->array_frozen &&
                                     (!conf->barrier ||
-                                   (conf->nr_pending &&
+                                   ((conf->start_next_window <
+                                     conf->next_resync + RESYNC_SECTORS) &&
                                      current->bio_list &&
                                      !bio_list_empty(current->bio_list))),
                                     conf->resync_lock);
                 conf->nr_waiting--;
         }
+
+       if (bio && bio_data_dir(bio) == WRITE) {
+               if (conf->next_resync + NEXT_NORMALIO_DISTANCE
+                   <= bio->bi_sector) {
+                       if (conf->start_next_window == MaxSector)
+                               conf->start_next_window =
+                                       conf->next_resync +
+                                       NEXT_NORMALIO_DISTANCE;
+
+                       if ((conf->start_next_window + NEXT_NORMALIO_DISTANCE)
+                           <= bio->bi_sector)
+                               conf->next_window_requests++;
+                       else
+                               conf->current_window_requests++;
+               }
+               if (bio->bi_sector >= conf->start_next_window)
+                       sector = conf->start_next_window;
+       }
+
         conf->nr_pending++;
         spin_unlock_irq(&conf->resync_lock);
+       return sector;
  }
  
-static void allow_barrier(struct r1conf *conf)
+static void allow_barrier(struct r1conf *conf, sector_t start_next_window,
+                         sector_t bi_sector)
  {
         unsigned long flags;
+
         spin_lock_irqsave(&conf->resync_lock, flags);
         conf->nr_pending--;
+       if (start_next_window) {
+               if (start_next_window == conf->start_next_window) {
+                       if (conf->start_next_window + NEXT_NORMALIO_DISTANCE
+                           <= bi_sector)
+                               conf->next_window_requests--;
+                       else
+                               conf->current_window_requests--;
+               } else
+                       conf->current_window_requests--;
+
+               if (!conf->current_window_requests) {
+                       if (conf->next_window_requests) {
+                               conf->current_window_requests =
+                                       conf->next_window_requests;
+                               conf->next_window_requests = 0;
+                               conf->start_next_window +=
+                                       NEXT_NORMALIO_DISTANCE;
+                       } else
+                               conf->start_next_window = MaxSector;
+               }
+       }
         spin_unlock_irqrestore(&conf->resync_lock, flags);
         wake_up(&conf->wait_barrier);
  }
@@ -1012,6 +1096,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
         int first_clone;
         int sectors_handled;
         int max_sectors;
+       sector_t start_next_window;
  
         /*
          * Register the new request and wait if the reconstruction
@@ -1041,7 +1126,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
                 finish_wait(&conf->wait_barrier, &w);
         }
  
-       wait_barrier(conf);
+       start_next_window = wait_barrier(conf, bio);
  
         bitmap = mddev->bitmap;
  
@@ -1162,6 +1247,7 @@ read_again:
  
         disks = conf->raid_disks * 2;
   retry_write:
+       r1_bio->start_next_window = start_next_window;
         blocked_rdev = NULL;
         rcu_read_lock();
         max_sectors = r1_bio->sectors;
@@ -1230,14 +1316,24 @@ read_again:
         if (unlikely(blocked_rdev)) {
                 /* Wait for this device to become unblocked */
                 int j;
+               sector_t old = start_next_window;
  
                 for (j = 0; j < i; j++)
                         if (r1_bio->bios[j])
                                 rdev_dec_pending(conf->mirrors[j].rdev, mddev);
                 r1_bio->state = 0;
-               allow_barrier(conf);
+               allow_barrier(conf, start_next_window, bio->bi_sector);
                 md_wait_for_blocked_rdev(blocked_rdev, mddev);
-               wait_barrier(conf);
+               start_next_window = wait_barrier(conf, bio);
+               /*
+                * We must make sure the multi r1bios of bio have
+                * the same value of bi_phys_segments
+                */
+               if (bio->bi_phys_segments && old &&
+                   old != start_next_window)
+                       /* Wait for the former r1bio(s) to complete */
+                       wait_event(conf->wait_barrier,
+                                  bio->bi_phys_segments == 1);
                 goto retry_write;
         }
  
@@ -1437,11 +1533,14 @@ static void print_conf(struct r1conf *conf)
  
  static void close_sync(struct r1conf *conf)
  {
-       wait_barrier(conf);
-       allow_barrier(conf);
+       wait_barrier(conf, NULL);
+       allow_barrier(conf, 0, 0);
  
         mempool_destroy(conf->r1buf_pool);
         conf->r1buf_pool = NULL;
+
+       conf->next_resync = 0;
+       conf->start_next_window = MaxSector;
  }
  
  static int raid1_spare_active(struct mddev *mddev)
@@ -2713,6 +2812,9 @@ static struct r1conf *setup_conf(struct mddev *mddev)
         conf->pending_count = 0;
         conf->recovery_disabled = mddev->recovery_disabled - 1;
  
+       conf->start_next_window = MaxSector;
+       conf->current_window_requests = conf->next_window_requests = 0;
+
         err = -EIO;
         for (i = 0; i < conf->raid_disks * 2; i++) {
  
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h

index 331a98a231b4e3a3dff585055655f065c0f68de9..9bebca7bff2fbc4ec4780031190e6666f7abf56d 100644 (file)
--- a/drivers/md/raid1.h
+++ b/drivers/md/raid1.h
@@ -41,6 +41,19 @@ struct r1conf {
          */
         sector_t                next_resync;
  
+       /* When raid1 starts resync, we divide array into four partitions
+        * |---------|--------------|---------------------|-------------|
+        *        next_resync   start_next_window       end_window
+        * start_next_window = next_resync + NEXT_NORMALIO_DISTANCE
+        * end_window = start_next_window + NEXT_NORMALIO_DISTANCE
+        * current_window_requests means the count of normalIO between
+        *   start_next_window and end_window.
+        * next_window_requests means the count of normalIO after end_window.
+        * */
+       sector_t                start_next_window;
+       int                     current_window_requests;
+       int                     next_window_requests;
+
         spinlock_t              device_lock;
  
         /* list of 'struct r1bio' that need to be processed by raid1d,
@@ -112,6 +125,7 @@ struct r1bio {
                                                  * in this BehindIO request
                                                  */
         sector_t                sector;
+       sector_t                start_next_window;
         int                     sectors;
         unsigned long           state;
         struct mddev            *mddev;
author	majianpeng <majianpeng@gmail.com>
	Fri, 15 Nov 2013 06:55:02 +0000 (14:55 +0800)
committer	NeilBrown <neilb@suse.de>
	Tue, 19 Nov 2013 04:19:18 +0000 (15:19 +1100)
drivers/md/raid1.c		patch \| blob \| history
drivers/md/raid1.h		patch \| blob \| history