Merge tag 'dm-3.20-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device...

author Linus Torvalds <torvalds@linux-foundation.org>

Fri, 13 Feb 2015 00:36:31 +0000 (16:36 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 13 Feb 2015 00:36:31 +0000 (16:36 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Fri, 13 Feb 2015 00:36:31 +0000 (16:36 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 13 Feb 2015 00:36:31 +0000 (16:36 -0800)
diff --combined drivers/md/Kconfig

index c355a226a0247c824770457179731bc05d3a0667,09c89a4b014d4bf1724a2fb238bedfc8d6b846fc..c39644478aa4e660f0ec2ddefedea4efbfd776b2
--- 1/drivers/md/Kconfig
--- 2/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@@ -5,7 -5,6 +5,7 @@@
   menuconfig MD
         bool "Multiple devices driver support (RAID and LVM)"
         depends on BLOCK
+ +      select SRCU
         help
           Support multiple physical spindles through a single logical device.
           Required for RAID and logical volume management.
@@@ -231,9 -230,8 +231,8 @@@ config DM_CRYP
           transparently encrypts the data on it. You'll need to activate
           the ciphers you're going to use in the cryptoapi configuration.
   
-         Information on how to use dm-crypt can be found on
- 
-         <http://www.saout.de/misc/dm-crypt/>
+         For further information on dm-crypt and userspace tools see:
+         <http://code.google.com/p/cryptsetup/wiki/DMCrypt>
   
           To compile this code as a module, choose M here: the module will
           be called dm-crypt.
diff --combined drivers/md/dm-cache-target.c

index e1650539cc2f826d9efe7f878352570bcc31e101,2eca128a9d6ac301f591bd58d41378ed0f4bae03..7755af35186762a4319e8cff52d4e95b26524d3e
--- 1/drivers/md/dm-cache-target.c
--- 2/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@@ -11,6 -11,7 +11,7 @@@
   
   #include <linux/dm-io.h>
   #include <linux/dm-kcopyd.h>
+ #include <linux/jiffies.h>
   #include <linux/init.h>
   #include <linux/mempool.h>
   #include <linux/module.h>
@@@ -221,13 -222,7 +222,13 @@@ struct cache 
         struct list_head need_commit_migrations;
         sector_t migration_threshold;
         wait_queue_head_t migration_wait;
- -      atomic_t nr_migrations;
+ +      atomic_t nr_allocated_migrations;
+ +
+ +      /*
+ +       * The number of in flight migrations that are performing
+ +       * background io. eg, promotion, writeback.
+ +       */
+ +      atomic_t nr_io_migrations;
   
         wait_queue_head_t quiescing_wait;
         atomic_t quiescing;
@@@ -264,6 -259,7 +265,6 @@@
         struct dm_deferred_set *all_io_ds;
   
         mempool_t *migration_pool;
- -      struct dm_cache_migration *next_migration;
   
         struct dm_cache_policy *policy;
         unsigned policy_nr_args;
@@@ -355,31 -351,10 +356,31 @@@ static void free_prison_cell(struct cac
         dm_bio_prison_free_cell(cache->prison, cell);
   }
   
+ +static struct dm_cache_migration *alloc_migration(struct cache *cache)
+ +{
+ +      struct dm_cache_migration *mg;
+ +
+ +      mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT);
+ +      if (mg) {
+ +              mg->cache = cache;
+ +              atomic_inc(&mg->cache->nr_allocated_migrations);
+ +      }
+ +
+ +      return mg;
+ +}
+ +
+ +static void free_migration(struct dm_cache_migration *mg)
+ +{
+ +      if (atomic_dec_and_test(&mg->cache->nr_allocated_migrations))
+ +              wake_up(&mg->cache->migration_wait);
+ +
+ +      mempool_free(mg, mg->cache->migration_pool);
+ +}
+ +
   static int prealloc_data_structs(struct cache *cache, struct prealloc *p)
   {
         if (!p->mg) {
- -              p->mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT);
+ +              p->mg = alloc_migration(cache);
                 if (!p->mg)
                         return -ENOMEM;
         }
@@@ -408,7 -383,7 +409,7 @@@ static void prealloc_free_structs(struc
                 free_prison_cell(cache, p->cell1);
   
         if (p->mg)
- -              mempool_free(p->mg, cache->migration_pool);
+ +              free_migration(p->mg);
   }
   
   static struct dm_cache_migration *prealloc_get_migration(struct prealloc *p)
@@@ -880,14 -855,24 +881,14 @@@ static void remap_to_origin_then_cache(
    * Migration covers moving data from the origin device to the cache, or
    * vice versa.
    *--------------------------------------------------------------*/
- -static void free_migration(struct dm_cache_migration *mg)
- -{
- -      mempool_free(mg, mg->cache->migration_pool);
- -}
- -
- -static void inc_nr_migrations(struct cache *cache)
+ +static void inc_io_migrations(struct cache *cache)
   {
- -      atomic_inc(&cache->nr_migrations);
+ +      atomic_inc(&cache->nr_io_migrations);
   }
   
- -static void dec_nr_migrations(struct cache *cache)
+ +static void dec_io_migrations(struct cache *cache)
   {
- -      atomic_dec(&cache->nr_migrations);
- -
- -      /*
- -       * Wake the worker in case we're suspending the target.
- -       */
- -      wake_up(&cache->migration_wait);
+ +      atomic_dec(&cache->nr_io_migrations);
   }
   
   static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell,
@@@ -910,10 -895,11 +911,10 @@@ static void cell_defer(struct cache *ca
         wake_worker(cache);
   }
   
- -static void cleanup_migration(struct dm_cache_migration *mg)
+ +static void free_io_migration(struct dm_cache_migration *mg)
   {
- -      struct cache *cache = mg->cache;
+ +      dec_io_migrations(mg->cache);
         free_migration(mg);
- -      dec_nr_migrations(cache);
   }
   
   static void migration_failure(struct dm_cache_migration *mg)
@@@ -938,7 -924,7 +939,7 @@@
                 cell_defer(cache, mg->new_ocell, true);
         }
   
- -      cleanup_migration(mg);
+ +      free_io_migration(mg);
   }
   
   static void migration_success_pre_commit(struct dm_cache_migration *mg)
@@@ -949,7 -935,7 +950,7 @@@
         if (mg->writeback) {
                 clear_dirty(cache, mg->old_oblock, mg->cblock);
                 cell_defer(cache, mg->old_ocell, false);
- -              cleanup_migration(mg);
+ +              free_io_migration(mg);
                 return;
   
         } else if (mg->demote) {
@@@ -959,14 -945,14 +960,14 @@@
                                              mg->old_oblock);
                         if (mg->promote)
                                 cell_defer(cache, mg->new_ocell, true);
- -                      cleanup_migration(mg);
+ +                      free_io_migration(mg);
                         return;
                 }
         } else {
                 if (dm_cache_insert_mapping(cache->cmd, mg->cblock, mg->new_oblock)) {
                         DMWARN_LIMIT("promotion failed; couldn't update on disk metadata");
                         policy_remove_mapping(cache->policy, mg->new_oblock);
- -                      cleanup_migration(mg);
+ +                      free_io_migration(mg);
                         return;
                 }
         }
@@@ -999,7 -985,7 +1000,7 @@@ static void migration_success_post_comm
                 } else {
                         if (mg->invalidate)
                                 policy_remove_mapping(cache->policy, mg->old_oblock);
- -                      cleanup_migration(mg);
+ +                      free_io_migration(mg);
                 }
   
         } else {
@@@ -1014,7 -1000,7 +1015,7 @@@
                         bio_endio(mg->new_ocell->holder, 0);
                         cell_defer(cache, mg->new_ocell, false);
                 }
- -              cleanup_migration(mg);
+ +              free_io_migration(mg);
         }
   }
   
@@@ -1266,7 -1252,7 +1267,7 @@@ static void promote(struct cache *cache
         mg->new_ocell = cell;
         mg->start_jiffies = jiffies;
   
- -      inc_nr_migrations(cache);
+ +      inc_io_migrations(cache);
         quiesce_migration(mg);
   }
   
@@@ -1290,7 -1276,7 +1291,7 @@@ static void writeback(struct cache *cac
         mg->new_ocell = NULL;
         mg->start_jiffies = jiffies;
   
- -      inc_nr_migrations(cache);
+ +      inc_io_migrations(cache);
         quiesce_migration(mg);
   }
   
@@@ -1317,7 -1303,7 +1318,7 @@@ static void demote_then_promote(struct 
         mg->new_ocell = new_ocell;
         mg->start_jiffies = jiffies;
   
- -      inc_nr_migrations(cache);
+ +      inc_io_migrations(cache);
         quiesce_migration(mg);
   }
   
@@@ -1345,7 -1331,7 +1346,7 @@@ static void invalidate(struct cache *ca
         mg->new_ocell = NULL;
         mg->start_jiffies = jiffies;
   
- -      inc_nr_migrations(cache);
+ +      inc_io_migrations(cache);
         quiesce_migration(mg);
   }
   
@@@ -1427,7 -1413,7 +1428,7 @@@ static void process_discard_bio(struct 
   
   static bool spare_migration_bandwidth(struct cache *cache)
   {
- -      sector_t current_volume = (atomic_read(&cache->nr_migrations) + 1) *
+ +      sector_t current_volume = (atomic_read(&cache->nr_io_migrations) + 1) *
                 cache->sectors_per_block;
         return current_volume < cache->migration_threshold;
   }
@@@ -1562,8 -1548,8 +1563,8 @@@ static void process_bio(struct cache *c
   
   static int need_commit_due_to_time(struct cache *cache)
   {
-       return jiffies < cache->last_commit_jiffies ||
-              jiffies > cache->last_commit_jiffies + COMMIT_PERIOD;
+       return !time_in_range(jiffies, cache->last_commit_jiffies,
+                             cache->last_commit_jiffies + COMMIT_PERIOD);
   }
   
   static int commit_if_needed(struct cache *cache)
@@@ -1779,7 -1765,7 +1780,7 @@@ static void stop_quiescing(struct cach
   
   static void wait_for_migrations(struct cache *cache)
   {
- -      wait_event(cache->migration_wait, !atomic_read(&cache->nr_migrations));
+ +      wait_event(cache->migration_wait, !atomic_read(&cache->nr_allocated_migrations));
   }
   
   static void stop_worker(struct cache *cache)
@@@ -1891,6 -1877,9 +1892,6 @@@ static void destroy(struct cache *cache
   {
         unsigned i;
   
- -      if (cache->next_migration)
- -              mempool_free(cache->next_migration, cache->migration_pool);
- -
         if (cache->migration_pool)
                 mempool_destroy(cache->migration_pool);
   
@@@ -2436,8 -2425,7 +2437,8 @@@ static int cache_create(struct cache_ar
         INIT_LIST_HEAD(&cache->quiesced_migrations);
         INIT_LIST_HEAD(&cache->completed_migrations);
         INIT_LIST_HEAD(&cache->need_commit_migrations);
- -      atomic_set(&cache->nr_migrations, 0);
+ +      atomic_set(&cache->nr_allocated_migrations, 0);
+ +      atomic_set(&cache->nr_io_migrations, 0);
         init_waitqueue_head(&cache->migration_wait);
   
         init_waitqueue_head(&cache->quiescing_wait);
@@@ -2500,6 -2488,8 +2501,6 @@@
                 goto bad;
         }
   
- -      cache->next_migration = NULL;
- -
         cache->need_tick_bio = true;
         cache->sized = false;
         cache->invalidate = false;
diff --combined drivers/md/dm-raid.c

index 777d9ba2acad646d7a0a20cea72a056ab1684239,41acc9dd7342f1ab3bc0a05a04fc2892ba372ba4..88e4c7f249864e6875796674d7177c8ecd6d6ad4
--- 1/drivers/md/dm-raid.c
--- 2/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@@ -746,7 -746,13 +746,7 @@@ static int raid_is_congested(struct dm_
   {
         struct raid_set *rs = container_of(cb, struct raid_set, callbacks);
   
- -      if (rs->raid_type->level == 1)
- -              return md_raid1_congested(&rs->md, bits);
- -
- -      if (rs->raid_type->level == 10)
- -              return md_raid10_congested(&rs->md, bits);
- -
- -      return md_raid5_congested(&rs->md, bits);
+ +      return mddev_congested(&rs->md, bits);
   }
   
   /*
@@@ -1237,7 -1243,7 +1237,7 @@@ static int raid_ctr(struct dm_target *t
         argv++;
   
         /* Skip over RAID params for now and find out # of devices */
-       if (num_raid_params + 1 > argc) {
+       if (num_raid_params >= argc) {
                 ti->error = "Arguments do not agree with counts given";
                 return -EINVAL;
         }
@@@ -1248,6 -1254,12 +1248,12 @@@
                 return -EINVAL;
         }
   
+       argc -= num_raid_params + 1; /* +1: we already have num_raid_devs */
+       if (argc != (num_raid_devs * 2)) {
+               ti->error = "Supplied RAID devices does not match the count given";
+               return -EINVAL;
+       }
+ 
         rs = context_alloc(ti, rt, (unsigned)num_raid_devs);
         if (IS_ERR(rs))
                 return PTR_ERR(rs);
@@@ -1256,16 -1268,8 +1262,8 @@@
         if (ret)
                 goto bad;
   
-       ret = -EINVAL;
- 
-       argc -= num_raid_params + 1; /* +1: we already have num_raid_devs */
         argv += num_raid_params + 1;
   
-       if (argc != (num_raid_devs * 2)) {
-               ti->error = "Supplied RAID devices does not match the count given";
-               goto bad;
-       }
- 
         ret = dev_parms(rs, argv);
         if (ret)
                 goto bad;
diff --combined drivers/md/dm-thin.c

index 07705ee181e3d2837c47954626276f9dea52cac0,0f781451ea3f2b344dd830f085a22ba73643607a..654773cb1eeea23b39db0fcf0e6fb00d91d9476d
--- 1/drivers/md/dm-thin.c
--- 2/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@@ -11,6 -11,7 +11,7 @@@
   #include <linux/device-mapper.h>
   #include <linux/dm-io.h>
   #include <linux/dm-kcopyd.h>
+ #include <linux/jiffies.h>
   #include <linux/log2.h>
   #include <linux/list.h>
   #include <linux/rculist.h>
@@@ -1700,8 -1701,8 +1701,8 @@@ static void process_cell_fail(struct th
    */
   static int need_commit_due_to_time(struct pool *pool)
   {
-       return jiffies < pool->last_commit_jiffies ||
-              jiffies > pool->last_commit_jiffies + COMMIT_PERIOD;
+       return !time_in_range(jiffies, pool->last_commit_jiffies,
+                             pool->last_commit_jiffies + COMMIT_PERIOD);
   }
   
   #define thin_pbd(node) rb_entry((node), struct dm_thin_endio_hook, rb_node)
@@@ -3385,12 -3386,6 +3386,12 @@@ static int pool_message(struct dm_targe
         struct pool_c *pt = ti->private;
         struct pool *pool = pt->pool;
   
+ +      if (get_pool_mode(pool) >= PM_READ_ONLY) {
+ +              DMERR("%s: unable to service pool target messages in READ_ONLY or FAIL mode",
+ +                    dm_device_name(pool->pool_md));
+ +              return -EINVAL;
+ +      }
+ +
         if (!strcasecmp(argv[0], "create_thin"))
                 r = process_create_thin_mesg(argc, argv, pool);
   
diff --combined drivers/md/dm.c

index 68c1b535c52ec5bfe53fe36d0a38b3e3695ca974,549b815999a1e082da40b2a59039c499e1766ee5..ec1444f49de14ac185ae39cfb214deee3ba66998
--- 1/drivers/md/dm.c
--- 2/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@@ -20,6 -20,7 +20,7 @@@
   #include <linux/hdreg.h>
   #include <linux/delay.h>
   #include <linux/wait.h>
+ #include <linux/kthread.h>
   
   #include <trace/events/block.h>
   
@@@ -78,7 -79,8 +79,8 @@@ struct dm_io 
   struct dm_rq_target_io {
         struct mapped_device *md;
         struct dm_target *ti;
-       struct request *orig, clone;
+       struct request *orig, *clone;
+       struct kthread_work work;
         int error;
         union map_info info;
   };
@@@ -179,6 -181,7 +181,7 @@@ struct mapped_device 
          * io objects are allocated from here.
          */
         mempool_t *io_pool;
+       mempool_t *rq_pool;
   
         struct bio_set *bs;
   
@@@ -206,10 -209,10 +209,13 @@@
         /* zero-length flush that will be cloned and submitted to targets */
         struct bio flush_bio;
   
+ +      /* the number of internal suspends */
+ +      unsigned internal_suspend_count;
+ +
         struct dm_stats stats;
+ 
+       struct kthread_worker kworker;
+       struct task_struct *kworker_task;
   };
   
   /*
@@@ -217,6 -220,7 +223,7 @@@
    */
   struct dm_md_mempools {
         mempool_t *io_pool;
+       mempool_t *rq_pool;
         struct bio_set *bs;
   };
   
@@@ -231,6 -235,7 +238,7 @@@ struct table_device 
   #define RESERVED_MAX_IOS              1024
   static struct kmem_cache *_io_cache;
   static struct kmem_cache *_rq_tio_cache;
+ static struct kmem_cache *_rq_cache;
   
   /*
    * Bio-based DM's mempools' reserved IOs set by the user.
@@@ -288,9 -293,14 +296,14 @@@ static int __init local_init(void
         if (!_rq_tio_cache)
                 goto out_free_io_cache;
   
+       _rq_cache = kmem_cache_create("dm_clone_request", sizeof(struct request),
+                                     __alignof__(struct request), 0, NULL);
+       if (!_rq_cache)
+               goto out_free_rq_tio_cache;
+ 
         r = dm_uevent_init();
         if (r)
-               goto out_free_rq_tio_cache;
+               goto out_free_rq_cache;
   
         deferred_remove_workqueue = alloc_workqueue("kdmremove", WQ_UNBOUND, 1);
         if (!deferred_remove_workqueue) {
@@@ -312,6 -322,8 +325,8 @@@ out_free_workqueue
         destroy_workqueue(deferred_remove_workqueue);
   out_uevent_exit:
         dm_uevent_exit();
+ out_free_rq_cache:
+       kmem_cache_destroy(_rq_cache);
   out_free_rq_tio_cache:
         kmem_cache_destroy(_rq_tio_cache);
   out_free_io_cache:
@@@ -325,6 -337,7 +340,7 @@@ static void local_exit(void
         flush_scheduled_work();
         destroy_workqueue(deferred_remove_workqueue);
   
+       kmem_cache_destroy(_rq_cache);
         kmem_cache_destroy(_rq_tio_cache);
         kmem_cache_destroy(_io_cache);
         unregister_blkdev(_major, _name);
@@@ -577,6 -590,17 +593,17 @@@ static void free_rq_tio(struct dm_rq_ta
         mempool_free(tio, tio->md->io_pool);
   }
   
+ static struct request *alloc_clone_request(struct mapped_device *md,
+                                          gfp_t gfp_mask)
+ {
+       return mempool_alloc(md->rq_pool, gfp_mask);
+ }
+ 
+ static void free_clone_request(struct mapped_device *md, struct request *rq)
+ {
+       mempool_free(rq, md->rq_pool);
+ }
+ 
   static int md_in_flight(struct mapped_device *md)
   {
         return atomic_read(&md->pending[READ]) +
@@@ -992,7 -1016,7 +1019,7 @@@ static void end_clone_bio(struct bio *c
    * the md may be freed in dm_put() at the end of this function.
    * Or do dm_get() before calling this function and dm_put() later.
    */
- static void rq_completed(struct mapped_device *md, int rw, int run_queue)
+ static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
   {
         atomic_dec(&md->pending[rw]);
   
@@@ -1020,12 -1044,17 +1047,17 @@@ static void free_rq_clone(struct reques
         struct dm_rq_target_io *tio = clone->end_io_data;
   
         blk_rq_unprep_clone(clone);
+       if (clone->q && clone->q->mq_ops)
+               tio->ti->type->release_clone_rq(clone);
+       else
+               free_clone_request(tio->md, clone);
         free_rq_tio(tio);
   }
   
   /*
    * Complete the clone and the original request.
-  * Must be called without queue lock.
+  * Must be called without clone's queue lock held,
+  * see end_clone_request() for more details.
    */
   static void dm_end_request(struct request *clone, int error)
   {
@@@ -1054,23 -1083,23 +1086,23 @@@
   
   static void dm_unprep_request(struct request *rq)
   {
-       struct request *clone = rq->special;
+       struct dm_rq_target_io *tio = rq->special;
+       struct request *clone = tio->clone;
   
         rq->special = NULL;
         rq->cmd_flags &= ~REQ_DONTPREP;
   
-       free_rq_clone(clone);
+       if (clone)
+               free_rq_clone(clone);
   }
   
   /*
    * Requeue the original request of a clone.
    */
- void dm_requeue_unmapped_request(struct request *clone)
+ static void dm_requeue_unmapped_original_request(struct mapped_device *md,
+                                                struct request *rq)
   {
-       int rw = rq_data_dir(clone);
-       struct dm_rq_target_io *tio = clone->end_io_data;
-       struct mapped_device *md = tio->md;
-       struct request *rq = tio->orig;
+       int rw = rq_data_dir(rq);
         struct request_queue *q = rq->q;
         unsigned long flags;
   
@@@ -1080,9 -1109,15 +1112,15 @@@
         blk_requeue_request(q, rq);
         spin_unlock_irqrestore(q->queue_lock, flags);
   
-       rq_completed(md, rw, 0);
+       rq_completed(md, rw, false);
+ }
+ 
+ static void dm_requeue_unmapped_request(struct request *clone)
+ {
+       struct dm_rq_target_io *tio = clone->end_io_data;
+ 
+       dm_requeue_unmapped_original_request(tio->md, tio->orig);
   }
- EXPORT_SYMBOL_GPL(dm_requeue_unmapped_request);
   
   static void __stop_queue(struct request_queue *q)
   {
@@@ -1151,8 -1186,15 +1189,15 @@@ static void dm_done(struct request *clo
   static void dm_softirq_done(struct request *rq)
   {
         bool mapped = true;
-       struct request *clone = rq->completion_data;
-       struct dm_rq_target_io *tio = clone->end_io_data;
+       struct dm_rq_target_io *tio = rq->special;
+       struct request *clone = tio->clone;
+ 
+       if (!clone) {
+               blk_end_request_all(rq, tio->error);
+               rq_completed(tio->md, rq_data_dir(rq), false);
+               free_rq_tio(tio);
+               return;
+       }
   
         if (rq->cmd_flags & REQ_FAILED)
                 mapped = false;
@@@ -1164,13 -1206,11 +1209,11 @@@
    * Complete the clone and the original request with the error status
    * through softirq context.
    */
- static void dm_complete_request(struct request *clone, int error)
+ static void dm_complete_request(struct request *rq, int error)
   {
-       struct dm_rq_target_io *tio = clone->end_io_data;
-       struct request *rq = tio->orig;
+       struct dm_rq_target_io *tio = rq->special;
   
         tio->error = error;
-       rq->completion_data = clone;
         blk_complete_request(rq);
   }
   
@@@ -1178,40 -1218,40 +1221,40 @@@
    * Complete the not-mapped clone and the original request with the error status
    * through softirq context.
    * Target's rq_end_io() function isn't called.
-  * This may be used when the target's map_rq() function fails.
+  * This may be used when the target's map_rq() or clone_and_map_rq() functions fail.
    */
- void dm_kill_unmapped_request(struct request *clone, int error)
+ static void dm_kill_unmapped_request(struct request *rq, int error)
   {
-       struct dm_rq_target_io *tio = clone->end_io_data;
-       struct request *rq = tio->orig;
- 
         rq->cmd_flags |= REQ_FAILED;
-       dm_complete_request(clone, error);
+       dm_complete_request(rq, error);
   }
- EXPORT_SYMBOL_GPL(dm_kill_unmapped_request);
   
   /*
-  * Called with the queue lock held
+  * Called with the clone's queue lock held
    */
   static void end_clone_request(struct request *clone, int error)
   {
-       /*
-        * For just cleaning up the information of the queue in which
-        * the clone was dispatched.
-        * The clone is *NOT* freed actually here because it is alloced from
-        * dm own mempool and REQ_ALLOCED isn't set in clone->cmd_flags.
-        */
-       __blk_put_request(clone->q, clone);
+       struct dm_rq_target_io *tio = clone->end_io_data;
+ 
+       if (!clone->q->mq_ops) {
+               /*
+                * For just cleaning up the information of the queue in which
+                * the clone was dispatched.
+                * The clone is *NOT* freed actually here because it is alloced
+                * from dm own mempool (REQ_ALLOCED isn't set).
+                */
+               __blk_put_request(clone->q, clone);
+       }
   
         /*
          * Actual request completion is done in a softirq context which doesn't
-        * hold the queue lock.  Otherwise, deadlock could occur because:
+        * hold the clone's queue lock.  Otherwise, deadlock could occur because:
          *     - another request may be submitted by the upper level driver
          *       of the stacking during the completion
          *     - the submission which requires queue lock may be done
-        *       against this queue
+        *       against this clone's queue
          */
-       dm_complete_request(clone, error);
+       dm_complete_request(tio->orig, error);
   }
   
   /*
@@@ -1689,19 -1729,19 +1732,19 @@@ static void dm_request(struct request_q
                 _dm_request(q, bio);
   }
   
- void dm_dispatch_request(struct request *rq)
+ static void dm_dispatch_clone_request(struct request *clone, struct request *rq)
   {
         int r;
   
-       if (blk_queue_io_stat(rq->q))
-               rq->cmd_flags |= REQ_IO_STAT;
+       if (blk_queue_io_stat(clone->q))
+               clone->cmd_flags |= REQ_IO_STAT;
   
-       rq->start_time = jiffies;
-       r = blk_insert_cloned_request(rq->q, rq);
+       clone->start_time = jiffies;
+       r = blk_insert_cloned_request(clone->q, clone);
         if (r)
+               /* must complete clone in terms of original request */
                 dm_complete_request(rq, r);
   }
- EXPORT_SYMBOL_GPL(dm_dispatch_request);
   
   static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
                                  void *data)
@@@ -1718,12 -1758,11 +1761,11 @@@
   }
   
   static int setup_clone(struct request *clone, struct request *rq,
-                      struct dm_rq_target_io *tio)
+                      struct dm_rq_target_io *tio, gfp_t gfp_mask)
   {
         int r;
   
-       blk_rq_init(NULL, clone);
-       r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC,
+       r = blk_rq_prep_clone(clone, rq, tio->md->bs, gfp_mask,
                               dm_rq_bio_constructor, tio);
         if (r)
                 return r;
@@@ -1734,14 -1773,37 +1776,37 @@@
         clone->end_io = end_clone_request;
         clone->end_io_data = tio;
   
+       tio->clone = clone;
+ 
         return 0;
   }
   
   static struct request *clone_rq(struct request *rq, struct mapped_device *md,
-                               gfp_t gfp_mask)
+                               struct dm_rq_target_io *tio, gfp_t gfp_mask)
+ {
+       struct request *clone = alloc_clone_request(md, gfp_mask);
+ 
+       if (!clone)
+               return NULL;
+ 
+       blk_rq_init(NULL, clone);
+       if (setup_clone(clone, rq, tio, gfp_mask)) {
+               /* -ENOMEM */
+               free_clone_request(md, clone);
+               return NULL;
+       }
+ 
+       return clone;
+ }
+ 
+ static void map_tio_request(struct kthread_work *work);
+ 
+ static struct dm_rq_target_io *prep_tio(struct request *rq,
+                                       struct mapped_device *md, gfp_t gfp_mask)
   {
-       struct request *clone;
         struct dm_rq_target_io *tio;
+       int srcu_idx;
+       struct dm_table *table;
   
         tio = alloc_rq_tio(md, gfp_mask);
         if (!tio)
@@@ -1749,18 -1811,23 +1814,23 @@@
   
         tio->md = md;
         tio->ti = NULL;
+       tio->clone = NULL;
         tio->orig = rq;
         tio->error = 0;
         memset(&tio->info, 0, sizeof(tio->info));
- 
-       clone = &tio->clone;
-       if (setup_clone(clone, rq, tio)) {
-               /* -ENOMEM */
-               free_rq_tio(tio);
-               return NULL;
+       init_kthread_work(&tio->work, map_tio_request);
+ 
+       table = dm_get_live_table(md, &srcu_idx);
+       if (!dm_table_mq_request_based(table)) {
+               if (!clone_rq(rq, md, tio, gfp_mask)) {
+                       dm_put_live_table(md, srcu_idx);
+                       free_rq_tio(tio);
+                       return NULL;
+               }
         }
+       dm_put_live_table(md, srcu_idx);
   
-       return clone;
+       return tio;
   }
   
   /*
@@@ -1769,18 -1836,18 +1839,18 @@@
   static int dm_prep_fn(struct request_queue *q, struct request *rq)
   {
         struct mapped_device *md = q->queuedata;
-       struct request *clone;
+       struct dm_rq_target_io *tio;
   
         if (unlikely(rq->special)) {
                 DMWARN("Already has something in rq->special.");
                 return BLKPREP_KILL;
         }
   
-       clone = clone_rq(rq, md, GFP_ATOMIC);
-       if (!clone)
+       tio = prep_tio(rq, md, GFP_ATOMIC);
+       if (!tio)
                 return BLKPREP_DEFER;
   
-       rq->special = clone;
+       rq->special = tio;
         rq->cmd_flags |= REQ_DONTPREP;
   
         return BLKPREP_OK;
@@@ -1788,17 -1855,36 +1858,36 @@@
   
   /*
    * Returns:
-  * 0  : the request has been processed (not requeued)
-  * !0 : the request has been requeued
+  * 0                : the request has been processed
+  * DM_MAPIO_REQUEUE : the original request needs to be requeued
+  * < 0              : the request was completed due to failure
    */
- static int map_request(struct dm_target *ti, struct request *clone,
+ static int map_request(struct dm_target *ti, struct request *rq,
                        struct mapped_device *md)
   {
-       int r, requeued = 0;
-       struct dm_rq_target_io *tio = clone->end_io_data;
+       int r;
+       struct dm_rq_target_io *tio = rq->special;
+       struct request *clone = NULL;
+ 
+       if (tio->clone) {
+               clone = tio->clone;
+               r = ti->type->map_rq(ti, clone, &tio->info);
+       } else {
+               r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone);
+               if (r < 0) {
+                       /* The target wants to complete the I/O */
+                       dm_kill_unmapped_request(rq, r);
+                       return r;
+               }
+               if (IS_ERR(clone))
+                       return DM_MAPIO_REQUEUE;
+               if (setup_clone(clone, rq, tio, GFP_KERNEL)) {
+                       /* -ENOMEM */
+                       ti->type->release_clone_rq(clone);
+                       return DM_MAPIO_REQUEUE;
+               }
+       }
   
-       tio->ti = ti;
-       r = ti->type->map_rq(ti, clone, &tio->info);
         switch (r) {
         case DM_MAPIO_SUBMITTED:
                 /* The target has taken the I/O to submit by itself later */
@@@ -1806,13 -1892,12 +1895,12 @@@
         case DM_MAPIO_REMAPPED:
                 /* The target has remapped the I/O so dispatch it */
                 trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)),
-                                    blk_rq_pos(tio->orig));
-               dm_dispatch_request(clone);
+                                    blk_rq_pos(rq));
+               dm_dispatch_clone_request(clone, rq);
                 break;
         case DM_MAPIO_REQUEUE:
                 /* The target wants to requeue the I/O */
                 dm_requeue_unmapped_request(clone);
-               requeued = 1;
                 break;
         default:
                 if (r > 0) {
@@@ -1821,20 -1906,27 +1909,27 @@@
                 }
   
                 /* The target wants to complete the I/O */
-               dm_kill_unmapped_request(clone, r);
-               break;
+               dm_kill_unmapped_request(rq, r);
+               return r;
         }
   
-       return requeued;
+       return 0;
   }
   
- static struct request *dm_start_request(struct mapped_device *md, struct request *orig)
+ static void map_tio_request(struct kthread_work *work)
   {
-       struct request *clone;
+       struct dm_rq_target_io *tio = container_of(work, struct dm_rq_target_io, work);
+       struct request *rq = tio->orig;
+       struct mapped_device *md = tio->md;
   
+       if (map_request(tio->ti, rq, md) == DM_MAPIO_REQUEUE)
+               dm_requeue_unmapped_original_request(md, rq);
+ }
+ 
+ static void dm_start_request(struct mapped_device *md, struct request *orig)
+ {
         blk_start_request(orig);
-       clone = orig->special;
-       atomic_inc(&md->pending[rq_data_dir(clone)]);
+       atomic_inc(&md->pending[rq_data_dir(orig)]);
   
         /*
          * Hold the md reference here for the in-flight I/O.
@@@ -1844,8 -1936,6 +1939,6 @@@
          * See the comment in rq_completed() too.
          */
         dm_get(md);
- 
-       return clone;
   }
   
   /*
@@@ -1858,7 -1948,8 +1951,8 @@@ static void dm_request_fn(struct reques
         int srcu_idx;
         struct dm_table *map = dm_get_live_table(md, &srcu_idx);
         struct dm_target *ti;
-       struct request *rq, *clone;
+       struct request *rq;
+       struct dm_rq_target_io *tio;
         sector_t pos;
   
         /*
@@@ -1880,34 -1971,29 +1974,29 @@@
                 ti = dm_table_find_target(map, pos);
                 if (!dm_target_is_valid(ti)) {
                         /*
-                        * Must perform setup, that dm_done() requires,
+                        * Must perform setup, that rq_completed() requires,
                          * before calling dm_kill_unmapped_request
                          */
                         DMERR_LIMIT("request attempted access beyond the end of device");
-                       clone = dm_start_request(md, rq);
-                       dm_kill_unmapped_request(clone, -EIO);
+                       dm_start_request(md, rq);
+                       dm_kill_unmapped_request(rq, -EIO);
                         continue;
                 }
   
                 if (ti->type->busy && ti->type->busy(ti))
                         goto delay_and_out;
   
-               clone = dm_start_request(md, rq);
- 
-               spin_unlock(q->queue_lock);
-               if (map_request(ti, clone, md))
-                       goto requeued;
+               dm_start_request(md, rq);
   
+               tio = rq->special;
+               /* Establish tio->ti before queuing work (map_tio_request) */
+               tio->ti = ti;
+               queue_kthread_work(&md->kworker, &tio->work);
                 BUG_ON(!irqs_disabled());
-               spin_lock(q->queue_lock);
         }
   
         goto out;
   
- requeued:
-       BUG_ON(!irqs_disabled());
-       spin_lock(q->queue_lock);
- 
   delay_and_out:
         blk_delay_queue(q, HZ / 10);
   out:
@@@ -2093,6 -2179,7 +2182,7 @@@ static struct mapped_device *alloc_dev(
         INIT_WORK(&md->work, dm_wq_work);
         init_waitqueue_head(&md->eventq);
         init_completion(&md->kobj_holder.completion);
+       md->kworker_task = NULL;
   
         md->disk->major = _major;
         md->disk->first_minor = minor;
@@@ -2153,8 -2240,13 +2243,13 @@@ static void free_dev(struct mapped_devi
         unlock_fs(md);
         bdput(md->bdev);
         destroy_workqueue(md->wq);
+ 
+       if (md->kworker_task)
+               kthread_stop(md->kworker_task);
         if (md->io_pool)
                 mempool_destroy(md->io_pool);
+       if (md->rq_pool)
+               mempool_destroy(md->rq_pool);
         if (md->bs)
                 bioset_free(md->bs);
         blk_integrity_unregister(md->disk);
@@@ -2188,23 -2280,24 +2283,24 @@@ static void __bind_mempools(struct mapp
                         bioset_free(md->bs);
                         md->bs = p->bs;
                         p->bs = NULL;
-               } else if (dm_table_get_type(t) == DM_TYPE_REQUEST_BASED) {
-                       /*
-                        * There's no need to reload with request-based dm
-                        * because the size of front_pad doesn't change.
-                        * Note for future: If you are to reload bioset,
-                        * prep-ed requests in the queue may refer
-                        * to bio from the old bioset, so you must walk
-                        * through the queue to unprep.
-                        */
                 }
+               /*
+                * There's no need to reload with request-based dm
+                * because the size of front_pad doesn't change.
+                * Note for future: If you are to reload bioset,
+                * prep-ed requests in the queue may refer
+                * to bio from the old bioset, so you must walk
+                * through the queue to unprep.
+                */
                 goto out;
         }
   
-       BUG_ON(!p || md->io_pool || md->bs);
+       BUG_ON(!p || md->io_pool || md->rq_pool || md->bs);
   
         md->io_pool = p->io_pool;
         p->io_pool = NULL;
+       md->rq_pool = p->rq_pool;
+       p->rq_pool = NULL;
         md->bs = p->bs;
         p->bs = NULL;
   
@@@ -2407,6 -2500,14 +2503,14 @@@ unsigned dm_get_md_type(struct mapped_d
         return md->type;
   }
   
+ static bool dm_md_type_request_based(struct mapped_device *md)
+ {
+       unsigned table_type = dm_get_md_type(md);
+ 
+       return (table_type == DM_TYPE_REQUEST_BASED ||
+               table_type == DM_TYPE_MQ_REQUEST_BASED);
+ }
+ 
   struct target_type *dm_get_immutable_target_type(struct mapped_device *md)
   {
         return md->immutable_target_type;
@@@ -2444,6 -2545,11 +2548,11 @@@ static int dm_init_request_based_queue(
         blk_queue_prep_rq(md->queue, dm_prep_fn);
         blk_queue_lld_busy(md->queue, dm_lld_busy);
   
+       /* Also initialize the request-based DM worker thread */
+       init_kthread_worker(&md->kworker);
+       md->kworker_task = kthread_run(kthread_worker_fn, &md->kworker,
+                                      "kdmwork-%s", dm_device_name(md));
+ 
         elv_register_queue(md->queue);
   
         return 1;
@@@ -2454,8 -2560,7 +2563,7 @@@
    */
   int dm_setup_md_queue(struct mapped_device *md)
   {
-       if ((dm_get_md_type(md) == DM_TYPE_REQUEST_BASED) &&
-           !dm_init_request_based_queue(md)) {
+       if (dm_md_type_request_based(md) && !dm_init_request_based_queue(md)) {
                 DMWARN("Cannot initialize queue for request-based mapped device");
                 return -EINVAL;
         }
@@@ -2534,6 -2639,9 +2642,9 @@@ static void __dm_destroy(struct mapped_
         set_bit(DMF_FREEING, &md->flags);
         spin_unlock(&_minor_lock);
   
+       if (dm_request_based(md))
+               flush_kthread_worker(&md->kworker);
+ 
         if (!dm_suspended_md(md)) {
                 dm_table_presuspend_targets(map);
                 dm_table_postsuspend_targets(map);
@@@ -2777,8 -2885,10 +2888,10 @@@ static int __dm_suspend(struct mapped_d
          * Stop md->queue before flushing md->wq in case request-based
          * dm defers requests to md->wq from md->queue.
          */
-       if (dm_request_based(md))
+       if (dm_request_based(md)) {
                 stop_queue(md->queue);
+               flush_kthread_worker(&md->kworker);
+       }
   
         flush_workqueue(md->wq);
   
@@@ -2932,7 -3042,7 +3045,7 @@@ static void __dm_internal_suspend(struc
   {
         struct dm_table *map = NULL;
   
- -      if (dm_suspended_internally_md(md))
+ +      if (md->internal_suspend_count++)
                 return; /* nested internal suspend */
   
         if (dm_suspended_md(md)) {
@@@ -2957,9 -3067,7 +3070,9 @@@
   
   static void __dm_internal_resume(struct mapped_device *md)
   {
- -      if (!dm_suspended_internally_md(md))
+ +      BUG_ON(!md->internal_suspend_count);
+ +
+ +      if (--md->internal_suspend_count)
                 return; /* resume from nested internal suspend */
   
         if (dm_suspended_md(md))
@@@ -3124,24 -3232,35 +3237,35 @@@ struct dm_md_mempools *dm_alloc_md_memp
   {
         struct dm_md_mempools *pools = kzalloc(sizeof(*pools), GFP_KERNEL);
         struct kmem_cache *cachep;
-       unsigned int pool_size;
+       unsigned int pool_size = 0;
         unsigned int front_pad;
   
         if (!pools)
                 return NULL;
   
-       if (type == DM_TYPE_BIO_BASED) {
+       switch (type) {
+       case DM_TYPE_BIO_BASED:
                 cachep = _io_cache;
                 pool_size = dm_get_reserved_bio_based_ios();
                 front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone);
-       } else if (type == DM_TYPE_REQUEST_BASED) {
-               cachep = _rq_tio_cache;
+               break;
+       case DM_TYPE_REQUEST_BASED:
                 pool_size = dm_get_reserved_rq_based_ios();
+               pools->rq_pool = mempool_create_slab_pool(pool_size, _rq_cache);
+               if (!pools->rq_pool)
+                       goto out;
+               /* fall through to setup remaining rq-based pools */
+       case DM_TYPE_MQ_REQUEST_BASED:
+               cachep = _rq_tio_cache;
+               if (!pool_size)
+                       pool_size = dm_get_reserved_rq_based_ios();
                 front_pad = offsetof(struct dm_rq_clone_bio_info, clone);
                 /* per_bio_data_size is not used. See __bind_mempools(). */
                 WARN_ON(per_bio_data_size != 0);
-       } else
+               break;
+       default:
                 goto out;
+       }
   
         pools->io_pool = mempool_create_slab_pool(pool_size, cachep);
         if (!pools->io_pool)
@@@ -3170,6 -3289,9 +3294,9 @@@ void dm_free_md_mempools(struct dm_md_m
         if (pools->io_pool)
                 mempool_destroy(pools->io_pool);
   
+       if (pools->rq_pool)
+               mempool_destroy(pools->rq_pool);
+ 
         if (pools->bs)
                 bioset_free(pools->bs);
author	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 13 Feb 2015 00:36:31 +0000 (16:36 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 13 Feb 2015 00:36:31 +0000 (16:36 -0800)
		1	2
drivers/md/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/dm-cache-target.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/dm-raid.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/dm-thin.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/dm.c	patch \|	diff1 \|	diff2 \|	blob \| history