Merge branch 'stable/for-jens-3.14' of git://git.kernel.org/pub/scm/linux/kernel...
authorJens Axboe <axboe@fb.com>
Mon, 10 Feb 2014 19:52:34 +0000 (12:52 -0700)
committerJens Axboe <axboe@fb.com>
Mon, 10 Feb 2014 19:52:34 +0000 (12:52 -0700)
Konrad writes:

Please git pull the following branch:

 git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip.git stable/for-jens-3.14

which is based off v3.13-rc6. If you would like me to rebase it on
a different branch/tag I would be more than happy to do so.

The patches are all bug-fixes and hopefully can go in 3.14.

They deal with xen-blkback shutdown and cause memory leaks
as well as shutdown races. They should go to stable tree and if you
are OK with I will ask them to backport those fixes.

There is also a fix to xen-blkfront to deal with unexpected state
transition. And lastly a fix to the header where it was using the
__aligned__ unnecessarily.

drivers/block/xen-blkback/blkback.c
drivers/block/xen-blkback/common.h
drivers/block/xen-blkback/xenbus.c
drivers/block/xen-blkfront.c
include/xen/interface/io/blkif.h

index 4b97b86da9265b4ca5dcb3a7ab562dbf1eb5bac0..765fc7348b6607c36556563be66674bd7a5fe253 100644 (file)
@@ -375,7 +375,7 @@ static void purge_persistent_gnt(struct xen_blkif *blkif)
 
        pr_debug(DRV_PFX "Going to purge %u persistent grants\n", num_clean);
 
-       INIT_LIST_HEAD(&blkif->persistent_purge_list);
+       BUG_ON(!list_empty(&blkif->persistent_purge_list));
        root = &blkif->persistent_gnts;
 purge_list:
        foreach_grant_safe(persistent_gnt, n, root, node) {
@@ -625,9 +625,23 @@ purge_gnt_list:
                        print_stats(blkif);
        }
 
-       /* Since we are shutting down remove all pages from the buffer */
-       shrink_free_pagepool(blkif, 0 /* All */);
+       /* Drain pending purge work */
+       flush_work(&blkif->persistent_purge_work);
 
+       if (log_stats)
+               print_stats(blkif);
+
+       blkif->xenblkd = NULL;
+       xen_blkif_put(blkif);
+
+       return 0;
+}
+
+/*
+ * Remove persistent grants and empty the pool of free pages
+ */
+void xen_blkbk_free_caches(struct xen_blkif *blkif)
+{
        /* Free all persistent grant pages */
        if (!RB_EMPTY_ROOT(&blkif->persistent_gnts))
                free_persistent_gnts(blkif, &blkif->persistent_gnts,
@@ -636,13 +650,8 @@ purge_gnt_list:
        BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));
        blkif->persistent_gnt_c = 0;
 
-       if (log_stats)
-               print_stats(blkif);
-
-       blkif->xenblkd = NULL;
-       xen_blkif_put(blkif);
-
-       return 0;
+       /* Since we are shutting down remove all pages from the buffer */
+       shrink_free_pagepool(blkif, 0 /* All */);
 }
 
 /*
@@ -838,7 +847,7 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req,
        struct grant_page **pages = pending_req->indirect_pages;
        struct xen_blkif *blkif = pending_req->blkif;
        int indirect_grefs, rc, n, nseg, i;
-       struct blkif_request_segment_aligned *segments = NULL;
+       struct blkif_request_segment *segments = NULL;
 
        nseg = pending_req->nr_pages;
        indirect_grefs = INDIRECT_PAGES(nseg);
@@ -934,9 +943,7 @@ static void xen_blk_drain_io(struct xen_blkif *blkif)
 {
        atomic_set(&blkif->drain, 1);
        do {
-               /* The initial value is one, and one refcnt taken at the
-                * start of the xen_blkif_schedule thread. */
-               if (atomic_read(&blkif->refcnt) <= 2)
+               if (atomic_read(&blkif->inflight) == 0)
                        break;
                wait_for_completion_interruptible_timeout(
                                &blkif->drain_complete, HZ);
@@ -976,17 +983,30 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
         * the proper response on the ring.
         */
        if (atomic_dec_and_test(&pending_req->pendcnt)) {
-               xen_blkbk_unmap(pending_req->blkif,
+               struct xen_blkif *blkif = pending_req->blkif;
+
+               xen_blkbk_unmap(blkif,
                                pending_req->segments,
                                pending_req->nr_pages);
-               make_response(pending_req->blkif, pending_req->id,
+               make_response(blkif, pending_req->id,
                              pending_req->operation, pending_req->status);
-               xen_blkif_put(pending_req->blkif);
-               if (atomic_read(&pending_req->blkif->refcnt) <= 2) {
-                       if (atomic_read(&pending_req->blkif->drain))
-                               complete(&pending_req->blkif->drain_complete);
+               free_req(blkif, pending_req);
+               /*
+                * Make sure the request is freed before releasing blkif,
+                * or there could be a race between free_req and the
+                * cleanup done in xen_blkif_free during shutdown.
+                *
+                * NB: The fact that we might try to wake up pending_free_wq
+                * before drain_complete (in case there's a drain going on)
+                * it's not a problem with our current implementation
+                * because we can assure there's no thread waiting on
+                * pending_free_wq if there's a drain going on, but it has
+                * to be taken into account if the current model is changed.
+                */
+               if (atomic_dec_and_test(&blkif->inflight) && atomic_read(&blkif->drain)) {
+                       complete(&blkif->drain_complete);
                }
-               free_req(pending_req->blkif, pending_req);
+               xen_blkif_put(blkif);
        }
 }
 
@@ -1240,6 +1260,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
         * below (in "!bio") if we are handling a BLKIF_OP_DISCARD.
         */
        xen_blkif_get(blkif);
+       atomic_inc(&blkif->inflight);
 
        for (i = 0; i < nseg; i++) {
                while ((bio == NULL) ||
index 8d8807563d9967afd28b70dcb0cd072bed35e8ec..9eb34e24b4fe9a5156ae6c548620c5722adefb5e 100644 (file)
@@ -57,7 +57,7 @@
 #define MAX_INDIRECT_SEGMENTS 256
 
 #define SEGS_PER_INDIRECT_FRAME \
-       (PAGE_SIZE/sizeof(struct blkif_request_segment_aligned))
+       (PAGE_SIZE/sizeof(struct blkif_request_segment))
 #define MAX_INDIRECT_PAGES \
        ((MAX_INDIRECT_SEGMENTS + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
 #define INDIRECT_PAGES(_segs) \
@@ -278,6 +278,7 @@ struct xen_blkif {
        /* for barrier (drain) requests */
        struct completion       drain_complete;
        atomic_t                drain;
+       atomic_t                inflight;
        /* One thread per one blkif. */
        struct task_struct      *xenblkd;
        unsigned int            waiting_reqs;
@@ -376,6 +377,7 @@ int xen_blkif_xenbus_init(void);
 irqreturn_t xen_blkif_be_int(int irq, void *dev_id);
 int xen_blkif_schedule(void *arg);
 int xen_blkif_purge_persistent(void *arg);
+void xen_blkbk_free_caches(struct xen_blkif *blkif);
 
 int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
                              struct backend_info *be, int state);
index c2014a0aa206b2ec1b1ee328e84bde4407026616..84973c6a856aba0c33c1ebf305d75fd7fb200fe0 100644 (file)
@@ -125,8 +125,10 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
        blkif->persistent_gnts.rb_node = NULL;
        spin_lock_init(&blkif->free_pages_lock);
        INIT_LIST_HEAD(&blkif->free_pages);
+       INIT_LIST_HEAD(&blkif->persistent_purge_list);
        blkif->free_pages_num = 0;
        atomic_set(&blkif->persistent_gnt_in_use, 0);
+       atomic_set(&blkif->inflight, 0);
 
        INIT_LIST_HEAD(&blkif->pending_free);
 
@@ -259,6 +261,17 @@ static void xen_blkif_free(struct xen_blkif *blkif)
        if (!atomic_dec_and_test(&blkif->refcnt))
                BUG();
 
+       /* Remove all persistent grants and the cache of ballooned pages. */
+       xen_blkbk_free_caches(blkif);
+
+       /* Make sure everything is drained before shutting down */
+       BUG_ON(blkif->persistent_gnt_c != 0);
+       BUG_ON(atomic_read(&blkif->persistent_gnt_in_use) != 0);
+       BUG_ON(blkif->free_pages_num != 0);
+       BUG_ON(!list_empty(&blkif->persistent_purge_list));
+       BUG_ON(!list_empty(&blkif->free_pages));
+       BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));
+
        /* Check that there is no request in use */
        list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) {
                list_del(&req->free_list);
index 8dcfb54f160302e0e1d91c232387f758b2f8e0f6..efe1b4761735a79faa30867ad625fdd51e043081 100644 (file)
@@ -162,7 +162,7 @@ static DEFINE_SPINLOCK(minor_lock);
 #define DEV_NAME       "xvd"   /* name in /dev */
 
 #define SEGS_PER_INDIRECT_FRAME \
-       (PAGE_SIZE/sizeof(struct blkif_request_segment_aligned))
+       (PAGE_SIZE/sizeof(struct blkif_request_segment))
 #define INDIRECT_GREFS(_segs) \
        ((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
 
@@ -393,7 +393,7 @@ static int blkif_queue_request(struct request *req)
        unsigned long id;
        unsigned int fsect, lsect;
        int i, ref, n;
-       struct blkif_request_segment_aligned *segments = NULL;
+       struct blkif_request_segment *segments = NULL;
 
        /*
         * Used to store if we are able to queue the request by just using
@@ -550,7 +550,7 @@ static int blkif_queue_request(struct request *req)
                        } else {
                                n = i % SEGS_PER_INDIRECT_FRAME;
                                segments[n] =
-                                       (struct blkif_request_segment_aligned) {
+                                       (struct blkif_request_segment) {
                                                        .gref       = ref,
                                                        .first_sect = fsect,
                                                        .last_sect  = lsect };
@@ -1904,13 +1904,16 @@ static void blkback_changed(struct xenbus_device *dev,
        case XenbusStateReconfiguring:
        case XenbusStateReconfigured:
        case XenbusStateUnknown:
-       case XenbusStateClosed:
                break;
 
        case XenbusStateConnected:
                blkfront_connect(info);
                break;
 
+       case XenbusStateClosed:
+               if (dev->state == XenbusStateClosed)
+                       break;
+               /* Missed the backend's Closing state -- fallthrough */
        case XenbusStateClosing:
                blkfront_closing(info);
                break;
index ae665ac59c36b6812a0470d5a7e0124c3e13a69f..32ec05a6572f779cb735b7a9b3ca5567cd1aed8f 100644 (file)
@@ -113,13 +113,13 @@ typedef uint64_t blkif_sector_t;
  * it's less than the number provided by the backend. The indirect_grefs field
  * in blkif_request_indirect should be filled by the frontend with the
  * grant references of the pages that are holding the indirect segments.
- * This pages are filled with an array of blkif_request_segment_aligned
- * that hold the information about the segments. The number of indirect
- * pages to use is determined by the maximum number of segments
- * a indirect request contains. Every indirect page can contain a maximum
- * of 512 segments (PAGE_SIZE/sizeof(blkif_request_segment_aligned)),
- * so to calculate the number of indirect pages to use we have to do
- * ceil(indirect_segments/512).
+ * These pages are filled with an array of blkif_request_segment that hold the
+ * information about the segments. The number of indirect pages to use is
+ * determined by the number of segments an indirect request contains. Every
+ * indirect page can contain a maximum of
+ * (PAGE_SIZE / sizeof(struct blkif_request_segment)) segments, so to
+ * calculate the number of indirect pages to use we have to do
+ * ceil(indirect_segments / (PAGE_SIZE / sizeof(struct blkif_request_segment))).
  *
  * If a backend does not recognize BLKIF_OP_INDIRECT, it should *not*
  * create the "feature-max-indirect-segments" node!
@@ -135,13 +135,12 @@ typedef uint64_t blkif_sector_t;
 
 #define BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST 8
 
-struct blkif_request_segment_aligned {
-       grant_ref_t gref;        /* reference to I/O buffer frame        */
-       /* @first_sect: first sector in frame to transfer (inclusive).   */
-       /* @last_sect: last sector in frame to transfer (inclusive).     */
-       uint8_t     first_sect, last_sect;
-       uint16_t    _pad; /* padding to make it 8 bytes, so it's cache-aligned */
-} __attribute__((__packed__));
+struct blkif_request_segment {
+               grant_ref_t gref;        /* reference to I/O buffer frame        */
+               /* @first_sect: first sector in frame to transfer (inclusive).   */
+               /* @last_sect: last sector in frame to transfer (inclusive).     */
+               uint8_t     first_sect, last_sect;
+};
 
 struct blkif_request_rw {
        uint8_t        nr_segments;  /* number of segments                   */
@@ -151,12 +150,7 @@ struct blkif_request_rw {
 #endif
        uint64_t       id;           /* private guest value, echoed in resp  */
        blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
-       struct blkif_request_segment {
-               grant_ref_t gref;        /* reference to I/O buffer frame        */
-               /* @first_sect: first sector in frame to transfer (inclusive).   */
-               /* @last_sect: last sector in frame to transfer (inclusive).     */
-               uint8_t     first_sect, last_sect;
-       } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+       struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 } __attribute__((__packed__));
 
 struct blkif_request_discard {