block: make /sys/block/<dev>/queue/discard_max_bytes writeable
authorJens Axboe <axboe@fb.com>
Thu, 16 Jul 2015 15:14:26 +0000 (09:14 -0600)
committerJens Axboe <axboe@fb.com>
Fri, 17 Jul 2015 14:41:53 +0000 (08:41 -0600)
Lots of devices support huge discard sizes these days. Depending
on how the device handles them internally, huge discards can
introduce massive latencies (hundreds of msec) on the device side.

We have a sysfs file, discard_max_bytes, that advertises the max
hardware supported discard size. Make this writeable, and split
the settings into a soft and hard limit. This can be set from
'discard_granularity' and up to the hardware limit.

Add a new sysfs file, 'discard_max_hw_bytes', that shows the hw
set limit.

Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
Documentation/block/queue-sysfs.txt
block/blk-settings.c
block/blk-sysfs.c
include/linux/blkdev.h

index 3a29f8914df9cea8a27048960f3b8b489e8d3ce4..e5d914845be6de4e59d59b7c1e905392d6bc5fb5 100644 (file)
@@ -20,7 +20,7 @@ This shows the size of internal allocation of the device in bytes, if
 reported by the device. A value of '0' means device does not support
 the discard functionality.
 
-discard_max_bytes (RO)
+discard_max_hw_bytes (RO)
 ----------------------
 Devices that support discard functionality may have internal limits on
 the number of bytes that can be trimmed or unmapped in a single operation.
@@ -29,6 +29,14 @@ number of bytes that can be discarded in a single operation. Discard
 requests issued to the device must not exceed this limit. A discard_max_bytes
 value of 0 means that the device does not support discard functionality.
 
+discard_max_bytes (RW)
+----------------------
+While discard_max_hw_bytes is the hardware limit for the device, this
+setting is the software limit. Some devices exhibit large latencies when
+large discards are issued, setting this value lower will make Linux issue
+smaller discards and potentially help reduce latencies induced by large
+discard operations.
+
 discard_zeroes_data (RO)
 ------------------------
 When read, this file will show if the discarded block are zeroed by the
index 12600bfffca93f4547e2325eeda9669ff443a7a7..b38d8d723276254dcb4bb2d582f7ae35ec672cfc 100644 (file)
@@ -116,6 +116,7 @@ void blk_set_default_limits(struct queue_limits *lim)
        lim->chunk_sectors = 0;
        lim->max_write_same_sectors = 0;
        lim->max_discard_sectors = 0;
+       lim->max_hw_discard_sectors = 0;
        lim->discard_granularity = 0;
        lim->discard_alignment = 0;
        lim->discard_misaligned = 0;
@@ -303,6 +304,7 @@ EXPORT_SYMBOL(blk_queue_chunk_sectors);
 void blk_queue_max_discard_sectors(struct request_queue *q,
                unsigned int max_discard_sectors)
 {
+       q->limits.max_hw_discard_sectors = max_discard_sectors;
        q->limits.max_discard_sectors = max_discard_sectors;
 }
 EXPORT_SYMBOL(blk_queue_max_discard_sectors);
@@ -641,6 +643,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 
                t->max_discard_sectors = min_not_zero(t->max_discard_sectors,
                                                      b->max_discard_sectors);
+               t->max_hw_discard_sectors = min_not_zero(t->max_hw_discard_sectors,
+                                                        b->max_hw_discard_sectors);
                t->discard_granularity = max(t->discard_granularity,
                                             b->discard_granularity);
                t->discard_alignment = lcm_not_zero(t->discard_alignment, alignment) %
index 6264b382d4d1ba8765dc3b22cead4fd9bf384d99..b1f34e463c0f0fd86c816bed9b7ee421d43b6b58 100644 (file)
@@ -145,12 +145,43 @@ static ssize_t queue_discard_granularity_show(struct request_queue *q, char *pag
        return queue_var_show(q->limits.discard_granularity, page);
 }
 
+static ssize_t queue_discard_max_hw_show(struct request_queue *q, char *page)
+{
+       unsigned long long val;
+
+       val = q->limits.max_hw_discard_sectors << 9;
+       return sprintf(page, "%llu\n", val);
+}
+
 static ssize_t queue_discard_max_show(struct request_queue *q, char *page)
 {
        return sprintf(page, "%llu\n",
                       (unsigned long long)q->limits.max_discard_sectors << 9);
 }
 
+static ssize_t queue_discard_max_store(struct request_queue *q,
+                                      const char *page, size_t count)
+{
+       unsigned long max_discard;
+       ssize_t ret = queue_var_store(&max_discard, page, count);
+
+       if (ret < 0)
+               return ret;
+
+       if (max_discard & (q->limits.discard_granularity - 1))
+               return -EINVAL;
+
+       max_discard >>= 9;
+       if (max_discard > UINT_MAX)
+               return -EINVAL;
+
+       if (max_discard > q->limits.max_hw_discard_sectors)
+               max_discard = q->limits.max_hw_discard_sectors;
+
+       q->limits.max_discard_sectors = max_discard;
+       return ret;
+}
+
 static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *page)
 {
        return queue_var_show(queue_discard_zeroes_data(q), page);
@@ -360,9 +391,15 @@ static struct queue_sysfs_entry queue_discard_granularity_entry = {
        .show = queue_discard_granularity_show,
 };
 
+static struct queue_sysfs_entry queue_discard_max_hw_entry = {
+       .attr = {.name = "discard_max_hw_bytes", .mode = S_IRUGO },
+       .show = queue_discard_max_hw_show,
+};
+
 static struct queue_sysfs_entry queue_discard_max_entry = {
-       .attr = {.name = "discard_max_bytes", .mode = S_IRUGO },
+       .attr = {.name = "discard_max_bytes", .mode = S_IRUGO | S_IWUSR },
        .show = queue_discard_max_show,
+       .store = queue_discard_max_store,
 };
 
 static struct queue_sysfs_entry queue_discard_zeroes_data_entry = {
@@ -421,6 +458,7 @@ static struct attribute *default_attrs[] = {
        &queue_io_opt_entry.attr,
        &queue_discard_granularity_entry.attr,
        &queue_discard_max_entry.attr,
+       &queue_discard_max_hw_entry.attr,
        &queue_discard_zeroes_data_entry.attr,
        &queue_write_same_max_entry.attr,
        &queue_nonrot_entry.attr,
index d4068c17d0df9152f8da58c8244ecf6cdf5d2414..243f29e779ec5890d40ccdad1752444ca620a098 100644 (file)
@@ -268,6 +268,7 @@ struct queue_limits {
        unsigned int            io_min;
        unsigned int            io_opt;
        unsigned int            max_discard_sectors;
+       unsigned int            max_hw_discard_sectors;
        unsigned int            max_write_same_sectors;
        unsigned int            discard_granularity;
        unsigned int            discard_alignment;