Squashfs: add multi-threaded decompression using percpu variable
authorPhillip Lougher <phillip@squashfs.org.uk>
Mon, 18 Nov 2013 02:31:36 +0000 (02:31 +0000)
committerPhillip Lougher <phillip@squashfs.org.uk>
Wed, 20 Nov 2013 03:58:03 +0000 (03:58 +0000)
Add a multi-threaded decompression implementation which uses
percpu variables.

Using percpu variables has advantages and disadvantages over
implementations which do not use percpu variables.

Advantages:
  * the nature of percpu variables ensures decompression is
    load-balanced across the multiple cores.
  * simplicity.

Disadvantages: it limits decompression to one thread per core.

Signed-off-by: Phillip Lougher <phillip@squashfs.org.uk>
fs/squashfs/Kconfig
fs/squashfs/Makefile
fs/squashfs/decompressor_multi_percpu.c [new file with mode: 0644]

index 1c6d340fc61f868570cbe1323ac56ae265cd19eb..159bd6676dc2f652cab5a947de88377ffae9c690 100644 (file)
@@ -25,6 +25,50 @@ config SQUASHFS
 
          If unsure, say N.
 
+choice
+       prompt "Decompressor parallelisation options"
+       depends on SQUASHFS
+       help
+         Squashfs now supports three parallelisation options for
+         decompression.  Each one exhibits various trade-offs between
+         decompression performance and CPU and memory usage.
+
+         If in doubt, select "Single threaded compression"
+
+config SQUASHFS_DECOMP_SINGLE
+       bool "Single threaded compression"
+       help
+         Traditionally Squashfs has used single-threaded decompression.
+         Only one block (data or metadata) can be decompressed at any
+         one time.  This limits CPU and memory usage to a minimum.
+
+config SQUASHFS_DECOMP_MULTI
+       bool "Use multiple decompressors for parallel I/O"
+       help
+         By default Squashfs uses a single decompressor but it gives
+         poor performance on parallel I/O workloads when using multiple CPU
+         machines due to waiting on decompressor availability.
+
+         If you have a parallel I/O workload and your system has enough memory,
+         using this option may improve overall I/O performance.
+
+         This decompressor implementation uses up to two parallel
+         decompressors per core.  It dynamically allocates decompressors
+         on a demand basis.
+
+config SQUASHFS_DECOMP_MULTI_PERCPU
+       bool "Use percpu multiple decompressors for parallel I/O"
+       help
+         By default Squashfs uses a single decompressor but it gives
+         poor performance on parallel I/O workloads when using multiple CPU
+         machines due to waiting on decompressor availability.
+
+         This decompressor implementation uses a maximum of one
+         decompressor per core.  It uses percpu variables to ensure
+         decompression is load-balanced across the cores.
+
+endchoice
+
 config SQUASHFS_XATTR
        bool "Squashfs XATTR support"
        depends on SQUASHFS
@@ -63,19 +107,6 @@ config SQUASHFS_LZO
 
          If unsure, say N.
 
-config SQUASHFS_MULTI_DECOMPRESSOR
-       bool "Use multiple decompressors for handling parallel I/O"
-       depends on SQUASHFS
-       help
-         By default Squashfs uses a single decompressor but it gives
-         poor performance on parallel I/O workloads when using multiple CPU
-         machines due to waiting on decompressor availability.
-
-         If you have a parallel I/O workload and your system has enough memory,
-         using this option may improve overall I/O performance.
-
-         If unsure, say N.
-
 config SQUASHFS_XZ
        bool "Include support for XZ compressed file systems"
        depends on SQUASHFS
index dfebc3b12d61f48d2eaf58cff938b13e3e64a01e..5833b96ee69c14b4f9e3cfe99092e12c278bbea1 100644 (file)
@@ -5,14 +5,10 @@
 obj-$(CONFIG_SQUASHFS) += squashfs.o
 squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o
 squashfs-y += namei.o super.o symlink.o decompressor.o
-
+squashfs-$(CONFIG_SQUASHFS_DECOMP_SINGLE) += decompressor_single.o
+squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI) += decompressor_multi.o
+squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) += decompressor_multi_percpu.o
 squashfs-$(CONFIG_SQUASHFS_XATTR) += xattr.o xattr_id.o
 squashfs-$(CONFIG_SQUASHFS_LZO) += lzo_wrapper.o
 squashfs-$(CONFIG_SQUASHFS_XZ) += xz_wrapper.o
 squashfs-$(CONFIG_SQUASHFS_ZLIB) += zlib_wrapper.o
-
-ifdef CONFIG_SQUASHFS_MULTI_DECOMPRESSOR
-       squashfs-y              += decompressor_multi.o
-else
-       squashfs-y              += decompressor_single.o
-endif
diff --git a/fs/squashfs/decompressor_multi_percpu.c b/fs/squashfs/decompressor_multi_percpu.c
new file mode 100644 (file)
index 0000000..0e7b679
--- /dev/null
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2013
+ * Phillip Lougher <phillip@squashfs.org.uk>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/percpu.h>
+#include <linux/buffer_head.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "decompressor.h"
+#include "squashfs.h"
+
+/*
+ * This file implements multi-threaded decompression using percpu
+ * variables, one thread per cpu core.
+ */
+
+struct squashfs_stream {
+       void            *stream;
+};
+
+void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
+                                               void *comp_opts)
+{
+       struct squashfs_stream *stream;
+       struct squashfs_stream __percpu *percpu;
+       int err, cpu;
+
+       percpu = alloc_percpu(struct squashfs_stream);
+       if (percpu == NULL)
+               return ERR_PTR(-ENOMEM);
+
+       for_each_possible_cpu(cpu) {
+               stream = per_cpu_ptr(percpu, cpu);
+               stream->stream = msblk->decompressor->init(msblk, comp_opts);
+               if (IS_ERR(stream->stream)) {
+                       err = PTR_ERR(stream->stream);
+                       goto out;
+               }
+       }
+
+       kfree(comp_opts);
+       return (__force void *) percpu;
+
+out:
+       for_each_possible_cpu(cpu) {
+               stream = per_cpu_ptr(percpu, cpu);
+               if (!IS_ERR_OR_NULL(stream->stream))
+                       msblk->decompressor->free(stream->stream);
+       }
+       free_percpu(percpu);
+       return ERR_PTR(err);
+}
+
+void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
+{
+       struct squashfs_stream __percpu *percpu =
+                       (struct squashfs_stream __percpu *) msblk->stream;
+       struct squashfs_stream *stream;
+       int cpu;
+
+       if (msblk->stream) {
+               for_each_possible_cpu(cpu) {
+                       stream = per_cpu_ptr(percpu, cpu);
+                       msblk->decompressor->free(stream->stream);
+               }
+               free_percpu(percpu);
+       }
+}
+
+int squashfs_decompress(struct squashfs_sb_info *msblk,
+       void **buffer, struct buffer_head **bh, int b, int offset, int length,
+       int srclength, int pages)
+{
+       struct squashfs_stream __percpu *percpu =
+                       (struct squashfs_stream __percpu *) msblk->stream;
+       struct squashfs_stream *stream = get_cpu_ptr(percpu);
+       int res = msblk->decompressor->decompress(msblk, stream->stream, buffer,
+               bh, b, offset, length, srclength, pages);
+       put_cpu_ptr(stream);
+
+       if (res < 0)
+               ERROR("%s decompression failed, data probably corrupt\n",
+                       msblk->decompressor->name);
+
+       return res;
+}
+
+int squashfs_max_decompressors(void)
+{
+       return num_possible_cpus();
+}