Skip to content

Commit

Permalink
Squashfs: add multi-threaded decompression using percpu variable
Browse files Browse the repository at this point in the history
Add a multi-threaded decompression implementation which uses
percpu variables.

Using percpu variables has advantages and disadvantages over
implementations which do not use percpu variables.

Advantages:
  * the nature of percpu variables ensures decompression is
    load-balanced across the multiple cores.
  * simplicity.

Disadvantages: it limits decompression to one thread per core.

Signed-off-by: Phillip Lougher <[email protected]>
  • Loading branch information
plougher committed Nov 20, 2013
1 parent cd59c2e commit d208383
Show file tree
Hide file tree
Showing 3 changed files with 145 additions and 20 deletions.
57 changes: 44 additions & 13 deletions fs/squashfs/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,50 @@ config SQUASHFS

If unsure, say N.

choice
prompt "Decompressor parallelisation options"
depends on SQUASHFS
help
Squashfs now supports three parallelisation options for
decompression. Each one exhibits various trade-offs between
decompression performance and CPU and memory usage.

If in doubt, select "Single threaded compression"

config SQUASHFS_DECOMP_SINGLE
bool "Single threaded compression"
help
Traditionally Squashfs has used single-threaded decompression.
Only one block (data or metadata) can be decompressed at any
one time. This limits CPU and memory usage to a minimum.

config SQUASHFS_DECOMP_MULTI
bool "Use multiple decompressors for parallel I/O"
help
By default Squashfs uses a single decompressor but it gives
poor performance on parallel I/O workloads when using multiple CPU
machines due to waiting on decompressor availability.

If you have a parallel I/O workload and your system has enough memory,
using this option may improve overall I/O performance.

This decompressor implementation uses up to two parallel
decompressors per core. It dynamically allocates decompressors
on a demand basis.

config SQUASHFS_DECOMP_MULTI_PERCPU
bool "Use percpu multiple decompressors for parallel I/O"
help
By default Squashfs uses a single decompressor but it gives
poor performance on parallel I/O workloads when using multiple CPU
machines due to waiting on decompressor availability.

This decompressor implementation uses a maximum of one
decompressor per core. It uses percpu variables to ensure
decompression is load-balanced across the cores.

endchoice

config SQUASHFS_XATTR
bool "Squashfs XATTR support"
depends on SQUASHFS
Expand Down Expand Up @@ -63,19 +107,6 @@ config SQUASHFS_LZO

If unsure, say N.

config SQUASHFS_MULTI_DECOMPRESSOR
bool "Use multiple decompressors for handling parallel I/O"
depends on SQUASHFS
help
By default Squashfs uses a single decompressor but it gives
poor performance on parallel I/O workloads when using multiple CPU
machines due to waiting on decompressor availability.

If you have a parallel I/O workload and your system has enough memory,
using this option may improve overall I/O performance.

If unsure, say N.

config SQUASHFS_XZ
bool "Include support for XZ compressed file systems"
depends on SQUASHFS
Expand Down
10 changes: 3 additions & 7 deletions fs/squashfs/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,10 @@
obj-$(CONFIG_SQUASHFS) += squashfs.o
squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o
squashfs-y += namei.o super.o symlink.o decompressor.o

squashfs-$(CONFIG_SQUASHFS_DECOMP_SINGLE) += decompressor_single.o
squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI) += decompressor_multi.o
squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) += decompressor_multi_percpu.o
squashfs-$(CONFIG_SQUASHFS_XATTR) += xattr.o xattr_id.o
squashfs-$(CONFIG_SQUASHFS_LZO) += lzo_wrapper.o
squashfs-$(CONFIG_SQUASHFS_XZ) += xz_wrapper.o
squashfs-$(CONFIG_SQUASHFS_ZLIB) += zlib_wrapper.o

ifdef CONFIG_SQUASHFS_MULTI_DECOMPRESSOR
squashfs-y += decompressor_multi.o
else
squashfs-y += decompressor_single.o
endif
98 changes: 98 additions & 0 deletions fs/squashfs/decompressor_multi_percpu.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
/*
* Copyright (c) 2013
* Phillip Lougher <[email protected]>
*
* This work is licensed under the terms of the GNU GPL, version 2. See
* the COPYING file in the top-level directory.
*/

#include <linux/types.h>
#include <linux/slab.h>
#include <linux/percpu.h>
#include <linux/buffer_head.h>

#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
#include "decompressor.h"
#include "squashfs.h"

/*
* This file implements multi-threaded decompression using percpu
* variables, one thread per cpu core.
*/

struct squashfs_stream {
void *stream;
};

void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
void *comp_opts)
{
struct squashfs_stream *stream;
struct squashfs_stream __percpu *percpu;
int err, cpu;

percpu = alloc_percpu(struct squashfs_stream);
if (percpu == NULL)
return ERR_PTR(-ENOMEM);

for_each_possible_cpu(cpu) {
stream = per_cpu_ptr(percpu, cpu);
stream->stream = msblk->decompressor->init(msblk, comp_opts);
if (IS_ERR(stream->stream)) {
err = PTR_ERR(stream->stream);
goto out;
}
}

kfree(comp_opts);
return (__force void *) percpu;

out:
for_each_possible_cpu(cpu) {
stream = per_cpu_ptr(percpu, cpu);
if (!IS_ERR_OR_NULL(stream->stream))
msblk->decompressor->free(stream->stream);
}
free_percpu(percpu);
return ERR_PTR(err);
}

void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
{
struct squashfs_stream __percpu *percpu =
(struct squashfs_stream __percpu *) msblk->stream;
struct squashfs_stream *stream;
int cpu;

if (msblk->stream) {
for_each_possible_cpu(cpu) {
stream = per_cpu_ptr(percpu, cpu);
msblk->decompressor->free(stream->stream);
}
free_percpu(percpu);
}
}

int squashfs_decompress(struct squashfs_sb_info *msblk,
void **buffer, struct buffer_head **bh, int b, int offset, int length,
int srclength, int pages)
{
struct squashfs_stream __percpu *percpu =
(struct squashfs_stream __percpu *) msblk->stream;
struct squashfs_stream *stream = get_cpu_ptr(percpu);
int res = msblk->decompressor->decompress(msblk, stream->stream, buffer,
bh, b, offset, length, srclength, pages);
put_cpu_ptr(stream);

if (res < 0)
ERROR("%s decompression failed, data probably corrupt\n",
msblk->decompressor->name);

return res;
}

int squashfs_max_decompressors(void)
{
return num_possible_cpus();
}

0 comments on commit d208383

Please sign in to comment.