Skip to content

Commit

Permalink
crypto: cryptd - Per-CPU thread implementation based on kcrypto_wq
Browse files Browse the repository at this point in the history
Original cryptd thread implementation has scalability issue, this
patch solve the issue with a per-CPU thread implementation.

struct cryptd_queue is defined to be a per-CPU queue, which holds one
struct cryptd_cpu_queue for each CPU. In struct cryptd_cpu_queue, a
struct crypto_queue holds all requests for the CPU, a struct
work_struct is used to run all requests for the CPU.

Testing based on dm-crypt on an Intel Core 2 E6400 (two cores) machine
shows 19.2% performance gain. The testing script is as follow:

-------------------- script begin ---------------------------
#!/bin/sh

dmc_create()
{
        # Create a crypt device using dmsetup
        dmsetup create $2 --table "0 `blockdev --getsize $1` crypt cbc(aes-asm)?cryptd?plain:plain babebabebabebabebabebabebabebabe 0 $1 0"
}

dmsetup remove crypt0
dmsetup remove crypt1

dd if=/dev/zero of=/dev/ram0 bs=1M count=4 >& /dev/null
dd if=/dev/zero of=/dev/ram1 bs=1M count=4 >& /dev/null

dmc_create /dev/ram0 crypt0
dmc_create /dev/ram1 crypt1

cat >tr.sh <<EOF
#!/bin/sh

for n in \$(seq 10); do
        dd if=/dev/dm-0 of=/dev/null >& /dev/null &
        dd if=/dev/dm-1 of=/dev/null >& /dev/null &
done
wait
EOF

for n in $(seq 10); do
        /usr/bin/time sh tr.sh
done
rm tr.sh
-------------------- script end   ---------------------------

The separator of dm-crypt parameter is changed from "-" to "?", because
"-" is used in some cipher driver name too, and cryptds need to specify
cipher driver name instead of cipher name.

The test result on an Intel Core2 E6400 (two cores) is as follow:

without patch:
-----------------wo begin --------------------------
0.04user 0.38system 0:00.39elapsed 107%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+6566minor)pagefaults 0swaps
0.07user 0.35system 0:00.35elapsed 121%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+6567minor)pagefaults 0swaps
0.06user 0.34system 0:00.30elapsed 135%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+6562minor)pagefaults 0swaps
0.05user 0.37system 0:00.36elapsed 119%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+6607minor)pagefaults 0swaps
0.06user 0.36system 0:00.35elapsed 120%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+6562minor)pagefaults 0swaps
0.05user 0.37system 0:00.31elapsed 136%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+6594minor)pagefaults 0swaps
0.04user 0.34system 0:00.30elapsed 126%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+6597minor)pagefaults 0swaps
0.06user 0.32system 0:00.31elapsed 125%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+6571minor)pagefaults 0swaps
0.06user 0.34system 0:00.31elapsed 134%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+6581minor)pagefaults 0swaps
0.05user 0.38system 0:00.31elapsed 138%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+6600minor)pagefaults 0swaps
-----------------wo end   --------------------------


with patch:
------------------w begin --------------------------
0.02user 0.31system 0:00.24elapsed 141%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+6554minor)pagefaults 0swaps
0.05user 0.34system 0:00.31elapsed 127%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+6606minor)pagefaults 0swaps
0.07user 0.33system 0:00.26elapsed 155%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+6559minor)pagefaults 0swaps
0.07user 0.32system 0:00.26elapsed 151%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+6562minor)pagefaults 0swaps
0.05user 0.34system 0:00.26elapsed 150%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+6603minor)pagefaults 0swaps
0.03user 0.36system 0:00.31elapsed 124%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+6562minor)pagefaults 0swaps
0.04user 0.35system 0:00.26elapsed 147%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+6586minor)pagefaults 0swaps
0.03user 0.37system 0:00.27elapsed 146%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+6562minor)pagefaults 0swaps
0.04user 0.36system 0:00.26elapsed 154%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+6594minor)pagefaults 0swaps
0.04user 0.35system 0:00.26elapsed 154%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+6557minor)pagefaults 0swaps
------------------w end   --------------------------

The middle value of elapsed time is:
wo cryptwq: 0.31
w  cryptwq: 0.26

The performance gain is about (0.31-0.26)/0.26 = 0.192.

Signed-off-by: Huang Ying <[email protected]>
Signed-off-by: Herbert Xu <[email protected]>
  • Loading branch information
yhuang-intel authored and herbertx committed Feb 19, 2009
1 parent 25c38d3 commit 254eff7
Show file tree
Hide file tree
Showing 2 changed files with 104 additions and 117 deletions.
1 change: 1 addition & 0 deletions crypto/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ config CRYPTO_CRYPTD
select CRYPTO_BLKCIPHER
select CRYPTO_HASH
select CRYPTO_MANAGER
select CRYPTO_WORKQUEUE
help
This is a generic software asynchronous crypto daemon that
converts an arbitrary synchronous software crypto algorithm
Expand Down
220 changes: 103 additions & 117 deletions crypto/cryptd.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,30 +13,30 @@
#include <crypto/algapi.h>
#include <crypto/internal/hash.h>
#include <crypto/cryptd.h>
#include <crypto/crypto_wq.h>
#include <linux/err.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/kthread.h>
#include <linux/list.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/scatterlist.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>

#define CRYPTD_MAX_QLEN 100
#define CRYPTD_MAX_CPU_QLEN 100

struct cryptd_state {
spinlock_t lock;
struct mutex mutex;
struct cryptd_cpu_queue {
struct crypto_queue queue;
struct task_struct *task;
struct work_struct work;
};

struct cryptd_queue {
struct cryptd_cpu_queue *cpu_queue;
};

struct cryptd_instance_ctx {
struct crypto_spawn spawn;
struct cryptd_state *state;
struct cryptd_queue *queue;
};

struct cryptd_blkcipher_ctx {
Expand All @@ -55,11 +55,85 @@ struct cryptd_hash_request_ctx {
crypto_completion_t complete;
};

static inline struct cryptd_state *cryptd_get_state(struct crypto_tfm *tfm)
static void cryptd_queue_worker(struct work_struct *work);

static int cryptd_init_queue(struct cryptd_queue *queue,
unsigned int max_cpu_qlen)
{
int cpu;
struct cryptd_cpu_queue *cpu_queue;

queue->cpu_queue = alloc_percpu(struct cryptd_cpu_queue);
if (!queue->cpu_queue)
return -ENOMEM;
for_each_possible_cpu(cpu) {
cpu_queue = per_cpu_ptr(queue->cpu_queue, cpu);
crypto_init_queue(&cpu_queue->queue, max_cpu_qlen);
INIT_WORK(&cpu_queue->work, cryptd_queue_worker);
}
return 0;
}

static void cryptd_fini_queue(struct cryptd_queue *queue)
{
int cpu;
struct cryptd_cpu_queue *cpu_queue;

for_each_possible_cpu(cpu) {
cpu_queue = per_cpu_ptr(queue->cpu_queue, cpu);
BUG_ON(cpu_queue->queue.qlen);
}
free_percpu(queue->cpu_queue);
}

static int cryptd_enqueue_request(struct cryptd_queue *queue,
struct crypto_async_request *request)
{
int cpu, err;
struct cryptd_cpu_queue *cpu_queue;

cpu = get_cpu();
cpu_queue = per_cpu_ptr(queue->cpu_queue, cpu);
err = crypto_enqueue_request(&cpu_queue->queue, request);
queue_work_on(cpu, kcrypto_wq, &cpu_queue->work);
put_cpu();

return err;
}

/* Called in workqueue context, do one real cryption work (via
* req->complete) and reschedule itself if there are more work to
* do. */
static void cryptd_queue_worker(struct work_struct *work)
{
struct cryptd_cpu_queue *cpu_queue;
struct crypto_async_request *req, *backlog;

cpu_queue = container_of(work, struct cryptd_cpu_queue, work);
/* Only handle one request at a time to avoid hogging crypto
* workqueue. preempt_disable/enable is used to prevent
* being preempted by cryptd_enqueue_request() */
preempt_disable();
backlog = crypto_get_backlog(&cpu_queue->queue);
req = crypto_dequeue_request(&cpu_queue->queue);
preempt_enable();

if (!req)
return;

if (backlog)
backlog->complete(backlog, -EINPROGRESS);
req->complete(req, 0);

if (cpu_queue->queue.qlen)
queue_work(kcrypto_wq, &cpu_queue->work);
}

static inline struct cryptd_queue *cryptd_get_queue(struct crypto_tfm *tfm)
{
struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
struct cryptd_instance_ctx *ictx = crypto_instance_ctx(inst);
return ictx->state;
return ictx->queue;
}

static int cryptd_blkcipher_setkey(struct crypto_ablkcipher *parent,
Expand Down Expand Up @@ -131,19 +205,13 @@ static int cryptd_blkcipher_enqueue(struct ablkcipher_request *req,
{
struct cryptd_blkcipher_request_ctx *rctx = ablkcipher_request_ctx(req);
struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
struct cryptd_state *state =
cryptd_get_state(crypto_ablkcipher_tfm(tfm));
int err;
struct cryptd_queue *queue;

queue = cryptd_get_queue(crypto_ablkcipher_tfm(tfm));
rctx->complete = req->base.complete;
req->base.complete = complete;

spin_lock_bh(&state->lock);
err = ablkcipher_enqueue_request(&state->queue, req);
spin_unlock_bh(&state->lock);

wake_up_process(state->task);
return err;
return cryptd_enqueue_request(queue, &req->base);
}

static int cryptd_blkcipher_encrypt_enqueue(struct ablkcipher_request *req)
Expand Down Expand Up @@ -177,21 +245,12 @@ static int cryptd_blkcipher_init_tfm(struct crypto_tfm *tfm)
static void cryptd_blkcipher_exit_tfm(struct crypto_tfm *tfm)
{
struct cryptd_blkcipher_ctx *ctx = crypto_tfm_ctx(tfm);
struct cryptd_state *state = cryptd_get_state(tfm);
int active;

mutex_lock(&state->mutex);
active = ablkcipher_tfm_in_queue(&state->queue,
__crypto_ablkcipher_cast(tfm));
mutex_unlock(&state->mutex);

BUG_ON(active);

crypto_free_blkcipher(ctx->child);
}

static struct crypto_instance *cryptd_alloc_instance(struct crypto_alg *alg,
struct cryptd_state *state)
struct cryptd_queue *queue)
{
struct crypto_instance *inst;
struct cryptd_instance_ctx *ctx;
Expand All @@ -214,7 +273,7 @@ static struct crypto_instance *cryptd_alloc_instance(struct crypto_alg *alg,
if (err)
goto out_free_inst;

ctx->state = state;
ctx->queue = queue;

memcpy(inst->alg.cra_name, alg->cra_name, CRYPTO_MAX_ALG_NAME);

Expand All @@ -232,7 +291,7 @@ static struct crypto_instance *cryptd_alloc_instance(struct crypto_alg *alg,
}

static struct crypto_instance *cryptd_alloc_blkcipher(
struct rtattr **tb, struct cryptd_state *state)
struct rtattr **tb, struct cryptd_queue *queue)
{
struct crypto_instance *inst;
struct crypto_alg *alg;
Expand All @@ -242,7 +301,7 @@ static struct crypto_instance *cryptd_alloc_blkcipher(
if (IS_ERR(alg))
return ERR_CAST(alg);

inst = cryptd_alloc_instance(alg, state);
inst = cryptd_alloc_instance(alg, queue);
if (IS_ERR(inst))
goto out_put_alg;

Expand Down Expand Up @@ -290,15 +349,6 @@ static int cryptd_hash_init_tfm(struct crypto_tfm *tfm)
static void cryptd_hash_exit_tfm(struct crypto_tfm *tfm)
{
struct cryptd_hash_ctx *ctx = crypto_tfm_ctx(tfm);
struct cryptd_state *state = cryptd_get_state(tfm);
int active;

mutex_lock(&state->mutex);
active = ahash_tfm_in_queue(&state->queue,
__crypto_ahash_cast(tfm));
mutex_unlock(&state->mutex);

BUG_ON(active);

crypto_free_hash(ctx->child);
}
Expand All @@ -324,19 +374,13 @@ static int cryptd_hash_enqueue(struct ahash_request *req,
{
struct cryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
struct cryptd_state *state =
cryptd_get_state(crypto_ahash_tfm(tfm));
int err;
struct cryptd_queue *queue =
cryptd_get_queue(crypto_ahash_tfm(tfm));

rctx->complete = req->base.complete;
req->base.complete = complete;

spin_lock_bh(&state->lock);
err = ahash_enqueue_request(&state->queue, req);
spin_unlock_bh(&state->lock);

wake_up_process(state->task);
return err;
return cryptd_enqueue_request(queue, &req->base);
}

static void cryptd_hash_init(struct crypto_async_request *req_async, int err)
Expand Down Expand Up @@ -469,7 +513,7 @@ static int cryptd_hash_digest_enqueue(struct ahash_request *req)
}

static struct crypto_instance *cryptd_alloc_hash(
struct rtattr **tb, struct cryptd_state *state)
struct rtattr **tb, struct cryptd_queue *queue)
{
struct crypto_instance *inst;
struct crypto_alg *alg;
Expand All @@ -479,7 +523,7 @@ static struct crypto_instance *cryptd_alloc_hash(
if (IS_ERR(alg))
return ERR_PTR(PTR_ERR(alg));

inst = cryptd_alloc_instance(alg, state);
inst = cryptd_alloc_instance(alg, queue);
if (IS_ERR(inst))
goto out_put_alg;

Expand All @@ -503,7 +547,7 @@ static struct crypto_instance *cryptd_alloc_hash(
return inst;
}

static struct cryptd_state state;
static struct cryptd_queue queue;

static struct crypto_instance *cryptd_alloc(struct rtattr **tb)
{
Expand All @@ -515,9 +559,9 @@ static struct crypto_instance *cryptd_alloc(struct rtattr **tb)

switch (algt->type & algt->mask & CRYPTO_ALG_TYPE_MASK) {
case CRYPTO_ALG_TYPE_BLKCIPHER:
return cryptd_alloc_blkcipher(tb, &state);
return cryptd_alloc_blkcipher(tb, &queue);
case CRYPTO_ALG_TYPE_DIGEST:
return cryptd_alloc_hash(tb, &state);
return cryptd_alloc_hash(tb, &queue);
}

return ERR_PTR(-EINVAL);
Expand Down Expand Up @@ -572,82 +616,24 @@ void cryptd_free_ablkcipher(struct cryptd_ablkcipher *tfm)
}
EXPORT_SYMBOL_GPL(cryptd_free_ablkcipher);

static inline int cryptd_create_thread(struct cryptd_state *state,
int (*fn)(void *data), const char *name)
{
spin_lock_init(&state->lock);
mutex_init(&state->mutex);
crypto_init_queue(&state->queue, CRYPTD_MAX_QLEN);

state->task = kthread_run(fn, state, name);
if (IS_ERR(state->task))
return PTR_ERR(state->task);

return 0;
}

static inline void cryptd_stop_thread(struct cryptd_state *state)
{
BUG_ON(state->queue.qlen);
kthread_stop(state->task);
}

static int cryptd_thread(void *data)
{
struct cryptd_state *state = data;
int stop;

current->flags |= PF_NOFREEZE;

do {
struct crypto_async_request *req, *backlog;

mutex_lock(&state->mutex);
__set_current_state(TASK_INTERRUPTIBLE);

spin_lock_bh(&state->lock);
backlog = crypto_get_backlog(&state->queue);
req = crypto_dequeue_request(&state->queue);
spin_unlock_bh(&state->lock);

stop = kthread_should_stop();

if (stop || req) {
__set_current_state(TASK_RUNNING);
if (req) {
if (backlog)
backlog->complete(backlog,
-EINPROGRESS);
req->complete(req, 0);
}
}

mutex_unlock(&state->mutex);

schedule();
} while (!stop);

return 0;
}

static int __init cryptd_init(void)
{
int err;

err = cryptd_create_thread(&state, cryptd_thread, "cryptd");
err = cryptd_init_queue(&queue, CRYPTD_MAX_CPU_QLEN);
if (err)
return err;

err = crypto_register_template(&cryptd_tmpl);
if (err)
kthread_stop(state.task);
cryptd_fini_queue(&queue);

return err;
}

static void __exit cryptd_exit(void)
{
cryptd_stop_thread(&state);
cryptd_fini_queue(&queue);
crypto_unregister_template(&cryptd_tmpl);
}

Expand Down

0 comments on commit 254eff7

Please sign in to comment.