@@ -23,6 +23,9 @@
#define AEGIS128_MIN_AUTH_SIZE 8
#define AEGIS128_MAX_AUTH_SIZE 16
+/* avoid kernel_fpu_begin/end scheduler/rcu stalls */
+static const unsigned int bytes_per_fpu = 4 * 1024;
+
asmlinkage void crypto_aegis128_aesni_init(void *state, void *key, void *iv);
asmlinkage void crypto_aegis128_aesni_ad(
@@ -85,15 +88,19 @@ static void crypto_aegis128_aesni_process_ad(
if (pos > 0) {
unsigned int fill = AEGIS128_BLOCK_SIZE - pos;
memcpy(buf.bytes + pos, src, fill);
- crypto_aegis128_aesni_ad(state,
+ kernel_fpu_begin();
+ crypto_aegis128_aesni_ad(state->blocks,
AEGIS128_BLOCK_SIZE,
buf.bytes);
+ kernel_fpu_end();
pos = 0;
left -= fill;
src += fill;
}
- crypto_aegis128_aesni_ad(state, left, src);
+ kernel_fpu_begin();
+ crypto_aegis128_aesni_ad(state->blocks, left, src);
+ kernel_fpu_end();
src += left & ~(AEGIS128_BLOCK_SIZE - 1);
left &= AEGIS128_BLOCK_SIZE - 1;
@@ -110,7 +117,9 @@ static void crypto_aegis128_aesni_process_ad(
if (pos > 0) {
memset(buf.bytes + pos, 0, AEGIS128_BLOCK_SIZE - pos);
- crypto_aegis128_aesni_ad(state, AEGIS128_BLOCK_SIZE, buf.bytes);
+ kernel_fpu_begin();
+ crypto_aegis128_aesni_ad(state->blocks, AEGIS128_BLOCK_SIZE, buf.bytes);
+ kernel_fpu_end();
}
}
@@ -119,15 +128,23 @@ static void crypto_aegis128_aesni_process_crypt(
const struct aegis_crypt_ops *ops)
{
while (walk->nbytes >= AEGIS128_BLOCK_SIZE) {
- ops->crypt_blocks(state,
- round_down(walk->nbytes, AEGIS128_BLOCK_SIZE),
+ unsigned int chunk = min(walk->nbytes, bytes_per_fpu);
+
+ chunk = round_down(chunk, AEGIS128_BLOCK_SIZE);
+
+ kernel_fpu_begin();
+ ops->crypt_blocks(state->blocks, chunk,
walk->src.virt.addr, walk->dst.virt.addr);
- skcipher_walk_done(walk, walk->nbytes % AEGIS128_BLOCK_SIZE);
+ kernel_fpu_end();
+
+ skcipher_walk_done(walk, walk->nbytes - chunk);
}
if (walk->nbytes) {
- ops->crypt_tail(state, walk->nbytes, walk->src.virt.addr,
+ kernel_fpu_begin();
+ ops->crypt_tail(state->blocks, walk->nbytes, walk->src.virt.addr,
walk->dst.virt.addr);
+ kernel_fpu_end();
skcipher_walk_done(walk, 0);
}
}
@@ -172,15 +189,17 @@ static void crypto_aegis128_aesni_crypt(struct aead_request *req,
struct skcipher_walk walk;
struct aegis_state state;
- ops->skcipher_walk_init(&walk, req, true);
+ ops->skcipher_walk_init(&walk, req, false);
kernel_fpu_begin();
+ crypto_aegis128_aesni_init(&state.blocks, ctx->key.bytes, req->iv);
+ kernel_fpu_end();
- crypto_aegis128_aesni_init(&state, ctx->key.bytes, req->iv);
crypto_aegis128_aesni_process_ad(&state, req->src, req->assoclen);
crypto_aegis128_aesni_process_crypt(&state, &walk, ops);
- crypto_aegis128_aesni_final(&state, tag_xor, req->assoclen, cryptlen);
+ kernel_fpu_begin();
+ crypto_aegis128_aesni_final(&state.blocks, tag_xor, req->assoclen, cryptlen);
kernel_fpu_end();
}
Make kernel_fpu_begin() and kernel_fpu_end() calls around each assembly language function that uses FPU context, rather than around the entire set (init, ad, crypt, final). Limit the processing of bulk data based on a module parameter, so multiple blocks are processed within one FPU context (associated data is not limited). Allow the skcipher_walk functions to sleep again, since they are is no longer called inside FPU context. Motivation: calling crypto_aead_encrypt() with a single scatter-gather list entry pointing to a 1 MiB plaintext buffer caused the aesni_encrypt function to receive a length of 1048576 bytes and consume 306348 cycles within FPU context to process that data. Fixes: 1d373d4e8e15 ("crypto: x86 - Add optimized AEGIS implementations") Fixes: ba6771c0a0bc ("crypto: x86/aegis - fix handling chunked inputs and MAY_SLEEP") Signed-off-by: Robert Elliott <elliott@hpe.com> --- arch/x86/crypto/aegis128-aesni-glue.c | 39 ++++++++++++++++++++------- 1 file changed, 29 insertions(+), 10 deletions(-)