@@ -20,8 +20,11 @@
#include <asm/cpu_device_id.h>
#include <asm/simd.h>
-#define GHASH_BLOCK_SIZE 16
-#define GHASH_DIGEST_SIZE 16
+#define GHASH_BLOCK_SIZE 16U
+#define GHASH_DIGEST_SIZE 16U
+
+/* avoid kernel_fpu_begin/end scheduler/rcu stalls */
+static const unsigned int bytes_per_fpu = 50 * 1024;
void clmul_ghash_mul(u8 *dst, const u128 *shash);
@@ -80,9 +83,11 @@ static int ghash_update(struct shash_desc *desc,
struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
u8 *dst = dctx->buffer;
+ BUILD_BUG_ON(bytes_per_fpu < GHASH_BLOCK_SIZE);
+
if (dctx->bytes) {
int n = min(srclen, dctx->bytes);
- u8 *pos = dst + (GHASH_BLOCK_SIZE - dctx->bytes);
+ u8 *pos = dst + GHASH_BLOCK_SIZE - dctx->bytes;
dctx->bytes -= n;
srclen -= n;
@@ -97,13 +102,18 @@ static int ghash_update(struct shash_desc *desc,
}
}
- kernel_fpu_begin();
- clmul_ghash_update(dst, src, srclen, &ctx->shash);
- kernel_fpu_end();
+ while (srclen >= GHASH_BLOCK_SIZE) {
+ unsigned int chunk = min(srclen, bytes_per_fpu);
+
+ kernel_fpu_begin();
+ clmul_ghash_update(dst, src, chunk, &ctx->shash);
+ kernel_fpu_end();
+
+ src += chunk & ~(GHASH_BLOCK_SIZE - 1);
+ srclen -= chunk & ~(GHASH_BLOCK_SIZE - 1);
+ }
- if (srclen & 0xf) {
- src += srclen - (srclen & 0xf);
- srclen &= 0xf;
+ if (srclen) {
dctx->bytes = GHASH_BLOCK_SIZE - srclen;
while (srclen--)
*dst++ ^= *src++;
Limit the number of bytes processed between kernel_fpu_begin() and kernel_fpu_end() calls. Those functions call preempt_disable() and preempt_enable(), so the CPU core is unavailable for scheduling while running, leading to: rcu: INFO: rcu_preempt detected expedited stalls on CPUs/tasks: ... Fixes: 0e1227d356e9 ("crypto: ghash - Add PCLMULQDQ accelerated implementation") Suggested-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: Robert Elliott <elliott@hpe.com> --- v3 change to static int, simplify while loop --- arch/x86/crypto/ghash-clmulni-intel_glue.c | 28 +++++++++++++++------- 1 file changed, 19 insertions(+), 9 deletions(-)