@@ -91,9 +91,19 @@ void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
return;
}
- kernel_neon_begin();
- chacha_doneon(state, dst, src, bytes, nrounds);
- kernel_neon_end();
+ for (;;) {
+ unsigned int todo = min_t(unsigned int, PAGE_SIZE, bytes);
+
+ kernel_neon_begin();
+ chacha_doneon(state, dst, src, todo, nrounds);
+ kernel_neon_end();
+
+ bytes -= todo;
+ if (!bytes)
+ break;
+ src += todo;
+ dst += todo;
+ }
}
EXPORT_SYMBOL(chacha_crypt_arch);
@@ -160,13 +160,22 @@ void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
if (static_branch_likely(&have_neon) && do_neon) {
- kernel_neon_begin();
- poly1305_blocks_neon(&dctx->h, src, len, 1);
- kernel_neon_end();
+ for (;;) {
+ unsigned int todo = min_t(unsigned int, PAGE_SIZE, len);
+
+ kernel_neon_begin();
+ poly1305_blocks_neon(&dctx->h, src, todo, 1);
+ kernel_neon_end();
+
+ len -= todo;
+ if (!len)
+ break;
+ src += todo;
+ }
} else {
poly1305_blocks_arm(&dctx->h, src, len, 1);
+ src += len;
}
- src += len;
nbytes %= POLY1305_BLOCK_SIZE;
}
@@ -87,9 +87,19 @@ void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
!crypto_simd_usable())
return chacha_crypt_generic(state, dst, src, bytes, nrounds);
- kernel_neon_begin();
- chacha_doneon(state, dst, src, bytes, nrounds);
- kernel_neon_end();
+ for (;;) {
+ unsigned int todo = min_t(unsigned int, PAGE_SIZE, bytes);
+
+ kernel_neon_begin();
+ chacha_doneon(state, dst, src, todo, nrounds);
+ kernel_neon_end();
+
+ bytes -= todo;
+ if (!bytes)
+ break;
+ src += todo;
+ dst += todo;
+ }
}
EXPORT_SYMBOL(chacha_crypt_arch);
@@ -143,13 +143,22 @@ void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
if (static_branch_likely(&have_neon) && crypto_simd_usable()) {
- kernel_neon_begin();
- poly1305_blocks_neon(&dctx->h, src, len, 1);
- kernel_neon_end();
+ for (;;) {
+ unsigned int todo = min_t(unsigned int, PAGE_SIZE, len);
+
+ kernel_neon_begin();
+ poly1305_blocks_neon(&dctx->h, src, todo, 1);
+ kernel_neon_end();
+
+ len -= todo;
+ if (!len)
+ break;
+ src += todo;
+ }
} else {
poly1305_blocks(&dctx->h, src, len, 1);
+ src += len;
}
- src += len;
nbytes %= POLY1305_BLOCK_SIZE;
}
@@ -153,9 +153,19 @@ void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
bytes <= CHACHA_BLOCK_SIZE)
return chacha_crypt_generic(state, dst, src, bytes, nrounds);
- kernel_fpu_begin();
- chacha_dosimd(state, dst, src, bytes, nrounds);
- kernel_fpu_end();
+ for (;;) {
+ unsigned int todo = min_t(unsigned int, PAGE_SIZE, bytes);
+
+ kernel_fpu_begin();
+ chacha_dosimd(state, dst, src, todo, nrounds);
+ kernel_fpu_end();
+
+ bytes -= todo;
+ if (!bytes)
+ break;
+ src += todo;
+ dst += todo;
+ }
}
EXPORT_SYMBOL(chacha_crypt_arch);
The initial Zinc patchset, after some mailing list discussion, contained code to ensure that kernel_fpu_enable would not be kept on for more than a PAGE_SIZE chunk, since it disables preemption. The choice of PAGE_SIZE isn't totally scientific, but it's not a bad guess either, and it's what's used in both the x86 poly1305 and blake2s library code already. Unfortunately it appears to have been left out of the final patchset that actually added the glue code. So, this commit adds back the PAGE_SIZE chunking. Fixes: 84e03fa39fbe ("crypto: x86/chacha - expose SIMD ChaCha routine as library function") Fixes: b3aad5bad26a ("crypto: arm64/chacha - expose arm64 ChaCha routine as library function") Fixes: a44a3430d71b ("crypto: arm/chacha - expose ARM ChaCha routine as library function") Fixes: f569ca164751 ("crypto: arm64/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON implementation") Fixes: a6b803b3ddc7 ("crypto: arm/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON implementation") Cc: Eric Biggers <ebiggers@google.com> Cc: Ard Biesheuvel <ardb@kernel.org> Cc: stable@vger.kernel.org Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> --- Eric, Ard - I'm wondering if this was in fact just an oversight in Ard's patches, or if there was actually some later discussion in which we concluded that the PAGE_SIZE chunking wasn't required, perhaps because of FPU changes. If that's the case, please do let me know, in which case I'll submit a _different_ patch that removes the chunking from x86 poly and blake. I can't find any emails that would indicate that, but I might be mistaken. arch/arm/crypto/chacha-glue.c | 16 +++++++++++++--- arch/arm/crypto/poly1305-glue.c | 17 +++++++++++++---- arch/arm64/crypto/chacha-neon-glue.c | 16 +++++++++++++--- arch/arm64/crypto/poly1305-glue.c | 17 +++++++++++++---- arch/x86/crypto/chacha_glue.c | 16 +++++++++++++--- 5 files changed, 65 insertions(+), 17 deletions(-)