From patchwork Mon Oct 7 01:24:21 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Eric Biggers X-Patchwork-Id: 834162 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 51CC5C156; Mon, 7 Oct 2024 01:24:56 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1728264296; cv=none; b=XP9gYtrq2p2P+Hsi3iiTwPGAPfzDcDDVZa2sFU/Tec11x+XOPBHAaRYxQvCp9qAYEI2tlce37Hz3CXkyv98GrVwbGG/aCE642pFaqxG4yfJlNtN12ZNw2jMeZBZ2oL7RxTdI9Q8dLBbACBtTfv6BoVJ/COk1O9noIAKqiCML8NA= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1728264296; c=relaxed/simple; bh=WCm/xfTBodlqj4bAKzAchL/blZIThgBRMTL2D1XRc6E=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=AgMUSyKQWRByXdiSPyTSfneRgt5Np3RvdQ+bDkyfjCb1VSMiWYHBqkbKXW3B5ynkQcE4ZpYZfjhjVPeqej1J3VyBl0yNzDO883i/XT3eCHriF1hgbLTAQB6JkUc4nsjIp/XK4ZOiDINmdO+FTulQqUMWsJW09yuM1TepUR2R8TI= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=dHCH8QY0; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="dHCH8QY0" Received: by smtp.kernel.org (Postfix) with ESMTPSA id B34E6C4CECC; Mon, 7 Oct 2024 01:24:55 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1728264295; bh=WCm/xfTBodlqj4bAKzAchL/blZIThgBRMTL2D1XRc6E=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=dHCH8QY0ZXyEg2C+QRZGwoRijb0ZYP98j15RbnIj0xpbmInWugI1WP/16g5MG0gkq sJLKqd47QLOFtzx86pGpGw6HbaD3D2COBr8JA5Yp1kHFiUfEw1zabd4aiJTKvl1J+a ZQDvdAixuafMQ6YotvbpxU7p6W00zg9wj37b9WRsQHpYRotTGsCRHsPVw5z3hlxMwJ sZn6A99qgdUvuj7whS9oJOYonxQKROWvoy7NDVyRlFZyaN1jyGcgMOqZmPz666tNit 6l4pH1XdvzgS7X3elzgyOotEJZHdfLsiLrths1XG1qsRIS5d90F1DSHlAWunn24IFG xoNd569qORQcQ== From: Eric Biggers To: linux-crypto@vger.kernel.org Cc: x86@kernel.org, Ondrej Mosnacek , stable@vger.kernel.org Subject: [PATCH 01/10] crypto: x86/aegis128 - access 32-bit arguments as 32-bit Date: Sun, 6 Oct 2024 18:24:21 -0700 Message-ID: <20241007012430.163606-2-ebiggers@kernel.org> X-Mailer: git-send-email 2.46.2 In-Reply-To: <20241007012430.163606-1-ebiggers@kernel.org> References: <20241007012430.163606-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-crypto@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Eric Biggers Fix the AEGIS assembly code to access 'unsigned int' arguments as 32-bit values instead of 64-bit, since the upper bits of the corresponding 64-bit registers are not guaranteed to be zero. Note: there haven't been any reports of this bug actually causing incorrect behavior. Neither gcc nor clang guarantee zero-extension to 64 bits, but zero-extension is likely to happen in practice because most instructions that operate on 32-bit registers zero-extend to 64 bits. Fixes: 1d373d4e8e15 ("crypto: x86 - Add optimized AEGIS implementations") Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers --- arch/x86/crypto/aegis128-aesni-asm.S | 29 ++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis128-aesni-asm.S index ad7f4c891625..2de859173940 100644 --- a/arch/x86/crypto/aegis128-aesni-asm.S +++ b/arch/x86/crypto/aegis128-aesni-asm.S @@ -19,11 +19,11 @@ #define MSG %xmm5 #define T0 %xmm6 #define T1 %xmm7 #define STATEP %rdi -#define LEN %rsi +#define LEN %esi #define SRC %rdx #define DST %rcx .section .rodata.cst16.aegis128_const, "aM", @progbits, 32 .align 16 @@ -74,50 +74,50 @@ */ SYM_FUNC_START_LOCAL(__load_partial) xor %r9d, %r9d pxor MSG, MSG - mov LEN, %r8 + mov LEN, %r8d and $0x1, %r8 jz .Lld_partial_1 - mov LEN, %r8 + mov LEN, %r8d and $0x1E, %r8 add SRC, %r8 mov (%r8), %r9b .Lld_partial_1: - mov LEN, %r8 + mov LEN, %r8d and $0x2, %r8 jz .Lld_partial_2 - mov LEN, %r8 + mov LEN, %r8d and $0x1C, %r8 add SRC, %r8 shl $0x10, %r9 mov (%r8), %r9w .Lld_partial_2: - mov LEN, %r8 + mov LEN, %r8d and $0x4, %r8 jz .Lld_partial_4 - mov LEN, %r8 + mov LEN, %r8d and $0x18, %r8 add SRC, %r8 shl $32, %r9 mov (%r8), %r8d xor %r8, %r9 .Lld_partial_4: movq %r9, MSG - mov LEN, %r8 + mov LEN, %r8d and $0x8, %r8 jz .Lld_partial_8 - mov LEN, %r8 + mov LEN, %r8d and $0x10, %r8 add SRC, %r8 pslldq $8, MSG movq (%r8), T0 pxor T0, MSG @@ -137,11 +137,11 @@ SYM_FUNC_END(__load_partial) * %r8 * %r9 * %r10 */ SYM_FUNC_START_LOCAL(__store_partial) - mov LEN, %r8 + mov LEN, %r8d mov DST, %r9 movq T0, %r10 cmp $8, %r8 @@ -675,11 +675,11 @@ SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec_tail) movdqa MSG, T0 call __store_partial /* mask with byte count: */ - movq LEN, T0 + movd LEN, T0 punpcklbw T0, T0 punpcklbw T0, T0 punpcklbw T0, T0 punpcklbw T0, T0 movdqa .Laegis128_counter(%rip), T1 @@ -700,11 +700,12 @@ SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec_tail) RET SYM_FUNC_END(crypto_aegis128_aesni_dec_tail) /* * void crypto_aegis128_aesni_final(void *state, void *tag_xor, - * u64 assoclen, u64 cryptlen); + * unsigned int assoclen, + * unsigned int cryptlen); */ SYM_FUNC_START(crypto_aegis128_aesni_final) FRAME_BEGIN /* load the state: */ @@ -713,12 +714,12 @@ SYM_FUNC_START(crypto_aegis128_aesni_final) movdqu 0x20(STATEP), STATE2 movdqu 0x30(STATEP), STATE3 movdqu 0x40(STATEP), STATE4 /* prepare length block: */ - movq %rdx, MSG - movq %rcx, T0 + movd %edx, MSG + movd %ecx, T0 pslldq $8, T0 pxor T0, MSG psllq $3, MSG /* multiply by 8 (to get bit count) */ pxor STATE3, MSG From patchwork Mon Oct 7 01:24:22 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Eric Biggers X-Patchwork-Id: 833402 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 97F00E552 for ; Mon, 7 Oct 2024 01:24:56 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1728264296; cv=none; b=T5DqYdPcwi8XXlXUMXG4v/fLV5A+48fbfnY3OU9LNUSALBbHx5xSAso6lYKTnVCyCSTBswIoAFulKjwu2uGQ+0USz9MI3y7Q46DSs2VHXKhKideyLmDPjm+RL5sH4z/NYRyYV+E8IiD+HCtEB/noauZ5h+pRflTdt4r72KF52Dg= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1728264296; c=relaxed/simple; bh=HcQC+jvPVUJx2RKcjbg0itHn7eBtluu0O12lQvkpdxE=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=mrxSsZpmRdkXebw+fFQzfMGSyXZ8yvyx2zcwmzfL8swi7ZKPaDG1n8KzQtbRQtSr5AQnCK6PFowBorsLT170UXQjs45ns+I3X/HaDoPKWRL2Wv8QuTCLSTMf+R2YOOBH58cYe6Rgc+INYR8EaUX63sy2hh2v8ehX8DpQo4NgAVs= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=tS6OFWY8; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="tS6OFWY8" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 0AFF0C4CED1; Mon, 7 Oct 2024 01:24:56 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1728264296; bh=HcQC+jvPVUJx2RKcjbg0itHn7eBtluu0O12lQvkpdxE=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=tS6OFWY8NDw8G8vPUTuSHHERsO9WiRbh9GgcfOJPNo1TAVFUbf1F8FexwE4yoe7fP JYrXPR3TEekFx4Tur4Ay0Zc7kS/XaW7Z5qe08U8jcblS9/buwCAKDYp+1sPC4Qqx+f XKpXceC9rBAACkJLntGOsDr3v5haenc4H2yt2G+4vTX2xIR1hOhRk/ANEdVIbawd33 Ek483kQKYGAuKM2A+J3PIBQ/s/QzmuFXqZTp3B94vxF7pCaz7mnso0h0ZG/1f0CCLQ jEcPDtt7kUR2pw0R3CCaylbgnQwfuL4HMrn87DcubznldFiGA/yg8L6CZBRXHZSsOV +udhEF/gcKsAg== From: Eric Biggers To: linux-crypto@vger.kernel.org Cc: x86@kernel.org, Ondrej Mosnacek Subject: [PATCH 02/10] crypto: x86/aegis128 - remove no-op init and exit functions Date: Sun, 6 Oct 2024 18:24:22 -0700 Message-ID: <20241007012430.163606-3-ebiggers@kernel.org> X-Mailer: git-send-email 2.46.2 In-Reply-To: <20241007012430.163606-1-ebiggers@kernel.org> References: <20241007012430.163606-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-crypto@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Eric Biggers Don't bother providing empty stubs for the init and exit methods in struct aead_alg, since they are optional anyway. Signed-off-by: Eric Biggers --- arch/x86/crypto/aegis128-aesni-glue.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/arch/x86/crypto/aegis128-aesni-glue.c b/arch/x86/crypto/aegis128-aesni-glue.c index 4623189000d8..96586470154e 100644 --- a/arch/x86/crypto/aegis128-aesni-glue.c +++ b/arch/x86/crypto/aegis128-aesni-glue.c @@ -225,26 +225,15 @@ static int crypto_aegis128_aesni_decrypt(struct aead_request *req) crypto_aegis128_aesni_crypt(req, &tag, cryptlen, &OPS); return crypto_memneq(tag.bytes, zeros.bytes, authsize) ? -EBADMSG : 0; } -static int crypto_aegis128_aesni_init_tfm(struct crypto_aead *aead) -{ - return 0; -} - -static void crypto_aegis128_aesni_exit_tfm(struct crypto_aead *aead) -{ -} - static struct aead_alg crypto_aegis128_aesni_alg = { .setkey = crypto_aegis128_aesni_setkey, .setauthsize = crypto_aegis128_aesni_setauthsize, .encrypt = crypto_aegis128_aesni_encrypt, .decrypt = crypto_aegis128_aesni_decrypt, - .init = crypto_aegis128_aesni_init_tfm, - .exit = crypto_aegis128_aesni_exit_tfm, .ivsize = AEGIS128_NONCE_SIZE, .maxauthsize = AEGIS128_MAX_AUTH_SIZE, .chunksize = AEGIS128_BLOCK_SIZE, From patchwork Mon Oct 7 01:24:23 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Eric Biggers X-Patchwork-Id: 834161 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id D6A0C1078B for ; Mon, 7 Oct 2024 01:24:56 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1728264296; cv=none; b=BIBFEGbKWZ++hHLjMrLg2uqoMPX4s9lsMG2GH/mh8KBnzN3Hq2VErdRej53bS5lkzNeSZlLxKc5zIvxSiuH1V3+RHhFGA0mu0lZnBcfv0he0sKked+vYIM7oTIsUDzrxtbWSQojV2LItL1B5uyDOYiYHaIfo8h9AqO9o5JFtt4U= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1728264296; c=relaxed/simple; bh=P8dQXy2eYS/6EZXuzq3z/fT3xjKCg0HAAYYIHO3D688=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=r6+ftaqhYuchgQCWq/SXVDXuZj7FKZ60fJSLdddmqNVBFq3p0pCTirPFpTff8TAEyV+NCX/wznLR8VrhgCqXgm9GFgG/dp5QiaWgBOwJUpH2udqdpD7of6fdWqzj0wR8xYQaKoFNs6AUh1texSWftiK6My23gAOZcsOA5EGsYD4= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=FfM8xReo; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="FfM8xReo" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 53D71C4CED4; Mon, 7 Oct 2024 01:24:56 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1728264296; bh=P8dQXy2eYS/6EZXuzq3z/fT3xjKCg0HAAYYIHO3D688=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=FfM8xReoj77S7wqHAHs7qBZRb319TsMavjx1Gmq6E0IBsEbN2vHDKyom1b56btWsj nk4qABwSk30puSoLHVGqdaDgsAbl8QpcSTzV0cOgXB1cKKtUNkqjbxMYQ0cJRrMi+S fR1cf6dn2meQNEewLLLi9eOHRfaWgDfRIDU0FcftjHprB3KTkEBBT13wMG2OiXyFm/ WitBPB+XPoUhIyc5B+vreobAUKIcoJFr8vxJV6fIJ2WHWHAGqZ26ov+O4EJTABnq8k H8Kb8+cqv55zyft+ryD3GKHlfUgzV8vO48EsB6F+802umE0mldxttCJXMgHLjdtDRI +3W/lfu60rvgA== From: Eric Biggers To: linux-crypto@vger.kernel.org Cc: x86@kernel.org, Ondrej Mosnacek Subject: [PATCH 03/10] crypto: x86/aegis128 - eliminate some indirect calls Date: Sun, 6 Oct 2024 18:24:23 -0700 Message-ID: <20241007012430.163606-4-ebiggers@kernel.org> X-Mailer: git-send-email 2.46.2 In-Reply-To: <20241007012430.163606-1-ebiggers@kernel.org> References: <20241007012430.163606-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-crypto@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Eric Biggers Instead of using a struct of function pointers to decide whether to call the encryption or decryption assembly functions, use a conditional branch on a bool. Force-inline the functions to avoid actually generating the branch. This improves performance slightly since indirect calls are slow. Remove the now-unnecessary CFI stubs. Signed-off-by: Eric Biggers --- arch/x86/crypto/aegis128-aesni-asm.S | 9 ++-- arch/x86/crypto/aegis128-aesni-glue.c | 74 +++++++++++++-------------- 2 files changed, 40 insertions(+), 43 deletions(-) diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis128-aesni-asm.S index 2de859173940..1b57558548c7 100644 --- a/arch/x86/crypto/aegis128-aesni-asm.S +++ b/arch/x86/crypto/aegis128-aesni-asm.S @@ -5,11 +5,10 @@ * Copyright (c) 2017-2018 Ondrej Mosnacek * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. */ #include -#include #include #define STATE0 %xmm0 #define STATE1 %xmm1 #define STATE2 %xmm2 @@ -401,11 +400,11 @@ SYM_FUNC_END(crypto_aegis128_aesni_ad) /* * void crypto_aegis128_aesni_enc(void *state, unsigned int length, * const void *src, void *dst); */ -SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc) +SYM_FUNC_START(crypto_aegis128_aesni_enc) FRAME_BEGIN cmp $0x10, LEN jb .Lenc_out @@ -498,11 +497,11 @@ SYM_FUNC_END(crypto_aegis128_aesni_enc) /* * void crypto_aegis128_aesni_enc_tail(void *state, unsigned int length, * const void *src, void *dst); */ -SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc_tail) +SYM_FUNC_START(crypto_aegis128_aesni_enc_tail) FRAME_BEGIN /* load the state: */ movdqu 0x00(STATEP), STATE0 movdqu 0x10(STATEP), STATE1 @@ -555,11 +554,11 @@ SYM_FUNC_END(crypto_aegis128_aesni_enc_tail) /* * void crypto_aegis128_aesni_dec(void *state, unsigned int length, * const void *src, void *dst); */ -SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec) +SYM_FUNC_START(crypto_aegis128_aesni_dec) FRAME_BEGIN cmp $0x10, LEN jb .Ldec_out @@ -652,11 +651,11 @@ SYM_FUNC_END(crypto_aegis128_aesni_dec) /* * void crypto_aegis128_aesni_dec_tail(void *state, unsigned int length, * const void *src, void *dst); */ -SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec_tail) +SYM_FUNC_START(crypto_aegis128_aesni_dec_tail) FRAME_BEGIN /* load the state: */ movdqu 0x00(STATEP), STATE0 movdqu 0x10(STATEP), STATE1 diff --git a/arch/x86/crypto/aegis128-aesni-glue.c b/arch/x86/crypto/aegis128-aesni-glue.c index 96586470154e..deb39cef0be1 100644 --- a/arch/x86/crypto/aegis128-aesni-glue.c +++ b/arch/x86/crypto/aegis128-aesni-glue.c @@ -54,20 +54,10 @@ struct aegis_state { struct aegis_ctx { struct aegis_block key; }; -struct aegis_crypt_ops { - int (*skcipher_walk_init)(struct skcipher_walk *walk, - struct aead_request *req, bool atomic); - - void (*crypt_blocks)(void *state, unsigned int length, const void *src, - void *dst); - void (*crypt_tail)(void *state, unsigned int length, const void *src, - void *dst); -}; - static void crypto_aegis128_aesni_process_ad( struct aegis_state *state, struct scatterlist *sg_src, unsigned int assoclen) { struct scatter_walk walk; @@ -112,24 +102,41 @@ static void crypto_aegis128_aesni_process_ad( memset(buf.bytes + pos, 0, AEGIS128_BLOCK_SIZE - pos); crypto_aegis128_aesni_ad(state, AEGIS128_BLOCK_SIZE, buf.bytes); } } -static void crypto_aegis128_aesni_process_crypt( - struct aegis_state *state, struct skcipher_walk *walk, - const struct aegis_crypt_ops *ops) +static __always_inline void +crypto_aegis128_aesni_process_crypt(struct aegis_state *state, + struct skcipher_walk *walk, bool enc) { while (walk->nbytes >= AEGIS128_BLOCK_SIZE) { - ops->crypt_blocks(state, - round_down(walk->nbytes, AEGIS128_BLOCK_SIZE), - walk->src.virt.addr, walk->dst.virt.addr); + if (enc) + crypto_aegis128_aesni_enc( + state, + round_down(walk->nbytes, + AEGIS128_BLOCK_SIZE), + walk->src.virt.addr, + walk->dst.virt.addr); + else + crypto_aegis128_aesni_dec( + state, + round_down(walk->nbytes, + AEGIS128_BLOCK_SIZE), + walk->src.virt.addr, + walk->dst.virt.addr); skcipher_walk_done(walk, walk->nbytes % AEGIS128_BLOCK_SIZE); } if (walk->nbytes) { - ops->crypt_tail(state, walk->nbytes, walk->src.virt.addr, - walk->dst.virt.addr); + if (enc) + crypto_aegis128_aesni_enc_tail(state, walk->nbytes, + walk->src.virt.addr, + walk->dst.virt.addr); + else + crypto_aegis128_aesni_dec_tail(state, walk->nbytes, + walk->src.virt.addr, + walk->dst.virt.addr); skcipher_walk_done(walk, 0); } } static struct aegis_ctx *crypto_aegis128_aesni_ctx(struct crypto_aead *aead) @@ -160,71 +167,62 @@ static int crypto_aegis128_aesni_setauthsize(struct crypto_aead *tfm, if (authsize < AEGIS128_MIN_AUTH_SIZE) return -EINVAL; return 0; } -static void crypto_aegis128_aesni_crypt(struct aead_request *req, - struct aegis_block *tag_xor, - unsigned int cryptlen, - const struct aegis_crypt_ops *ops) +static __always_inline void +crypto_aegis128_aesni_crypt(struct aead_request *req, + struct aegis_block *tag_xor, + unsigned int cryptlen, bool enc) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct aegis_ctx *ctx = crypto_aegis128_aesni_ctx(tfm); struct skcipher_walk walk; struct aegis_state state; - ops->skcipher_walk_init(&walk, req, true); + if (enc) + skcipher_walk_aead_encrypt(&walk, req, true); + else + skcipher_walk_aead_decrypt(&walk, req, true); kernel_fpu_begin(); crypto_aegis128_aesni_init(&state, ctx->key.bytes, req->iv); crypto_aegis128_aesni_process_ad(&state, req->src, req->assoclen); - crypto_aegis128_aesni_process_crypt(&state, &walk, ops); + crypto_aegis128_aesni_process_crypt(&state, &walk, enc); crypto_aegis128_aesni_final(&state, tag_xor, req->assoclen, cryptlen); kernel_fpu_end(); } static int crypto_aegis128_aesni_encrypt(struct aead_request *req) { - static const struct aegis_crypt_ops OPS = { - .skcipher_walk_init = skcipher_walk_aead_encrypt, - .crypt_blocks = crypto_aegis128_aesni_enc, - .crypt_tail = crypto_aegis128_aesni_enc_tail, - }; - struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct aegis_block tag = {}; unsigned int authsize = crypto_aead_authsize(tfm); unsigned int cryptlen = req->cryptlen; - crypto_aegis128_aesni_crypt(req, &tag, cryptlen, &OPS); + crypto_aegis128_aesni_crypt(req, &tag, cryptlen, true); scatterwalk_map_and_copy(tag.bytes, req->dst, req->assoclen + cryptlen, authsize, 1); return 0; } static int crypto_aegis128_aesni_decrypt(struct aead_request *req) { static const struct aegis_block zeros = {}; - static const struct aegis_crypt_ops OPS = { - .skcipher_walk_init = skcipher_walk_aead_decrypt, - .crypt_blocks = crypto_aegis128_aesni_dec, - .crypt_tail = crypto_aegis128_aesni_dec_tail, - }; - struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct aegis_block tag; unsigned int authsize = crypto_aead_authsize(tfm); unsigned int cryptlen = req->cryptlen - authsize; scatterwalk_map_and_copy(tag.bytes, req->src, req->assoclen + cryptlen, authsize, 0); - crypto_aegis128_aesni_crypt(req, &tag, cryptlen, &OPS); + crypto_aegis128_aesni_crypt(req, &tag, cryptlen, false); return crypto_memneq(tag.bytes, zeros.bytes, authsize) ? -EBADMSG : 0; } static struct aead_alg crypto_aegis128_aesni_alg = { From patchwork Mon Oct 7 01:24:24 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Eric Biggers X-Patchwork-Id: 833401 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 1A63F10A1C for ; Mon, 7 Oct 2024 01:24:56 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1728264297; cv=none; b=VjeCnQEP4LWFP4fUUgncK7WSFJd0RrKwWigpuTOILFjiWVVYWDyriUFCGLAc/6g4lg+zGhOWy1ttMlujBGjaR73DhAUG96je6EtU1oWve/GKQYnmr5Lf9LcsWFnO9/QxJC7R1ROv8gQas8Ovv30jCo8PKKPdenz5mtgEWA0sj4g= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1728264297; c=relaxed/simple; bh=UQ5z+M013V+7Q3EhUtmWmtTjR4pQo9By567fNgUaNWg=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=Oa1RqhrzhEp2HfLLhLq7TsHc3eV+5cVSlL1FLtzZlLcBAaOewm9SOCdlqnkyMJE5u+6Aq+3RPwdq125LCag1MoQTA883G1G3FUdVF2PI+IKxIzbAe2k7FWW36bT0vn1l0djKM+U7Tq3Ug0eqF/EkiRrN9+q8GF8LBYSrRjlPWuw= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=QemuRG2Y; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="QemuRG2Y" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 98ABFC4CECC; Mon, 7 Oct 2024 01:24:56 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1728264296; bh=UQ5z+M013V+7Q3EhUtmWmtTjR4pQo9By567fNgUaNWg=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=QemuRG2YytQL4/K99qiB8t11XgHEOcsxVxW2ChAdKqCzm5fi73imxP5CTIFC+V69r 2etbOEbuYaRQugmJbJo4WkQvBw+C+qKWNccXfb+AZJ9ol0IhMJ4rGj5OPKCFdwQ3lC ZjyV02vPQ8camU769Jfs30EMFCt/FP4BFoQ4N9bU1vICCl+Lne6MI0UEEFv96QnRWY M9x3tIY0nu5+6h7m6DixoRTARzFR88lZsJvVVoP5AE12tO3UzxkOPvk5Wuwf9lu1Wo OSWuslswh0KIKab/GGfe3oelOiQnQQIS0tqM7F+6g/lAcaRoj1i3fOeXD3vV9wTfsd 810l+IDp04P5A== From: Eric Biggers To: linux-crypto@vger.kernel.org Cc: x86@kernel.org, Ondrej Mosnacek Subject: [PATCH 04/10] crypto: x86/aegis128 - don't bother with special code for aligned data Date: Sun, 6 Oct 2024 18:24:24 -0700 Message-ID: <20241007012430.163606-5-ebiggers@kernel.org> X-Mailer: git-send-email 2.46.2 In-Reply-To: <20241007012430.163606-1-ebiggers@kernel.org> References: <20241007012430.163606-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-crypto@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Eric Biggers Remove the AEGIS assembly code paths that were "optimized" to operate on 16-byte aligned data using movdqa, and instead just use the code paths that use movdqu and can handle data with any alignment. This does not reduce performance. movdqa is basically a historical artifact; on aligned data, movdqu and movdqa have had the same performance since Intel Nehalem (2008) and AMD Bulldozer (2011). And code that requires AES-NI cannot run on CPUs older than those anyway. Signed-off-by: Eric Biggers --- arch/x86/crypto/aegis128-aesni-asm.S | 122 +++++---------------------- 1 file changed, 22 insertions(+), 100 deletions(-) diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis128-aesni-asm.S index 1b57558548c7..5541aca2fd0d 100644 --- a/arch/x86/crypto/aegis128-aesni-asm.S +++ b/arch/x86/crypto/aegis128-aesni-asm.S @@ -243,56 +243,12 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad) movdqu 0x10(STATEP), STATE1 movdqu 0x20(STATEP), STATE2 movdqu 0x30(STATEP), STATE3 movdqu 0x40(STATEP), STATE4 - mov SRC, %r8 - and $0xF, %r8 - jnz .Lad_u_loop - -.align 8 -.Lad_a_loop: - movdqa 0x00(SRC), MSG - aegis128_update - pxor MSG, STATE4 - sub $0x10, LEN - cmp $0x10, LEN - jl .Lad_out_1 - - movdqa 0x10(SRC), MSG - aegis128_update - pxor MSG, STATE3 - sub $0x10, LEN - cmp $0x10, LEN - jl .Lad_out_2 - - movdqa 0x20(SRC), MSG - aegis128_update - pxor MSG, STATE2 - sub $0x10, LEN - cmp $0x10, LEN - jl .Lad_out_3 - - movdqa 0x30(SRC), MSG - aegis128_update - pxor MSG, STATE1 - sub $0x10, LEN - cmp $0x10, LEN - jl .Lad_out_4 - - movdqa 0x40(SRC), MSG - aegis128_update - pxor MSG, STATE0 - sub $0x10, LEN - cmp $0x10, LEN - jl .Lad_out_0 - - add $0x50, SRC - jmp .Lad_a_loop - .align 8 -.Lad_u_loop: +.Lad_loop: movdqu 0x00(SRC), MSG aegis128_update pxor MSG, STATE4 sub $0x10, LEN cmp $0x10, LEN @@ -325,11 +281,11 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad) sub $0x10, LEN cmp $0x10, LEN jl .Lad_out_0 add $0x50, SRC - jmp .Lad_u_loop + jmp .Lad_loop /* store the state: */ .Lad_out_0: movdqu STATE0, 0x00(STATEP) movdqu STATE1, 0x10(STATEP) @@ -378,19 +334,19 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad) .Lad_out: FRAME_END RET SYM_FUNC_END(crypto_aegis128_aesni_ad) -.macro encrypt_block a s0 s1 s2 s3 s4 i - movdq\a (\i * 0x10)(SRC), MSG +.macro encrypt_block s0 s1 s2 s3 s4 i + movdqu (\i * 0x10)(SRC), MSG movdqa MSG, T0 pxor \s1, T0 pxor \s4, T0 movdqa \s2, T1 pand \s3, T1 pxor T1, T0 - movdq\a T0, (\i * 0x10)(DST) + movdqu T0, (\i * 0x10)(DST) aegis128_update pxor MSG, \s4 sub $0x10, LEN @@ -413,38 +369,21 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc) movdqu 0x10(STATEP), STATE1 movdqu 0x20(STATEP), STATE2 movdqu 0x30(STATEP), STATE3 movdqu 0x40(STATEP), STATE4 - mov SRC, %r8 - or DST, %r8 - and $0xF, %r8 - jnz .Lenc_u_loop - .align 8 -.Lenc_a_loop: - encrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0 - encrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1 - encrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2 - encrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3 - encrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4 +.Lenc_loop: + encrypt_block STATE0 STATE1 STATE2 STATE3 STATE4 0 + encrypt_block STATE4 STATE0 STATE1 STATE2 STATE3 1 + encrypt_block STATE3 STATE4 STATE0 STATE1 STATE2 2 + encrypt_block STATE2 STATE3 STATE4 STATE0 STATE1 3 + encrypt_block STATE1 STATE2 STATE3 STATE4 STATE0 4 add $0x50, SRC add $0x50, DST - jmp .Lenc_a_loop - -.align 8 -.Lenc_u_loop: - encrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0 - encrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1 - encrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2 - encrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3 - encrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4 - - add $0x50, SRC - add $0x50, DST - jmp .Lenc_u_loop + jmp .Lenc_loop /* store the state: */ .Lenc_out_0: movdqu STATE4, 0x00(STATEP) movdqu STATE0, 0x10(STATEP) @@ -533,18 +472,18 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc_tail) FRAME_END RET SYM_FUNC_END(crypto_aegis128_aesni_enc_tail) -.macro decrypt_block a s0 s1 s2 s3 s4 i - movdq\a (\i * 0x10)(SRC), MSG +.macro decrypt_block s0 s1 s2 s3 s4 i + movdqu (\i * 0x10)(SRC), MSG pxor \s1, MSG pxor \s4, MSG movdqa \s2, T1 pand \s3, T1 pxor T1, MSG - movdq\a MSG, (\i * 0x10)(DST) + movdqu MSG, (\i * 0x10)(DST) aegis128_update pxor MSG, \s4 sub $0x10, LEN @@ -567,38 +506,21 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec) movdqu 0x10(STATEP), STATE1 movdqu 0x20(STATEP), STATE2 movdqu 0x30(STATEP), STATE3 movdqu 0x40(STATEP), STATE4 - mov SRC, %r8 - or DST, %r8 - and $0xF, %r8 - jnz .Ldec_u_loop - -.align 8 -.Ldec_a_loop: - decrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0 - decrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1 - decrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2 - decrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3 - decrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4 - - add $0x50, SRC - add $0x50, DST - jmp .Ldec_a_loop - .align 8 -.Ldec_u_loop: - decrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0 - decrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1 - decrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2 - decrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3 - decrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4 +.Ldec_loop: + decrypt_block STATE0 STATE1 STATE2 STATE3 STATE4 0 + decrypt_block STATE4 STATE0 STATE1 STATE2 STATE3 1 + decrypt_block STATE3 STATE4 STATE0 STATE1 STATE2 2 + decrypt_block STATE2 STATE3 STATE4 STATE0 STATE1 3 + decrypt_block STATE1 STATE2 STATE3 STATE4 STATE0 4 add $0x50, SRC add $0x50, DST - jmp .Ldec_u_loop + jmp .Ldec_loop /* store the state: */ .Ldec_out_0: movdqu STATE4, 0x00(STATEP) movdqu STATE0, 0x10(STATEP) From patchwork Mon Oct 7 01:24:25 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Eric Biggers X-Patchwork-Id: 834160 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 27A651171D for ; Mon, 7 Oct 2024 01:24:57 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1728264297; cv=none; b=j0WIqNcGzdHRtwYmigJmcPelN7Viu3H0oXkVWhDhIndCWwfzz/elywxE3RfDYJqBNxxNl1IsUgvD68P33xVEc16FoJuxQYYJRYttM3vwYJ0PM4NLZlseeg5FiVzxLXarGvALuwLzLSWewh/ig+X/gWXOnpqmt6GLvukErRhkHOw= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1728264297; c=relaxed/simple; bh=k/mTYX02KvpLmDuBNEGrzMYyk+wYM9HVP9OgbxTq8PQ=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=DGnkkhC6YpzQuS6ENSmJ0Z+uT17M1g9vPkikT4KxU60bwKSbG4denlSqzLHU+Pt0F5JHDeYXZPzqyaAPF/cka0MDmuruVf0sTlg2j2M3ZJr7xEfwB4oXK6pV/jUHK0mdqCuVVZNwppAdajrGg9UJaG7jpJXKzTsLRFBPc3Cx9p8= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=Lh15lw+n; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="Lh15lw+n" Received: by smtp.kernel.org (Postfix) with ESMTPSA id E09D4C4CECF; Mon, 7 Oct 2024 01:24:56 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1728264297; bh=k/mTYX02KvpLmDuBNEGrzMYyk+wYM9HVP9OgbxTq8PQ=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=Lh15lw+nth2TNTF9ldvf7nb2WlIoUvc8vb1ZjOmLED+AyquimRleOOX8akWMTXL7Q Ke0T6c4grBvfiinb+KUu0lnekkZBT0gyKqvSkQIUfHanUsX+pcdQXbRpW3zg1Y2Cb6 hy+KUFjQY9VpZJWvmQSK9MzLbI6Gsui4tRn8Dqg6oB/YyN5lfl+MhwUrnQG10Re6iI il3bDf+szYEMjdQnbLGYg7wklYo8O2mmQGSPlCvvmHIy+tcTliIWi9TNyXIh5j3A+f gKCFcWw71wz5iUQc+psRfvlLMZX9L6TLuNrwYdNVwqbK61op4M+hwc6GSjXdPEPMW5 +wdQrvrPMNYwg== From: Eric Biggers To: linux-crypto@vger.kernel.org Cc: x86@kernel.org, Ondrej Mosnacek Subject: [PATCH 05/10] crypto: x86/aegis128 - optimize length block preparation using SSE4.1 Date: Sun, 6 Oct 2024 18:24:25 -0700 Message-ID: <20241007012430.163606-6-ebiggers@kernel.org> X-Mailer: git-send-email 2.46.2 In-Reply-To: <20241007012430.163606-1-ebiggers@kernel.org> References: <20241007012430.163606-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-crypto@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Eric Biggers Start using SSE4.1 instructions in the AES-NI AEGIS code, with the first use case being preparing the length block in fewer instructions. In practice this does not reduce the set of CPUs on which the code can run, because all Intel and AMD CPUs with AES-NI also have SSE4.1. Upgrade the existing SSE2 feature check to SSE4.1, though it seems this check is not strictly necessary; the aesni-intel module has been getting away with using SSE4.1 despite checking for AES-NI only. Signed-off-by: Eric Biggers --- arch/x86/crypto/Kconfig | 4 ++-- arch/x86/crypto/aegis128-aesni-asm.S | 6 ++---- arch/x86/crypto/aegis128-aesni-glue.c | 6 +++--- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/arch/x86/crypto/Kconfig b/arch/x86/crypto/Kconfig index 7b1bebed879d..3d2e38ba5240 100644 --- a/arch/x86/crypto/Kconfig +++ b/arch/x86/crypto/Kconfig @@ -361,20 +361,20 @@ config CRYPTO_CHACHA20_X86_64 - SSSE3 (Supplemental SSE3) - AVX2 (Advanced Vector Extensions 2) - AVX-512VL (Advanced Vector Extensions-512VL) config CRYPTO_AEGIS128_AESNI_SSE2 - tristate "AEAD ciphers: AEGIS-128 (AES-NI/SSE2)" + tristate "AEAD ciphers: AEGIS-128 (AES-NI/SSE4.1)" depends on X86 && 64BIT select CRYPTO_AEAD select CRYPTO_SIMD help AEGIS-128 AEAD algorithm Architecture: x86_64 using: - AES-NI (AES New Instructions) - - SSE2 (Streaming SIMD Extensions 2) + - SSE4.1 (Streaming SIMD Extensions 4.1) config CRYPTO_NHPOLY1305_SSE2 tristate "Hash functions: NHPoly1305 (SSE2)" depends on X86 && 64BIT select CRYPTO_NHPOLY1305 diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis128-aesni-asm.S index 5541aca2fd0d..6ed4bc452c29 100644 --- a/arch/x86/crypto/aegis128-aesni-asm.S +++ b/arch/x86/crypto/aegis128-aesni-asm.S @@ -1,8 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * AES-NI + SSE2 implementation of AEGIS-128 + * AES-NI + SSE4.1 implementation of AEGIS-128 * * Copyright (c) 2017-2018 Ondrej Mosnacek * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. */ @@ -636,13 +636,11 @@ SYM_FUNC_START(crypto_aegis128_aesni_final) movdqu 0x30(STATEP), STATE3 movdqu 0x40(STATEP), STATE4 /* prepare length block: */ movd %edx, MSG - movd %ecx, T0 - pslldq $8, T0 - pxor T0, MSG + pinsrd $2, %ecx, MSG psllq $3, MSG /* multiply by 8 (to get bit count) */ pxor STATE3, MSG /* update state: */ diff --git a/arch/x86/crypto/aegis128-aesni-glue.c b/arch/x86/crypto/aegis128-aesni-glue.c index deb39cef0be1..4dd2d981a514 100644 --- a/arch/x86/crypto/aegis128-aesni-glue.c +++ b/arch/x86/crypto/aegis128-aesni-glue.c @@ -1,9 +1,9 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* * The AEGIS-128 Authenticated-Encryption Algorithm - * Glue for AES-NI + SSE2 implementation + * Glue for AES-NI + SSE4.1 implementation * * Copyright (c) 2017-2018 Ondrej Mosnacek * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. */ @@ -252,11 +252,11 @@ static struct aead_alg crypto_aegis128_aesni_alg = { static struct simd_aead_alg *simd_alg; static int __init crypto_aegis128_aesni_module_init(void) { - if (!boot_cpu_has(X86_FEATURE_XMM2) || + if (!boot_cpu_has(X86_FEATURE_XMM4_1) || !boot_cpu_has(X86_FEATURE_AES) || !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL)) return -ENODEV; return simd_register_aeads_compat(&crypto_aegis128_aesni_alg, 1, @@ -271,8 +271,8 @@ static void __exit crypto_aegis128_aesni_module_exit(void) module_init(crypto_aegis128_aesni_module_init); module_exit(crypto_aegis128_aesni_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Ondrej Mosnacek "); -MODULE_DESCRIPTION("AEGIS-128 AEAD algorithm -- AESNI+SSE2 implementation"); +MODULE_DESCRIPTION("AEGIS-128 AEAD algorithm -- AESNI+SSE4.1 implementation"); MODULE_ALIAS_CRYPTO("aegis128"); MODULE_ALIAS_CRYPTO("aegis128-aesni"); From patchwork Mon Oct 7 01:24:26 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Eric Biggers X-Patchwork-Id: 833400 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 8452C12E75 for ; Mon, 7 Oct 2024 01:24:57 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1728264297; cv=none; b=FbGL73lUfqrUaN3IrB/gtdEODqT1EervWu9TUYgIHCgQ66tXvHAJ8ezKAvEBsHhsMtDLSpEoxuMi+TzXyI5RBQ3+TR2LMDAAU7ozjuCH3p3cIqmfv030OTsQTduZg9lNd7zIsUd7oquqgMF/DQJIksMAiAwJhgI6bPn2BFXbSeg= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1728264297; c=relaxed/simple; bh=31DJiemRPlFdVVw+DLb8cEaSXPnuQ5jPsj3suRzUXaI=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=bQdCcJSSS6d/BsBFuHW1Y4IDsajdRk0sBUz6btsWx0N66f4CNPnU4qf55S2NTv1Xyvm/Qle4G0PA4e9NweRfiN3Bqb9PFISWYw4xotf4VxHBNxng9Hgmm/QR+Dv/9rEiXEWDZofA56lmQCi5RgnKqzC4kXKDzs60lcfrHaym92o= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=uyH8RdFy; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="uyH8RdFy" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 37F36C4CED3; Mon, 7 Oct 2024 01:24:57 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1728264297; bh=31DJiemRPlFdVVw+DLb8cEaSXPnuQ5jPsj3suRzUXaI=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=uyH8RdFyBN+yZxTepY0EQ78DkTdC+FcYiG+Yegb5+yoQ+Us8Z8VQVNUTVL8pl6jn1 yV3RPFUkKFhptlPH0oKFMS5ts2G+3S+JpwrSOaLtrmbfWb0RBK4Qj8HnOSqv9Vr2zS gT/xqlQJNO5zAlqBLJsPKzpX6zxiynBsitZgWvPCGme8dQo70PFm3wTiyCSQ8h6kHz yQZmxsHPoBhsMr0u9KYsFANKN9YMH3j+Q0aHTUbuASTJtGPoMmp5B+RDzt5Rz9EpEP 9v5L4lLwNj71/B67kIl6s49Dgv3oef7wKCBS1GlZkgPE+xUkYwobb3y3ZqSbwVxKXe NTtuNMQagvsYg== From: Eric Biggers To: linux-crypto@vger.kernel.org Cc: x86@kernel.org, Ondrej Mosnacek Subject: [PATCH 06/10] crypto: x86/aegis128 - improve assembly function prototypes Date: Sun, 6 Oct 2024 18:24:26 -0700 Message-ID: <20241007012430.163606-7-ebiggers@kernel.org> X-Mailer: git-send-email 2.46.2 In-Reply-To: <20241007012430.163606-1-ebiggers@kernel.org> References: <20241007012430.163606-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-crypto@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Eric Biggers Adjust the prototypes of the AEGIS assembly functions: - Use proper types instead of 'void *', when applicable. - Move the length parameter to after the buffers it describes rather than before, to match the usual convention. Also shorten its name to just len (which is the name used in the assembly code). - Declare register aliases at the beginning of each function rather than once per file. This was necessary because len was moved, but also it allows adding some aliases where raw registers were used before. - Remove the unnecessary "crypto_" prefix. Signed-off-by: Eric Biggers --- arch/x86/crypto/aegis128-aesni-asm.S | 105 ++++++++++++++++---------- arch/x86/crypto/aegis128-aesni-glue.c | 92 +++++++++++----------- 2 files changed, 112 insertions(+), 85 deletions(-) diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis128-aesni-asm.S index 6ed4bc452c29..8131903cc7ff 100644 --- a/arch/x86/crypto/aegis128-aesni-asm.S +++ b/arch/x86/crypto/aegis128-aesni-asm.S @@ -17,15 +17,10 @@ #define KEY %xmm5 #define MSG %xmm5 #define T0 %xmm6 #define T1 %xmm7 -#define STATEP %rdi -#define LEN %esi -#define SRC %rdx -#define DST %rcx - .section .rodata.cst16.aegis128_const, "aM", @progbits, 32 .align 16 .Laegis128_const_0: .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62 @@ -70,10 +65,12 @@ * T0 * %r8 * %r9 */ SYM_FUNC_START_LOCAL(__load_partial) + .set LEN, %ecx + .set SRC, %rsi xor %r9d, %r9d pxor MSG, MSG mov LEN, %r8d and $0x1, %r8 @@ -136,10 +133,12 @@ SYM_FUNC_END(__load_partial) * %r8 * %r9 * %r10 */ SYM_FUNC_START_LOCAL(__store_partial) + .set LEN, %ecx + .set DST, %rdx mov LEN, %r8d mov DST, %r9 movq T0, %r10 @@ -182,20 +181,25 @@ SYM_FUNC_START_LOCAL(__store_partial) .Lst_partial_1: RET SYM_FUNC_END(__store_partial) /* - * void crypto_aegis128_aesni_init(void *state, const void *key, const void *iv); + * void aegis128_aesni_init(struct aegis_state *state, + * const struct aegis_block *key, + * const u8 iv[AEGIS128_NONCE_SIZE]); */ -SYM_FUNC_START(crypto_aegis128_aesni_init) +SYM_FUNC_START(aegis128_aesni_init) + .set STATEP, %rdi + .set KEYP, %rsi + .set IVP, %rdx FRAME_BEGIN /* load IV: */ - movdqu (%rdx), T1 + movdqu (IVP), T1 /* load key: */ - movdqa (%rsi), KEY + movdqa (KEYP), KEY pxor KEY, T1 movdqa T1, STATE0 movdqa KEY, STATE3 movdqa KEY, STATE4 @@ -224,17 +228,20 @@ SYM_FUNC_START(crypto_aegis128_aesni_init) movdqu STATE3, 0x30(STATEP) movdqu STATE4, 0x40(STATEP) FRAME_END RET -SYM_FUNC_END(crypto_aegis128_aesni_init) +SYM_FUNC_END(aegis128_aesni_init) /* - * void crypto_aegis128_aesni_ad(void *state, unsigned int length, - * const void *data); + * void aegis128_aesni_ad(struct aegis_state *state, const u8 *data, + * unsigned int len); */ -SYM_FUNC_START(crypto_aegis128_aesni_ad) +SYM_FUNC_START(aegis128_aesni_ad) + .set STATEP, %rdi + .set SRC, %rsi + .set LEN, %edx FRAME_BEGIN cmp $0x10, LEN jb .Lad_out @@ -332,11 +339,11 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad) RET .Lad_out: FRAME_END RET -SYM_FUNC_END(crypto_aegis128_aesni_ad) +SYM_FUNC_END(aegis128_aesni_ad) .macro encrypt_block s0 s1 s2 s3 s4 i movdqu (\i * 0x10)(SRC), MSG movdqa MSG, T0 pxor \s1, T0 @@ -353,14 +360,18 @@ SYM_FUNC_END(crypto_aegis128_aesni_ad) cmp $0x10, LEN jl .Lenc_out_\i .endm /* - * void crypto_aegis128_aesni_enc(void *state, unsigned int length, - * const void *src, void *dst); + * void aegis128_aesni_enc(struct aegis_state *state, const u8 *src, u8 *dst, + * unsigned int len); */ -SYM_FUNC_START(crypto_aegis128_aesni_enc) +SYM_FUNC_START(aegis128_aesni_enc) + .set STATEP, %rdi + .set SRC, %rsi + .set DST, %rdx + .set LEN, %ecx FRAME_BEGIN cmp $0x10, LEN jb .Lenc_out @@ -430,17 +441,21 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc) RET .Lenc_out: FRAME_END RET -SYM_FUNC_END(crypto_aegis128_aesni_enc) +SYM_FUNC_END(aegis128_aesni_enc) /* - * void crypto_aegis128_aesni_enc_tail(void *state, unsigned int length, - * const void *src, void *dst); + * void aegis128_aesni_enc_tail(struct aegis_state *state, const u8 *src, + * u8 *dst, unsigned int len); */ -SYM_FUNC_START(crypto_aegis128_aesni_enc_tail) +SYM_FUNC_START(aegis128_aesni_enc_tail) + .set STATEP, %rdi + .set SRC, %rsi + .set DST, %rdx + .set LEN, %ecx FRAME_BEGIN /* load the state: */ movdqu 0x00(STATEP), STATE0 movdqu 0x10(STATEP), STATE1 @@ -470,11 +485,11 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc_tail) movdqu STATE2, 0x30(STATEP) movdqu STATE3, 0x40(STATEP) FRAME_END RET -SYM_FUNC_END(crypto_aegis128_aesni_enc_tail) +SYM_FUNC_END(aegis128_aesni_enc_tail) .macro decrypt_block s0 s1 s2 s3 s4 i movdqu (\i * 0x10)(SRC), MSG pxor \s1, MSG pxor \s4, MSG @@ -490,14 +505,18 @@ SYM_FUNC_END(crypto_aegis128_aesni_enc_tail) cmp $0x10, LEN jl .Ldec_out_\i .endm /* - * void crypto_aegis128_aesni_dec(void *state, unsigned int length, - * const void *src, void *dst); + * void aegis128_aesni_dec(struct aegis_state *state, const u8 *src, u8 *dst, + * unsigned int len); */ -SYM_FUNC_START(crypto_aegis128_aesni_dec) +SYM_FUNC_START(aegis128_aesni_dec) + .set STATEP, %rdi + .set SRC, %rsi + .set DST, %rdx + .set LEN, %ecx FRAME_BEGIN cmp $0x10, LEN jb .Ldec_out @@ -567,17 +586,21 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec) RET .Ldec_out: FRAME_END RET -SYM_FUNC_END(crypto_aegis128_aesni_dec) +SYM_FUNC_END(aegis128_aesni_dec) /* - * void crypto_aegis128_aesni_dec_tail(void *state, unsigned int length, - * const void *src, void *dst); + * void aegis128_aesni_dec_tail(struct aegis_state *state, const u8 *src, + * u8 *dst, unsigned int len); */ -SYM_FUNC_START(crypto_aegis128_aesni_dec_tail) +SYM_FUNC_START(aegis128_aesni_dec_tail) + .set STATEP, %rdi + .set SRC, %rsi + .set DST, %rdx + .set LEN, %ecx FRAME_BEGIN /* load the state: */ movdqu 0x00(STATEP), STATE0 movdqu 0x10(STATEP), STATE1 @@ -617,30 +640,34 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec_tail) movdqu STATE2, 0x30(STATEP) movdqu STATE3, 0x40(STATEP) FRAME_END RET -SYM_FUNC_END(crypto_aegis128_aesni_dec_tail) +SYM_FUNC_END(aegis128_aesni_dec_tail) /* - * void crypto_aegis128_aesni_final(void *state, void *tag_xor, - * unsigned int assoclen, - * unsigned int cryptlen); + * void aegis128_aesni_final(struct aegis_state *state, + * struct aegis_block *tag_xor, + * unsigned int cryptlen, unsigned int assoclen); */ -SYM_FUNC_START(crypto_aegis128_aesni_final) +SYM_FUNC_START(aegis128_aesni_final) + .set STATEP, %rdi + .set TAG_XOR, %rsi + .set ASSOCLEN, %edx + .set CRYPTLEN, %ecx FRAME_BEGIN /* load the state: */ movdqu 0x00(STATEP), STATE0 movdqu 0x10(STATEP), STATE1 movdqu 0x20(STATEP), STATE2 movdqu 0x30(STATEP), STATE3 movdqu 0x40(STATEP), STATE4 /* prepare length block: */ - movd %edx, MSG - pinsrd $2, %ecx, MSG + movd ASSOCLEN, MSG + pinsrd $2, CRYPTLEN, MSG psllq $3, MSG /* multiply by 8 (to get bit count) */ pxor STATE3, MSG /* update state: */ @@ -651,18 +678,18 @@ SYM_FUNC_START(crypto_aegis128_aesni_final) aegis128_update; pxor MSG, STATE0 aegis128_update; pxor MSG, STATE4 aegis128_update; pxor MSG, STATE3 /* xor tag: */ - movdqu (%rsi), MSG + movdqu (TAG_XOR), MSG pxor STATE0, MSG pxor STATE1, MSG pxor STATE2, MSG pxor STATE3, MSG pxor STATE4, MSG - movdqu MSG, (%rsi) + movdqu MSG, (TAG_XOR) FRAME_END RET -SYM_FUNC_END(crypto_aegis128_aesni_final) +SYM_FUNC_END(aegis128_aesni_final) diff --git a/arch/x86/crypto/aegis128-aesni-glue.c b/arch/x86/crypto/aegis128-aesni-glue.c index 4dd2d981a514..739d92c85790 100644 --- a/arch/x86/crypto/aegis128-aesni-glue.c +++ b/arch/x86/crypto/aegis128-aesni-glue.c @@ -21,31 +21,10 @@ #define AEGIS128_STATE_BLOCKS 5 #define AEGIS128_KEY_SIZE 16 #define AEGIS128_MIN_AUTH_SIZE 8 #define AEGIS128_MAX_AUTH_SIZE 16 -asmlinkage void crypto_aegis128_aesni_init(void *state, void *key, void *iv); - -asmlinkage void crypto_aegis128_aesni_ad( - void *state, unsigned int length, const void *data); - -asmlinkage void crypto_aegis128_aesni_enc( - void *state, unsigned int length, const void *src, void *dst); - -asmlinkage void crypto_aegis128_aesni_dec( - void *state, unsigned int length, const void *src, void *dst); - -asmlinkage void crypto_aegis128_aesni_enc_tail( - void *state, unsigned int length, const void *src, void *dst); - -asmlinkage void crypto_aegis128_aesni_dec_tail( - void *state, unsigned int length, const void *src, void *dst); - -asmlinkage void crypto_aegis128_aesni_final( - void *state, void *tag_xor, unsigned int cryptlen, - unsigned int assoclen); - struct aegis_block { u8 bytes[AEGIS128_BLOCK_SIZE] __aligned(AEGIS128_BLOCK_ALIGN); }; struct aegis_state { @@ -54,10 +33,36 @@ struct aegis_state { struct aegis_ctx { struct aegis_block key; }; +asmlinkage void aegis128_aesni_init(struct aegis_state *state, + const struct aegis_block *key, + const u8 iv[AEGIS128_NONCE_SIZE]); + +asmlinkage void aegis128_aesni_ad(struct aegis_state *state, const u8 *data, + unsigned int len); + +asmlinkage void aegis128_aesni_enc(struct aegis_state *state, const u8 *src, + u8 *dst, unsigned int len); + +asmlinkage void aegis128_aesni_dec(struct aegis_state *state, const u8 *src, + u8 *dst, unsigned int len); + +asmlinkage void aegis128_aesni_enc_tail(struct aegis_state *state, + const u8 *src, u8 *dst, + unsigned int len); + +asmlinkage void aegis128_aesni_dec_tail(struct aegis_state *state, + const u8 *src, u8 *dst, + unsigned int len); + +asmlinkage void aegis128_aesni_final(struct aegis_state *state, + struct aegis_block *tag_xor, + unsigned int cryptlen, + unsigned int assoclen); + static void crypto_aegis128_aesni_process_ad( struct aegis_state *state, struct scatterlist *sg_src, unsigned int assoclen) { struct scatter_walk walk; @@ -73,19 +78,18 @@ static void crypto_aegis128_aesni_process_ad( if (pos + size >= AEGIS128_BLOCK_SIZE) { if (pos > 0) { unsigned int fill = AEGIS128_BLOCK_SIZE - pos; memcpy(buf.bytes + pos, src, fill); - crypto_aegis128_aesni_ad(state, - AEGIS128_BLOCK_SIZE, - buf.bytes); + aegis128_aesni_ad(state, buf.bytes, + AEGIS128_BLOCK_SIZE); pos = 0; left -= fill; src += fill; } - crypto_aegis128_aesni_ad(state, left, src); + aegis128_aesni_ad(state, src, left); src += left & ~(AEGIS128_BLOCK_SIZE - 1); left &= AEGIS128_BLOCK_SIZE - 1; } @@ -98,45 +102,41 @@ static void crypto_aegis128_aesni_process_ad( scatterwalk_done(&walk, 0, assoclen); } if (pos > 0) { memset(buf.bytes + pos, 0, AEGIS128_BLOCK_SIZE - pos); - crypto_aegis128_aesni_ad(state, AEGIS128_BLOCK_SIZE, buf.bytes); + aegis128_aesni_ad(state, buf.bytes, AEGIS128_BLOCK_SIZE); } } static __always_inline void crypto_aegis128_aesni_process_crypt(struct aegis_state *state, struct skcipher_walk *walk, bool enc) { while (walk->nbytes >= AEGIS128_BLOCK_SIZE) { if (enc) - crypto_aegis128_aesni_enc( - state, - round_down(walk->nbytes, - AEGIS128_BLOCK_SIZE), - walk->src.virt.addr, - walk->dst.virt.addr); + aegis128_aesni_enc(state, walk->src.virt.addr, + walk->dst.virt.addr, + round_down(walk->nbytes, + AEGIS128_BLOCK_SIZE)); else - crypto_aegis128_aesni_dec( - state, - round_down(walk->nbytes, - AEGIS128_BLOCK_SIZE), - walk->src.virt.addr, - walk->dst.virt.addr); + aegis128_aesni_dec(state, walk->src.virt.addr, + walk->dst.virt.addr, + round_down(walk->nbytes, + AEGIS128_BLOCK_SIZE)); skcipher_walk_done(walk, walk->nbytes % AEGIS128_BLOCK_SIZE); } if (walk->nbytes) { if (enc) - crypto_aegis128_aesni_enc_tail(state, walk->nbytes, - walk->src.virt.addr, - walk->dst.virt.addr); + aegis128_aesni_enc_tail(state, walk->src.virt.addr, + walk->dst.virt.addr, + walk->nbytes); else - crypto_aegis128_aesni_dec_tail(state, walk->nbytes, - walk->src.virt.addr, - walk->dst.virt.addr); + aegis128_aesni_dec_tail(state, walk->src.virt.addr, + walk->dst.virt.addr, + walk->nbytes); skcipher_walk_done(walk, 0); } } static struct aegis_ctx *crypto_aegis128_aesni_ctx(struct crypto_aead *aead) @@ -184,14 +184,14 @@ crypto_aegis128_aesni_crypt(struct aead_request *req, else skcipher_walk_aead_decrypt(&walk, req, true); kernel_fpu_begin(); - crypto_aegis128_aesni_init(&state, ctx->key.bytes, req->iv); + aegis128_aesni_init(&state, &ctx->key, req->iv); crypto_aegis128_aesni_process_ad(&state, req->src, req->assoclen); crypto_aegis128_aesni_process_crypt(&state, &walk, enc); - crypto_aegis128_aesni_final(&state, tag_xor, req->assoclen, cryptlen); + aegis128_aesni_final(&state, tag_xor, req->assoclen, cryptlen); kernel_fpu_end(); } static int crypto_aegis128_aesni_encrypt(struct aead_request *req) From patchwork Mon Oct 7 01:24:27 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Eric Biggers X-Patchwork-Id: 834159 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id C51651400A for ; Mon, 7 Oct 2024 01:24:57 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1728264297; cv=none; b=Kvj9PnrLFIrIBxgk8CUFZ6Zo2X/Y/s3fWGRa8WqhzBLrWU0MzpUzN7hF/LuS2E/uSnsB+T735uVsewb48kKgAKU85TjpvBGu64ee6XWuBlvTdOgElcIq1RhxN71dfpLE8ZRQN7e87OgkqSqcQ5tWm8TpEZf4411z2gNJGR0hs+w= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1728264297; c=relaxed/simple; bh=FXaqnIAcJV0N/u6qISeo3QSKQe4ZIHKKUs0+RYVsROo=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=NETEhA4zV2mOdLfulKXqd0HY6pd7y9lFrRAkaU9Q6J/c9P1BD21n2l1DSc3R2ObG9BMkbmowemPq2wJSfOmp2fINu6AkpCBSpHhycALjRk38IaOROZhJiWnjcQDvOS5NnKsjKAAEWkGeED6Gnttcwj/UtoyrMMFSwgo+pDlPYr0= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=cwW+4si7; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="cwW+4si7" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 87FCEC4CECF; Mon, 7 Oct 2024 01:24:57 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1728264297; bh=FXaqnIAcJV0N/u6qISeo3QSKQe4ZIHKKUs0+RYVsROo=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=cwW+4si7BuZ1e/P9jb6orFFaJ4ulAG9OgWzSXi/sM+wU40jVCFGuHwF8XUrVuUnDF +Q4pV/9A8pguddscDYl9B9tfY3HlF79MbUZqIwhsHMJAmwTOTW4s03k+yoDR58anoa 9gQtuR1qNjkNk3zR++6rM4ERpQREDaUxFJHZBPzWHAlCdhXM2Z8iP7T0V5pdwMsIC4 ARtdOpzZSDfyYQPKL+jjJV6iyUxwwXpWn3sVIDKQzT1w7hUjfr4KgHia1V1wDuDrKw DjYPE35zedXj3jsM3kDPj/AShaySTWe7JTH59vfwmnz2zFJAk1Ut6J0ilQ2Ijb4Ak0 W2ynl57SA+gXw== From: Eric Biggers To: linux-crypto@vger.kernel.org Cc: x86@kernel.org, Ondrej Mosnacek Subject: [PATCH 07/10] crypto: x86/aegis128 - optimize partial block handling using SSE4.1 Date: Sun, 6 Oct 2024 18:24:27 -0700 Message-ID: <20241007012430.163606-8-ebiggers@kernel.org> X-Mailer: git-send-email 2.46.2 In-Reply-To: <20241007012430.163606-1-ebiggers@kernel.org> References: <20241007012430.163606-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-crypto@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Eric Biggers Optimize the code that loads and stores partial blocks, taking advantage of SSE4.1. The code is adapted from that in aes-gcm-aesni-x86_64.S. Signed-off-by: Eric Biggers --- arch/x86/crypto/aegis128-aesni-asm.S | 236 +++++++++++---------------- 1 file changed, 95 insertions(+), 141 deletions(-) diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis128-aesni-asm.S index 8131903cc7ff..b5c7abc9a0d4 100644 --- a/arch/x86/crypto/aegis128-aesni-asm.S +++ b/arch/x86/crypto/aegis128-aesni-asm.S @@ -2,10 +2,11 @@ /* * AES-NI + SSE4.1 implementation of AEGIS-128 * * Copyright (c) 2017-2018 Ondrej Mosnacek * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. + * Copyright 2024 Google LLC */ #include #include @@ -26,15 +27,15 @@ .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62 .Laegis128_const_1: .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1 .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd -.section .rodata.cst16.aegis128_counter, "aM", @progbits, 16 -.align 16 -.Laegis128_counter: - .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 - .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f +.section .rodata.cst32.zeropad_mask, "aM", @progbits, 32 +.align 32 +.Lzeropad_mask: + .octa 0xffffffffffffffffffffffffffffffff + .octa 0 .text /* * aegis128_update @@ -53,136 +54,90 @@ aesenc STATE3, STATE2 aesenc T0, STATE3 .endm /* - * __load_partial: internal ABI - * input: - * LEN - bytes - * SRC - src - * output: - * MSG - message block - * changed: - * T0 - * %r8 - * %r9 + * Load 1 <= LEN (%ecx) <= 15 bytes from the pointer SRC into the xmm register + * MSG and zeroize any remaining bytes. Clobbers %rax, %rcx, and %r8. */ -SYM_FUNC_START_LOCAL(__load_partial) - .set LEN, %ecx - .set SRC, %rsi - xor %r9d, %r9d - pxor MSG, MSG - - mov LEN, %r8d - and $0x1, %r8 - jz .Lld_partial_1 - - mov LEN, %r8d - and $0x1E, %r8 - add SRC, %r8 - mov (%r8), %r9b - -.Lld_partial_1: - mov LEN, %r8d - and $0x2, %r8 - jz .Lld_partial_2 - - mov LEN, %r8d - and $0x1C, %r8 - add SRC, %r8 - shl $0x10, %r9 - mov (%r8), %r9w - -.Lld_partial_2: - mov LEN, %r8d - and $0x4, %r8 - jz .Lld_partial_4 - - mov LEN, %r8d - and $0x18, %r8 - add SRC, %r8 - shl $32, %r9 - mov (%r8), %r8d - xor %r8, %r9 - -.Lld_partial_4: - movq %r9, MSG - - mov LEN, %r8d - and $0x8, %r8 - jz .Lld_partial_8 - - mov LEN, %r8d - and $0x10, %r8 - add SRC, %r8 - pslldq $8, MSG - movq (%r8), T0 - pxor T0, MSG - -.Lld_partial_8: - RET -SYM_FUNC_END(__load_partial) +.macro load_partial + sub $8, %ecx /* LEN - 8 */ + jle .Lle8\@ + + /* Load 9 <= LEN <= 15 bytes: */ + movq (SRC), MSG /* Load first 8 bytes */ + mov (SRC, %rcx), %rax /* Load last 8 bytes */ + neg %ecx + shl $3, %ecx + shr %cl, %rax /* Discard overlapping bytes */ + pinsrq $1, %rax, MSG + jmp .Ldone\@ + +.Lle8\@: + add $4, %ecx /* LEN - 4 */ + jl .Llt4\@ + + /* Load 4 <= LEN <= 8 bytes: */ + mov (SRC), %eax /* Load first 4 bytes */ + mov (SRC, %rcx), %r8d /* Load last 4 bytes */ + jmp .Lcombine\@ + +.Llt4\@: + /* Load 1 <= LEN <= 3 bytes: */ + add $2, %ecx /* LEN - 2 */ + movzbl (SRC), %eax /* Load first byte */ + jl .Lmovq\@ + movzwl (SRC, %rcx), %r8d /* Load last 2 bytes */ +.Lcombine\@: + shl $3, %ecx + shl %cl, %r8 + or %r8, %rax /* Combine the two parts */ +.Lmovq\@: + movq %rax, MSG +.Ldone\@: +.endm /* - * __store_partial: internal ABI - * input: - * LEN - bytes - * DST - dst - * output: - * T0 - message block - * changed: - * %r8 - * %r9 - * %r10 + * Store 1 <= LEN (%ecx) <= 15 bytes from the xmm register \msg to the pointer + * DST. Clobbers %rax, %rcx, and %r8. */ -SYM_FUNC_START_LOCAL(__store_partial) - .set LEN, %ecx - .set DST, %rdx - mov LEN, %r8d - mov DST, %r9 - - movq T0, %r10 - - cmp $8, %r8 - jl .Lst_partial_8 - - mov %r10, (%r9) - psrldq $8, T0 - movq T0, %r10 - - sub $8, %r8 - add $8, %r9 - -.Lst_partial_8: - cmp $4, %r8 - jl .Lst_partial_4 - - mov %r10d, (%r9) - shr $32, %r10 - - sub $4, %r8 - add $4, %r9 - -.Lst_partial_4: - cmp $2, %r8 - jl .Lst_partial_2 - - mov %r10w, (%r9) - shr $0x10, %r10 - - sub $2, %r8 - add $2, %r9 - -.Lst_partial_2: - cmp $1, %r8 - jl .Lst_partial_1 - - mov %r10b, (%r9) - -.Lst_partial_1: - RET -SYM_FUNC_END(__store_partial) +.macro store_partial msg + sub $8, %ecx /* LEN - 8 */ + jl .Llt8\@ + + /* Store 8 <= LEN <= 15 bytes: */ + pextrq $1, \msg, %rax + mov %ecx, %r8d + shl $3, %ecx + ror %cl, %rax + mov %rax, (DST, %r8) /* Store last LEN - 8 bytes */ + movq \msg, (DST) /* Store first 8 bytes */ + jmp .Ldone\@ + +.Llt8\@: + add $4, %ecx /* LEN - 4 */ + jl .Llt4\@ + + /* Store 4 <= LEN <= 7 bytes: */ + pextrd $1, \msg, %eax + mov %ecx, %r8d + shl $3, %ecx + ror %cl, %eax + mov %eax, (DST, %r8) /* Store last LEN - 4 bytes */ + movd \msg, (DST) /* Store first 4 bytes */ + jmp .Ldone\@ + +.Llt4\@: + /* Store 1 <= LEN <= 3 bytes: */ + pextrb $0, \msg, 0(DST) + cmp $-2, %ecx /* LEN - 4 == -2, i.e. LEN == 2? */ + jl .Ldone\@ + pextrb $1, \msg, 1(DST) + je .Ldone\@ + pextrb $2, \msg, 2(DST) +.Ldone\@: +.endm /* * void aegis128_aesni_init(struct aegis_state *state, * const struct aegis_block *key, * const u8 iv[AEGIS128_NONCE_SIZE]); @@ -451,31 +406,33 @@ SYM_FUNC_END(aegis128_aesni_enc) */ SYM_FUNC_START(aegis128_aesni_enc_tail) .set STATEP, %rdi .set SRC, %rsi .set DST, %rdx - .set LEN, %ecx + .set LEN, %ecx /* {load,store}_partial rely on this being %ecx */ FRAME_BEGIN /* load the state: */ movdqu 0x00(STATEP), STATE0 movdqu 0x10(STATEP), STATE1 movdqu 0x20(STATEP), STATE2 movdqu 0x30(STATEP), STATE3 movdqu 0x40(STATEP), STATE4 /* encrypt message: */ - call __load_partial + mov LEN, %r9d + load_partial movdqa MSG, T0 pxor STATE1, T0 pxor STATE4, T0 movdqa STATE2, T1 pand STATE3, T1 pxor T1, T0 - call __store_partial + mov %r9d, LEN + store_partial T0 aegis128_update pxor MSG, STATE4 /* store the state: */ @@ -596,40 +553,37 @@ SYM_FUNC_END(aegis128_aesni_dec) */ SYM_FUNC_START(aegis128_aesni_dec_tail) .set STATEP, %rdi .set SRC, %rsi .set DST, %rdx - .set LEN, %ecx + .set LEN, %ecx /* {load,store}_partial rely on this being %ecx */ FRAME_BEGIN /* load the state: */ movdqu 0x00(STATEP), STATE0 movdqu 0x10(STATEP), STATE1 movdqu 0x20(STATEP), STATE2 movdqu 0x30(STATEP), STATE3 movdqu 0x40(STATEP), STATE4 /* decrypt message: */ - call __load_partial + mov LEN, %r9d + load_partial pxor STATE1, MSG pxor STATE4, MSG movdqa STATE2, T1 pand STATE3, T1 pxor T1, MSG - movdqa MSG, T0 - call __store_partial + mov %r9d, LEN + store_partial MSG /* mask with byte count: */ - movd LEN, T0 - punpcklbw T0, T0 - punpcklbw T0, T0 - punpcklbw T0, T0 - punpcklbw T0, T0 - movdqa .Laegis128_counter(%rip), T1 - pcmpgtb T1, T0 + lea .Lzeropad_mask+16(%rip), %rax + sub %r9, %rax + movdqu (%rax), T0 pand T0, MSG aegis128_update pxor MSG, STATE4 From patchwork Mon Oct 7 01:24:28 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Eric Biggers X-Patchwork-Id: 833399 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 67B22171A7 for ; Mon, 7 Oct 2024 01:24:58 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1728264298; cv=none; b=lffOSZUlB0XTPUpVtL9qnVVCr2GhS7K1b1YnLeEozPQ+towYSEpTYca6qkH3n4Q2fqWY73UuVysQejcljE5tWqbwvxouieBU05Q/FNk4iiyYLCzDNDQzORH46mrbRSUZ4U8jWm/kFhAyIQveaO8t5rvxuDACfubOXXeiXr8a0hs= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1728264298; c=relaxed/simple; bh=etMS4ksedaC5t40wm3xvIHtyMMS1+QhxCXZeiCycsME=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=aso+2nvBANHboC0ME942BSuOIFV+pzJOETz6mQBVFRm+9IHGUnizvuJD/T8aBxcIwBl36/c8rG2lUN0zy39emd0JYyEDN5k3G4uRa0NzHolNfK7JXYEL2wf0JOw3iou3Yr/C+x7qajr3DBL3XIAuHBGuySgl6CXxRjm0iIt1FRo= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=eTK5Oci8; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="eTK5Oci8" Received: by smtp.kernel.org (Postfix) with ESMTPSA id D9792C4CEC5; Mon, 7 Oct 2024 01:24:57 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1728264298; bh=etMS4ksedaC5t40wm3xvIHtyMMS1+QhxCXZeiCycsME=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=eTK5Oci8VBRytUcq5Di68aFacGeiZsnqGWBDG7wbqPGpTxk8EhnlNHAh4UqwrdXGw jARd2JGbkvQ1C1pzpuQnmuudjcZndEvBardFR+431M5hLjuuGYDGzZVvI575mD/r3H iYMsK4zFNytjYGmdHJDp86oD0D0TJvclZw0SG247wH97j0BflrtuF7RDNqhPd3vpOk Op1LkS1gGE7c0rt58oiozQKJ4yItfJ0yPfNnhGZdets0QSM7QuLqiJpKfcegb/65hd AE6CIRsXnQF89VdtbP8FlTcVoDu+4TslXPXMOSMpwEn8yb8IttZdLE8NeG3c7YhJfB pFDPmoPwVlEWA== From: Eric Biggers To: linux-crypto@vger.kernel.org Cc: x86@kernel.org, Ondrej Mosnacek Subject: [PATCH 08/10] crypto: x86/aegis128 - take advantage of block-aligned len Date: Sun, 6 Oct 2024 18:24:28 -0700 Message-ID: <20241007012430.163606-9-ebiggers@kernel.org> X-Mailer: git-send-email 2.46.2 In-Reply-To: <20241007012430.163606-1-ebiggers@kernel.org> References: <20241007012430.163606-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-crypto@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Eric Biggers Update a caller of aegis128_aesni_ad() to round down the length to a block boundary. After that, aegis128_aesni_ad(), aegis128_aesni_enc(), and aegis128_aesni_dec() are only passed whole blocks. Update the assembly code to take advantage of that, which eliminates some unneeded instructions. For aegis128_aesni_enc() and aegis128_aesni_dec(), the length is also always nonzero, so stop checking for zero length. Signed-off-by: Eric Biggers --- arch/x86/crypto/aegis128-aesni-asm.S | 37 +++++++++++---------------- arch/x86/crypto/aegis128-aesni-glue.c | 4 +-- 2 files changed, 17 insertions(+), 24 deletions(-) diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis128-aesni-asm.S index b5c7abc9a0d4..583e4515e1f1 100644 --- a/arch/x86/crypto/aegis128-aesni-asm.S +++ b/arch/x86/crypto/aegis128-aesni-asm.S @@ -188,19 +188,21 @@ SYM_FUNC_START(aegis128_aesni_init) SYM_FUNC_END(aegis128_aesni_init) /* * void aegis128_aesni_ad(struct aegis_state *state, const u8 *data, * unsigned int len); + * + * len must be a multiple of 16. */ SYM_FUNC_START(aegis128_aesni_ad) .set STATEP, %rdi .set SRC, %rsi .set LEN, %edx FRAME_BEGIN - cmp $0x10, LEN - jb .Lad_out + test LEN, LEN + jz .Lad_out /* load the state: */ movdqu 0x00(STATEP), STATE0 movdqu 0x10(STATEP), STATE1 movdqu 0x20(STATEP), STATE2 @@ -211,40 +213,35 @@ SYM_FUNC_START(aegis128_aesni_ad) .Lad_loop: movdqu 0x00(SRC), MSG aegis128_update pxor MSG, STATE4 sub $0x10, LEN - cmp $0x10, LEN - jl .Lad_out_1 + jz .Lad_out_1 movdqu 0x10(SRC), MSG aegis128_update pxor MSG, STATE3 sub $0x10, LEN - cmp $0x10, LEN - jl .Lad_out_2 + jz .Lad_out_2 movdqu 0x20(SRC), MSG aegis128_update pxor MSG, STATE2 sub $0x10, LEN - cmp $0x10, LEN - jl .Lad_out_3 + jz .Lad_out_3 movdqu 0x30(SRC), MSG aegis128_update pxor MSG, STATE1 sub $0x10, LEN - cmp $0x10, LEN - jl .Lad_out_4 + jz .Lad_out_4 movdqu 0x40(SRC), MSG aegis128_update pxor MSG, STATE0 sub $0x10, LEN - cmp $0x10, LEN - jl .Lad_out_0 + jz .Lad_out_0 add $0x50, SRC jmp .Lad_loop /* store the state: */ @@ -310,28 +307,26 @@ SYM_FUNC_END(aegis128_aesni_ad) aegis128_update pxor MSG, \s4 sub $0x10, LEN - cmp $0x10, LEN - jl .Lenc_out_\i + jz .Lenc_out_\i .endm /* * void aegis128_aesni_enc(struct aegis_state *state, const u8 *src, u8 *dst, * unsigned int len); + * + * len must be nonzero and a multiple of 16. */ SYM_FUNC_START(aegis128_aesni_enc) .set STATEP, %rdi .set SRC, %rsi .set DST, %rdx .set LEN, %ecx FRAME_BEGIN - cmp $0x10, LEN - jb .Lenc_out - /* load the state: */ movdqu 0x00(STATEP), STATE0 movdqu 0x10(STATEP), STATE1 movdqu 0x20(STATEP), STATE2 movdqu 0x30(STATEP), STATE3 @@ -457,28 +452,26 @@ SYM_FUNC_END(aegis128_aesni_enc_tail) aegis128_update pxor MSG, \s4 sub $0x10, LEN - cmp $0x10, LEN - jl .Ldec_out_\i + jz .Ldec_out_\i .endm /* * void aegis128_aesni_dec(struct aegis_state *state, const u8 *src, u8 *dst, * unsigned int len); + * + * len must be nonzero and a multiple of 16. */ SYM_FUNC_START(aegis128_aesni_dec) .set STATEP, %rdi .set SRC, %rsi .set DST, %rdx .set LEN, %ecx FRAME_BEGIN - cmp $0x10, LEN - jb .Ldec_out - /* load the state: */ movdqu 0x00(STATEP), STATE0 movdqu 0x10(STATEP), STATE1 movdqu 0x20(STATEP), STATE2 movdqu 0x30(STATEP), STATE3 diff --git a/arch/x86/crypto/aegis128-aesni-glue.c b/arch/x86/crypto/aegis128-aesni-glue.c index 739d92c85790..32a42a7dcd3b 100644 --- a/arch/x86/crypto/aegis128-aesni-glue.c +++ b/arch/x86/crypto/aegis128-aesni-glue.c @@ -85,12 +85,12 @@ static void crypto_aegis128_aesni_process_ad( pos = 0; left -= fill; src += fill; } - aegis128_aesni_ad(state, src, left); - + aegis128_aesni_ad(state, src, + left & ~(AEGIS128_BLOCK_SIZE - 1)); src += left & ~(AEGIS128_BLOCK_SIZE - 1); left &= AEGIS128_BLOCK_SIZE - 1; } memcpy(buf.bytes + pos, src, left); From patchwork Mon Oct 7 01:24:29 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Eric Biggers X-Patchwork-Id: 834158 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 63BE8168C7 for ; Mon, 7 Oct 2024 01:24:58 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1728264298; cv=none; b=PI9xx0ySvJrcUushmRVplfJuj69crsoa8Vb2UYTsVn/EtqCylyEwDKPwKYwghQ/cRV7/kDstxszki1vomKMj1oDqw6iG4gnmi1pMlI/cxruufa+EZgjfNkbjbdfki9OBbe4AUeoG/ceGlNrdYu7WQXCxmnFRrAwznkWGn9oV6g0= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1728264298; c=relaxed/simple; bh=gInMbYGUdgv9AowtvjI8ndZgLAUiybRpck554YHWjbE=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=srTH1+geEkGgDw7DodwMc2tVYdkfRDB8VmadOL7wbYiejFlgH6RUAwL474hpoqEZ28JUlyL778opNZ8w9q2OKa6Kg0BNpIRMbm7Q1HQaNRnKgxkQqPypISP6L5Mz/8H651kcFLY7v0lsDGcSlA+RoX2aUoi5gFJrJOYhtIjEkTE= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=Ke7NYULm; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="Ke7NYULm" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 2A558C4CECF; Mon, 7 Oct 2024 01:24:58 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1728264298; bh=gInMbYGUdgv9AowtvjI8ndZgLAUiybRpck554YHWjbE=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=Ke7NYULmpi8xONzgyh3hrw04wQ06gkVa9k4pGfC05LA+b7KEXV/eR3rUjkQP7Kvfl GLYyes23gW6eEqLHCi5NHCk1hiy0AeuFmAelLsA93Ykwiq3lC5QMAPl9HbYhtvSeG0 9mciIe/fAtIR3WsP5JSjwx0KoHJq70HAz0ePaO8G8UlljE5t4vxQgI+DvSeQfCooNo VMjCmPDb5B5w64kpb80LrYO7IL5jGVRIyqB8ZZBg/NbzXoDa9glb3w8iU3TtPKW7cB xOYfO5VzyvmLa29l8+jY7+87/a3b+M9pSKIWO4yB482DJnnW/KCldopR8ojTzpD0LW dt9lR/32zMzYA== From: Eric Biggers To: linux-crypto@vger.kernel.org Cc: x86@kernel.org, Ondrej Mosnacek Subject: [PATCH 09/10] crypto: x86/aegis128 - remove unneeded FRAME_BEGIN and FRAME_END Date: Sun, 6 Oct 2024 18:24:29 -0700 Message-ID: <20241007012430.163606-10-ebiggers@kernel.org> X-Mailer: git-send-email 2.46.2 In-Reply-To: <20241007012430.163606-1-ebiggers@kernel.org> References: <20241007012430.163606-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-crypto@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Eric Biggers Stop using FRAME_BEGIN and FRAME_END in the AEGIS assembly functions, since all these functions are now leaf functions. This eliminates some unnecessary instructions. Signed-off-by: Eric Biggers --- arch/x86/crypto/aegis128-aesni-asm.S | 34 ---------------------------- 1 file changed, 34 deletions(-) diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis128-aesni-asm.S index 583e4515e1f1..e025c6bfadbd 100644 --- a/arch/x86/crypto/aegis128-aesni-asm.S +++ b/arch/x86/crypto/aegis128-aesni-asm.S @@ -6,11 +6,10 @@ * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. * Copyright 2024 Google LLC */ #include -#include #define STATE0 %xmm0 #define STATE1 %xmm1 #define STATE2 %xmm2 #define STATE3 %xmm3 @@ -144,11 +143,10 @@ */ SYM_FUNC_START(aegis128_aesni_init) .set STATEP, %rdi .set KEYP, %rsi .set IVP, %rdx - FRAME_BEGIN /* load IV: */ movdqu (IVP), T1 /* load key: */ @@ -180,12 +178,10 @@ SYM_FUNC_START(aegis128_aesni_init) movdqu STATE0, 0x00(STATEP) movdqu STATE1, 0x10(STATEP) movdqu STATE2, 0x20(STATEP) movdqu STATE3, 0x30(STATEP) movdqu STATE4, 0x40(STATEP) - - FRAME_END RET SYM_FUNC_END(aegis128_aesni_init) /* * void aegis128_aesni_ad(struct aegis_state *state, const u8 *data, @@ -195,11 +191,10 @@ SYM_FUNC_END(aegis128_aesni_init) */ SYM_FUNC_START(aegis128_aesni_ad) .set STATEP, %rdi .set SRC, %rsi .set LEN, %edx - FRAME_BEGIN test LEN, LEN jz .Lad_out /* load the state: */ @@ -249,51 +244,45 @@ SYM_FUNC_START(aegis128_aesni_ad) movdqu STATE0, 0x00(STATEP) movdqu STATE1, 0x10(STATEP) movdqu STATE2, 0x20(STATEP) movdqu STATE3, 0x30(STATEP) movdqu STATE4, 0x40(STATEP) - FRAME_END RET .Lad_out_1: movdqu STATE4, 0x00(STATEP) movdqu STATE0, 0x10(STATEP) movdqu STATE1, 0x20(STATEP) movdqu STATE2, 0x30(STATEP) movdqu STATE3, 0x40(STATEP) - FRAME_END RET .Lad_out_2: movdqu STATE3, 0x00(STATEP) movdqu STATE4, 0x10(STATEP) movdqu STATE0, 0x20(STATEP) movdqu STATE1, 0x30(STATEP) movdqu STATE2, 0x40(STATEP) - FRAME_END RET .Lad_out_3: movdqu STATE2, 0x00(STATEP) movdqu STATE3, 0x10(STATEP) movdqu STATE4, 0x20(STATEP) movdqu STATE0, 0x30(STATEP) movdqu STATE1, 0x40(STATEP) - FRAME_END RET .Lad_out_4: movdqu STATE1, 0x00(STATEP) movdqu STATE2, 0x10(STATEP) movdqu STATE3, 0x20(STATEP) movdqu STATE4, 0x30(STATEP) movdqu STATE0, 0x40(STATEP) - FRAME_END RET .Lad_out: - FRAME_END RET SYM_FUNC_END(aegis128_aesni_ad) .macro encrypt_block s0 s1 s2 s3 s4 i movdqu (\i * 0x10)(SRC), MSG @@ -321,11 +310,10 @@ SYM_FUNC_END(aegis128_aesni_ad) SYM_FUNC_START(aegis128_aesni_enc) .set STATEP, %rdi .set SRC, %rsi .set DST, %rdx .set LEN, %ecx - FRAME_BEGIN /* load the state: */ movdqu 0x00(STATEP), STATE0 movdqu 0x10(STATEP), STATE1 movdqu 0x20(STATEP), STATE2 @@ -349,51 +337,45 @@ SYM_FUNC_START(aegis128_aesni_enc) movdqu STATE4, 0x00(STATEP) movdqu STATE0, 0x10(STATEP) movdqu STATE1, 0x20(STATEP) movdqu STATE2, 0x30(STATEP) movdqu STATE3, 0x40(STATEP) - FRAME_END RET .Lenc_out_1: movdqu STATE3, 0x00(STATEP) movdqu STATE4, 0x10(STATEP) movdqu STATE0, 0x20(STATEP) movdqu STATE1, 0x30(STATEP) movdqu STATE2, 0x40(STATEP) - FRAME_END RET .Lenc_out_2: movdqu STATE2, 0x00(STATEP) movdqu STATE3, 0x10(STATEP) movdqu STATE4, 0x20(STATEP) movdqu STATE0, 0x30(STATEP) movdqu STATE1, 0x40(STATEP) - FRAME_END RET .Lenc_out_3: movdqu STATE1, 0x00(STATEP) movdqu STATE2, 0x10(STATEP) movdqu STATE3, 0x20(STATEP) movdqu STATE4, 0x30(STATEP) movdqu STATE0, 0x40(STATEP) - FRAME_END RET .Lenc_out_4: movdqu STATE0, 0x00(STATEP) movdqu STATE1, 0x10(STATEP) movdqu STATE2, 0x20(STATEP) movdqu STATE3, 0x30(STATEP) movdqu STATE4, 0x40(STATEP) - FRAME_END RET .Lenc_out: - FRAME_END RET SYM_FUNC_END(aegis128_aesni_enc) /* * void aegis128_aesni_enc_tail(struct aegis_state *state, const u8 *src, @@ -402,11 +384,10 @@ SYM_FUNC_END(aegis128_aesni_enc) SYM_FUNC_START(aegis128_aesni_enc_tail) .set STATEP, %rdi .set SRC, %rsi .set DST, %rdx .set LEN, %ecx /* {load,store}_partial rely on this being %ecx */ - FRAME_BEGIN /* load the state: */ movdqu 0x00(STATEP), STATE0 movdqu 0x10(STATEP), STATE1 movdqu 0x20(STATEP), STATE2 @@ -434,12 +415,10 @@ SYM_FUNC_START(aegis128_aesni_enc_tail) movdqu STATE4, 0x00(STATEP) movdqu STATE0, 0x10(STATEP) movdqu STATE1, 0x20(STATEP) movdqu STATE2, 0x30(STATEP) movdqu STATE3, 0x40(STATEP) - - FRAME_END RET SYM_FUNC_END(aegis128_aesni_enc_tail) .macro decrypt_block s0 s1 s2 s3 s4 i movdqu (\i * 0x10)(SRC), MSG @@ -466,11 +445,10 @@ SYM_FUNC_END(aegis128_aesni_enc_tail) SYM_FUNC_START(aegis128_aesni_dec) .set STATEP, %rdi .set SRC, %rsi .set DST, %rdx .set LEN, %ecx - FRAME_BEGIN /* load the state: */ movdqu 0x00(STATEP), STATE0 movdqu 0x10(STATEP), STATE1 movdqu 0x20(STATEP), STATE2 @@ -494,51 +472,45 @@ SYM_FUNC_START(aegis128_aesni_dec) movdqu STATE4, 0x00(STATEP) movdqu STATE0, 0x10(STATEP) movdqu STATE1, 0x20(STATEP) movdqu STATE2, 0x30(STATEP) movdqu STATE3, 0x40(STATEP) - FRAME_END RET .Ldec_out_1: movdqu STATE3, 0x00(STATEP) movdqu STATE4, 0x10(STATEP) movdqu STATE0, 0x20(STATEP) movdqu STATE1, 0x30(STATEP) movdqu STATE2, 0x40(STATEP) - FRAME_END RET .Ldec_out_2: movdqu STATE2, 0x00(STATEP) movdqu STATE3, 0x10(STATEP) movdqu STATE4, 0x20(STATEP) movdqu STATE0, 0x30(STATEP) movdqu STATE1, 0x40(STATEP) - FRAME_END RET .Ldec_out_3: movdqu STATE1, 0x00(STATEP) movdqu STATE2, 0x10(STATEP) movdqu STATE3, 0x20(STATEP) movdqu STATE4, 0x30(STATEP) movdqu STATE0, 0x40(STATEP) - FRAME_END RET .Ldec_out_4: movdqu STATE0, 0x00(STATEP) movdqu STATE1, 0x10(STATEP) movdqu STATE2, 0x20(STATEP) movdqu STATE3, 0x30(STATEP) movdqu STATE4, 0x40(STATEP) - FRAME_END RET .Ldec_out: - FRAME_END RET SYM_FUNC_END(aegis128_aesni_dec) /* * void aegis128_aesni_dec_tail(struct aegis_state *state, const u8 *src, @@ -547,11 +519,10 @@ SYM_FUNC_END(aegis128_aesni_dec) SYM_FUNC_START(aegis128_aesni_dec_tail) .set STATEP, %rdi .set SRC, %rsi .set DST, %rdx .set LEN, %ecx /* {load,store}_partial rely on this being %ecx */ - FRAME_BEGIN /* load the state: */ movdqu 0x00(STATEP), STATE0 movdqu 0x10(STATEP), STATE1 movdqu 0x20(STATEP), STATE2 @@ -584,12 +555,10 @@ SYM_FUNC_START(aegis128_aesni_dec_tail) movdqu STATE4, 0x00(STATEP) movdqu STATE0, 0x10(STATEP) movdqu STATE1, 0x20(STATEP) movdqu STATE2, 0x30(STATEP) movdqu STATE3, 0x40(STATEP) - - FRAME_END RET SYM_FUNC_END(aegis128_aesni_dec_tail) /* * void aegis128_aesni_final(struct aegis_state *state, @@ -599,11 +568,10 @@ SYM_FUNC_END(aegis128_aesni_dec_tail) SYM_FUNC_START(aegis128_aesni_final) .set STATEP, %rdi .set TAG_XOR, %rsi .set ASSOCLEN, %edx .set CRYPTLEN, %ecx - FRAME_BEGIN /* load the state: */ movdqu 0x00(STATEP), STATE0 movdqu 0x10(STATEP), STATE1 movdqu 0x20(STATEP), STATE2 @@ -634,9 +602,7 @@ SYM_FUNC_START(aegis128_aesni_final) pxor STATE2, MSG pxor STATE3, MSG pxor STATE4, MSG movdqu MSG, (TAG_XOR) - - FRAME_END RET SYM_FUNC_END(aegis128_aesni_final) From patchwork Mon Oct 7 01:24:30 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Eric Biggers X-Patchwork-Id: 833398 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id B8D58175AE for ; Mon, 7 Oct 2024 01:24:58 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1728264298; cv=none; b=V/yaTTv8HIdIuC/bn3o+H7GcDwLRf/1c4qBu3MXAMaWMAMD62mNwIU/IzeZHm+FxEpvko5pxAK23c+cgY/44pQMrm34DmKxeJPII6whx0a5wlROq2nKu69Fv8DjcYAzcT0GoV8hY8CPwT+VikBF7olwovmoJsp8wAdRCQGesyrg= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1728264298; c=relaxed/simple; bh=fSfzw890ze1jClqifVSPYJ9E3yn1ff9x7i5uiQyp+/I=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=L+ncOaphzUR74phUlWKkuMuNqbAYNA9M86stdPlvf3wvjp4FmBfPuRtqCO+LpZC/3TS0AQn7d6m4PrxDFkB061edp0lREjAw0DPdNrhML+t/iZ45P19arT1z7MYKmw0gmiESc+yWc8iDpBzjD1pD6MOtisYJpxSrvJmMn+B/fWM= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=akOx/DcL; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="akOx/DcL" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 6EFC7C4CED2; Mon, 7 Oct 2024 01:24:58 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1728264298; bh=fSfzw890ze1jClqifVSPYJ9E3yn1ff9x7i5uiQyp+/I=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=akOx/DcLTl59EkqSuD9tZsXpeDCMmMU8QQDPpovnnGBl7Ffe2sdFFo3s9o5koAzbH qPMnWlnH3FBManKdYgecsywxrFH7OCQ9/A+O/2uz/U41yGbFjbSHO903H8a58AAxTD WD7cHFIP1kvJgue+cShvrSN69p1U6Lkyuc3HpEguMCtZ86r+Owx/10ifXXLAjOFGVU cN1Kmt15Pi6/pcx1YY8eZXZsIITWlwbyiyDzx2xM/LOArmuPtMB2rUGuVC5cQd3AmL RszFeVsfuYUpA8l0td9mLzlEgZcBOV3GYUiBAyPSQ24g/A2XqnuedEy9p+VSw6Lq63 T0tpLSjkMkYJQ== From: Eric Biggers To: linux-crypto@vger.kernel.org Cc: x86@kernel.org, Ondrej Mosnacek Subject: [PATCH 10/10] crypto: x86/aegis128 - remove unneeded RETs Date: Sun, 6 Oct 2024 18:24:30 -0700 Message-ID: <20241007012430.163606-11-ebiggers@kernel.org> X-Mailer: git-send-email 2.46.2 In-Reply-To: <20241007012430.163606-1-ebiggers@kernel.org> References: <20241007012430.163606-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-crypto@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Eric Biggers Remove returns that are immediately followed by another return. Signed-off-by: Eric Biggers --- arch/x86/crypto/aegis128-aesni-asm.S | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis128-aesni-asm.S index e025c6bfadbd..c899948d24c9 100644 --- a/arch/x86/crypto/aegis128-aesni-asm.S +++ b/arch/x86/crypto/aegis128-aesni-asm.S @@ -276,12 +276,10 @@ SYM_FUNC_START(aegis128_aesni_ad) movdqu STATE1, 0x00(STATEP) movdqu STATE2, 0x10(STATEP) movdqu STATE3, 0x20(STATEP) movdqu STATE4, 0x30(STATEP) movdqu STATE0, 0x40(STATEP) - RET - .Lad_out: RET SYM_FUNC_END(aegis128_aesni_ad) .macro encrypt_block s0 s1 s2 s3 s4 i @@ -369,12 +367,10 @@ SYM_FUNC_START(aegis128_aesni_enc) movdqu STATE0, 0x00(STATEP) movdqu STATE1, 0x10(STATEP) movdqu STATE2, 0x20(STATEP) movdqu STATE3, 0x30(STATEP) movdqu STATE4, 0x40(STATEP) - RET - .Lenc_out: RET SYM_FUNC_END(aegis128_aesni_enc) /* @@ -504,12 +500,10 @@ SYM_FUNC_START(aegis128_aesni_dec) movdqu STATE0, 0x00(STATEP) movdqu STATE1, 0x10(STATEP) movdqu STATE2, 0x20(STATEP) movdqu STATE3, 0x30(STATEP) movdqu STATE4, 0x40(STATEP) - RET - .Ldec_out: RET SYM_FUNC_END(aegis128_aesni_dec) /*