Message ID | 20230603023426.1064431-5-richard.henderson@linaro.org |
---|---|
State | New |
Headers | show |
Series | crypto: Provide aes-round.h and host accel | expand |
On Sat, 3 Jun 2023 at 04:34, Richard Henderson <richard.henderson@linaro.org> wrote: > > Start adding infrastructure for accelerating guest AES. > Begin with a SubBytes + ShiftRows primitive. > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > host/include/generic/host/aes-round.h | 15 +++++++++ > include/crypto/aes-round.h | 41 +++++++++++++++++++++++ > crypto/aes.c | 47 +++++++++++++++++++++++++++ > 3 files changed, 103 insertions(+) > create mode 100644 host/include/generic/host/aes-round.h > create mode 100644 include/crypto/aes-round.h > > diff --git a/host/include/generic/host/aes-round.h b/host/include/generic/host/aes-round.h > new file mode 100644 > index 0000000000..598242c603 > --- /dev/null > +++ b/host/include/generic/host/aes-round.h > @@ -0,0 +1,15 @@ > +/* > + * No host specific aes acceleration. > + * SPDX-License-Identifier: GPL-2.0-or-later > + */ > + > +#ifndef HOST_AES_ROUND_H > +#define HOST_AES_ROUND_H > + > +#define HAVE_AES_ACCEL false > +#define ATTR_AES_ACCEL > + > +void aesenc_SB_SR_accel(AESState *, const AESState *, bool) > + QEMU_ERROR("unsupported accel"); > + > +#endif > diff --git a/include/crypto/aes-round.h b/include/crypto/aes-round.h > new file mode 100644 > index 0000000000..784e1daee6 > --- /dev/null > +++ b/include/crypto/aes-round.h > @@ -0,0 +1,41 @@ > +/* > + * SPDX-License-Identifier: GPL-2.0-or-later > + * AES round fragments, generic version > + * > + * Copyright (C) 2023 Linaro, Ltd. > + */ > + > +#ifndef CRYPTO_AES_ROUND_H > +#define CRYPTO_AES_ROUND_H > + > +/* Hosts with acceleration will usually need a 16-byte vector type. */ > +typedef uint8_t AESStateVec __attribute__((vector_size(16))); > + > +typedef union { > + uint8_t b[16]; > + uint32_t w[4]; > + uint64_t d[4]; > + AESStateVec v; > +} AESState; > + > +#include "host/aes-round.h" > + > +/* > + * Perform SubBytes + ShiftRows. > + */ > + > +void aesenc_SB_SR_gen(AESState *ret, const AESState *st); > +void aesenc_SB_SR_genrev(AESState *ret, const AESState *st); > + > +static inline void aesenc_SB_SR(AESState *r, const AESState *st, bool be) > +{ > + if (HAVE_AES_ACCEL) { > + aesenc_SB_SR_accel(r, st, be); > + } else if (HOST_BIG_ENDIAN == be) { > + aesenc_SB_SR_gen(r, st); > + } else { > + aesenc_SB_SR_genrev(r, st); > + } > +} > + > +#endif /* CRYPTO_AES_ROUND_H */ > diff --git a/crypto/aes.c b/crypto/aes.c > index 1309a13e91..708838315a 100644 > --- a/crypto/aes.c > +++ b/crypto/aes.c > @@ -29,6 +29,7 @@ > */ > #include "qemu/osdep.h" > #include "crypto/aes.h" > +#include "crypto/aes-round.h" > > typedef uint32_t u32; > typedef uint8_t u8; > @@ -1251,6 +1252,52 @@ static const u32 rcon[] = { > 0x1B000000, 0x36000000, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */ > }; > > +/* Perform SubBytes + ShiftRows. */ > +static inline void > +aesenc_SB_SR_swap(AESState *r, const AESState *st, bool swap) > +{ > + const int swap_b = swap ? 15 : 0; > + uint8_t t; > + > + /* These four indexes are not swizzled. */ > + r->b[swap_b ^ 0x0] = AES_sbox[st->b[swap_b ^ AES_SH_0]]; > + r->b[swap_b ^ 0x4] = AES_sbox[st->b[swap_b ^ AES_SH_4]]; > + r->b[swap_b ^ 0x8] = AES_sbox[st->b[swap_b ^ AES_SH_8]]; > + r->b[swap_b ^ 0xc] = AES_sbox[st->b[swap_b ^ AES_SH_C]]; > + > + /* Otherwise, break cycles. */ > + This is only needed it r == st, right? > + t = AES_sbox[st->b[swap_b ^ AES_SH_D]]; > + r->b[swap_b ^ 0x1] = AES_sbox[st->b[swap_b ^ AES_SH_1]]; > + r->b[swap_b ^ 0x5] = AES_sbox[st->b[swap_b ^ AES_SH_5]]; > + r->b[swap_b ^ 0x9] = AES_sbox[st->b[swap_b ^ AES_SH_9]]; > + r->b[swap_b ^ 0xd] = t; > + > + t = AES_sbox[st->b[swap_b ^ AES_SH_A]]; > + r->b[swap_b ^ 0x2] = AES_sbox[st->b[swap_b ^ AES_SH_2]]; > + r->b[swap_b ^ 0xa] = t; > + > + t = AES_sbox[st->b[swap_b ^ AES_SH_E]]; > + r->b[swap_b ^ 0x6] = AES_sbox[st->b[swap_b ^ AES_SH_6]]; > + r->b[swap_b ^ 0xe] = t; > + > + t = AES_sbox[st->b[swap_b ^ AES_SH_7]]; > + r->b[swap_b ^ 0x3] = AES_sbox[st->b[swap_b ^ AES_SH_3]]; > + r->b[swap_b ^ 0xf] = AES_sbox[st->b[swap_b ^ AES_SH_F]]; > + r->b[swap_b ^ 0xb] = AES_sbox[st->b[swap_b ^ AES_SH_B]]; > + r->b[swap_b ^ 0x7] = t; > +} > + > +void aesenc_SB_SR_gen(AESState *r, const AESState *st) > +{ > + aesenc_SB_SR_swap(r, st, false); > +} > + > +void aesenc_SB_SR_genrev(AESState *r, const AESState *st) > +{ > + aesenc_SB_SR_swap(r, st, true); > +} > + > /** > * Expand the cipher key into the encryption key schedule. > */ > -- > 2.34.1 >
On 6/3/23 06:15, Ard Biesheuvel wrote: >> diff --git a/crypto/aes.c b/crypto/aes.c >> index 1309a13e91..708838315a 100644 >> --- a/crypto/aes.c >> +++ b/crypto/aes.c >> @@ -29,6 +29,7 @@ >> */ >> #include "qemu/osdep.h" >> #include "crypto/aes.h" >> +#include "crypto/aes-round.h" >> >> typedef uint32_t u32; >> typedef uint8_t u8; >> @@ -1251,6 +1252,52 @@ static const u32 rcon[] = { >> 0x1B000000, 0x36000000, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */ >> }; >> >> +/* Perform SubBytes + ShiftRows. */ >> +static inline void >> +aesenc_SB_SR_swap(AESState *r, const AESState *st, bool swap) >> +{ >> + const int swap_b = swap ? 15 : 0; >> + uint8_t t; >> + >> + /* These four indexes are not swizzled. */ >> + r->b[swap_b ^ 0x0] = AES_sbox[st->b[swap_b ^ AES_SH_0]]; >> + r->b[swap_b ^ 0x4] = AES_sbox[st->b[swap_b ^ AES_SH_4]]; >> + r->b[swap_b ^ 0x8] = AES_sbox[st->b[swap_b ^ AES_SH_8]]; >> + r->b[swap_b ^ 0xc] = AES_sbox[st->b[swap_b ^ AES_SH_C]]; >> + >> + /* Otherwise, break cycles. */ >> + > > This is only needed it r == st, right? Yes. This is, perhaps, where using symbolic AES_SH_X while assuming knowledge of the value does not aid understanding. r~ > >> + t = AES_sbox[st->b[swap_b ^ AES_SH_D]]; >> + r->b[swap_b ^ 0x1] = AES_sbox[st->b[swap_b ^ AES_SH_1]]; >> + r->b[swap_b ^ 0x5] = AES_sbox[st->b[swap_b ^ AES_SH_5]]; >> + r->b[swap_b ^ 0x9] = AES_sbox[st->b[swap_b ^ AES_SH_9]]; >> + r->b[swap_b ^ 0xd] = t; >> + >> + t = AES_sbox[st->b[swap_b ^ AES_SH_A]]; >> + r->b[swap_b ^ 0x2] = AES_sbox[st->b[swap_b ^ AES_SH_2]]; >> + r->b[swap_b ^ 0xa] = t; >> + >> + t = AES_sbox[st->b[swap_b ^ AES_SH_E]]; >> + r->b[swap_b ^ 0x6] = AES_sbox[st->b[swap_b ^ AES_SH_6]]; >> + r->b[swap_b ^ 0xe] = t; >> + >> + t = AES_sbox[st->b[swap_b ^ AES_SH_7]]; >> + r->b[swap_b ^ 0x3] = AES_sbox[st->b[swap_b ^ AES_SH_3]]; >> + r->b[swap_b ^ 0xf] = AES_sbox[st->b[swap_b ^ AES_SH_F]]; >> + r->b[swap_b ^ 0xb] = AES_sbox[st->b[swap_b ^ AES_SH_B]]; >> + r->b[swap_b ^ 0x7] = t; >> +} >> + >> +void aesenc_SB_SR_gen(AESState *r, const AESState *st) >> +{ >> + aesenc_SB_SR_swap(r, st, false); >> +} >> + >> +void aesenc_SB_SR_genrev(AESState *r, const AESState *st) >> +{ >> + aesenc_SB_SR_swap(r, st, true); >> +} >> + >> /** >> * Expand the cipher key into the encryption key schedule. >> */ >> -- >> 2.34.1 >>
diff --git a/host/include/generic/host/aes-round.h b/host/include/generic/host/aes-round.h new file mode 100644 index 0000000000..598242c603 --- /dev/null +++ b/host/include/generic/host/aes-round.h @@ -0,0 +1,15 @@ +/* + * No host specific aes acceleration. + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef HOST_AES_ROUND_H +#define HOST_AES_ROUND_H + +#define HAVE_AES_ACCEL false +#define ATTR_AES_ACCEL + +void aesenc_SB_SR_accel(AESState *, const AESState *, bool) + QEMU_ERROR("unsupported accel"); + +#endif diff --git a/include/crypto/aes-round.h b/include/crypto/aes-round.h new file mode 100644 index 0000000000..784e1daee6 --- /dev/null +++ b/include/crypto/aes-round.h @@ -0,0 +1,41 @@ +/* + * SPDX-License-Identifier: GPL-2.0-or-later + * AES round fragments, generic version + * + * Copyright (C) 2023 Linaro, Ltd. + */ + +#ifndef CRYPTO_AES_ROUND_H +#define CRYPTO_AES_ROUND_H + +/* Hosts with acceleration will usually need a 16-byte vector type. */ +typedef uint8_t AESStateVec __attribute__((vector_size(16))); + +typedef union { + uint8_t b[16]; + uint32_t w[4]; + uint64_t d[4]; + AESStateVec v; +} AESState; + +#include "host/aes-round.h" + +/* + * Perform SubBytes + ShiftRows. + */ + +void aesenc_SB_SR_gen(AESState *ret, const AESState *st); +void aesenc_SB_SR_genrev(AESState *ret, const AESState *st); + +static inline void aesenc_SB_SR(AESState *r, const AESState *st, bool be) +{ + if (HAVE_AES_ACCEL) { + aesenc_SB_SR_accel(r, st, be); + } else if (HOST_BIG_ENDIAN == be) { + aesenc_SB_SR_gen(r, st); + } else { + aesenc_SB_SR_genrev(r, st); + } +} + +#endif /* CRYPTO_AES_ROUND_H */ diff --git a/crypto/aes.c b/crypto/aes.c index 1309a13e91..708838315a 100644 --- a/crypto/aes.c +++ b/crypto/aes.c @@ -29,6 +29,7 @@ */ #include "qemu/osdep.h" #include "crypto/aes.h" +#include "crypto/aes-round.h" typedef uint32_t u32; typedef uint8_t u8; @@ -1251,6 +1252,52 @@ static const u32 rcon[] = { 0x1B000000, 0x36000000, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */ }; +/* Perform SubBytes + ShiftRows. */ +static inline void +aesenc_SB_SR_swap(AESState *r, const AESState *st, bool swap) +{ + const int swap_b = swap ? 15 : 0; + uint8_t t; + + /* These four indexes are not swizzled. */ + r->b[swap_b ^ 0x0] = AES_sbox[st->b[swap_b ^ AES_SH_0]]; + r->b[swap_b ^ 0x4] = AES_sbox[st->b[swap_b ^ AES_SH_4]]; + r->b[swap_b ^ 0x8] = AES_sbox[st->b[swap_b ^ AES_SH_8]]; + r->b[swap_b ^ 0xc] = AES_sbox[st->b[swap_b ^ AES_SH_C]]; + + /* Otherwise, break cycles. */ + + t = AES_sbox[st->b[swap_b ^ AES_SH_D]]; + r->b[swap_b ^ 0x1] = AES_sbox[st->b[swap_b ^ AES_SH_1]]; + r->b[swap_b ^ 0x5] = AES_sbox[st->b[swap_b ^ AES_SH_5]]; + r->b[swap_b ^ 0x9] = AES_sbox[st->b[swap_b ^ AES_SH_9]]; + r->b[swap_b ^ 0xd] = t; + + t = AES_sbox[st->b[swap_b ^ AES_SH_A]]; + r->b[swap_b ^ 0x2] = AES_sbox[st->b[swap_b ^ AES_SH_2]]; + r->b[swap_b ^ 0xa] = t; + + t = AES_sbox[st->b[swap_b ^ AES_SH_E]]; + r->b[swap_b ^ 0x6] = AES_sbox[st->b[swap_b ^ AES_SH_6]]; + r->b[swap_b ^ 0xe] = t; + + t = AES_sbox[st->b[swap_b ^ AES_SH_7]]; + r->b[swap_b ^ 0x3] = AES_sbox[st->b[swap_b ^ AES_SH_3]]; + r->b[swap_b ^ 0xf] = AES_sbox[st->b[swap_b ^ AES_SH_F]]; + r->b[swap_b ^ 0xb] = AES_sbox[st->b[swap_b ^ AES_SH_B]]; + r->b[swap_b ^ 0x7] = t; +} + +void aesenc_SB_SR_gen(AESState *r, const AESState *st) +{ + aesenc_SB_SR_swap(r, st, false); +} + +void aesenc_SB_SR_genrev(AESState *r, const AESState *st) +{ + aesenc_SB_SR_swap(r, st, true); +} + /** * Expand the cipher key into the encryption key schedule. */
Start adding infrastructure for accelerating guest AES. Begin with a SubBytes + ShiftRows primitive. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- host/include/generic/host/aes-round.h | 15 +++++++++ include/crypto/aes-round.h | 41 +++++++++++++++++++++++ crypto/aes.c | 47 +++++++++++++++++++++++++++ 3 files changed, 103 insertions(+) create mode 100644 host/include/generic/host/aes-round.h create mode 100644 include/crypto/aes-round.h