Message ID | 20241025191454.72616-4-ebiggers@kernel.org |
---|---|
State | New |
Headers | show |
Series | Wire up CRC32 library functions to arch-optimized code | expand |
On Fri, 25 Oct 2024 at 21:15, Eric Biggers <ebiggers@kernel.org> wrote: > > From: Eric Biggers <ebiggers@google.com> > > Make the CRC32 library export some flags that indicate which CRC32 > functions are actually executing optimized code at runtime. Set these > correctly from the architectures that implement the CRC32 functions. > > This will be used to determine whether the crc32[c]-$arch shash > algorithms should be registered in the crypto API. btrfs could also > start using these flags instead of the hack that it currently uses where > it parses the crypto_shash_driver_name. > > Signed-off-by: Eric Biggers <ebiggers@google.com> > --- > arch/arm64/lib/crc32-glue.c | 15 +++++++++++++++ > arch/riscv/lib/crc32-riscv.c | 15 +++++++++++++++ > include/linux/crc32.h | 15 +++++++++++++++ > lib/crc32.c | 5 +++++ > 4 files changed, 50 insertions(+) > ... > diff --git a/include/linux/crc32.h b/include/linux/crc32.h > index 58c632533b08..bf26d454b60d 100644 > --- a/include/linux/crc32.h > +++ b/include/linux/crc32.h > @@ -35,10 +35,25 @@ static inline u32 __pure __crc32c_le(u32 crc, const u8 *p, size_t len) > if (IS_ENABLED(CONFIG_CRC32_ARCH)) > return crc32c_le_arch(crc, p, len); > return crc32c_le_base(crc, p, len); > } > > +/* > + * crc32_optimizations contains flags that indicate which CRC32 library > + * functions are using architecture-specific optimizations. Unlike > + * IS_ENABLED(CONFIG_CRC32_ARCH) it takes into account the different CRC32 > + * variants and also whether any needed CPU features are available at runtime. > + */ > +#define CRC32_LE_OPTIMIZATION BIT(0) /* crc32_le() is optimized */ > +#define CRC32_BE_OPTIMIZATION BIT(1) /* crc32_be() is optimized */ > +#define CRC32C_OPTIMIZATION BIT(2) /* __crc32c_le() is optimized */ > +#if IS_ENABLED(CONFIG_CRC32_ARCH) > +extern u32 crc32_optimizations; > +#else > +#define crc32_optimizations 0 > +#endif > + Wouldn't it be cleaner to add a new library function for this, instead of using a global variable? > /** > * crc32_le_combine - Combine two crc32 check values into one. For two > * sequences of bytes, seq1 and seq2 with lengths len1 > * and len2, crc32_le() check values were calculated > * for each, crc1 and crc2. > diff --git a/lib/crc32.c b/lib/crc32.c > index 47151624332e..194de73f30f8 100644 > --- a/lib/crc32.c > +++ b/lib/crc32.c > @@ -336,5 +336,10 @@ u32 __pure crc32_be_base(u32 crc, const u8 *p, size_t len) > { > return crc32_be_generic(crc, p, len, crc32table_be, CRC32_POLY_BE); > } > #endif > EXPORT_SYMBOL(crc32_be_base); > + > +#if IS_ENABLED(CONFIG_CRC32_ARCH) > +u32 crc32_optimizations; > +EXPORT_SYMBOL(crc32_optimizations); > +#endif > -- > 2.47.0 > >
On Fri, 25 Oct 2024 at 23:32, Eric Biggers <ebiggers@kernel.org> wrote: > > On Fri, Oct 25, 2024 at 10:32:14PM +0200, Ard Biesheuvel wrote: > > On Fri, 25 Oct 2024 at 21:15, Eric Biggers <ebiggers@kernel.org> wrote: > > > > > > From: Eric Biggers <ebiggers@google.com> > > > > > > Make the CRC32 library export some flags that indicate which CRC32 > > > functions are actually executing optimized code at runtime. Set these > > > correctly from the architectures that implement the CRC32 functions. > > > > > > This will be used to determine whether the crc32[c]-$arch shash > > > algorithms should be registered in the crypto API. btrfs could also > > > start using these flags instead of the hack that it currently uses where > > > it parses the crypto_shash_driver_name. > > > > > > Signed-off-by: Eric Biggers <ebiggers@google.com> > > > --- > > > arch/arm64/lib/crc32-glue.c | 15 +++++++++++++++ > > > arch/riscv/lib/crc32-riscv.c | 15 +++++++++++++++ > > > include/linux/crc32.h | 15 +++++++++++++++ > > > lib/crc32.c | 5 +++++ > > > 4 files changed, 50 insertions(+) > > > > > ... > > > diff --git a/include/linux/crc32.h b/include/linux/crc32.h > > > index 58c632533b08..bf26d454b60d 100644 > > > --- a/include/linux/crc32.h > > > +++ b/include/linux/crc32.h > > > @@ -35,10 +35,25 @@ static inline u32 __pure __crc32c_le(u32 crc, const u8 *p, size_t len) > > > if (IS_ENABLED(CONFIG_CRC32_ARCH)) > > > return crc32c_le_arch(crc, p, len); > > > return crc32c_le_base(crc, p, len); > > > } > > > > > > +/* > > > + * crc32_optimizations contains flags that indicate which CRC32 library > > > + * functions are using architecture-specific optimizations. Unlike > > > + * IS_ENABLED(CONFIG_CRC32_ARCH) it takes into account the different CRC32 > > > + * variants and also whether any needed CPU features are available at runtime. > > > + */ > > > +#define CRC32_LE_OPTIMIZATION BIT(0) /* crc32_le() is optimized */ > > > +#define CRC32_BE_OPTIMIZATION BIT(1) /* crc32_be() is optimized */ > > > +#define CRC32C_OPTIMIZATION BIT(2) /* __crc32c_le() is optimized */ > > > +#if IS_ENABLED(CONFIG_CRC32_ARCH) > > > +extern u32 crc32_optimizations; > > > +#else > > > +#define crc32_optimizations 0 > > > +#endif > > > + > > > > Wouldn't it be cleaner to add a new library function for this, instead > > of using a global variable? > > The architecture crc32 modules need to be able to write to this. There could be > a setter function and a getter function, but just using a variable is simpler. > If we just add 'u32 crc32_optimizations()', there is no need for those modules to have init/exit hooks, the only thing they need to export is this routine. Or perhaps it could even be a static inline, with the right plumbing of header files. At least on arm64, static inline u32 crc32_optimizations() { if (!alternative_have_const_cap_likely(ARM64_HAS_CRC32)) return 0; return CRC32_LE_OPTIMIZATION | CRC32_BE_OPTIMIZATION | CRC32C_OPTIMIZATION; } should be all we need.
On Fri, Oct 25, 2024 at 11:37:45PM +0200, Ard Biesheuvel wrote: > On Fri, 25 Oct 2024 at 23:32, Eric Biggers <ebiggers@kernel.org> wrote: > > > > On Fri, Oct 25, 2024 at 10:32:14PM +0200, Ard Biesheuvel wrote: > > > On Fri, 25 Oct 2024 at 21:15, Eric Biggers <ebiggers@kernel.org> wrote: > > > > > > > > From: Eric Biggers <ebiggers@google.com> > > > > > > > > Make the CRC32 library export some flags that indicate which CRC32 > > > > functions are actually executing optimized code at runtime. Set these > > > > correctly from the architectures that implement the CRC32 functions. > > > > > > > > This will be used to determine whether the crc32[c]-$arch shash > > > > algorithms should be registered in the crypto API. btrfs could also > > > > start using these flags instead of the hack that it currently uses where > > > > it parses the crypto_shash_driver_name. > > > > > > > > Signed-off-by: Eric Biggers <ebiggers@google.com> > > > > --- > > > > arch/arm64/lib/crc32-glue.c | 15 +++++++++++++++ > > > > arch/riscv/lib/crc32-riscv.c | 15 +++++++++++++++ > > > > include/linux/crc32.h | 15 +++++++++++++++ > > > > lib/crc32.c | 5 +++++ > > > > 4 files changed, 50 insertions(+) > > > > > > > ... > > > > diff --git a/include/linux/crc32.h b/include/linux/crc32.h > > > > index 58c632533b08..bf26d454b60d 100644 > > > > --- a/include/linux/crc32.h > > > > +++ b/include/linux/crc32.h > > > > @@ -35,10 +35,25 @@ static inline u32 __pure __crc32c_le(u32 crc, const u8 *p, size_t len) > > > > if (IS_ENABLED(CONFIG_CRC32_ARCH)) > > > > return crc32c_le_arch(crc, p, len); > > > > return crc32c_le_base(crc, p, len); > > > > } > > > > > > > > +/* > > > > + * crc32_optimizations contains flags that indicate which CRC32 library > > > > + * functions are using architecture-specific optimizations. Unlike > > > > + * IS_ENABLED(CONFIG_CRC32_ARCH) it takes into account the different CRC32 > > > > + * variants and also whether any needed CPU features are available at runtime. > > > > + */ > > > > +#define CRC32_LE_OPTIMIZATION BIT(0) /* crc32_le() is optimized */ > > > > +#define CRC32_BE_OPTIMIZATION BIT(1) /* crc32_be() is optimized */ > > > > +#define CRC32C_OPTIMIZATION BIT(2) /* __crc32c_le() is optimized */ > > > > +#if IS_ENABLED(CONFIG_CRC32_ARCH) > > > > +extern u32 crc32_optimizations; > > > > +#else > > > > +#define crc32_optimizations 0 > > > > +#endif > > > > + > > > > > > Wouldn't it be cleaner to add a new library function for this, instead > > > of using a global variable? > > > > The architecture crc32 modules need to be able to write to this. There could be > > a setter function and a getter function, but just using a variable is simpler. > > > > If we just add 'u32 crc32_optimizations()', there is no need for those > modules to have init/exit hooks, the only thing they need to export is > this routine. > > Or perhaps it could even be a static inline, with the right plumbing > of header files. At least on arm64, > > static inline u32 crc32_optimizations() { > if (!alternative_have_const_cap_likely(ARM64_HAS_CRC32)) > return 0; > return CRC32_LE_OPTIMIZATION | CRC32_BE_OPTIMIZATION | CRC32C_OPTIMIZATION; > } > > should be all we need. In 7 of the 9 affected arches, I already have a module_init function that checks the CPU features in order to set up static keys. (arm64 and riscv already used alternatives patching, so I kept that.) It's slightly convenient to set these flags at the same time, but yes the above solution would work too. - Eric
diff --git a/arch/arm64/lib/crc32-glue.c b/arch/arm64/lib/crc32-glue.c index d7f6e1cbf0d2..16f2b7c04294 100644 --- a/arch/arm64/lib/crc32-glue.c +++ b/arch/arm64/lib/crc32-glue.c @@ -83,7 +83,22 @@ u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len) return crc32_be_arm64(crc, p, len); } EXPORT_SYMBOL(crc32_be_arch); +static int __init crc32_arm64_init(void) +{ + if (cpus_have_cap(ARM64_HAS_CRC32)) + crc32_optimizations = CRC32_LE_OPTIMIZATION | + CRC32_BE_OPTIMIZATION | + CRC32C_OPTIMIZATION; + return 0; +} +arch_initcall(crc32_arm64_init); + +static void __exit crc32_arm64_exit(void) +{ +} +module_exit(crc32_arm64_exit); + MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("arm64-optimized CRC32 functions"); diff --git a/arch/riscv/lib/crc32-riscv.c b/arch/riscv/lib/crc32-riscv.c index a3ff7db2a1ce..a61c13d89364 100644 --- a/arch/riscv/lib/crc32-riscv.c +++ b/arch/riscv/lib/crc32-riscv.c @@ -295,7 +295,22 @@ u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len) legacy: return crc32_be_base(crc, p, len); } EXPORT_SYMBOL(crc32_be_arch); +static int __init crc32_riscv_init(void) +{ + if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC)) + crc32_optimizations = CRC32_LE_OPTIMIZATION | + CRC32_BE_OPTIMIZATION | + CRC32C_OPTIMIZATION; + return 0; +} +arch_initcall(crc32_riscv_init); + +static void __exit crc32_riscv_exit(void) +{ +} +module_exit(crc32_riscv_exit); + MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Accelerated CRC32 implementation with Zbc extension"); diff --git a/include/linux/crc32.h b/include/linux/crc32.h index 58c632533b08..bf26d454b60d 100644 --- a/include/linux/crc32.h +++ b/include/linux/crc32.h @@ -35,10 +35,25 @@ static inline u32 __pure __crc32c_le(u32 crc, const u8 *p, size_t len) if (IS_ENABLED(CONFIG_CRC32_ARCH)) return crc32c_le_arch(crc, p, len); return crc32c_le_base(crc, p, len); } +/* + * crc32_optimizations contains flags that indicate which CRC32 library + * functions are using architecture-specific optimizations. Unlike + * IS_ENABLED(CONFIG_CRC32_ARCH) it takes into account the different CRC32 + * variants and also whether any needed CPU features are available at runtime. + */ +#define CRC32_LE_OPTIMIZATION BIT(0) /* crc32_le() is optimized */ +#define CRC32_BE_OPTIMIZATION BIT(1) /* crc32_be() is optimized */ +#define CRC32C_OPTIMIZATION BIT(2) /* __crc32c_le() is optimized */ +#if IS_ENABLED(CONFIG_CRC32_ARCH) +extern u32 crc32_optimizations; +#else +#define crc32_optimizations 0 +#endif + /** * crc32_le_combine - Combine two crc32 check values into one. For two * sequences of bytes, seq1 and seq2 with lengths len1 * and len2, crc32_le() check values were calculated * for each, crc1 and crc2. diff --git a/lib/crc32.c b/lib/crc32.c index 47151624332e..194de73f30f8 100644 --- a/lib/crc32.c +++ b/lib/crc32.c @@ -336,5 +336,10 @@ u32 __pure crc32_be_base(u32 crc, const u8 *p, size_t len) { return crc32_be_generic(crc, p, len, crc32table_be, CRC32_POLY_BE); } #endif EXPORT_SYMBOL(crc32_be_base); + +#if IS_ENABLED(CONFIG_CRC32_ARCH) +u32 crc32_optimizations; +EXPORT_SYMBOL(crc32_optimizations); +#endif