diff mbox series

[PATCHv6,02/16] x86/asm: Introduce inline memcpy and memset

Message ID 20250620135325.3300848-3-kirill.shutemov@linux.intel.com
State New
Headers show
Series x86: Enable Linear Address Space Separation support | expand

Commit Message

Kirill A. Shutemov June 20, 2025, 1:53 p.m. UTC
Extract memcpy and memset functions from copy_user_generic() and
__clear_user().

They can be used as inline memcpy and memset instead of the GCC builtins
whenever necessary. LASS requires them to handle text_poke.

Originally-by: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/all/20241029184840.GJ14555@noisy.programming.kicks-ass.net/
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 arch/x86/include/asm/string.h     | 45 +++++++++++++++++++++++++++++++
 arch/x86/include/asm/uaccess_64.h | 37 +++++++------------------
 arch/x86/lib/clear_page_64.S      | 10 +++++--
 3 files changed, 62 insertions(+), 30 deletions(-)
diff mbox series

Patch

diff --git a/arch/x86/include/asm/string.h b/arch/x86/include/asm/string.h
index c3c2c1914d65..d75e965d1ce4 100644
--- a/arch/x86/include/asm/string.h
+++ b/arch/x86/include/asm/string.h
@@ -1,6 +1,51 @@ 
 /* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_STRING_H
+#define _ASM_X86_STRING_H
+
+#include <asm/asm.h>
+#include <asm/alternative.h>
+
 #ifdef CONFIG_X86_32
 # include <asm/string_32.h>
 #else
 # include <asm/string_64.h>
 #endif
+
+#ifdef CONFIG_X86_64
+#define ALT_64(orig, alt, feat) ALTERNATIVE(orig, alt, feat)
+#else
+#define ALT_64(orig, alt, feat) orig
+#endif
+
+static __always_inline void *__inline_memcpy(void *to, const void *from, size_t len)
+{
+	void *ret = to;
+
+	asm volatile("1:\n\t"
+		     ALT_64("rep movsb",
+			    "call rep_movs_alternative", ALT_NOT(X86_FEATURE_FSRM))
+		     "2:\n\t"
+		     _ASM_EXTABLE_UA(1b, 2b)
+		     :"+c" (len), "+D" (to), "+S" (from), ASM_CALL_CONSTRAINT
+		     : : "memory", _ASM_AX);
+
+	return ret + len;
+}
+
+static __always_inline void *__inline_memset(void *addr, int v, size_t len)
+{
+	void *ret = addr;
+
+	asm volatile("1:\n\t"
+		     ALT_64("rep stosb",
+			    "call rep_stos_alternative", ALT_NOT(X86_FEATURE_FSRM))
+		     "2:\n\t"
+		     _ASM_EXTABLE_UA(1b, 2b)
+		     : "+c" (len), "+D" (addr), ASM_CALL_CONSTRAINT
+		     : "a" ((uint8_t)v)
+		     : "memory", _ASM_SI);
+
+	return ret + len;
+}
+
+#endif /* _ASM_X86_STRING_H */
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index c8a5ae35c871..30cc318eb3ed 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -13,6 +13,7 @@ 
 #include <asm/page.h>
 #include <asm/percpu.h>
 #include <asm/runtime-const.h>
+#include <asm/string.h>
 
 /*
  * Virtual variable: there's no actual backing store for this,
@@ -118,21 +119,12 @@  rep_movs_alternative(void *to, const void *from, unsigned len);
 static __always_inline __must_check unsigned long
 copy_user_generic(void *to, const void *from, unsigned long len)
 {
+	void *ret;
+
 	stac();
-	/*
-	 * If CPU has FSRM feature, use 'rep movs'.
-	 * Otherwise, use rep_movs_alternative.
-	 */
-	asm volatile(
-		"1:\n\t"
-		ALTERNATIVE("rep movsb",
-			    "call rep_movs_alternative", ALT_NOT(X86_FEATURE_FSRM))
-		"2:\n"
-		_ASM_EXTABLE_UA(1b, 2b)
-		:"+c" (len), "+D" (to), "+S" (from), ASM_CALL_CONSTRAINT
-		: : "memory", "rax");
+	ret = __inline_memcpy(to, from, len);
 	clac();
-	return len;
+	return ret - to;
 }
 
 static __always_inline __must_check unsigned long
@@ -178,25 +170,14 @@  rep_stos_alternative(void __user *addr, unsigned long len);
 
 static __always_inline __must_check unsigned long __clear_user(void __user *addr, unsigned long size)
 {
+	void *ret;
+
 	might_fault();
 	stac();
-
-	/*
-	 * No memory constraint because it doesn't change any memory gcc
-	 * knows about.
-	 */
-	asm volatile(
-		"1:\n\t"
-		ALTERNATIVE("rep stosb",
-			    "call rep_stos_alternative", ALT_NOT(X86_FEATURE_FSRS))
-		"2:\n"
-	       _ASM_EXTABLE_UA(1b, 2b)
-	       : "+c" (size), "+D" (addr), ASM_CALL_CONSTRAINT
-	       : "a" (0));
-
+	ret = __inline_memset(addr, 0, size);
 	clac();
 
-	return size;
+	return ret - addr;
 }
 
 static __always_inline unsigned long clear_user(void __user *to, unsigned long n)
diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S
index a508e4a8c66a..ca94828def62 100644
--- a/arch/x86/lib/clear_page_64.S
+++ b/arch/x86/lib/clear_page_64.S
@@ -55,17 +55,23 @@  SYM_FUNC_END(clear_page_erms)
 EXPORT_SYMBOL_GPL(clear_page_erms)
 
 /*
- * Default clear user-space.
+ * Default memset.
  * Input:
  * rdi destination
+ * rsi scratch
  * rcx count
- * rax is zero
+ * al is value
  *
  * Output:
  * rcx: uncleared bytes or 0 if successful.
  */
 SYM_FUNC_START(rep_stos_alternative)
 	ANNOTATE_NOENDBR
+
+	movzbq %al, %rsi
+	movabs $0x0101010101010101, %rax
+	mulq %rsi
+
 	cmpq $64,%rcx
 	jae .Lunrolled