diff mbox series

[RFC,7/7] mm: zswap: Use acomp virtual address interface

Message ID 153c340a52090f2ff82f8f066203186a932d3f99.1740651138.git.herbert@gondor.apana.org.au
State New
Headers show
Series [1/7] crypto: iaa - Test the correct request flag | expand

Commit Message

Herbert Xu Feb. 27, 2025, 10:15 a.m. UTC
Use the acomp virtual address interface.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 mm/zswap.c | 23 ++++++++---------------
 1 file changed, 8 insertions(+), 15 deletions(-)

Comments

Herbert Xu Feb. 28, 2025, 8:13 a.m. UTC | #1
On Thu, Feb 27, 2025 at 09:43:52PM +0000, Yosry Ahmed wrote:
>
> If we cannot pass in highmem addresses then the problem is not solved.
> Thanks for pointing this out.

Oh I didn't realise this even existed.

It's much better to handle this in the Crypto API where the copy
and be done only when it's needed for DMA.

I'll respin this.

Thanks,
Herbert Xu Feb. 28, 2025, 9:54 a.m. UTC | #2
On Fri, Feb 28, 2025 at 04:13:08PM +0800, Herbert Xu wrote:
>
> I'll respin this.

FWIW this is what the interface looks like.  Does it look OK?
Longer term hardware offload drivers should handle these non-DMA
pointers directly by having their own buffers.  For the time being
I'm simply redirecting these to a software fallback.

diff --git a/mm/zswap.c b/mm/zswap.c
index 2b5a2398a9be..2fd241c65f80 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -994,30 +994,16 @@ static void zswap_decompress(struct zswap_entry *entry, struct folio *folio)
 
 	acomp_ctx = acomp_ctx_get_cpu_lock(entry->pool);
 	src = zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO);
-	/*
-	 * If zpool_map_handle is atomic, we cannot reliably utilize its mapped buffer
-	 * to do crypto_acomp_decompress() which might sleep. In such cases, we must
-	 * resort to copying the buffer to a temporary one.
-	 * Meanwhile, zpool_map_handle() might return a non-linearly mapped buffer,
-	 * such as a kmap address of high memory or even ever a vmap address.
-	 * However, sg_init_one is only equipped to handle linearly mapped low memory.
-	 * In such cases, we also must copy the buffer to a temporary and lowmem one.
-	 */
-	if ((acomp_ctx->is_sleepable && !zpool_can_sleep_mapped(zpool)) ||
-	    !virt_addr_valid(src)) {
-		memcpy(acomp_ctx->buffer, src, entry->length);
-		src = acomp_ctx->buffer;
-		zpool_unmap_handle(zpool, entry->handle);
-	}
-
 	dst = kmap_local_folio(folio, 0);
-	acomp_request_set_virt(acomp_ctx->req, src, dst, entry->length, PAGE_SIZE);
+	if (!zpool_can_sleep_mapped(zpool) || !virt_addr_valid(src))
+		acomp_request_set_nondma(acomp_ctx->req, src, dst, entry->length, PAGE_SIZE);
+	else
+		acomp_request_set_virt(acomp_ctx->req, src, dst, entry->length, PAGE_SIZE);
 	BUG_ON(crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait));
 	kunmap_local(dst);
 	BUG_ON(acomp_ctx->req->dlen != PAGE_SIZE);
 
-	if (src != acomp_ctx->buffer)
-		zpool_unmap_handle(zpool, entry->handle);
+	zpool_unmap_handle(zpool, entry->handle);
 	acomp_ctx_put_unlock(acomp_ctx);
 }
Yosry Ahmed Feb. 28, 2025, 3:56 p.m. UTC | #3
On Fri, Feb 28, 2025 at 05:54:53PM +0800, Herbert Xu wrote:
> On Fri, Feb 28, 2025 at 04:13:08PM +0800, Herbert Xu wrote:
> >
> > I'll respin this.
> 
> FWIW this is what the interface looks like.  Does it look OK?
> Longer term hardware offload drivers should handle these non-DMA
> pointers directly by having their own buffers.  For the time being
> I'm simply redirecting these to a software fallback.
> 
> diff --git a/mm/zswap.c b/mm/zswap.c
> index 2b5a2398a9be..2fd241c65f80 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -994,30 +994,16 @@ static void zswap_decompress(struct zswap_entry *entry, struct folio *folio)
>  
>  	acomp_ctx = acomp_ctx_get_cpu_lock(entry->pool);
>  	src = zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO);
> -	/*
> -	 * If zpool_map_handle is atomic, we cannot reliably utilize its mapped buffer
> -	 * to do crypto_acomp_decompress() which might sleep. In such cases, we must
> -	 * resort to copying the buffer to a temporary one.
> -	 * Meanwhile, zpool_map_handle() might return a non-linearly mapped buffer,
> -	 * such as a kmap address of high memory or even ever a vmap address.
> -	 * However, sg_init_one is only equipped to handle linearly mapped low memory.
> -	 * In such cases, we also must copy the buffer to a temporary and lowmem one.
> -	 */
> -	if ((acomp_ctx->is_sleepable && !zpool_can_sleep_mapped(zpool)) ||
> -	    !virt_addr_valid(src)) {
> -		memcpy(acomp_ctx->buffer, src, entry->length);
> -		src = acomp_ctx->buffer;
> -		zpool_unmap_handle(zpool, entry->handle);
> -	}
> -
>  	dst = kmap_local_folio(folio, 0);
> -	acomp_request_set_virt(acomp_ctx->req, src, dst, entry->length, PAGE_SIZE);
> +	if (!zpool_can_sleep_mapped(zpool) || !virt_addr_valid(src))

Why is the acomp_ctx->is_sleepable check no longer needed?

Also, the zpool_can_sleep_mapped() cases will go away soon-ish, so I was
kinda hoping that the !virt_addr_valid() case goes away too and is
handled internally in the crypto library. Right now the problem is that
virt_to_page() is used to get the underlying page, which doesn't work
for kmap addresses.

> +		acomp_request_set_nondma(acomp_ctx->req, src, dst, entry->length, PAGE_SIZE);
> +	else
> +		acomp_request_set_virt(acomp_ctx->req, src, dst, entry->length, PAGE_SIZE);
>  	BUG_ON(crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait));
>  	kunmap_local(dst);
>  	BUG_ON(acomp_ctx->req->dlen != PAGE_SIZE);
>  
> -	if (src != acomp_ctx->buffer)
> -		zpool_unmap_handle(zpool, entry->handle);
> +	zpool_unmap_handle(zpool, entry->handle);
>  	acomp_ctx_put_unlock(acomp_ctx);
>  }
> 
> -- 
> Email: Herbert Xu <herbert@gondor.apana.org.au>
> Home Page: http://gondor.apana.org.au/~herbert/
> PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
Herbert Xu March 1, 2025, 6:36 a.m. UTC | #4
On Fri, Feb 28, 2025 at 03:56:42PM +0000, Yosry Ahmed wrote:
> 
> Why is the acomp_ctx->is_sleepable check no longer needed?

Because the Crypto API knows which driver uses DMA so there is
no need to check that here.  All the API needs is for you to tell
it whether the memory can undergo DMA.

> Also, the zpool_can_sleep_mapped() cases will go away soon-ish, so I was
> kinda hoping that the !virt_addr_valid() case goes away too and is
> handled internally in the crypto library. Right now the problem is that
> virt_to_page() is used to get the underlying page, which doesn't work
> for kmap addresses.

Actually this check looks buggy.  The issue is that there is no
way in general to tell whether a given pointer can be used for
DMA or not.  You have to go to the source (e.g., is it SLAB,
vmalloc, or something else) to make that determination.

So there is no simple check that the Crypto API can do to determine
this.  We have to rely on you to tell us whether it's OK to do DMA.
Otherwise the assumption must be that it is not safe to do DMA and
a copy is always required.

Now of course this is all irrelevant if you use software crypto
that does not involve DMA.  So regardless of whether you can do
DMA or not, if you're going through the software path it will just
use that memory as is without any extra copies.

The difference only comes into play if your request is routed to
hardware offload.

In fact it looks like this check and fallback path is wrong to
begin with.  It's perfectly OK to do DMA to high memory, assuming
that the device can handle it.  And if not the device would need
to take care of it anyway since an SG list can always live in
highmem.

I thought this was a lot more complicated and you had some weird
arbtirary pointer from an unknown source.  But if it's just highmem
I can get rid of the memcpy for you right now.

Cheers,
Herbert Xu March 1, 2025, 7:34 a.m. UTC | #5
On Thu, Feb 27, 2025 at 10:38:47AM -0800, Eric Biggers wrote:
>
> Well, unfortunately this patchset still uses sg_init_one() on the virtual
> address, so AFAICS it doesn't work with arbitrary virtual addresses.

Right, you can't do sg_init_one on highmem memory.  But the problem
here is that this pointer (the source for decompression) should
never have been linear to start with.

Had it been an SG list, things would have just worked.

In fact, digging deeper reveals even more reasons why it should be
non-linear.  The object that we're decompressing is often split over
two physical pages.  In order to produce a linear pointer, the
zsmalloc code is actually forcing a copy (incidentally, that's
why an extra copy is being added in zswap because the zsmalloc
copy is done with a per-CPU buffer that cannot be used with async).

Had this be done with SG lists none of these issues would've existed
and things would just work whether you're doing software compression
or hardware offload.

So I'm withdrawing my acomp patch-set because the premise was wrong.
The only user for acomp actually wants SG lists.

Cheers,
diff mbox series

Patch

diff --git a/mm/zswap.c b/mm/zswap.c
index 6504174fbc6a..2b5a2398a9be 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -925,27 +925,20 @@  static bool zswap_compress(struct page *page, struct zswap_entry *entry,
 			   struct zswap_pool *pool)
 {
 	struct crypto_acomp_ctx *acomp_ctx;
-	struct scatterlist input, output;
 	int comp_ret = 0, alloc_ret = 0;
 	unsigned int dlen = PAGE_SIZE;
 	unsigned long handle;
 	struct zpool *zpool;
+	const u8 *src;
 	char *buf;
 	gfp_t gfp;
 	u8 *dst;
 
 	acomp_ctx = acomp_ctx_get_cpu_lock(pool);
+	src = kmap_local_page(page);
 	dst = acomp_ctx->buffer;
-	sg_init_table(&input, 1);
-	sg_set_page(&input, page, PAGE_SIZE, 0);
 
-	/*
-	 * We need PAGE_SIZE * 2 here since there maybe over-compression case,
-	 * and hardware-accelerators may won't check the dst buffer size, so
-	 * giving the dst buffer with enough length to avoid buffer overflow.
-	 */
-	sg_init_one(&output, dst, PAGE_SIZE * 2);
-	acomp_request_set_params(acomp_ctx->req, &input, &output, PAGE_SIZE, dlen);
+	acomp_request_set_virt(acomp_ctx->req, src, dst, PAGE_SIZE, dlen);
 
 	/*
 	 * it maybe looks a little bit silly that we send an asynchronous request,
@@ -960,6 +953,7 @@  static bool zswap_compress(struct page *page, struct zswap_entry *entry,
 	 * acomp instance, so multiple threads can do (de)compression in parallel.
 	 */
 	comp_ret = crypto_wait_req(crypto_acomp_compress(acomp_ctx->req), &acomp_ctx->wait);
+	kunmap_local(src);
 	dlen = acomp_ctx->req->dlen;
 	if (comp_ret)
 		goto unlock;
@@ -994,9 +988,9 @@  static bool zswap_compress(struct page *page, struct zswap_entry *entry,
 static void zswap_decompress(struct zswap_entry *entry, struct folio *folio)
 {
 	struct zpool *zpool = entry->pool->zpool;
-	struct scatterlist input, output;
 	struct crypto_acomp_ctx *acomp_ctx;
 	u8 *src;
+	u8 *dst;
 
 	acomp_ctx = acomp_ctx_get_cpu_lock(entry->pool);
 	src = zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO);
@@ -1016,11 +1010,10 @@  static void zswap_decompress(struct zswap_entry *entry, struct folio *folio)
 		zpool_unmap_handle(zpool, entry->handle);
 	}
 
-	sg_init_one(&input, src, entry->length);
-	sg_init_table(&output, 1);
-	sg_set_folio(&output, folio, PAGE_SIZE, 0);
-	acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, PAGE_SIZE);
+	dst = kmap_local_folio(folio, 0);
+	acomp_request_set_virt(acomp_ctx->req, src, dst, entry->length, PAGE_SIZE);
 	BUG_ON(crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait));
+	kunmap_local(dst);
 	BUG_ON(acomp_ctx->req->dlen != PAGE_SIZE);
 
 	if (src != acomp_ctx->buffer)