diff mbox series

[RFC,6/6] btrfs: zlib: add support for zlib-deflate through acomp

Message ID 20240426110941.5456-7-giovanni.cabiddu@intel.com
State New
Headers show
Series btrfs: offload zlib-deflate to accelerators | expand

Commit Message

Giovanni Cabiddu April 26, 2024, 10:54 a.m. UTC
From: Weigang Li <weigang.li@intel.com>

Add support for zlib compression and decompression through the acomp
APIs.
Input pages are added to an sg-list and sent to acomp in one request.
Since acomp is asynchronous, the thread is put to sleep and then the CPU
is freed up. Once compression is done, the acomp callback is triggered
and the thread is woke up.

This patch doesn't change the BTRFS disk format, this means that files
compressed by hardware engines can be de-compressed by the zlib software
library, and vice versa.

Limitations:
  * The implementation tries always to use an acomp even if only
    zlib-deflate-scomp is present
  * Acomp does not provide a way to support compression levels
  * Acomp is an asynchronous API but used here synchronously

Signed-off-by: Weigang Li <weigang.li@intel.com>
---
 fs/btrfs/zlib.c | 216 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 216 insertions(+)

Comments

Herbert Xu May 3, 2024, 10:04 a.m. UTC | #1
On Mon, Apr 29, 2024 at 04:21:46PM +0100, Cabiddu, Giovanni wrote:
>
> @Herbert, do you have any objection if we add the compression level to
> the acomp tfm and we add an API to set it? Example:
> 
>     tfm = crypto_alloc_acomp("deflate", 0, 0);
>     acomp_set_level(tfm, compression_level);

Yes I think that's fine.  I'd make it a more generic interface
and model it after setkey so that you can set any parameter for
the given algorithm.

Cheers,
David Sterba May 12, 2025, 5:52 p.m. UTC | #2
On Wed, May 07, 2025 at 09:19:14PM -0700, Eric Biggers wrote:
> On Wed, May 07, 2025 at 02:17:54PM +0200, David Sterba wrote:
> > Anyway, assuming there will be a maintained, packaged in distros and
> > user friendly tool to tweak the linux crypto subsystem I agree we don't
> > have to do it in the filesystem or other subsystems.
> 
> I don't think there ever will be.  NETLINK_CRYPTO is obscure and hardly anyone
> uses it.  The kernel's generic crypto infrastructure is also really cumbersome
> to use, so the trend in the kernel overall has been a move away from the generic
> crypto infrastructure and towards straightforward library APIs (e.g.
> lib/crypto/) that just do the right thing with no configuration needed.

Ok, so on hand the recommendation is to use an obscure tool and
interface and ont the other hand kernel is moving towards library API.
I don't mind using the library approach, as long as it provides the
automatic selection of the fastest implementation available (it could be
even an extra API call e.g. at mount time).

> btrfs already uses the compression library APIs.  Considering how disastrous
> crypto_acomp has been so far when other people tried to use it, most likely the
> right decision will be to keep using the library APIs for the vast majority of
> btrfs users, and have an alternative code path that uses crypto_acomp only when
> hardware offload is actually being used.

No problem with that. I once had a prototype to do async checksumming
and using the ahash was indeed cumbersome, with the mempools and request
handling to avoid deadlocks.

> There may also be a way to rework things so that the compression library APIs
> can use hardware offload, in which case the crypto API would play no role at
> all.  I understand the Zstandard library in userspace can use Intel QAT as an
> external sequence provider, so it's been proven that this can be done.

As along as the switch to library or QAT can be in a wrapper I don't
mind.

> BTW, I also have to wonder why this patchset is proposing accelerating zlib
> instead of Zstandard.  Zstandard is a much more modern algorithm.

I think the plan is to support zstd as well but the QAT integration
should be simpler on zlib. I'd like to see some up to date code first to
get better idea of what and how.
diff mbox series

Patch

diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index e5b3f2003896..b5bbb8c97244 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -18,6 +18,8 @@ 
 #include <linux/pagemap.h>
 #include <linux/bio.h>
 #include <linux/refcount.h>
+#include <crypto/acompress.h>
+#include <linux/scatterlist.h>
 #include "compression.h"
 
 /* workspace buffer size for s390 zlib hardware support */
@@ -33,6 +35,201 @@  struct workspace {
 
 static struct workspace_manager wsm;
 
+static int acomp_comp_pages(struct address_space *mapping, u64 start,
+			    unsigned long len, struct page **pages,
+			    unsigned long *out_pages,
+			    unsigned long *total_in,
+			    unsigned long *total_out)
+{
+	unsigned int nr_src_pages = 0, nr_dst_pages = 0, nr_pages = 0;
+	struct sg_table in_sg = { 0 }, out_sg = { 0 };
+	struct page *in_page, *out_page, **in_pages;
+	struct crypto_acomp *tfm = NULL;
+	struct acomp_req *req = NULL;
+	struct crypto_wait wait;
+	int ret, i;
+
+	nr_src_pages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	in_pages = kcalloc(nr_src_pages, sizeof(struct page *), GFP_KERNEL);
+	if (!in_pages) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	for (i = 0; i < nr_src_pages; i++) {
+		in_page = find_get_page(mapping, start >> PAGE_SHIFT);
+		out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+		if (!in_page || !out_page) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		in_pages[i] = in_page;
+		pages[i] = out_page;
+		nr_dst_pages += 1;
+		start += PAGE_SIZE;
+	}
+
+	ret = sg_alloc_table_from_pages(&in_sg, in_pages, nr_src_pages, 0,
+					nr_src_pages << PAGE_SHIFT, GFP_KERNEL);
+	if (ret)
+		goto out;
+
+	ret = sg_alloc_table_from_pages(&out_sg, pages, nr_dst_pages, 0,
+					nr_dst_pages << PAGE_SHIFT, GFP_KERNEL);
+	if (ret)
+		goto out;
+
+	crypto_init_wait(&wait);
+	tfm = crypto_alloc_acomp("zlib-deflate", 0, 0);
+	if (IS_ERR(tfm)) {
+		ret = PTR_ERR(tfm);
+		goto out;
+	}
+
+	req = acomp_request_alloc(tfm);
+	if (!req) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	acomp_request_set_params(req, in_sg.sgl, out_sg.sgl, len,
+				 nr_dst_pages << PAGE_SHIFT);
+	acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+				   crypto_req_done, &wait);
+
+	ret = crypto_wait_req(crypto_acomp_compress(req), &wait);
+	if (ret)
+		goto out;
+
+	*total_in = len;
+	*total_out = req->dlen;
+	nr_pages = (*total_out + PAGE_SIZE - 1) >> PAGE_SHIFT;
+
+out:
+	sg_free_table(&in_sg);
+	sg_free_table(&out_sg);
+
+	if (in_pages) {
+		for (i = 0; i < nr_src_pages; i++)
+			put_page(in_pages[i]);
+		kfree(in_pages);
+	}
+
+	/* free un-used out pages */
+	for (i = nr_pages; i < nr_dst_pages; i++)
+		put_page(pages[i]);
+
+	if (req)
+		acomp_request_free(req);
+
+	if (tfm)
+		crypto_free_acomp(tfm);
+
+	*out_pages = nr_pages;
+
+	return ret;
+}
+
+static int acomp_zlib_decomp_bio(struct page **in_pages,
+				 struct compressed_bio *cb, size_t srclen,
+				 unsigned long total_pages_in)
+{
+	unsigned int nr_dst_pages = BTRFS_MAX_COMPRESSED_PAGES;
+	struct sg_table in_sg = { 0 }, out_sg = { 0 };
+	struct bio *orig_bio = &cb->orig_bbio->bio;
+	char *data_out = NULL, *bv_buf = NULL;
+	int copy_len = 0, bytes_left = 0;
+	struct crypto_acomp *tfm = NULL;
+	struct page **out_pages = NULL;
+	struct acomp_req *req = NULL;
+	struct crypto_wait wait;
+	struct bio_vec bvec;
+	int ret, i = 0;
+
+	ret = sg_alloc_table_from_pages(&in_sg, in_pages, total_pages_in,
+					0, srclen, GFP_KERNEL);
+	if (ret)
+		goto out;
+
+	out_pages = kcalloc(nr_dst_pages, sizeof(struct page *), GFP_KERNEL);
+	if (!out_pages) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	for (i = 0; i < nr_dst_pages; i++) {
+		out_pages[i] = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+		if (!out_pages[i]) {
+			ret = -ENOMEM;
+			goto out;
+		}
+	}
+
+	ret = sg_alloc_table_from_pages(&out_sg, out_pages, nr_dst_pages, 0,
+					nr_dst_pages << PAGE_SHIFT, GFP_KERNEL);
+	if (ret)
+		goto out;
+
+	crypto_init_wait(&wait);
+	tfm = crypto_alloc_acomp("zlib-deflate", 0, 0);
+	if (IS_ERR(tfm)) {
+		ret = PTR_ERR(tfm);
+		goto out;
+	}
+
+	req = acomp_request_alloc(tfm);
+	if (!req) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	acomp_request_set_params(req, in_sg.sgl, out_sg.sgl, srclen,
+				 nr_dst_pages << PAGE_SHIFT);
+	acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+				   crypto_req_done, &wait);
+
+	ret = crypto_wait_req(crypto_acomp_decompress(req), &wait);
+	if (ret)
+		goto out;
+
+	/* Copy decompressed buffer to bio pages */
+	bytes_left = req->dlen;
+	for (i = 0; i < nr_dst_pages; i++) {
+		copy_len = bytes_left > PAGE_SIZE ? PAGE_SIZE : bytes_left;
+		data_out = kmap_local_page(out_pages[i]);
+
+		bvec = bio_iter_iovec(orig_bio, orig_bio->bi_iter);
+		bv_buf = kmap_local_page(bvec.bv_page);
+		memcpy(bv_buf, data_out, copy_len);
+		kunmap_local(bv_buf);
+
+		bio_advance(orig_bio, copy_len);
+		if (!orig_bio->bi_iter.bi_size)
+			break;
+		bytes_left -= copy_len;
+		if (bytes_left <= 0)
+			break;
+	}
+out:
+	sg_free_table(&in_sg);
+	sg_free_table(&out_sg);
+
+	if (out_pages) {
+		for (i = 0; i < nr_dst_pages; i++) {
+			if (out_pages[i])
+				put_page(out_pages[i]);
+		}
+		kfree(out_pages);
+	}
+
+	if (req)
+		acomp_request_free(req);
+	if (tfm)
+		crypto_free_acomp(tfm);
+
+	return ret;
+}
+
 struct list_head *zlib_get_workspace(unsigned int level)
 {
 	struct list_head *ws = btrfs_get_workspace(BTRFS_COMPRESS_ZLIB, level);
@@ -108,6 +305,15 @@  int zlib_compress_pages(struct list_head *ws, struct address_space *mapping,
 	unsigned long nr_dest_pages = *out_pages;
 	const unsigned long max_out = nr_dest_pages * PAGE_SIZE;
 
+	if (crypto_has_acomp("zlib-deflate", 0, 0)) {
+		ret = acomp_comp_pages(mapping, start, len, pages, out_pages,
+				       total_in, total_out);
+		if (!ret)
+			return ret;
+
+		pr_warn("BTRFS: acomp compression failed: ret = %d\n", ret);
+		/* Fallback to SW implementation if HW compression failed */
+	}
 	*out_pages = 0;
 	*total_out = 0;
 	*total_in = 0;
@@ -281,6 +487,16 @@  int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
 	unsigned long buf_start;
 	struct page **pages_in = cb->compressed_pages;
 
+	if (crypto_has_acomp("zlib-deflate", 0, 0)) {
+		ret = acomp_zlib_decomp_bio(pages_in, cb, srclen,
+					    total_pages_in);
+		if (!ret)
+			return ret;
+
+		pr_warn("BTRFS: acomp decompression failed, ret=%d\n", ret);
+		/* Fallback to SW implementation if HW decompression failed */
+	}
+
 	data_in = kmap_local_page(pages_in[page_in_index]);
 	workspace->strm.next_in = data_in;
 	workspace->strm.avail_in = min_t(size_t, srclen, PAGE_SIZE);