From patchwork Wed Apr 30 20:52:47 2025
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: "Sridhar, Kanchana P" <kanchana.p.sridhar@intel.com>
X-Patchwork-Id: 886551
Received: from mgamail.intel.com (mgamail.intel.com [198.175.65.15])
 (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
 (No client certificate requested)
 by smtp.subspace.kernel.org (Postfix) with ESMTPS id 17B65299529;
 Wed, 30 Apr 2025 20:53:08 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.175.65.15
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
 t=1746046391; cv=none;
 b=GcTPTCS5Q9FBGq390V5Gkt4oKZfMpbEHe9AWfMeB+OEkZMjBrJOq1/vMBc+IGkgSCUbcwHi4PImxyxplS8lz6y9HrNodlpastmAH7MCzCfiYqAsy4QogvWrYxIzJVakVeiOH532/Kqo3gBx9aOnAitWsHL8KARpCacVFcCAOjG0=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
 s=arc-20240116; t=1746046391; c=relaxed/simple;
 bh=xDsbv3ls5X3YM0JiXefAsh4WsqCsb4fO+p9sSZmG6EM=;
 h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
 MIME-Version;
 b=hgUtrP55Y5MBbUbDKLgZHNEFz8qCPcJulzsRt77xST5mZcupnM+/bVE/A4logGFJOSkQ4LlKHItiglRIBHtRX4K6MR6FAHWFFsqgXBUcNIZaVYvQoJWSkEoqC2r5Ctl1pbzVDgYW+c9RjFzy4pUyoeSykY3+cK2NOXWZumgT4GU=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=intel.com;
 spf=pass smtp.mailfrom=intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=UXD3V6xT; arc=none smtp.client-ip=198.175.65.15
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=pass smtp.mailfrom=intel.com
Authentication-Results: smtp.subspace.kernel.org;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="UXD3V6xT"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
 d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
 t=1746046389; x=1777582389;
 h=from:to:cc:subject:date:message-id:in-reply-to:
 references:mime-version:content-transfer-encoding;
 bh=xDsbv3ls5X3YM0JiXefAsh4WsqCsb4fO+p9sSZmG6EM=;
 b=UXD3V6xTrl9Ravg9xG3fU3VT4drqohgVqP8VtUDmDSRu2pPJU5GS33hO
 yBDgrRMrbCgN4b+R0eXDvOZ+5eGd9QStPv0+reMqUamNeQQEaMU1nv2yM
 ema2w7BbQGQ6BnC4eZe+C2Hp3NuCjpzYyZXAduKpIr3STv5KZkUcom+qF
 9za2A+fG/VBaL0mG6wx2XY5oMNhvToYUPbrA6Xu5QQBIghU+0K/OEQJBS
 SclQtboLYV18O5vdEFhSS1nXk81yUu3ln9uOnD5WlA1l++EN5fdF5JlgY
 Fj08yu2w3aG+VgqBXaLaIH2z0Ey3cKftBr+5rORpfRZEDaF/TCOaV8RFe A==;
X-CSE-ConnectionGUID: 4MQQ1n/qQZ+jIcouyGej4w==
X-CSE-MsgGUID: 5TpzEuu4RreCuqNnyFi33g==
X-IronPort-AV: E=McAfee;i="6700,10204,11419"; a="51388472"
X-IronPort-AV: E=Sophos;i="6.15,252,1739865600";
   d="scan'208";a="51388472"
Received: from fmviesa003.fm.intel.com ([10.60.135.143])
 by orvoesa107.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 30 Apr 2025 13:53:08 -0700
X-CSE-ConnectionGUID: +4H+II2cSBeqdhBHgUoxww==
X-CSE-MsgGUID: 64YfAg3RStmUm35BzUBlBg==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.15,252,1739865600"; d="scan'208";a="138248870"
Received: from jf5300-b11a338t.jf.intel.com ([10.242.51.115])
 by fmviesa003.fm.intel.com with ESMTP; 30 Apr 2025 13:53:06 -0700
From: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
To: linux-kernel@vger.kernel.org, linux-mm@kvack.org, hannes@cmpxchg.org,
 yosry.ahmed@linux.dev, nphamcs@gmail.com, chengming.zhou@linux.dev,
 usamaarif642@gmail.com, ryan.roberts@arm.com, 21cnbao@gmail.com,
 ying.huang@linux.alibaba.com, akpm@linux-foundation.org,
 linux-crypto@vger.kernel.org, herbert@gondor.apana.org.au,
 davem@davemloft.net, clabbe@baylibre.com, ardb@kernel.org,
 ebiggers@google.com, surenb@google.com, kristen.c.accardi@intel.com
Cc: wajdi.k.feghali@intel.com, vinodh.gopal@intel.com,
 kanchana.p.sridhar@intel.com
Subject: [PATCH v9 01/19] crypto: acomp - Remove request chaining
Date: Wed, 30 Apr 2025 13:52:47 -0700
Message-Id: <20250430205305.22844-2-kanchana.p.sridhar@intel.com>
X-Mailer: git-send-email 2.27.0
In-Reply-To: <20250430205305.22844-1-kanchana.p.sridhar@intel.com>
References: <20250430205305.22844-1-kanchana.p.sridhar@intel.com>
Precedence: bulk
X-Mailing-List: linux-crypto@vger.kernel.org
List-Id: <linux-crypto.vger.kernel.org>
List-Subscribe: <mailto:linux-crypto+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-crypto+unsubscribe@vger.kernel.org>
MIME-Version: 1.0

Request chaining requires the user to do too much book keeping.
Remove it from acomp.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/acompress.c                  | 117 ++++++++--------------------
 crypto/scompress.c                  |  18 +----
 include/crypto/acompress.h          |  14 ----
 include/crypto/internal/acompress.h |   5 --
 4 files changed, 35 insertions(+), 119 deletions(-)

diff --git a/crypto/acompress.c b/crypto/acompress.c
index f7a3fbe5447e..82fb3c04e68f 100644
--- a/crypto/acompress.c
+++ b/crypto/acompress.c
@@ -161,7 +161,6 @@ static void acomp_save_req(struct acomp_req *req, crypto_completion_t cplt)
 	state->data = req->base.data;
 	req->base.complete = cplt;
 	req->base.data = state;
-	state->req0 = req;
 }
 
 static void acomp_restore_req(struct acomp_req *req)
@@ -172,23 +171,20 @@ static void acomp_restore_req(struct acomp_req *req)
 	req->base.data = state->data;
 }
 
-static void acomp_reqchain_virt(struct acomp_req_chain *state, int err)
+static void acomp_reqchain_virt(struct acomp_req *req)
 {
-	struct acomp_req *req = state->cur;
+	struct acomp_req_chain *state = &req->chain;
 	unsigned int slen = req->slen;
 	unsigned int dlen = req->dlen;
 
-	req->base.err = err;
-	state = &req->chain;
-
 	if (state->flags & CRYPTO_ACOMP_REQ_SRC_VIRT)
 		acomp_request_set_src_dma(req, state->src, slen);
 	else if (state->flags & CRYPTO_ACOMP_REQ_SRC_FOLIO)
-		acomp_request_set_src_folio(req, state->sfolio, state->soff, slen);
+		acomp_request_set_src_folio(req, state->sfolio, req->soff, slen);
 	if (state->flags & CRYPTO_ACOMP_REQ_DST_VIRT)
 		acomp_request_set_dst_dma(req, state->dst, dlen);
 	else if (state->flags & CRYPTO_ACOMP_REQ_DST_FOLIO)
-		acomp_request_set_dst_folio(req, state->dfolio, state->doff, dlen);
+		acomp_request_set_dst_folio(req, state->dfolio, req->doff, dlen);
 }
 
 static void acomp_virt_to_sg(struct acomp_req *req)
@@ -213,7 +209,6 @@ static void acomp_virt_to_sg(struct acomp_req *req)
 		size_t off = req->soff;
 
 		state->sfolio = folio;
-		state->soff = off;
 		sg_init_table(&state->ssg, 1);
 		sg_set_page(&state->ssg, folio_page(folio, off / PAGE_SIZE),
 			    slen, off % PAGE_SIZE);
@@ -233,7 +228,6 @@ static void acomp_virt_to_sg(struct acomp_req *req)
 		size_t off = req->doff;
 
 		state->dfolio = folio;
-		state->doff = off;
 		sg_init_table(&state->dsg, 1);
 		sg_set_page(&state->dsg, folio_page(folio, off / PAGE_SIZE),
 			    dlen, off % PAGE_SIZE);
@@ -241,8 +235,7 @@ static void acomp_virt_to_sg(struct acomp_req *req)
 	}
 }
 
-static int acomp_do_nondma(struct acomp_req_chain *state,
-			   struct acomp_req *req)
+static int acomp_do_nondma(struct acomp_req *req, bool comp)
 {
 	u32 keep = CRYPTO_ACOMP_REQ_SRC_VIRT |
 		   CRYPTO_ACOMP_REQ_SRC_NONDMA |
@@ -259,7 +252,7 @@ static int acomp_do_nondma(struct acomp_req_chain *state,
 	fbreq->slen = req->slen;
 	fbreq->dlen = req->dlen;
 
-	if (state->op == crypto_acomp_reqtfm(req)->compress)
+	if (comp)
 		err = crypto_acomp_compress(fbreq);
 	else
 		err = crypto_acomp_decompress(fbreq);
@@ -268,114 +261,70 @@ static int acomp_do_nondma(struct acomp_req_chain *state,
 	return err;
 }
 
-static int acomp_do_one_req(struct acomp_req_chain *state,
-			    struct acomp_req *req)
+static int acomp_do_one_req(struct acomp_req *req, bool comp)
 {
-	state->cur = req;
-
 	if (acomp_request_isnondma(req))
-		return acomp_do_nondma(state, req);
+		return acomp_do_nondma(req, comp);
 
 	acomp_virt_to_sg(req);
-	return state->op(req);
+	return comp ? crypto_acomp_reqtfm(req)->compress(req) :
+		      crypto_acomp_reqtfm(req)->decompress(req);
 }
 
-static int acomp_reqchain_finish(struct acomp_req *req0, int err, u32 mask)
+static int acomp_reqchain_finish(struct acomp_req *req, int err)
 {
-	struct acomp_req_chain *state = req0->base.data;
-	struct acomp_req *req = state->cur;
-	struct acomp_req *n;
-
-	acomp_reqchain_virt(state, err);
-
-	if (req != req0)
-		list_add_tail(&req->base.list, &req0->base.list);
-
-	list_for_each_entry_safe(req, n, &state->head, base.list) {
-		list_del_init(&req->base.list);
-
-		req->base.flags &= mask;
-		req->base.complete = acomp_reqchain_done;
-		req->base.data = state;
-
-		err = acomp_do_one_req(state, req);
-
-		if (err == -EINPROGRESS) {
-			if (!list_empty(&state->head))
-				err = -EBUSY;
-			goto out;
-		}
-
-		if (err == -EBUSY)
-			goto out;
-
-		acomp_reqchain_virt(state, err);
-		list_add_tail(&req->base.list, &req0->base.list);
-	}
-
-	acomp_restore_req(req0);
-
-out:
+	acomp_reqchain_virt(req);
+	acomp_restore_req(req);
 	return err;
 }
 
 static void acomp_reqchain_done(void *data, int err)
 {
-	struct acomp_req_chain *state = data;
-	crypto_completion_t compl = state->compl;
+	struct acomp_req *req = data;
+	crypto_completion_t compl;
 
-	data = state->data;
+	compl = req->chain.compl;
+	data = req->chain.data;
 
-	if (err == -EINPROGRESS) {
-		if (!list_empty(&state->head))
-			return;
+	if (err == -EINPROGRESS)
 		goto notify;
-	}
 
-	err = acomp_reqchain_finish(state->req0, err,
-				    CRYPTO_TFM_REQ_MAY_BACKLOG);
-	if (err == -EBUSY)
-		return;
+	err = acomp_reqchain_finish(req, err);
 
 notify:
 	compl(data, err);
 }
 
-static int acomp_do_req_chain(struct acomp_req *req,
-			      int (*op)(struct acomp_req *req))
+static int acomp_do_req_chain(struct acomp_req *req, bool comp)
 {
-	struct crypto_acomp *tfm = crypto_acomp_reqtfm(req);
-	struct acomp_req_chain *state;
 	int err;
 
-	if (crypto_acomp_req_chain(tfm) ||
-	    (!acomp_request_chained(req) && acomp_request_issg(req)))
-		return op(req);
-
 	acomp_save_req(req, acomp_reqchain_done);
-	state = req->base.data;
 
-	state->op = op;
-	state->src = NULL;
-	INIT_LIST_HEAD(&state->head);
-	list_splice_init(&req->base.list, &state->head);
-
-	err = acomp_do_one_req(state, req);
+	err = acomp_do_one_req(req, comp);
 	if (err == -EBUSY || err == -EINPROGRESS)
-		return -EBUSY;
+		return err;
 
-	return acomp_reqchain_finish(req, err, ~0);
+	return acomp_reqchain_finish(req, err);
 }
 
 int crypto_acomp_compress(struct acomp_req *req)
 {
-	return acomp_do_req_chain(req, crypto_acomp_reqtfm(req)->compress);
+	struct crypto_acomp *tfm = crypto_acomp_reqtfm(req);
+
+	if (crypto_acomp_req_chain(tfm) || acomp_request_issg(req))
+		crypto_acomp_reqtfm(req)->compress(req);
+	return acomp_do_req_chain(req, true);
 }
 EXPORT_SYMBOL_GPL(crypto_acomp_compress);
 
 int crypto_acomp_decompress(struct acomp_req *req)
 {
-	return acomp_do_req_chain(req, crypto_acomp_reqtfm(req)->decompress);
+	struct crypto_acomp *tfm = crypto_acomp_reqtfm(req);
+
+	if (crypto_acomp_req_chain(tfm) || acomp_request_issg(req))
+		crypto_acomp_reqtfm(req)->decompress(req);
+	return acomp_do_req_chain(req, false);
 }
 EXPORT_SYMBOL_GPL(crypto_acomp_decompress);
 
diff --git a/crypto/scompress.c b/crypto/scompress.c
index 5762fcc63b51..c1ce12564299 100644
--- a/crypto/scompress.c
+++ b/crypto/scompress.c
@@ -293,28 +293,14 @@ static int scomp_acomp_comp_decomp(struct acomp_req *req, int dir)
 	return ret;
 }
 
-static int scomp_acomp_chain(struct acomp_req *req, int dir)
-{
-	struct acomp_req *r2;
-	int err;
-
-	err = scomp_acomp_comp_decomp(req, dir);
-	req->base.err = err;
-
-	list_for_each_entry(r2, &req->base.list, base.list)
-		r2->base.err = scomp_acomp_comp_decomp(r2, dir);
-
-	return err;
-}
-
 static int scomp_acomp_compress(struct acomp_req *req)
 {
-	return scomp_acomp_chain(req, 1);
+	return scomp_acomp_comp_decomp(req, 1);
 }
 
 static int scomp_acomp_decompress(struct acomp_req *req)
 {
-	return scomp_acomp_chain(req, 0);
+	return scomp_acomp_comp_decomp(req, 0);
 }
 
 static void crypto_exit_scomp_ops_async(struct crypto_tfm *tfm)
diff --git a/include/crypto/acompress.h b/include/crypto/acompress.h
index c497c73baf13..267d557daeb1 100644
--- a/include/crypto/acompress.h
+++ b/include/crypto/acompress.h
@@ -52,10 +52,6 @@ struct acomp_req;
 struct folio;
 
 struct acomp_req_chain {
-	struct list_head head;
-	struct acomp_req *req0;
-	struct acomp_req *cur;
-	int (*op)(struct acomp_req *req);
 	crypto_completion_t compl;
 	void *data;
 	struct scatterlist ssg;
@@ -68,8 +64,6 @@ struct acomp_req_chain {
 		u8 *dst;
 		struct folio *dfolio;
 	};
-	size_t soff;
-	size_t doff;
 	u32 flags;
 };
 
@@ -349,8 +343,6 @@ static inline void acomp_request_set_callback(struct acomp_req *req,
 	req->base.data = data;
 	req->base.flags &= keep;
 	req->base.flags |= flgs & ~keep;
-
-	crypto_reqchain_init(&req->base);
 }
 
 /**
@@ -558,12 +550,6 @@ static inline void acomp_request_set_dst_folio(struct acomp_req *req,
 	req->base.flags |= CRYPTO_ACOMP_REQ_DST_FOLIO;
 }
 
-static inline void acomp_request_chain(struct acomp_req *req,
-				       struct acomp_req *head)
-{
-	crypto_request_chain(&req->base, &head->base);
-}
-
 /**
  * crypto_acomp_compress() -- Invoke asynchronous compress operation
  *
diff --git a/include/crypto/internal/acompress.h b/include/crypto/internal/acompress.h
index aaf59f3236fa..b69d818d7e68 100644
--- a/include/crypto/internal/acompress.h
+++ b/include/crypto/internal/acompress.h
@@ -98,11 +98,6 @@ void crypto_unregister_acomp(struct acomp_alg *alg);
 int crypto_register_acomps(struct acomp_alg *algs, int count);
 void crypto_unregister_acomps(struct acomp_alg *algs, int count);
 
-static inline bool acomp_request_chained(struct acomp_req *req)
-{
-	return crypto_request_chained(&req->base);
-}
-
 static inline bool acomp_request_issg(struct acomp_req *req)
 {
 	return !(req->base.flags & (CRYPTO_ACOMP_REQ_SRC_VIRT |

From patchwork Wed Apr 30 20:52:48 2025
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: "Sridhar, Kanchana P" <kanchana.p.sridhar@intel.com>
X-Patchwork-Id: 886550
Received: from mgamail.intel.com (mgamail.intel.com [198.175.65.15])
 (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
 (No client certificate requested)
 by smtp.subspace.kernel.org (Postfix) with ESMTPS id BE63D2983E9;
 Wed, 30 Apr 2025 20:53:10 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.175.65.15
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
 t=1746046392; cv=none;
 b=SC1MO8lE2YmZexkL9kkX9OmcvPywmcy3uiySJLppIBip6TQ9loYXECLWViY13SagByGeIeQWOlNKG9Oo0H7vN7xqN49Kf2x2bv1hMe7VgrtqI9dyHmNznxpuSSdD3XdcSiaPkRa9B0i/40DafHpkvnt0WQEMR38kZxM8J5paLn8=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
 s=arc-20240116; t=1746046392; c=relaxed/simple;
 bh=6wbRhOV0GFWlmqPXQS6DTk+b5GGSil7xU+GDXQbyLR0=;
 h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
 MIME-Version;
 b=oWg+/QuG2P9NeS++wHqH1IjIhg439UnZIl90b17Mf4Rc0VkeXV9Lvx82z2t5/d8CwUvR7i7D7Z04X9/wlxPM17OsCsP3eccviqyGCAf4CqazQ7n0Q2dpI2HBAHx8FWbX/XoX8uTTsYqseWGdr4GetAJiVmNVLge08aYlXuLYV38=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=intel.com;
 spf=pass smtp.mailfrom=intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=ID+WMFH3; arc=none smtp.client-ip=198.175.65.15
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=pass smtp.mailfrom=intel.com
Authentication-Results: smtp.subspace.kernel.org;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="ID+WMFH3"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
 d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
 t=1746046391; x=1777582391;
 h=from:to:cc:subject:date:message-id:in-reply-to:
 references:mime-version:content-transfer-encoding;
 bh=6wbRhOV0GFWlmqPXQS6DTk+b5GGSil7xU+GDXQbyLR0=;
 b=ID+WMFH3aNwACK47bQ/BhqainNBxbM2MiCy81SDjyP7VIJjZpE5BznnP
 HVRqFKqipmoPZdvLMq5Fzq8ZgdVOYDwNJgakz79dhudjij8rYS04Ves/U
 RS6QROpAQoj6zfJbDeEF5D9J2HhnawfMpDNQdRJViR12G5VPztY0KFXuU
 /b+W77Xt0wfrjPi3J8Dq+sNrJXRPQgrjegk3P7aJ0i0F8PWofNIR3I2O1
 gQM8jxMNwCSxBi1ACHP3kzf2D0+Bimz4F8XHCJ9M/rgVEdX98FtmcPDQQ
 e1DgnEknOmfu3bc2nVVCvGGoyomDsIIb947nNn3/m47DAVDNcgYu/XwjL w==;
X-CSE-ConnectionGUID: QZPZmI/nSwWGZ8ItD4wTIw==
X-CSE-MsgGUID: RlAjiTB0T/ytnvk+GxWdSw==
X-IronPort-AV: E=McAfee;i="6700,10204,11419"; a="51388490"
X-IronPort-AV: E=Sophos;i="6.15,252,1739865600";
   d="scan'208";a="51388490"
Received: from fmviesa003.fm.intel.com ([10.60.135.143])
 by orvoesa107.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 30 Apr 2025 13:53:09 -0700
X-CSE-ConnectionGUID: Qv3hyzFRRt66jXyyBrDrAg==
X-CSE-MsgGUID: DNCHjZ4LRBOGQBsceF2LLA==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.15,252,1739865600"; d="scan'208";a="138248876"
Received: from jf5300-b11a338t.jf.intel.com ([10.242.51.115])
 by fmviesa003.fm.intel.com with ESMTP; 30 Apr 2025 13:53:07 -0700
From: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
To: linux-kernel@vger.kernel.org, linux-mm@kvack.org, hannes@cmpxchg.org,
 yosry.ahmed@linux.dev, nphamcs@gmail.com, chengming.zhou@linux.dev,
 usamaarif642@gmail.com, ryan.roberts@arm.com, 21cnbao@gmail.com,
 ying.huang@linux.alibaba.com, akpm@linux-foundation.org,
 linux-crypto@vger.kernel.org, herbert@gondor.apana.org.au,
 davem@davemloft.net, clabbe@baylibre.com, ardb@kernel.org,
 ebiggers@google.com, surenb@google.com, kristen.c.accardi@intel.com
Cc: wajdi.k.feghali@intel.com, vinodh.gopal@intel.com,
 kanchana.p.sridhar@intel.com
Subject: [PATCH v9 02/19] crypto: acomp - Reinstate non-chained
 crypto_acomp_[de]compress().
Date: Wed, 30 Apr 2025 13:52:48 -0700
Message-Id: <20250430205305.22844-3-kanchana.p.sridhar@intel.com>
X-Mailer: git-send-email 2.27.0
In-Reply-To: <20250430205305.22844-1-kanchana.p.sridhar@intel.com>
References: <20250430205305.22844-1-kanchana.p.sridhar@intel.com>
Precedence: bulk
X-Mailing-List: linux-crypto@vger.kernel.org
List-Id: <linux-crypto.vger.kernel.org>
List-Subscribe: <mailto:linux-crypto+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-crypto+unsubscribe@vger.kernel.org>
MIME-Version: 1.0

This reverts the request chaining implementations of
crypto_acomp_[de]compress() introduced in commit b67a02600372
("crypto: acomp - Add request chaining and virtual addresses") since
request chaining has been removed from acomp subsequently in commit
64929fe8c0a4 ("crypto: acomp - Remove request chaining").

This patch restores the implementations of crypto_acomp_[de]compress()
from prior to commit b67a02600372.

Signed-off-by: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
---
 crypto/acompress.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/crypto/acompress.c b/crypto/acompress.c
index 82fb3c04e68f..d08e0fe8cd9e 100644
--- a/crypto/acompress.c
+++ b/crypto/acompress.c
@@ -310,21 +310,13 @@ static int acomp_do_req_chain(struct acomp_req *req, bool comp)
 
 int crypto_acomp_compress(struct acomp_req *req)
 {
-	struct crypto_acomp *tfm = crypto_acomp_reqtfm(req);
-
-	if (crypto_acomp_req_chain(tfm) || acomp_request_issg(req))
-		crypto_acomp_reqtfm(req)->compress(req);
-	return acomp_do_req_chain(req, true);
+	return crypto_acomp_reqtfm(req)->compress(req);
 }
 EXPORT_SYMBOL_GPL(crypto_acomp_compress);
 
 int crypto_acomp_decompress(struct acomp_req *req)
 {
-	struct crypto_acomp *tfm = crypto_acomp_reqtfm(req);
-
-	if (crypto_acomp_req_chain(tfm) || acomp_request_issg(req))
-		crypto_acomp_reqtfm(req)->decompress(req);
-	return acomp_do_req_chain(req, false);
+	return crypto_acomp_reqtfm(req)->decompress(req);
 }
 EXPORT_SYMBOL_GPL(crypto_acomp_decompress);
 

From patchwork Wed Apr 30 20:52:49 2025
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: "Sridhar, Kanchana P" <kanchana.p.sridhar@intel.com>
X-Patchwork-Id: 886549
Received: from mgamail.intel.com (mgamail.intel.com [198.175.65.15])
 (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
 (No client certificate requested)
 by smtp.subspace.kernel.org (Postfix) with ESMTPS id 8A2432C031F;
 Wed, 30 Apr 2025 20:53:11 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.175.65.15
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
 t=1746046393; cv=none;
 b=sjna/pHIAC9pGCK6K0NwoniaIvmkk2lbmXucBVSMOTxj+wsTVLXZZ3ruSVv7DZtEoGRSkdnmykiGFREAgHSdhn0449gx8PMx2evGw1EeDzoZtk9Va0LnhyA9c2OER7ghiCrpWdaOiGlXzuBodn/wF2cM75gnsXWdzVHi5MZhhnM=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
 s=arc-20240116; t=1746046393; c=relaxed/simple;
 bh=YCqkHG3xgUw0fvPeOxdezVgACk/mFbGDzGAv8tGRuL4=;
 h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
 MIME-Version;
 b=eg4fSPkPICL8jxdTG4Zgh7VrUkT+TFZfEp/KgkZcL/xBvyVG8jBPeiUZf24UhjOrEAUCv2rmeMlmcMbmzAlfGlsKQw+6wPb8qVK7LKCGCu9yc9X1eBUmZPtU6iLdpYUtWAvF7phuRP0GQQZ75/A8fIy/VVJrZClnneclDbMP3hs=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=intel.com;
 spf=pass smtp.mailfrom=intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=Y0cscRh/; arc=none smtp.client-ip=198.175.65.15
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=pass smtp.mailfrom=intel.com
Authentication-Results: smtp.subspace.kernel.org;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="Y0cscRh/"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
 d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
 t=1746046392; x=1777582392;
 h=from:to:cc:subject:date:message-id:in-reply-to:
 references:mime-version:content-transfer-encoding;
 bh=YCqkHG3xgUw0fvPeOxdezVgACk/mFbGDzGAv8tGRuL4=;
 b=Y0cscRh/OG2ku+Z0iTKYrnc9sPokLUS/Wr+1a+T7qUmRTncP4CxHTYWy
 WMTREFuD8idTo+uFYB5fgWnAfzyjSWFe9jjw6xlAym+8prbQ27Pl9z7uf
 VAipUFlR8s9CbQwAnsOpoRDnfgFRAwzWq8LZEJDP+4bQZbTkIvUrdlVkv
 fiLvS//rD+RFVIn0IchuCmJ9fLeozIrPmsJM9nU84Gzvvm5Zo5MSMlaXd
 zKdRciiudGiU9+mknKtVnbhmGmlTcY8L8Ju3xtQVCog0mZ5lpNiIoN4Pv
 LREztIhCWCPcTp5+3Aju1zY82liieLXkR8wuLhl7uivPw6xFj0oytzeVh A==;
X-CSE-ConnectionGUID: iwGvo8igSC2jGba6ljHJhg==
X-CSE-MsgGUID: rQ1gcJV6TUyYIgV10Jml2Q==
X-IronPort-AV: E=McAfee;i="6700,10204,11419"; a="51388507"
X-IronPort-AV: E=Sophos;i="6.15,252,1739865600";
   d="scan'208";a="51388507"
Received: from fmviesa003.fm.intel.com ([10.60.135.143])
 by orvoesa107.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 30 Apr 2025 13:53:09 -0700
X-CSE-ConnectionGUID: qKggRo/DSYuDc9AfVrK5YQ==
X-CSE-MsgGUID: cLh+tXX5TPuOQqqsbCXIhA==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.15,252,1739865600"; d="scan'208";a="138248881"
Received: from jf5300-b11a338t.jf.intel.com ([10.242.51.115])
 by fmviesa003.fm.intel.com with ESMTP; 30 Apr 2025 13:53:08 -0700
From: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
To: linux-kernel@vger.kernel.org, linux-mm@kvack.org, hannes@cmpxchg.org,
 yosry.ahmed@linux.dev, nphamcs@gmail.com, chengming.zhou@linux.dev,
 usamaarif642@gmail.com, ryan.roberts@arm.com, 21cnbao@gmail.com,
 ying.huang@linux.alibaba.com, akpm@linux-foundation.org,
 linux-crypto@vger.kernel.org, herbert@gondor.apana.org.au,
 davem@davemloft.net, clabbe@baylibre.com, ardb@kernel.org,
 ebiggers@google.com, surenb@google.com, kristen.c.accardi@intel.com
Cc: wajdi.k.feghali@intel.com, vinodh.gopal@intel.com,
 kanchana.p.sridhar@intel.com
Subject: [PATCH v9 03/19] Revert "crypto: testmgr - Add multibuffer acomp
 testing"
Date: Wed, 30 Apr 2025 13:52:49 -0700
Message-Id: <20250430205305.22844-4-kanchana.p.sridhar@intel.com>
X-Mailer: git-send-email 2.27.0
In-Reply-To: <20250430205305.22844-1-kanchana.p.sridhar@intel.com>
References: <20250430205305.22844-1-kanchana.p.sridhar@intel.com>
Precedence: bulk
X-Mailing-List: linux-crypto@vger.kernel.org
List-Id: <linux-crypto.vger.kernel.org>
List-Subscribe: <mailto:linux-crypto+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-crypto+unsubscribe@vger.kernel.org>
MIME-Version: 1.0

This reverts commit 99585c2192cb1ce212876e82ef01d1c98c7f4699.

Remove the acomp multibuffer tests so that the interface can be
redesigned.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/testmgr.c | 147 +++++++++++++++++++++--------------------------
 1 file changed, 64 insertions(+), 83 deletions(-)

diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index abd609d4c8ef..82977ea25db3 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -58,9 +58,6 @@ module_param(fuzz_iterations, uint, 0644);
 MODULE_PARM_DESC(fuzz_iterations, "number of fuzz test iterations");
 #endif
 
-/* Multibuffer is unlimited.  Set arbitrary limit for testing. */
-#define MAX_MB_MSGS	16
-
 #ifdef CONFIG_CRYPTO_MANAGER_DISABLE_TESTS
 
 /* a perfect nop */
@@ -3329,48 +3326,27 @@ static int test_acomp(struct crypto_acomp *tfm,
 		      int ctcount, int dtcount)
 {
 	const char *algo = crypto_tfm_alg_driver_name(crypto_acomp_tfm(tfm));
-	struct scatterlist *src = NULL, *dst = NULL;
-	struct acomp_req *reqs[MAX_MB_MSGS] = {};
-	char *decomp_out[MAX_MB_MSGS] = {};
-	char *output[MAX_MB_MSGS] = {};
-	struct crypto_wait wait;
-	struct acomp_req *req;
-	int ret = -ENOMEM;
 	unsigned int i;
+	char *output, *decomp_out;
+	int ret;
+	struct scatterlist src, dst;
+	struct acomp_req *req;
+	struct crypto_wait wait;
 
-	src = kmalloc_array(MAX_MB_MSGS, sizeof(*src), GFP_KERNEL);
-	if (!src)
-		goto out;
-	dst = kmalloc_array(MAX_MB_MSGS, sizeof(*dst), GFP_KERNEL);
-	if (!dst)
-		goto out;
-
-	for (i = 0; i < MAX_MB_MSGS; i++) {
-		reqs[i] = acomp_request_alloc(tfm);
-		if (!reqs[i])
-			goto out;
-
-		acomp_request_set_callback(reqs[i],
-					   CRYPTO_TFM_REQ_MAY_SLEEP |
-					   CRYPTO_TFM_REQ_MAY_BACKLOG,
-					   crypto_req_done, &wait);
-		if (i)
-			acomp_request_chain(reqs[i], reqs[0]);
-
-		output[i] = kmalloc(COMP_BUF_SIZE, GFP_KERNEL);
-		if (!output[i])
-			goto out;
+	output = kmalloc(COMP_BUF_SIZE, GFP_KERNEL);
+	if (!output)
+		return -ENOMEM;
 
-		decomp_out[i] = kmalloc(COMP_BUF_SIZE, GFP_KERNEL);
-		if (!decomp_out[i])
-			goto out;
+	decomp_out = kmalloc(COMP_BUF_SIZE, GFP_KERNEL);
+	if (!decomp_out) {
+		kfree(output);
+		return -ENOMEM;
 	}
 
 	for (i = 0; i < ctcount; i++) {
 		unsigned int dlen = COMP_BUF_SIZE;
 		int ilen = ctemplate[i].inlen;
 		void *input_vec;
-		int j;
 
 		input_vec = kmemdup(ctemplate[i].input, ilen, GFP_KERNEL);
 		if (!input_vec) {
@@ -3378,61 +3354,70 @@ static int test_acomp(struct crypto_acomp *tfm,
 			goto out;
 		}
 
+		memset(output, 0, dlen);
 		crypto_init_wait(&wait);
-		sg_init_one(src, input_vec, ilen);
+		sg_init_one(&src, input_vec, ilen);
+		sg_init_one(&dst, output, dlen);
 
-		for (j = 0; j < MAX_MB_MSGS; j++) {
-			sg_init_one(dst + j, output[j], dlen);
-			acomp_request_set_params(reqs[j], src, dst + j, ilen, dlen);
+		req = acomp_request_alloc(tfm);
+		if (!req) {
+			pr_err("alg: acomp: request alloc failed for %s\n",
+			       algo);
+			kfree(input_vec);
+			ret = -ENOMEM;
+			goto out;
 		}
 
-		req = reqs[0];
+		acomp_request_set_params(req, &src, &dst, ilen, dlen);
+		acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+					   crypto_req_done, &wait);
+
 		ret = crypto_wait_req(crypto_acomp_compress(req), &wait);
 		if (ret) {
 			pr_err("alg: acomp: compression failed on test %d for %s: ret=%d\n",
 			       i + 1, algo, -ret);
 			kfree(input_vec);
+			acomp_request_free(req);
 			goto out;
 		}
 
 		ilen = req->dlen;
 		dlen = COMP_BUF_SIZE;
+		sg_init_one(&src, output, ilen);
+		sg_init_one(&dst, decomp_out, dlen);
 		crypto_init_wait(&wait);
-		for (j = 0; j < MAX_MB_MSGS; j++) {
-			sg_init_one(src + j, output[j], ilen);
-			sg_init_one(dst + j, decomp_out[j], dlen);
-			acomp_request_set_params(reqs[j], src + j, dst + j, ilen, dlen);
-		}
-
-		crypto_wait_req(crypto_acomp_decompress(req), &wait);
-		for (j = 0; j < MAX_MB_MSGS; j++) {
-			ret = reqs[j]->base.err;
-			if (ret) {
-				pr_err("alg: acomp: compression failed on test %d (%d) for %s: ret=%d\n",
-				       i + 1, j, algo, -ret);
-				kfree(input_vec);
-				goto out;
-			}
+		acomp_request_set_params(req, &src, &dst, ilen, dlen);
 
-			if (reqs[j]->dlen != ctemplate[i].inlen) {
-				pr_err("alg: acomp: Compression test %d (%d) failed for %s: output len = %d\n",
-				       i + 1, j, algo, reqs[j]->dlen);
-				ret = -EINVAL;
-				kfree(input_vec);
-				goto out;
-			}
+		ret = crypto_wait_req(crypto_acomp_decompress(req), &wait);
+		if (ret) {
+			pr_err("alg: acomp: compression failed on test %d for %s: ret=%d\n",
+			       i + 1, algo, -ret);
+			kfree(input_vec);
+			acomp_request_free(req);
+			goto out;
+		}
 
-			if (memcmp(input_vec, decomp_out[j], reqs[j]->dlen)) {
-				pr_err("alg: acomp: Compression test %d (%d) failed for %s\n",
-				       i + 1, j, algo);
-				hexdump(output[j], reqs[j]->dlen);
-				ret = -EINVAL;
-				kfree(input_vec);
-				goto out;
-			}
+		if (req->dlen != ctemplate[i].inlen) {
+			pr_err("alg: acomp: Compression test %d failed for %s: output len = %d\n",
+			       i + 1, algo, req->dlen);
+			ret = -EINVAL;
+			kfree(input_vec);
+			acomp_request_free(req);
+			goto out;
+		}
+
+		if (memcmp(input_vec, decomp_out, req->dlen)) {
+			pr_err("alg: acomp: Compression test %d failed for %s\n",
+			       i + 1, algo);
+			hexdump(output, req->dlen);
+			ret = -EINVAL;
+			kfree(input_vec);
+			acomp_request_free(req);
+			goto out;
 		}
 
 		kfree(input_vec);
+		acomp_request_free(req);
 	}
 
 	for (i = 0; i < dtcount; i++) {
@@ -3446,9 +3431,10 @@ static int test_acomp(struct crypto_acomp *tfm,
 			goto out;
 		}
 
+		memset(output, 0, dlen);
 		crypto_init_wait(&wait);
-		sg_init_one(src, input_vec, ilen);
-		sg_init_one(dst, output[0], dlen);
+		sg_init_one(&src, input_vec, ilen);
+		sg_init_one(&dst, output, dlen);
 
 		req = acomp_request_alloc(tfm);
 		if (!req) {
@@ -3459,7 +3445,7 @@ static int test_acomp(struct crypto_acomp *tfm,
 			goto out;
 		}
 
-		acomp_request_set_params(req, src, dst, ilen, dlen);
+		acomp_request_set_params(req, &src, &dst, ilen, dlen);
 		acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
 					   crypto_req_done, &wait);
 
@@ -3481,10 +3467,10 @@ static int test_acomp(struct crypto_acomp *tfm,
 			goto out;
 		}
 
-		if (memcmp(output[0], dtemplate[i].output, req->dlen)) {
+		if (memcmp(output, dtemplate[i].output, req->dlen)) {
 			pr_err("alg: acomp: Decompression test %d failed for %s\n",
 			       i + 1, algo);
-			hexdump(output[0], req->dlen);
+			hexdump(output, req->dlen);
 			ret = -EINVAL;
 			kfree(input_vec);
 			acomp_request_free(req);
@@ -3498,13 +3484,8 @@ static int test_acomp(struct crypto_acomp *tfm,
 	ret = 0;
 
 out:
-	acomp_request_free(reqs[0]);
-	for (i = 0; i < MAX_MB_MSGS; i++) {
-		kfree(output[i]);
-		kfree(decomp_out[i]);
-	}
-	kfree(dst);
-	kfree(src);
+	kfree(decomp_out);
+	kfree(output);
 	return ret;
 }
 

From patchwork Wed Apr 30 20:52:50 2025
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: "Sridhar, Kanchana P" <kanchana.p.sridhar@intel.com>
X-Patchwork-Id: 886170
Received: from mgamail.intel.com (mgamail.intel.com [198.175.65.15])
 (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
 (No client certificate requested)
 by smtp.subspace.kernel.org (Postfix) with ESMTPS id 8A1D82C0318;
 Wed, 30 Apr 2025 20:53:11 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.175.65.15
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
 t=1746046393; cv=none;
 b=urPpGuD4dwAtDO5Ih+jbLjAxiO1ZqE9J3mgtopEfsj5vcfK/rrigS6fvwCpC3/v5OmzqSfWm3uYHvpeJeK1rQR/T92u97+5aXFo5MRv+bupLaKX/Ec0W7YOUVU7/CmurUGfAz7ZV0XUeNOxhVb9YHsXdDrclruKzvGQu2li8q5Y=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
 s=arc-20240116; t=1746046393; c=relaxed/simple;
 bh=sJFxWyA6AJF6CcAZ5xFoncDGNwqA2S5ppE8eb6ZDAWI=;
 h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
 MIME-Version;
 b=ZLXmgUXhMGfL6mwooeUaB1O8P/lCCffpA/sGF01m5CRq0N3kplSZJPqwqZeoIaCH2RP9MNaDPWPz/SJ6cC+wi1xo6x8g++qUhRjzkr6Djd2UUE60351sE6XzW49JZR0VqkWy9ZsXp8u5+vkWsXZ5h35mPl8WYvTKrisLwfrL3ZA=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=intel.com;
 spf=pass smtp.mailfrom=intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=m07ncGk3; arc=none smtp.client-ip=198.175.65.15
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=pass smtp.mailfrom=intel.com
Authentication-Results: smtp.subspace.kernel.org;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="m07ncGk3"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
 d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
 t=1746046392; x=1777582392;
 h=from:to:cc:subject:date:message-id:in-reply-to:
 references:mime-version:content-transfer-encoding;
 bh=sJFxWyA6AJF6CcAZ5xFoncDGNwqA2S5ppE8eb6ZDAWI=;
 b=m07ncGk3GQpdyH5eT+7TGnscRa+7IRzCrz61DqYjDoPZo0RiLxhjTmT4
 4I0dKxeXAVFJoLBPh0MrdaqM0r9421XwcmowjOqgmvWz+erkKAcFfxgXq
 9ngf6yqfPo5ZO7vsc/AR5nBF6GJ7qnBaJV8yCt/BbtXVvq056DTW05O/b
 Ub0jVreOHvIG4pDl5ahGJ4UFZm8e9CJax+q5VuX1TxGvXUQUgnHP8tIXC
 R5WC9eZ8ofOUoJ+9+F9WZf/ErjjH+4jAI2wlqCuxeOSFHjiiHAKtrnYcj
 6/e6keh/E4nh57oRmvg5WvBv4ATPZcVp37CRw025Dcle6eyspnZPGTIji A==;
X-CSE-ConnectionGUID: yiGbkz7xQZqYtSr6ZAgk3A==
X-CSE-MsgGUID: fPD4v95gSEadNRuqxXm5Eg==
X-IronPort-AV: E=McAfee;i="6700,10204,11419"; a="51388521"
X-IronPort-AV: E=Sophos;i="6.15,252,1739865600";
   d="scan'208";a="51388521"
Received: from fmviesa003.fm.intel.com ([10.60.135.143])
 by orvoesa107.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 30 Apr 2025 13:53:10 -0700
X-CSE-ConnectionGUID: ZSZ9KhmCRJyz09lh3q5fnw==
X-CSE-MsgGUID: 1GdV5HP6StWduSbWaC2+gQ==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.15,252,1739865600"; d="scan'208";a="138248886"
Received: from jf5300-b11a338t.jf.intel.com ([10.242.51.115])
 by fmviesa003.fm.intel.com with ESMTP; 30 Apr 2025 13:53:09 -0700
From: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
To: linux-kernel@vger.kernel.org, linux-mm@kvack.org, hannes@cmpxchg.org,
 yosry.ahmed@linux.dev, nphamcs@gmail.com, chengming.zhou@linux.dev,
 usamaarif642@gmail.com, ryan.roberts@arm.com, 21cnbao@gmail.com,
 ying.huang@linux.alibaba.com, akpm@linux-foundation.org,
 linux-crypto@vger.kernel.org, herbert@gondor.apana.org.au,
 davem@davemloft.net, clabbe@baylibre.com, ardb@kernel.org,
 ebiggers@google.com, surenb@google.com, kristen.c.accardi@intel.com
Cc: wajdi.k.feghali@intel.com, vinodh.gopal@intel.com,
 kanchana.p.sridhar@intel.com
Subject: [PATCH v9 04/19] crypto: scomp - Fix off-by-one bug when calculating
 last page
Date: Wed, 30 Apr 2025 13:52:50 -0700
Message-Id: <20250430205305.22844-5-kanchana.p.sridhar@intel.com>
X-Mailer: git-send-email 2.27.0
In-Reply-To: <20250430205305.22844-1-kanchana.p.sridhar@intel.com>
References: <20250430205305.22844-1-kanchana.p.sridhar@intel.com>
Precedence: bulk
X-Mailing-List: linux-crypto@vger.kernel.org
List-Id: <linux-crypto.vger.kernel.org>
List-Subscribe: <mailto:linux-crypto+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-crypto+unsubscribe@vger.kernel.org>
MIME-Version: 1.0

Fix off-by-one bug in the last page calculation for src and dst.

Reported-by: Nhat Pham <nphamcs@gmail.com>
Fixes: 2d3553ecb4e3 ("crypto: scomp - Remove support for some non-trivial SG lists")
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/scompress.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/crypto/scompress.c b/crypto/scompress.c
index c1ce12564299..1ed52b9740c5 100644
--- a/crypto/scompress.c
+++ b/crypto/scompress.c
@@ -215,8 +215,8 @@ static int scomp_acomp_comp_decomp(struct acomp_req *req, int dir)
 			spage = nth_page(spage, soff / PAGE_SIZE);
 			soff = offset_in_page(soff);
 
-			n = slen / PAGE_SIZE;
-			n += (offset_in_page(slen) + soff - 1) / PAGE_SIZE;
+			n = (slen - 1) / PAGE_SIZE;
+			n += (offset_in_page(slen - 1) + soff) / PAGE_SIZE;
 			if (PageHighMem(nth_page(spage, n)) &&
 			    size_add(soff, slen) > PAGE_SIZE)
 				break;
@@ -243,9 +243,9 @@ static int scomp_acomp_comp_decomp(struct acomp_req *req, int dir)
 			dpage = nth_page(dpage, doff / PAGE_SIZE);
 			doff = offset_in_page(doff);
 
-			n = dlen / PAGE_SIZE;
-			n += (offset_in_page(dlen) + doff - 1) / PAGE_SIZE;
-			if (PageHighMem(dpage + n) &&
+			n = (dlen - 1) / PAGE_SIZE;
+			n += (offset_in_page(dlen - 1) + doff) / PAGE_SIZE;
+			if (PageHighMem(nth_page(dpage, n)) &&
 			    size_add(doff, dlen) > PAGE_SIZE)
 				break;
 			dst = kmap_local_page(dpage) + doff;

From patchwork Wed Apr 30 20:52:51 2025
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: "Sridhar, Kanchana P" <kanchana.p.sridhar@intel.com>
X-Patchwork-Id: 886169
Received: from mgamail.intel.com (mgamail.intel.com [198.175.65.15])
 (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
 (No client certificate requested)
 by smtp.subspace.kernel.org (Postfix) with ESMTPS id ACF062C17A0;
 Wed, 30 Apr 2025 20:53:12 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.175.65.15
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
 t=1746046395; cv=none;
 b=IkCw+/SidqHeSW+3aFqyh14sm1NV3oPHmPQ2Ybd9q3i5IdPTiZdNUnaSwgLWdul8xqSztdgyixshlXng4eG35PDmaKstj+GGAdey78zPg9zBAFfEnb3OXSd9nvKD3SWKlVO5blcac3jnzN/vEfkmBKywqNbPgu26aWgyj5ORIjc=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
 s=arc-20240116; t=1746046395; c=relaxed/simple;
 bh=vA9TTJwa3s6XVaktr+zThiUrzAKTNXc8bVs3R+ljuY8=;
 h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
 MIME-Version;
 b=uFhrx3eS4SDwNP7ieubSv1InnMM61WdMl5MkrcXRUDyabfg1Zt/dtxpah9L6oqIdD0GR4pd0d5iQLw+AMuMBpERfQ4SgQtyKJO3ZGd3KbFsxYStfp2Wi2086vOaMd9ggzPVtJDeYe7qR+krJzRXOYtC9NS8nVuX2TbXpp97s2sY=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=intel.com;
 spf=pass smtp.mailfrom=intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=nPYHCY4M; arc=none smtp.client-ip=198.175.65.15
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=pass smtp.mailfrom=intel.com
Authentication-Results: smtp.subspace.kernel.org;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="nPYHCY4M"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
 d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
 t=1746046393; x=1777582393;
 h=from:to:cc:subject:date:message-id:in-reply-to:
 references:mime-version:content-transfer-encoding;
 bh=vA9TTJwa3s6XVaktr+zThiUrzAKTNXc8bVs3R+ljuY8=;
 b=nPYHCY4MJwKuzkIhQ5tn112G8DBSni2EclNpz8/UED6gfATfpEN9Pkaf
 XGoaJ/sBvl9oIUup6qU34jwRkBWTQQi33yqiTLfSXsgekSp691quPZRRN
 GfllIO60jxRlnPQlnKFxG0ay7mINAqY81tEN4Hhvj/M47JvmxUJqtkkM8
 c2YVlC17CcPX0/nIbZV6P/tTMptSPPk5G3D9RdKlJ7WzuEuTNbx9ZuoSv
 zbBi8qEBogzOM8k6GC3bSFd4zshn0FHwQIk1LaGcgZa5K1XzB/9SJxctn
 RHwUGzjZQUGEuVAE3mbrWoAKJau8sJKR3TVKoR+hCYJ/TWwY/a5cdpH9F w==;
X-CSE-ConnectionGUID: 70p5NwqMSQ6wj57WLYJJgw==
X-CSE-MsgGUID: Op3JgPKbRXabVhmjO0mgaA==
X-IronPort-AV: E=McAfee;i="6700,10204,11419"; a="51388536"
X-IronPort-AV: E=Sophos;i="6.15,252,1739865600";
   d="scan'208";a="51388536"
Received: from fmviesa003.fm.intel.com ([10.60.135.143])
 by orvoesa107.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 30 Apr 2025 13:53:11 -0700
X-CSE-ConnectionGUID: aoirvHDdS5ygolFOHh/ICQ==
X-CSE-MsgGUID: eGqTBJlkTCaXlHPOowrGZQ==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.15,252,1739865600"; d="scan'208";a="138248893"
Received: from jf5300-b11a338t.jf.intel.com ([10.242.51.115])
 by fmviesa003.fm.intel.com with ESMTP; 30 Apr 2025 13:53:10 -0700
From: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
To: linux-kernel@vger.kernel.org, linux-mm@kvack.org, hannes@cmpxchg.org,
 yosry.ahmed@linux.dev, nphamcs@gmail.com, chengming.zhou@linux.dev,
 usamaarif642@gmail.com, ryan.roberts@arm.com, 21cnbao@gmail.com,
 ying.huang@linux.alibaba.com, akpm@linux-foundation.org,
 linux-crypto@vger.kernel.org, herbert@gondor.apana.org.au,
 davem@davemloft.net, clabbe@baylibre.com, ardb@kernel.org,
 ebiggers@google.com, surenb@google.com, kristen.c.accardi@intel.com
Cc: wajdi.k.feghali@intel.com, vinodh.gopal@intel.com,
 kanchana.p.sridhar@intel.com
Subject: [PATCH v9 05/19] crypto: iaa - Re-organize the iaa_crypto driver code.
Date: Wed, 30 Apr 2025 13:52:51 -0700
Message-Id: <20250430205305.22844-6-kanchana.p.sridhar@intel.com>
X-Mailer: git-send-email 2.27.0
In-Reply-To: <20250430205305.22844-1-kanchana.p.sridhar@intel.com>
References: <20250430205305.22844-1-kanchana.p.sridhar@intel.com>
Precedence: bulk
X-Mailing-List: linux-crypto@vger.kernel.org
List-Id: <linux-crypto.vger.kernel.org>
List-Subscribe: <mailto:linux-crypto+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-crypto+unsubscribe@vger.kernel.org>
MIME-Version: 1.0

This patch merely reorganizes the code in iaa_crypto_main.c, so that
the functions are consolidated into logically related sub-sections of
code, without requiring forward declarations.

This is expected to make the code more maintainable and for it to be
easier to replace functional layers and/or add new features.

Signed-off-by: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
---
 drivers/crypto/intel/iaa/iaa_crypto_main.c | 678 +++++++++++----------
 1 file changed, 348 insertions(+), 330 deletions(-)

diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c
index 09d9589f2d68..4900f9c72600 100644
--- a/drivers/crypto/intel/iaa/iaa_crypto_main.c
+++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c
@@ -24,6 +24,10 @@
 
 #define IAA_ALG_PRIORITY               300
 
+/**************************************
+ * Driver internal global variables.
+ **************************************/
+
 /* number of iaa instances probed */
 static unsigned int nr_iaa;
 static unsigned int nr_cpus;
@@ -36,54 +40,6 @@ static unsigned int cpus_per_iaa;
 /* Per-cpu lookup table for balanced wqs */
 static struct wq_table_entry __percpu *wq_table;
 
-static struct idxd_wq *wq_table_next_wq(int cpu)
-{
-	struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
-
-	if (++entry->cur_wq >= entry->n_wqs)
-		entry->cur_wq = 0;
-
-	if (!entry->wqs[entry->cur_wq])
-		return NULL;
-
-	pr_debug("%s: returning wq at idx %d (iaa wq %d.%d) from cpu %d\n", __func__,
-		 entry->cur_wq, entry->wqs[entry->cur_wq]->idxd->id,
-		 entry->wqs[entry->cur_wq]->id, cpu);
-
-	return entry->wqs[entry->cur_wq];
-}
-
-static void wq_table_add(int cpu, struct idxd_wq *wq)
-{
-	struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
-
-	if (WARN_ON(entry->n_wqs == entry->max_wqs))
-		return;
-
-	entry->wqs[entry->n_wqs++] = wq;
-
-	pr_debug("%s: added iaa wq %d.%d to idx %d of cpu %d\n", __func__,
-		 entry->wqs[entry->n_wqs - 1]->idxd->id,
-		 entry->wqs[entry->n_wqs - 1]->id, entry->n_wqs - 1, cpu);
-}
-
-static void wq_table_free_entry(int cpu)
-{
-	struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
-
-	kfree(entry->wqs);
-	memset(entry, 0, sizeof(*entry));
-}
-
-static void wq_table_clear_entry(int cpu)
-{
-	struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
-
-	entry->n_wqs = 0;
-	entry->cur_wq = 0;
-	memset(entry->wqs, 0, entry->max_wqs * sizeof(struct idxd_wq *));
-}
-
 LIST_HEAD(iaa_devices);
 DEFINE_MUTEX(iaa_devices_lock);
 
@@ -91,36 +47,11 @@ DEFINE_MUTEX(iaa_devices_lock);
 static bool iaa_crypto_enabled;
 static bool iaa_crypto_registered;
 
+static struct iaa_compression_mode *iaa_compression_modes[IAA_COMP_MODES_MAX];
+
 /* Verify results of IAA compress or not */
 static bool iaa_verify_compress = true;
 
-static ssize_t verify_compress_show(struct device_driver *driver, char *buf)
-{
-	return sprintf(buf, "%d\n", iaa_verify_compress);
-}
-
-static ssize_t verify_compress_store(struct device_driver *driver,
-				     const char *buf, size_t count)
-{
-	int ret = -EBUSY;
-
-	mutex_lock(&iaa_devices_lock);
-
-	if (iaa_crypto_enabled)
-		goto out;
-
-	ret = kstrtobool(buf, &iaa_verify_compress);
-	if (ret)
-		goto out;
-
-	ret = count;
-out:
-	mutex_unlock(&iaa_devices_lock);
-
-	return ret;
-}
-static DRIVER_ATTR_RW(verify_compress);
-
 /*
  * The iaa crypto driver supports three 'sync' methods determining how
  * compressions and decompressions are performed:
@@ -155,6 +86,37 @@ static bool async_mode;
 /* Use interrupts */
 static bool use_irq;
 
+/**************************************************
+ * Driver attributes along with get/set functions.
+ **************************************************/
+
+static ssize_t verify_compress_show(struct device_driver *driver, char *buf)
+{
+	return sprintf(buf, "%d\n", iaa_verify_compress);
+}
+
+static ssize_t verify_compress_store(struct device_driver *driver,
+				     const char *buf, size_t count)
+{
+	int ret = -EBUSY;
+
+	mutex_lock(&iaa_devices_lock);
+
+	if (iaa_crypto_enabled)
+		goto out;
+
+	ret = kstrtobool(buf, &iaa_verify_compress);
+	if (ret)
+		goto out;
+
+	ret = count;
+out:
+	mutex_unlock(&iaa_devices_lock);
+
+	return ret;
+}
+static DRIVER_ATTR_RW(verify_compress);
+
 /**
  * set_iaa_sync_mode - Set IAA sync mode
  * @name: The name of the sync mode
@@ -217,7 +179,9 @@ static ssize_t sync_mode_store(struct device_driver *driver,
 }
 static DRIVER_ATTR_RW(sync_mode);
 
-static struct iaa_compression_mode *iaa_compression_modes[IAA_COMP_MODES_MAX];
+/****************************
+ * Driver compression modes.
+ ****************************/
 
 static int find_empty_iaa_compression_mode(void)
 {
@@ -409,11 +373,6 @@ static void free_device_compression_mode(struct iaa_device *iaa_device,
 						IDXD_OP_FLAG_WR_SRC2_AECS_COMP | \
 						IDXD_OP_FLAG_AECS_RW_TGLS)
 
-static int check_completion(struct device *dev,
-			    struct iax_completion_record *comp,
-			    bool compress,
-			    bool only_once);
-
 static int init_device_compression_mode(struct iaa_device *iaa_device,
 					struct iaa_compression_mode *mode,
 					int idx, struct idxd_wq *wq)
@@ -500,6 +459,11 @@ static void remove_device_compression_modes(struct iaa_device *iaa_device)
 	}
 }
 
+/***********************************************************
+ * Functions for use in crypto probe and remove interfaces:
+ * allocate/init/query/deallocate devices/wqs.
+ ***********************************************************/
+
 static struct iaa_device *iaa_device_alloc(void)
 {
 	struct iaa_device *iaa_device;
@@ -513,18 +477,6 @@ static struct iaa_device *iaa_device_alloc(void)
 	return iaa_device;
 }
 
-static bool iaa_has_wq(struct iaa_device *iaa_device, struct idxd_wq *wq)
-{
-	struct iaa_wq *iaa_wq;
-
-	list_for_each_entry(iaa_wq, &iaa_device->wqs, list) {
-		if (iaa_wq->wq == wq)
-			return true;
-	}
-
-	return false;
-}
-
 static struct iaa_device *add_iaa_device(struct idxd_device *idxd)
 {
 	struct iaa_device *iaa_device;
@@ -560,6 +512,27 @@ static void del_iaa_device(struct iaa_device *iaa_device)
 	nr_iaa--;
 }
 
+static void free_iaa_device(struct iaa_device *iaa_device)
+{
+	if (!iaa_device)
+		return;
+
+	remove_device_compression_modes(iaa_device);
+	kfree(iaa_device);
+}
+
+static bool iaa_has_wq(struct iaa_device *iaa_device, struct idxd_wq *wq)
+{
+	struct iaa_wq *iaa_wq;
+
+	list_for_each_entry(iaa_wq, &iaa_device->wqs, list) {
+		if (iaa_wq->wq == wq)
+			return true;
+	}
+
+	return false;
+}
+
 static int add_iaa_wq(struct iaa_device *iaa_device, struct idxd_wq *wq,
 		      struct iaa_wq **new_wq)
 {
@@ -612,23 +585,23 @@ static void del_iaa_wq(struct iaa_device *iaa_device, struct idxd_wq *wq)
 	}
 }
 
-static void clear_wq_table(void)
+static void remove_iaa_wq(struct idxd_wq *wq)
 {
-	int cpu;
-
-	for (cpu = 0; cpu < nr_cpus; cpu++)
-		wq_table_clear_entry(cpu);
-
-	pr_debug("cleared wq table\n");
-}
+	struct iaa_device *iaa_device;
 
-static void free_iaa_device(struct iaa_device *iaa_device)
-{
-	if (!iaa_device)
-		return;
+	list_for_each_entry(iaa_device, &iaa_devices, list) {
+		if (iaa_has_wq(iaa_device, wq)) {
+			del_iaa_wq(iaa_device, wq);
+			break;
+		}
+	}
 
-	remove_device_compression_modes(iaa_device);
-	kfree(iaa_device);
+	if (nr_iaa) {
+		cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa;
+		if (!cpus_per_iaa)
+			cpus_per_iaa = 1;
+	} else
+		cpus_per_iaa = 1;
 }
 
 static void __free_iaa_wq(struct iaa_wq *iaa_wq)
@@ -655,6 +628,75 @@ static void free_iaa_wq(struct iaa_wq *iaa_wq)
 	idxd_wq_set_private(wq, NULL);
 }
 
+static int save_iaa_wq(struct idxd_wq *wq)
+{
+	struct iaa_device *iaa_device, *found = NULL;
+	struct idxd_device *idxd;
+	struct pci_dev *pdev;
+	struct device *dev;
+	int ret = 0;
+
+	list_for_each_entry(iaa_device, &iaa_devices, list) {
+		if (iaa_device->idxd == wq->idxd) {
+			idxd = iaa_device->idxd;
+			pdev = idxd->pdev;
+			dev = &pdev->dev;
+			/*
+			 * Check to see that we don't already have this wq.
+			 * Shouldn't happen but we don't control probing.
+			 */
+			if (iaa_has_wq(iaa_device, wq)) {
+				dev_dbg(dev, "same wq probed multiple times for iaa_device %p\n",
+					iaa_device);
+				goto out;
+			}
+
+			found = iaa_device;
+
+			ret = add_iaa_wq(iaa_device, wq, NULL);
+			if (ret)
+				goto out;
+
+			break;
+		}
+	}
+
+	if (!found) {
+		struct iaa_device *new_device;
+		struct iaa_wq *new_wq;
+
+		new_device = add_iaa_device(wq->idxd);
+		if (!new_device) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		ret = add_iaa_wq(new_device, wq, &new_wq);
+		if (ret) {
+			del_iaa_device(new_device);
+			free_iaa_device(new_device);
+			goto out;
+		}
+
+		ret = init_iaa_device(new_device, new_wq);
+		if (ret) {
+			del_iaa_wq(new_device, new_wq->wq);
+			del_iaa_device(new_device);
+			free_iaa_wq(new_wq);
+			goto out;
+		}
+	}
+
+	if (WARN_ON(nr_iaa == 0))
+		return -EINVAL;
+
+	cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa;
+	if (!cpus_per_iaa)
+		cpus_per_iaa = 1;
+out:
+	return 0;
+}
+
 static int iaa_wq_get(struct idxd_wq *wq)
 {
 	struct idxd_device *idxd = wq->idxd;
@@ -702,6 +744,37 @@ static int iaa_wq_put(struct idxd_wq *wq)
 	return ret;
 }
 
+/***************************************************************
+ * Mapping IAA devices and wqs to cores with per-cpu wq_tables.
+ ***************************************************************/
+
+static void wq_table_free_entry(int cpu)
+{
+	struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
+
+	kfree(entry->wqs);
+	memset(entry, 0, sizeof(*entry));
+}
+
+static void wq_table_clear_entry(int cpu)
+{
+	struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
+
+	entry->n_wqs = 0;
+	entry->cur_wq = 0;
+	memset(entry->wqs, 0, entry->max_wqs * sizeof(struct idxd_wq *));
+}
+
+static void clear_wq_table(void)
+{
+	int cpu;
+
+	for (cpu = 0; cpu < nr_cpus; cpu++)
+		wq_table_clear_entry(cpu);
+
+	pr_debug("cleared wq table\n");
+}
+
 static void free_wq_table(void)
 {
 	int cpu;
@@ -739,92 +812,18 @@ static int alloc_wq_table(int max_wqs)
 	return 0;
 }
 
-static int save_iaa_wq(struct idxd_wq *wq)
+static void wq_table_add(int cpu, struct idxd_wq *wq)
 {
-	struct iaa_device *iaa_device, *found = NULL;
-	struct idxd_device *idxd;
-	struct pci_dev *pdev;
-	struct device *dev;
-	int ret = 0;
-
-	list_for_each_entry(iaa_device, &iaa_devices, list) {
-		if (iaa_device->idxd == wq->idxd) {
-			idxd = iaa_device->idxd;
-			pdev = idxd->pdev;
-			dev = &pdev->dev;
-			/*
-			 * Check to see that we don't already have this wq.
-			 * Shouldn't happen but we don't control probing.
-			 */
-			if (iaa_has_wq(iaa_device, wq)) {
-				dev_dbg(dev, "same wq probed multiple times for iaa_device %p\n",
-					iaa_device);
-				goto out;
-			}
-
-			found = iaa_device;
-
-			ret = add_iaa_wq(iaa_device, wq, NULL);
-			if (ret)
-				goto out;
-
-			break;
-		}
-	}
-
-	if (!found) {
-		struct iaa_device *new_device;
-		struct iaa_wq *new_wq;
-
-		new_device = add_iaa_device(wq->idxd);
-		if (!new_device) {
-			ret = -ENOMEM;
-			goto out;
-		}
-
-		ret = add_iaa_wq(new_device, wq, &new_wq);
-		if (ret) {
-			del_iaa_device(new_device);
-			free_iaa_device(new_device);
-			goto out;
-		}
-
-		ret = init_iaa_device(new_device, new_wq);
-		if (ret) {
-			del_iaa_wq(new_device, new_wq->wq);
-			del_iaa_device(new_device);
-			free_iaa_wq(new_wq);
-			goto out;
-		}
-	}
-
-	if (WARN_ON(nr_iaa == 0))
-		return -EINVAL;
-
-	cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa;
-	if (!cpus_per_iaa)
-		cpus_per_iaa = 1;
-out:
-	return 0;
-}
+	struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
 
-static void remove_iaa_wq(struct idxd_wq *wq)
-{
-	struct iaa_device *iaa_device;
+	if (WARN_ON(entry->n_wqs == entry->max_wqs))
+		return;
 
-	list_for_each_entry(iaa_device, &iaa_devices, list) {
-		if (iaa_has_wq(iaa_device, wq)) {
-			del_iaa_wq(iaa_device, wq);
-			break;
-		}
-	}
+	entry->wqs[entry->n_wqs++] = wq;
 
-	if (nr_iaa) {
-		cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa;
-		if (!cpus_per_iaa)
-			cpus_per_iaa = 1;
-	} else
-		cpus_per_iaa = 1;
+	pr_debug("%s: added iaa wq %d.%d to idx %d of cpu %d\n", __func__,
+		 entry->wqs[entry->n_wqs - 1]->idxd->id,
+		 entry->wqs[entry->n_wqs - 1]->id, entry->n_wqs - 1, cpu);
 }
 
 static int wq_table_add_wqs(int iaa, int cpu)
@@ -937,6 +936,47 @@ static void rebalance_wq_table(void)
 	}
 }
 
+/***************************************************************
+ * Assign work-queues for driver ops using per-cpu wq_tables.
+ ***************************************************************/
+
+static struct idxd_wq *wq_table_next_wq(int cpu)
+{
+	struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
+
+	if (++entry->cur_wq >= entry->n_wqs)
+		entry->cur_wq = 0;
+
+	if (!entry->wqs[entry->cur_wq])
+		return NULL;
+
+	pr_debug("%s: returning wq at idx %d (iaa wq %d.%d) from cpu %d\n", __func__,
+		 entry->cur_wq, entry->wqs[entry->cur_wq]->idxd->id,
+		 entry->wqs[entry->cur_wq]->id, cpu);
+
+	return entry->wqs[entry->cur_wq];
+}
+
+/*************************************************
+ * Core iaa_crypto compress/decompress functions.
+ *************************************************/
+
+static int deflate_generic_decompress(struct acomp_req *req)
+{
+	ACOMP_REQUEST_ON_STACK(fbreq, crypto_acomp_reqtfm(req));
+	int ret;
+
+	acomp_request_set_callback(fbreq, 0, NULL, NULL);
+	acomp_request_set_params(fbreq, req->src, req->dst, req->slen,
+				 req->dlen);
+	ret = crypto_acomp_decompress(fbreq);
+	req->dlen = fbreq->dlen;
+
+	update_total_sw_decomp_calls();
+
+	return ret;
+}
+
 static inline int check_completion(struct device *dev,
 				   struct iax_completion_record *comp,
 				   bool compress,
@@ -997,31 +1037,132 @@ static inline int check_completion(struct device *dev,
 	return ret;
 }
 
-static int deflate_generic_decompress(struct acomp_req *req)
+static int iaa_remap_for_verify(struct device *dev, struct iaa_wq *iaa_wq,
+				struct acomp_req *req,
+				dma_addr_t *src_addr, dma_addr_t *dst_addr)
 {
-	ACOMP_REQUEST_ON_STACK(fbreq, crypto_acomp_reqtfm(req));
-	int ret;
+	int ret = 0;
+	int nr_sgs;
 
-	acomp_request_set_callback(fbreq, 0, NULL, NULL);
-	acomp_request_set_params(fbreq, req->src, req->dst, req->slen,
-				 req->dlen);
-	ret = crypto_acomp_decompress(fbreq);
-	req->dlen = fbreq->dlen;
+	dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
+	dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
 
-	update_total_sw_decomp_calls();
+	nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_FROM_DEVICE);
+	if (nr_sgs <= 0 || nr_sgs > 1) {
+		dev_dbg(dev, "verify: couldn't map src sg for iaa device %d,"
+			" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
+			iaa_wq->wq->id, ret);
+		ret = -EIO;
+		goto out;
+	}
+	*src_addr = sg_dma_address(req->src);
+	dev_dbg(dev, "verify: dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
+		" req->slen %d, sg_dma_len(sg) %d\n", *src_addr, nr_sgs,
+		req->src, req->slen, sg_dma_len(req->src));
 
+	nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_TO_DEVICE);
+	if (nr_sgs <= 0 || nr_sgs > 1) {
+		dev_dbg(dev, "verify: couldn't map dst sg for iaa device %d,"
+			" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
+			iaa_wq->wq->id, ret);
+		ret = -EIO;
+		dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_FROM_DEVICE);
+		goto out;
+	}
+	*dst_addr = sg_dma_address(req->dst);
+	dev_dbg(dev, "verify: dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
+		" req->dlen %d, sg_dma_len(sg) %d\n", *dst_addr, nr_sgs,
+		req->dst, req->dlen, sg_dma_len(req->dst));
+out:
 	return ret;
 }
 
-static int iaa_remap_for_verify(struct device *dev, struct iaa_wq *iaa_wq,
-				struct acomp_req *req,
-				dma_addr_t *src_addr, dma_addr_t *dst_addr);
-
 static int iaa_compress_verify(struct crypto_tfm *tfm, struct acomp_req *req,
 			       struct idxd_wq *wq,
 			       dma_addr_t src_addr, unsigned int slen,
 			       dma_addr_t dst_addr, unsigned int *dlen,
-			       u32 compression_crc);
+			       u32 compression_crc)
+{
+	struct iaa_device_compression_mode *active_compression_mode;
+	struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct iaa_device *iaa_device;
+	struct idxd_desc *idxd_desc;
+	struct iax_hw_desc *desc;
+	struct idxd_device *idxd;
+	struct iaa_wq *iaa_wq;
+	struct pci_dev *pdev;
+	struct device *dev;
+	int ret = 0;
+
+	iaa_wq = idxd_wq_get_private(wq);
+	iaa_device = iaa_wq->iaa_device;
+	idxd = iaa_device->idxd;
+	pdev = idxd->pdev;
+	dev = &pdev->dev;
+
+	active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode);
+
+	idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
+	if (IS_ERR(idxd_desc)) {
+		dev_dbg(dev, "idxd descriptor allocation failed\n");
+		dev_dbg(dev, "iaa compress failed: ret=%ld\n",
+			PTR_ERR(idxd_desc));
+		return PTR_ERR(idxd_desc);
+	}
+	desc = idxd_desc->iax_hw;
+
+	/* Verify (optional) - decompress and check crc, suppress dest write */
+
+	desc->flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CC;
+	desc->opcode = IAX_OPCODE_DECOMPRESS;
+	desc->decompr_flags = IAA_DECOMP_FLAGS | IAA_DECOMP_SUPPRESS_OUTPUT;
+	desc->priv = 0;
+
+	desc->src1_addr = (u64)dst_addr;
+	desc->src1_size = *dlen;
+	desc->dst_addr = (u64)src_addr;
+	desc->max_dst_size = slen;
+	desc->completion_addr = idxd_desc->compl_dma;
+
+	dev_dbg(dev, "(verify) compression mode %s,"
+		" desc->src1_addr %llx, desc->src1_size %d,"
+		" desc->dst_addr %llx, desc->max_dst_size %d,"
+		" desc->src2_addr %llx, desc->src2_size %d\n",
+		active_compression_mode->name,
+		desc->src1_addr, desc->src1_size, desc->dst_addr,
+		desc->max_dst_size, desc->src2_addr, desc->src2_size);
+
+	ret = idxd_submit_desc(wq, idxd_desc);
+	if (ret) {
+		dev_dbg(dev, "submit_desc (verify) failed ret=%d\n", ret);
+		goto err;
+	}
+
+	ret = check_completion(dev, idxd_desc->iax_completion, false, false);
+	if (ret) {
+		dev_dbg(dev, "(verify) check_completion failed ret=%d\n", ret);
+		goto err;
+	}
+
+	if (compression_crc != idxd_desc->iax_completion->crc) {
+		ret = -EINVAL;
+		dev_dbg(dev, "(verify) iaa comp/decomp crc mismatch:"
+			" comp=0x%x, decomp=0x%x\n", compression_crc,
+			idxd_desc->iax_completion->crc);
+		print_hex_dump(KERN_INFO, "cmp-rec: ", DUMP_PREFIX_OFFSET,
+			       8, 1, idxd_desc->iax_completion, 64, 0);
+		goto err;
+	}
+
+	idxd_free_desc(wq, idxd_desc);
+out:
+	return ret;
+err:
+	idxd_free_desc(wq, idxd_desc);
+	dev_dbg(dev, "iaa compress failed: ret=%d\n", ret);
+
+	goto out;
+}
 
 static void iaa_desc_complete(struct idxd_desc *idxd_desc,
 			      enum idxd_complete_type comp_type,
@@ -1239,133 +1380,6 @@ static int iaa_compress(struct crypto_tfm *tfm,	struct acomp_req *req,
 	goto out;
 }
 
-static int iaa_remap_for_verify(struct device *dev, struct iaa_wq *iaa_wq,
-				struct acomp_req *req,
-				dma_addr_t *src_addr, dma_addr_t *dst_addr)
-{
-	int ret = 0;
-	int nr_sgs;
-
-	dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
-	dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
-
-	nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_FROM_DEVICE);
-	if (nr_sgs <= 0 || nr_sgs > 1) {
-		dev_dbg(dev, "verify: couldn't map src sg for iaa device %d,"
-			" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
-			iaa_wq->wq->id, ret);
-		ret = -EIO;
-		goto out;
-	}
-	*src_addr = sg_dma_address(req->src);
-	dev_dbg(dev, "verify: dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
-		" req->slen %d, sg_dma_len(sg) %d\n", *src_addr, nr_sgs,
-		req->src, req->slen, sg_dma_len(req->src));
-
-	nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_TO_DEVICE);
-	if (nr_sgs <= 0 || nr_sgs > 1) {
-		dev_dbg(dev, "verify: couldn't map dst sg for iaa device %d,"
-			" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
-			iaa_wq->wq->id, ret);
-		ret = -EIO;
-		dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_FROM_DEVICE);
-		goto out;
-	}
-	*dst_addr = sg_dma_address(req->dst);
-	dev_dbg(dev, "verify: dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
-		" req->dlen %d, sg_dma_len(sg) %d\n", *dst_addr, nr_sgs,
-		req->dst, req->dlen, sg_dma_len(req->dst));
-out:
-	return ret;
-}
-
-static int iaa_compress_verify(struct crypto_tfm *tfm, struct acomp_req *req,
-			       struct idxd_wq *wq,
-			       dma_addr_t src_addr, unsigned int slen,
-			       dma_addr_t dst_addr, unsigned int *dlen,
-			       u32 compression_crc)
-{
-	struct iaa_device_compression_mode *active_compression_mode;
-	struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
-	struct iaa_device *iaa_device;
-	struct idxd_desc *idxd_desc;
-	struct iax_hw_desc *desc;
-	struct idxd_device *idxd;
-	struct iaa_wq *iaa_wq;
-	struct pci_dev *pdev;
-	struct device *dev;
-	int ret = 0;
-
-	iaa_wq = idxd_wq_get_private(wq);
-	iaa_device = iaa_wq->iaa_device;
-	idxd = iaa_device->idxd;
-	pdev = idxd->pdev;
-	dev = &pdev->dev;
-
-	active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode);
-
-	idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
-	if (IS_ERR(idxd_desc)) {
-		dev_dbg(dev, "idxd descriptor allocation failed\n");
-		dev_dbg(dev, "iaa compress failed: ret=%ld\n",
-			PTR_ERR(idxd_desc));
-		return PTR_ERR(idxd_desc);
-	}
-	desc = idxd_desc->iax_hw;
-
-	/* Verify (optional) - decompress and check crc, suppress dest write */
-
-	desc->flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CC;
-	desc->opcode = IAX_OPCODE_DECOMPRESS;
-	desc->decompr_flags = IAA_DECOMP_FLAGS | IAA_DECOMP_SUPPRESS_OUTPUT;
-	desc->priv = 0;
-
-	desc->src1_addr = (u64)dst_addr;
-	desc->src1_size = *dlen;
-	desc->dst_addr = (u64)src_addr;
-	desc->max_dst_size = slen;
-	desc->completion_addr = idxd_desc->compl_dma;
-
-	dev_dbg(dev, "(verify) compression mode %s,"
-		" desc->src1_addr %llx, desc->src1_size %d,"
-		" desc->dst_addr %llx, desc->max_dst_size %d,"
-		" desc->src2_addr %llx, desc->src2_size %d\n",
-		active_compression_mode->name,
-		desc->src1_addr, desc->src1_size, desc->dst_addr,
-		desc->max_dst_size, desc->src2_addr, desc->src2_size);
-
-	ret = idxd_submit_desc(wq, idxd_desc);
-	if (ret) {
-		dev_dbg(dev, "submit_desc (verify) failed ret=%d\n", ret);
-		goto err;
-	}
-
-	ret = check_completion(dev, idxd_desc->iax_completion, false, false);
-	if (ret) {
-		dev_dbg(dev, "(verify) check_completion failed ret=%d\n", ret);
-		goto err;
-	}
-
-	if (compression_crc != idxd_desc->iax_completion->crc) {
-		ret = -EINVAL;
-		dev_dbg(dev, "(verify) iaa comp/decomp crc mismatch:"
-			" comp=0x%x, decomp=0x%x\n", compression_crc,
-			idxd_desc->iax_completion->crc);
-		print_hex_dump(KERN_INFO, "cmp-rec: ", DUMP_PREFIX_OFFSET,
-			       8, 1, idxd_desc->iax_completion, 64, 0);
-		goto err;
-	}
-
-	idxd_free_desc(wq, idxd_desc);
-out:
-	return ret;
-err:
-	idxd_free_desc(wq, idxd_desc);
-	dev_dbg(dev, "iaa compress failed: ret=%d\n", ret);
-
-	goto out;
-}
-
 static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req,
 			  struct idxd_wq *wq,
 			  dma_addr_t src_addr, unsigned int slen,
@@ -1678,6 +1692,10 @@ static void compression_ctx_init(struct iaa_compression_ctx *ctx)
 	ctx->use_irq = use_irq;
 }
 
+/*********************************************
+ * Interfaces to crypto_alg and crypto_acomp.
+ *********************************************/
+
 static int iaa_comp_init_fixed(struct crypto_acomp *acomp_tfm)
 {
 	struct crypto_tfm *tfm = crypto_acomp_tfm(acomp_tfm);

From patchwork Wed Apr 30 20:52:52 2025
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: "Sridhar, Kanchana P" <kanchana.p.sridhar@intel.com>
X-Patchwork-Id: 886168
Received: from mgamail.intel.com (mgamail.intel.com [198.175.65.15])
 (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
 (No client certificate requested)
 by smtp.subspace.kernel.org (Postfix) with ESMTPS id 36FBA2C1E32;
 Wed, 30 Apr 2025 20:53:13 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.175.65.15
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
 t=1746046396; cv=none;
 b=XXspskxTDZxPS22Pxpz6sSJs264eL2Xxbq7Lw/Ub0vXCcLp60L3lQrj5lb0vhSfKBGMmKCwVCUJXdvUrETa9zCJNDm31kGgIRjPqNWeExpf7w78Hmi1L80JbsJJyUSHcZqCNdI2+FmN09I9Pn/Qi7KiJmGJvfz9E/He3MoSsLGA=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
 s=arc-20240116; t=1746046396; c=relaxed/simple;
 bh=MbuiqNMzPGhfNU0RY+0KuNOw+Jwiy5UKfNS8THN4Ohk=;
 h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
 MIME-Version;
 b=FIAqV4UQ4aIdiBySN3yeZAxrIlku46h1nqGF1uGkzDeZ5lPcs5s6zztDuK472cPzzZkXdQrWvqODYxLmtgGRhU3LHqMmtD/e5ECpdtKiLk3UAzAwl9SABRwXM7i1w3GWxbanjqOC0ozBjqmOGP5vUvY/Ftda4MO+xZlNTV3dDm4=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=intel.com;
 spf=pass smtp.mailfrom=intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=TPRiJ5uv; arc=none smtp.client-ip=198.175.65.15
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=pass smtp.mailfrom=intel.com
Authentication-Results: smtp.subspace.kernel.org;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="TPRiJ5uv"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
 d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
 t=1746046394; x=1777582394;
 h=from:to:cc:subject:date:message-id:in-reply-to:
 references:mime-version:content-transfer-encoding;
 bh=MbuiqNMzPGhfNU0RY+0KuNOw+Jwiy5UKfNS8THN4Ohk=;
 b=TPRiJ5uvwlpRSDHJL0t2EhhpfQP/k9LvN6W+IdzjZgs5rtqt0RrpeMI/
 nVwrUsmwI7utE7QM2kNX88mq5o22BdlCG4ntu0R7mJO9wtkQAXTHZ6lvH
 ai+20zvNBnGV1M4c2bSdz/+OSp3qeDZ5np3Pbxkf5arYNENVL4JkCnfzH
 hzfXM19Fb1JQVpqJ8FX+D7EpUjTC0otJD/gOwx5Z4gzNiUWB+tbeVxZW9
 9RoFtBk59HUBZWv19vWXyjyINv9hxU6orTTF1kVWsKqrpkcnKQiViB2XT
 k657RpbCCZj+eR+GjPiipIA3MdAqsoQdlZ5ng/flHIdEvtVH8ZClXrl2L w==;
X-CSE-ConnectionGUID: ZGOQq4cIR7CSjc9KyOHenA==
X-CSE-MsgGUID: H0ZWd69cQI6xgfMMSfkMfQ==
X-IronPort-AV: E=McAfee;i="6700,10204,11419"; a="51388554"
X-IronPort-AV: E=Sophos;i="6.15,252,1739865600";
   d="scan'208";a="51388554"
Received: from fmviesa003.fm.intel.com ([10.60.135.143])
 by orvoesa107.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 30 Apr 2025 13:53:12 -0700
X-CSE-ConnectionGUID: wO+Zmcr9RlqeSQNTJEk3mg==
X-CSE-MsgGUID: 54RBVjfnSNq/pZWdtlztlw==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.15,252,1739865600"; d="scan'208";a="138248899"
Received: from jf5300-b11a338t.jf.intel.com ([10.242.51.115])
 by fmviesa003.fm.intel.com with ESMTP; 30 Apr 2025 13:53:11 -0700
From: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
To: linux-kernel@vger.kernel.org, linux-mm@kvack.org, hannes@cmpxchg.org,
 yosry.ahmed@linux.dev, nphamcs@gmail.com, chengming.zhou@linux.dev,
 usamaarif642@gmail.com, ryan.roberts@arm.com, 21cnbao@gmail.com,
 ying.huang@linux.alibaba.com, akpm@linux-foundation.org,
 linux-crypto@vger.kernel.org, herbert@gondor.apana.org.au,
 davem@davemloft.net, clabbe@baylibre.com, ardb@kernel.org,
 ebiggers@google.com, surenb@google.com, kristen.c.accardi@intel.com
Cc: wajdi.k.feghali@intel.com, vinodh.gopal@intel.com,
 kanchana.p.sridhar@intel.com
Subject: [PATCH v9 06/19] crypto: iaa - New architecture for IAA device WQ
 comp/decomp usage & core mapping.
Date: Wed, 30 Apr 2025 13:52:52 -0700
Message-Id: <20250430205305.22844-7-kanchana.p.sridhar@intel.com>
X-Mailer: git-send-email 2.27.0
In-Reply-To: <20250430205305.22844-1-kanchana.p.sridhar@intel.com>
References: <20250430205305.22844-1-kanchana.p.sridhar@intel.com>
Precedence: bulk
X-Mailing-List: linux-crypto@vger.kernel.org
List-Id: <linux-crypto.vger.kernel.org>
List-Subscribe: <mailto:linux-crypto+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-crypto+unsubscribe@vger.kernel.org>
MIME-Version: 1.0

This patch re-architects the iaa_crypto driver in two aspects:

A) Map IAA devices/wqs to cores based on packages instead of NUMA.

B) The WQ rebalancing algorithm that is invoked as WQs are
   discovered/deleted has been made very general and flexible so that
   the user can control exactly how IAA WQs are used.

Description/motivation for (A):
===============================
This patch modifies the algorithm for mapping available IAA devices and
WQs to cores based on packages instead of NUMA nodes. This leads to a
more realistic mapping of IAA devices as compression/decompression
resources for a package, rather than for a NUMA node. This also resolves
problems that were observed during internal validation on Intel Granite
Rapids platforms with many more NUMA nodes than packages: for such
cases, the earlier NUMA based allocation caused some IAAs to be
over-subscribed and some to not be utilized at all.

As a result of this change from NUMA to packages, some of the core
functions used by the iaa_crypto driver's "probe" and "remove" API
have been re-written. The new infrastructure maintains a static mapping
of wqs per IAA device, in the "struct iaa_device" itself. The earlier
implementation would allocate memory per-cpu for this data, which never
changes once the IAA devices/wqs have been initialized.

Two main outcomes from this new iaa_crypto driver infrastructure are:

 1) Resolves "task blocked for more than x seconds" errors observed during
    internal validation on Intel systems with the earlier NUMA node based
    mappings, which was root-caused to the non-optimal IAA-to-core mappings
    described earlier.

 2) Results in a NUM_THREADS factor reduction in memory footprint cost of
    initializing IAA devices/wqs, due to eliminating the per-cpu copies of
    each IAA device's wqs. On a 384 cores Intel Granite Rapids server with
    8 IAA devices, this saves 140MiB.

An auxiliary change included in this patch is that the driver's "nr_iaa",
"nr_iaa_per_package" and "cpus_per_iaa" global variables are made
atomic, because iaa_crypto_probe() and iaa_crypto_remove() change the
values of these variables asynchronously and concurrently as wqs get
added/deleted and rebalance_wq_table() is called. This change allows the
rebalance_wq_table() code to see consistent values of the number of IAA
devices.

Description/motivation for (B):
===============================
This builds upon the package-based driver infrastructure, to provide
more flexibility in using particular WQs for compress-only or
decompress-only jobs. It also introduces the notion of using all the IAA
devices on a package as resources that are shared by all cores on the
package: this significantly improves batching (to be added in subsequent
patches) latency and compress/decompress throughput. sysfs driver
paramters provide configurability of these features.

Two main concepts are introduced as part of the rebalancing changes:

 1) An IAA WQ can be used for specific ops, that determines a WQ "type"
    for the iaa_crypto driver to submit compress/decompress jobs:

    - compress only
    - decompress only
    - generic, i.e, for both compresses and decompresses

    The WQ type is decided based on the number of WQs configured for a
    given IAA device, and the new "g_comp_wqs_per_iaa" driver parameter.

 2) An IAA WQ can be mapped to cores using either of the following
    balancing techniques:

    a) Shared by all cores on a package. The iaa_crypto driver will
       dispatch compress/decompress jobs to all WQs of the same type,
       across all IAA devices on the package:
       - IAA compress jobs will be distributed to all same-package IAA
         compress-only/generic WQs.
       - IAA decompress jobs will be distributed to all same-package IAA
         decompress-only/generic WQs.

    b) Handles compress/decompress jobs only from "mapped cores", i.e.,
       the cores derived by evenly dividing the number of IAAs among the
       number of cores, per package.

Server setups that are moderately to highly contended can benefit from
(2.a). When the mix of workloads running on a system need high compress
throughput, and have relatively lower decompress activity, (2.b) might
be more optimal.

These approaches can be accomplished with the following new iaa_crypto
driver parameters. These parameters are global settings and will apply
to all IAAs on a package, interpreted in the context of the number of
WQs configured per IAA device.

 g_comp_wqs_per_iaa:
 ===================
   Number of compress-only WQs (default is 0).

   If the IAA device has more than "g_comp_wqs_per_iaa" WQs configured,
   the last "g_comp_wqs_per_iaa" number of WQs will be considered as
   "compress only". The remaining WQs will be considered as "decomp only".

   If the device has fewer WQs than "g_comp_wqs_per_iaa", all the
   device's WQs will be considered "generic", i.e., the driver will
   submit compress and decompress jobs to all the WQs configured for the
   device.

   For e.g., if an IAA "X" has 2 WQs, this will set up 1 decompress WQ and
   1 compress WQ:

     echo 1 > /sys/bus/dsa/drivers/crypto/g_comp_wqs_per_iaa

     wqX.0: decompress jobs only.
     wqX.1: compress jobs only.

   This setting would typically benefit workloads that see a high
   level of compress and decompress activity.

   If an IAA has 1 WQ, that WQ will be considered "generic": the driver
   will submit compress and decompress jobs to the same WQ (this is
   independent of the "g_comp_wqs_per_iaa" setting):

     wqX.0: compress and decompress jobs.

   This would typically benefit workloads that see significant cold
   memory being reclaimed, and consequently, high swapout and low swapin
   activity.

 distribute_comps:
 =================
   Distribute compressions to all IAAs on package (default is Y).

   Assuming the WQ type has been established as
   compress-only/decompress-only/generic, this setting will determine if
   the driver will distribute compress jobs to all IAAs on a package
   (default behavior) or not.

   If this is turned off, the driver will dispatch compress jobs to a
   given IAA "compression enabled" WQ only from cores that are mapped to
   that IAA using an algorithm that evenly distributes IAAs per package
   to cores per package. For e.g., on a Sapphire Rapids server with
   56-physical-cores and 4 IAAs per package, with Hyperthreading, 28
   logical cores will be assigned to each IAA. With the
   "distribute_comps" driver parameter turned off, the driver will send
   compress jobs only to it's assigned IAA device.

   Enabling "distribute_comps" would typically benefit workloads in
   terms of batch compress latency and throughput.

 distribute_decomps:
 ===================
   Distribute decompressions to all IAAs on package (default is Y).

   Assuming the WQ type has been established as
   compress-only/decompress-only/generic, this setting will determine if
   the driver will distribute decompress jobs to all IAAs on a package
   (default behavior) or not.

   Enabling "distribute_decomps" would typically benefit workloads that
   see a high level of compress and decompress activity, especially
   p99 decompress latency.

Recommended settings for best compress/decompress latency, throughput
and hence memory savings for a moderately contended server:

   2 WQs per IAA
   g_comp_wqs_per_iaa = 1 (separate WQ for comps/decomps per IAA)
   distribute_decomps = Y
   distribute_comps = Y

For a system that has only 1 IAA device enabled on a given package,
the recommended settings are:

   1 WQ per IAA
   g_comp_wqs_per_iaa = 0 (same WQ for comps/decomps)
   distribute_decomps = N
   distribute_comps = N

 Examples:
 =========
   For a Sapphire Rapids server with 2 packages, 56 cores and 4 IAAs per
   package, each IAA has 2 WQs, and these settings are in effect:

     echo 1 > /sys/bus/dsa/drivers/crypto/g_comp_wqs_per_iaa
     echo 1 > /sys/bus/dsa/drivers/crypto/distribute_comps
     echo 0 > /sys/bus/dsa/drivers/crypto/distribute_decomps

     wqX.0: decompress jobs only.
     wqX.1: compress jobs only.

   Compress jobs from all cores on package-0 will be distributed in
   round-robin manner to [iax1, iax3, iax5, iax7]'s wqX.1, to maximize
   compression throughput/latency/memory savings:

     wq1.1
     wq3.1
     wq5.1
     wq7.1

   Likewise, compress jobs from all cores on package-1 will be
   distributed in round-robin manner to [iax9, iax11, iax13, iax15]'s
   wqX.1, to maximize compression throughput/latency/memory savings for
   workloads running on package-1:

     wq9.1
     wq11.1
     wq13.1
     wq15.1

   Decompress jobs will be submitted from mapped logical cores only, as
   follows:

     package-0:

       CPU   0-13,112-125   14-27,126-139  28-41,140-153  42-55,154-167
       IAA:  iax1           iax3           iax5           iax7
       WQ:   wq1.0          wq3.0          wq5.0          wq7.0

     package-1:

       CPU   56-69,168-181  70-83,182-195  84-97,196-209   98-111,210-223
       IAA:  iax9           iax11          iax13           iax15
       WQ:   wq9.0          wq11.0         wq13.0          wq15.0

IAA WQs can be configured using higher level scripts as described in
Documentation/driver-api/crypto/iaa/iaa-crypto.rst. This documentation
has been updated for the above new parameters.

Signed-off-by: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
---
 .../driver-api/crypto/iaa/iaa-crypto.rst      | 134 +++
 drivers/crypto/intel/iaa/iaa_crypto.h         |  18 +-
 drivers/crypto/intel/iaa/iaa_crypto_main.c    | 817 ++++++++++++++----
 3 files changed, 805 insertions(+), 164 deletions(-)

diff --git a/Documentation/driver-api/crypto/iaa/iaa-crypto.rst b/Documentation/driver-api/crypto/iaa/iaa-crypto.rst
index 8e50b900d51c..949bfa1ef624 100644
--- a/Documentation/driver-api/crypto/iaa/iaa-crypto.rst
+++ b/Documentation/driver-api/crypto/iaa/iaa-crypto.rst
@@ -290,6 +290,140 @@ The available attributes are:
     'sync' mode. This is to ensure correct iaa_crypto behavior until true
     async polling without interrupts is enabled in iaa_crypto.
 
+  - g_comp_wqs_per_iaa
+
+    Number of compress-only WQs (default is 0).
+
+    If the IAA device has more than "g_comp_wqs_per_iaa" WQs configured,
+    the last "g_comp_wqs_per_iaa" number of WQs will be considered as
+    "compress only". The remaining WQs will be considered as "decomp only".
+
+    If the device has fewer WQs than "g_comp_wqs_per_iaa", all the
+    device's WQs will be considered "generic", i.e., the driver will
+    submit compress and decompress jobs to all the WQs configured for the
+    device.
+
+    For e.g., if an IAA "X" has 2 WQs, this will set up 1 decompress WQ and
+    1 compress WQ::
+
+      echo 1 > /sys/bus/dsa/drivers/crypto/g_comp_wqs_per_iaa
+
+     wqX.0: decompress jobs only.
+     wqX.1: compress jobs only.
+
+    This setting would typically benefit workloads that see a high
+    level of compress and decompress activity.
+
+    If an IAA has 1 WQ, that WQ will be considered "generic": the driver
+    will submit compress and decompress jobs to the same WQ (this is
+    independent of the "g_comp_wqs_per_iaa" setting):
+
+     wqX.0: compress and decompress jobs.
+
+    This would typically benefit workloads that see significant cold
+    memory being reclaimed, and consequently, high swapout and low swapin
+    activity.
+
+  - distribute_comps
+
+    Distribute compressions to all IAAs on package (default is Y).
+
+    Assuming the WQ type has been established as
+    compress-only/decompress-only/generic, this setting will determine if
+    the driver will distribute compress jobs to all IAAs on a package
+    (default behavior) or not.
+
+    If this is turned off, the driver will dispatch compress jobs to a
+    given IAA "compression enabled" WQ only from cores that are mapped to
+    that IAA using an algorithm that evenly distributes IAAs per package
+    to cores per package. For e.g., on a Sapphire Rapids server with
+    56-physical-cores and 4 IAAs per package, with Hyperthreading, 28
+    logical cores will be assigned to each IAA. With the
+    "distribute_comps" driver parameter turned off, the driver will send
+    compress jobs only to it's assigned IAA device.
+
+    Enabling "distribute_comps" would typically benefit workloads in
+    terms of batch compress latency and throughput.
+
+  - distribute_decomps
+
+    Distribute decompressions to all IAAs on package (default is Y).
+
+    Assuming the WQ type has been established as
+    compress-only/decompress-only/generic, this setting will determine if
+    the driver will distribute decompress jobs to all IAAs on a package
+    (default behavior) or not.
+
+    Enabling "distribute_decomps" would typically benefit workloads that
+    see a high level of compress and decompress activity, especially
+    p99 decompress latency.
+
+    Recommended settings for best compress/decompress latency, throughput
+    and hence memory savings for a moderately contended server that
+    has more than 1 IAA device enabled on a given package:
+
+      2 WQs per IAA
+      g_comp_wqs_per_iaa = 1 (separate WQ for comps/decomps per IAA)
+      distribute_decomps = Y
+      distribute_comps = Y
+
+    For a system that has only 1 IAA device enabled on a given package,
+    the recommended settings are:
+
+      1 WQ per IAA
+      g_comp_wqs_per_iaa = 0 (same WQ for comps/decomps)
+      distribute_decomps = N
+      distribute_comps = N
+
+    Examples:
+
+    For a Sapphire Rapids server with 2 packages, 56 cores and 4 IAAs per
+    package, each IAA has 2 WQs, and these settings are in effect::
+
+      echo 1 > /sys/bus/dsa/drivers/crypto/g_comp_wqs_per_iaa
+      echo 1 > /sys/bus/dsa/drivers/crypto/distribute_comps
+      echo 0 > /sys/bus/dsa/drivers/crypto/distribute_decomps
+
+    This enables the following behavior:
+
+      wqX.0: decompress jobs only.
+      wqX.1: compress jobs only.
+
+    Compress jobs from all cores on package-0 will be distributed in
+    round-robin manner to [iax1, iax3, iax5, iax7]'s wqX.1, to maximize
+    compression throughput/latency/memory savings:
+
+      wq1.1
+      wq3.1
+      wq5.1
+      wq7.1
+
+    Likewise, compress jobs from all cores on package-1 will be
+    distributed in round-robin manner to [iax9, iax11, iax13, iax15]'s
+    wqX.1, to maximize compression throughput/latency/memory savings for
+    workloads running on package-1:
+
+      wq9.1
+      wq11.1
+      wq13.1
+      wq15.1
+
+    Decompress jobs will be submitted from mapped logical cores only, as
+    follows:
+
+      package-0:
+
+        CPU   0-13,112-125   14-27,126-139  28-41,140-153  42-55,154-167
+        IAA:  iax1           iax3           iax5           iax7
+        WQ:   wq1.0          wq3.0          wq5.0          wq7.0
+
+      package-1:
+
+        CPU   56-69,168-181  70-83,182-195  84-97,196-209   98-111,210-223
+        IAA:  iax9           iax11          iax13           iax15
+        WQ:   wq9.0          wq11.0         wq13.0          wq15.0
+
+
 .. _iaa_default_config:
 
 IAA Default Configuration
diff --git a/drivers/crypto/intel/iaa/iaa_crypto.h b/drivers/crypto/intel/iaa/iaa_crypto.h
index 56985e395263..549ac98a9366 100644
--- a/drivers/crypto/intel/iaa/iaa_crypto.h
+++ b/drivers/crypto/intel/iaa/iaa_crypto.h
@@ -46,6 +46,7 @@ struct iaa_wq {
 	struct idxd_wq		*wq;
 	int			ref;
 	bool			remove;
+	bool			mapped;
 
 	struct iaa_device	*iaa_device;
 
@@ -63,6 +64,13 @@ struct iaa_device_compression_mode {
 	dma_addr_t			aecs_comp_table_dma_addr;
 };
 
+struct wq_table_entry {
+	struct idxd_wq	**wqs;
+	unsigned int	max_wqs;
+	unsigned int	n_wqs;
+	unsigned int	cur_wq;
+};
+
 /* Representation of IAA device with wqs, populated by probe */
 struct iaa_device {
 	struct list_head		list;
@@ -73,19 +81,15 @@ struct iaa_device {
 	int				n_wq;
 	struct list_head		wqs;
 
+	struct wq_table_entry		*generic_wq_table;
+	struct wq_table_entry		*comp_wq_table;
+
 	atomic64_t			comp_calls;
 	atomic64_t			comp_bytes;
 	atomic64_t			decomp_calls;
 	atomic64_t			decomp_bytes;
 };
 
-struct wq_table_entry {
-	struct idxd_wq **wqs;
-	int	max_wqs;
-	int	n_wqs;
-	int	cur_wq;
-};
-
 #define IAA_AECS_ALIGN			32
 
 /*
diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c
index 4900f9c72600..2f2dc6987cc6 100644
--- a/drivers/crypto/intel/iaa/iaa_crypto_main.c
+++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c
@@ -23,32 +23,65 @@
 #define pr_fmt(fmt)			"idxd: " IDXD_SUBDRIVER_NAME ": " fmt
 
 #define IAA_ALG_PRIORITY               300
+#define MAX_PKG_IAA   8
+#define MAX_IAA_WQ    8
 
 /**************************************
  * Driver internal global variables.
  **************************************/
 
 /* number of iaa instances probed */
-static unsigned int nr_iaa;
+static atomic_t nr_iaa = ATOMIC_INIT(0);
 static unsigned int nr_cpus;
-static unsigned int nr_nodes;
-static unsigned int nr_cpus_per_node;
+static unsigned int nr_packages;
+static unsigned int nr_cpus_per_package;
+static atomic_t nr_iaa_per_package = ATOMIC_INIT(0);
 
 /* Number of physical cpus sharing each iaa instance */
-static unsigned int cpus_per_iaa;
+static atomic_t cpus_per_iaa = ATOMIC_INIT(0);
 
-/* Per-cpu lookup table for balanced wqs */
-static struct wq_table_entry __percpu *wq_table;
+/* Per-cpu lookup table for decomp wqs. */
+static struct wq_table_entry __percpu *cpu_decomp_wqs = NULL;
+
+/* Per-cpu lookup table for comp wqs. */
+static struct wq_table_entry __percpu *cpu_comp_wqs = NULL;
+
+/* All decomp wqs from IAAs on a package. */
+static struct wq_table_entry **pkg_global_decomp_wqs = NULL;
+/* All comp wqs from IAAs on a package. */
+static struct wq_table_entry **pkg_global_comp_wqs = NULL;
+
+static struct idxd_wq *first_wq_found = NULL;
 
 LIST_HEAD(iaa_devices);
 DEFINE_MUTEX(iaa_devices_lock);
+DEFINE_MUTEX(first_wq_found_lock);
 
 /* If enabled, IAA hw crypto algos are registered, unavailable otherwise */
 static bool iaa_crypto_enabled;
 static bool iaa_crypto_registered;
+/*
+ * We use the atomic iaa_device_registration_done to know if the
+ * crypto testmgr has been started, and the device has been
+ * registered. Until this is done, the first WQ probed will be
+ * assigned to the per-CPU comp/decomp wq tables.
+ * With the new dynamic package-level rebalancing of WQs being
+ * discovered asynchronously and concurrently with tests
+ * triggered from device registration, this is needed to
+ * determine when it is safe for the rebalancing of decomp/comp
+ * WQs to de-allocate the per-package WQs and re-allocate them
+ * based on the latest number of IAA devices and WQs.
+ */
+static atomic_t iaa_device_registration_done = ATOMIC_INIT(0);
 
 static struct iaa_compression_mode *iaa_compression_modes[IAA_COMP_MODES_MAX];
 
+/* Distribute decompressions across all IAAs on the package. */
+static bool iaa_distribute_decomps = true;
+
+/* Distribute compressions across all IAAs on the package. */
+static bool iaa_distribute_comps = true;
+
 /* Verify results of IAA compress or not */
 static bool iaa_verify_compress = true;
 
@@ -86,6 +119,9 @@ static bool async_mode;
 /* Use interrupts */
 static bool use_irq;
 
+/* Number of compress-only wqs per iaa*/
+static int g_comp_wqs_per_iaa = 0;
+
 /**************************************************
  * Driver attributes along with get/set functions.
  **************************************************/
@@ -179,6 +215,87 @@ static ssize_t sync_mode_store(struct device_driver *driver,
 }
 static DRIVER_ATTR_RW(sync_mode);
 
+static ssize_t g_comp_wqs_per_iaa_show(struct device_driver *driver, char *buf)
+{
+	return sprintf(buf, "%d\n", g_comp_wqs_per_iaa);
+}
+
+static ssize_t g_comp_wqs_per_iaa_store(struct device_driver *driver,
+				   const char *buf, size_t count)
+{
+	int ret = -EBUSY;
+
+	mutex_lock(&iaa_devices_lock);
+
+	if (iaa_crypto_enabled)
+		goto out;
+
+	ret = kstrtoint(buf, 10, &g_comp_wqs_per_iaa);
+	if (ret)
+		goto out;
+
+	ret = count;
+out:
+	mutex_unlock(&iaa_devices_lock);
+
+	return ret;
+}
+static DRIVER_ATTR_RW(g_comp_wqs_per_iaa);
+
+static ssize_t distribute_decomps_show(struct device_driver *driver, char *buf)
+{
+	return sprintf(buf, "%d\n", iaa_distribute_decomps);
+}
+
+static ssize_t distribute_decomps_store(struct device_driver *driver,
+					const char *buf, size_t count)
+{
+	int ret = -EBUSY;
+
+	mutex_lock(&iaa_devices_lock);
+
+	if (iaa_crypto_enabled)
+		goto out;
+
+	ret = kstrtobool(buf, &iaa_distribute_decomps);
+	if (ret)
+		goto out;
+
+	ret = count;
+out:
+	mutex_unlock(&iaa_devices_lock);
+
+	return ret;
+}
+static DRIVER_ATTR_RW(distribute_decomps);
+
+static ssize_t distribute_comps_show(struct device_driver *driver, char *buf)
+{
+	return sprintf(buf, "%d\n", iaa_distribute_comps);
+}
+
+static ssize_t distribute_comps_store(struct device_driver *driver,
+				      const char *buf, size_t count)
+{
+	int ret = -EBUSY;
+
+	mutex_lock(&iaa_devices_lock);
+
+	if (iaa_crypto_enabled)
+		goto out;
+
+	ret = kstrtobool(buf, &iaa_distribute_comps);
+	if (ret)
+		goto out;
+
+	ret = count;
+out:
+	mutex_unlock(&iaa_devices_lock);
+
+	return ret;
+}
+static DRIVER_ATTR_RW(distribute_comps);
+
 /****************************
  * Driver compression modes.
  ****************************/
@@ -464,32 +581,81 @@ static void remove_device_compression_modes(struct iaa_device *iaa_device)
  * allocate/init/query/deallocate devices/wqs.
  ***********************************************************/
 
-static struct iaa_device *iaa_device_alloc(void)
+static struct iaa_device *iaa_device_alloc(struct idxd_device *idxd)
 {
 	struct iaa_device *iaa_device;
+	struct wq_table_entry *wqt;
 
 	iaa_device = kzalloc(sizeof(*iaa_device), GFP_KERNEL);
 	if (!iaa_device)
-		return NULL;
+		goto err;
+
+	iaa_device->idxd = idxd;
+
+	/* IAA device's generic/decomp wqs. */
+	iaa_device->generic_wq_table = kzalloc(sizeof(struct wq_table_entry), GFP_KERNEL);
+	if (!iaa_device->generic_wq_table)
+		goto err;
+
+	wqt = iaa_device->generic_wq_table;
+
+	wqt->wqs = kzalloc(iaa_device->idxd->max_wqs * sizeof(struct idxd_wq *), GFP_KERNEL);
+	if (!wqt->wqs)
+		goto err;
+
+	wqt->max_wqs = iaa_device->idxd->max_wqs;
+	wqt->n_wqs = 0;
+
+	/*
+	 * IAA device's comp wqs (optional). If the device has more than
+	 * "g_comp_wqs_per_iaa" WQs configured, the last "g_comp_wqs_per_iaa"
+	 * number of WQs will be considered as "comp only". The remaining
+	 * WQs will be considered as "decomp only".
+	 * If the device has fewer WQs than "g_comp_wqs_per_iaa", all the
+	 * device's WQs will be considered "generic", i.e., cores can submit
+	 * comp and decomp jobs to all the WQs configured for the device.
+	 */
+	iaa_device->comp_wq_table = kzalloc(sizeof(struct wq_table_entry), GFP_KERNEL);
+	if (!iaa_device->comp_wq_table)
+		goto err;
+
+	wqt = iaa_device->comp_wq_table;
+
+	wqt->wqs = kzalloc(iaa_device->idxd->max_wqs * sizeof(struct idxd_wq *), GFP_KERNEL);
+	if (!wqt->wqs)
+		goto err;
+
+	wqt->max_wqs = iaa_device->idxd->max_wqs;
+	wqt->n_wqs = 0;
 
 	INIT_LIST_HEAD(&iaa_device->wqs);
 
 	return iaa_device;
+
+err:
+	if (iaa_device) {
+		if (iaa_device->generic_wq_table) {
+			kfree(iaa_device->generic_wq_table->wqs);
+			kfree(iaa_device->generic_wq_table);
+		}
+		kfree(iaa_device->comp_wq_table);
+		kfree(iaa_device);
+	}
+
+	return NULL;
 }
 
 static struct iaa_device *add_iaa_device(struct idxd_device *idxd)
 {
 	struct iaa_device *iaa_device;
 
-	iaa_device = iaa_device_alloc();
+	iaa_device = iaa_device_alloc(idxd);
 	if (!iaa_device)
 		return NULL;
 
-	iaa_device->idxd = idxd;
-
 	list_add_tail(&iaa_device->list, &iaa_devices);
 
-	nr_iaa++;
+	atomic_inc(&nr_iaa);
 
 	return iaa_device;
 }
@@ -509,7 +675,7 @@ static void del_iaa_device(struct iaa_device *iaa_device)
 {
 	list_del(&iaa_device->list);
 
-	nr_iaa--;
+	atomic_dec(&nr_iaa);
 }
 
 static void free_iaa_device(struct iaa_device *iaa_device)
@@ -518,6 +684,17 @@ static void free_iaa_device(struct iaa_device *iaa_device)
 		return;
 
 	remove_device_compression_modes(iaa_device);
+
+	if (iaa_device->generic_wq_table) {
+		kfree(iaa_device->generic_wq_table->wqs);
+		kfree(iaa_device->generic_wq_table);
+	}
+
+	if (iaa_device->comp_wq_table) {
+		kfree(iaa_device->comp_wq_table->wqs);
+		kfree(iaa_device->comp_wq_table);
+	}
+
 	kfree(iaa_device);
 }
 
@@ -548,6 +725,7 @@ static int add_iaa_wq(struct iaa_device *iaa_device, struct idxd_wq *wq,
 	iaa_wq->wq = wq;
 	iaa_wq->iaa_device = iaa_device;
 	idxd_wq_set_private(wq, iaa_wq);
+	iaa_wq->mapped = false;
 
 	list_add_tail(&iaa_wq->list, &iaa_device->wqs);
 
@@ -576,7 +754,7 @@ static void del_iaa_wq(struct iaa_device *iaa_device, struct idxd_wq *wq)
 
 			dev_dbg(dev, "removed wq %d from iaa_device %d, n_wq %d, nr_iaa %d\n",
 				wq->id, iaa_device->idxd->id,
-				iaa_device->n_wq, nr_iaa);
+				iaa_device->n_wq, atomic_read(&nr_iaa));
 
 			if (iaa_device->n_wq == 0)
 				del_iaa_device(iaa_device);
@@ -588,6 +766,7 @@ static void del_iaa_wq(struct iaa_device *iaa_device, struct idxd_wq *wq)
 static void remove_iaa_wq(struct idxd_wq *wq)
 {
 	struct iaa_device *iaa_device;
+	unsigned int num_pkg_iaa = 0;
 
 	list_for_each_entry(iaa_device, &iaa_devices, list) {
 		if (iaa_has_wq(iaa_device, wq)) {
@@ -596,12 +775,20 @@ static void remove_iaa_wq(struct idxd_wq *wq)
 		}
 	}
 
-	if (nr_iaa) {
-		cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa;
-		if (!cpus_per_iaa)
-			cpus_per_iaa = 1;
-	} else
-		cpus_per_iaa = 1;
+	if (atomic_read(&nr_iaa)) {
+		atomic_set(&cpus_per_iaa, (nr_packages * nr_cpus_per_package) / atomic_read(&nr_iaa));
+		if (!atomic_read(&cpus_per_iaa))
+			atomic_set(&cpus_per_iaa, 1);
+
+		num_pkg_iaa = atomic_read(&nr_iaa) / nr_packages;
+		if (!num_pkg_iaa)
+			num_pkg_iaa = 1;
+	} else {
+		atomic_set(&cpus_per_iaa, 1);
+		num_pkg_iaa = 1;
+	}
+
+	atomic_set(&nr_iaa_per_package, num_pkg_iaa);
 }
 
 static void __free_iaa_wq(struct iaa_wq *iaa_wq)
@@ -635,6 +822,7 @@ static int save_iaa_wq(struct idxd_wq *wq)
 	struct pci_dev *pdev;
 	struct device *dev;
 	int ret = 0;
+	unsigned int num_pkg_iaa = 0;
 
 	list_for_each_entry(iaa_device, &iaa_devices, list) {
 		if (iaa_device->idxd == wq->idxd) {
@@ -687,12 +875,19 @@ static int save_iaa_wq(struct idxd_wq *wq)
 		}
 	}
 
-	if (WARN_ON(nr_iaa == 0))
+	if (WARN_ON(atomic_read(&nr_iaa) == 0))
 		return -EINVAL;
 
-	cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa;
-	if (!cpus_per_iaa)
-		cpus_per_iaa = 1;
+	atomic_set(&cpus_per_iaa, (nr_packages * nr_cpus_per_package) / atomic_read(&nr_iaa));
+	if (!atomic_read(&cpus_per_iaa))
+		atomic_set(&cpus_per_iaa, 1);
+
+	num_pkg_iaa = atomic_read(&nr_iaa) / nr_packages;
+	if (!num_pkg_iaa)
+		num_pkg_iaa = 1;
+
+	atomic_set(&nr_iaa_per_package, num_pkg_iaa);
+
 out:
 	return 0;
 }
@@ -748,105 +943,284 @@ static int iaa_wq_put(struct idxd_wq *wq)
  * Mapping IAA devices and wqs to cores with per-cpu wq_tables.
  ***************************************************************/
 
-static void wq_table_free_entry(int cpu)
+/*
+ * Given a cpu, find the closest IAA instance.
+ */
+static inline int cpu_to_iaa(int cpu)
 {
-	struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
+	int package_id, base_iaa, iaa = 0;
+
+	if (!nr_packages || !atomic_read(&nr_iaa_per_package) || !atomic_read(&nr_iaa))
+		return -1;
+
+	package_id = topology_logical_package_id(cpu);
+	base_iaa = package_id * atomic_read(&nr_iaa_per_package);
+	iaa = base_iaa + ((cpu % nr_cpus_per_package) / atomic_read(&cpus_per_iaa));
 
-	kfree(entry->wqs);
-	memset(entry, 0, sizeof(*entry));
+	pr_debug("cpu = %d, package_id = %d, base_iaa = %d, iaa = %d",
+		 cpu, package_id, base_iaa, iaa);
+
+	if (iaa >= 0 && iaa < atomic_read(&nr_iaa))
+		return iaa;
+
+	return (atomic_read(&nr_iaa) - 1);
 }
 
-static void wq_table_clear_entry(int cpu)
+static void free_wq_tables(void)
 {
-	struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
+	if (cpu_decomp_wqs) {
+		free_percpu(cpu_decomp_wqs);
+		cpu_decomp_wqs = NULL;
+	}
 
-	entry->n_wqs = 0;
-	entry->cur_wq = 0;
-	memset(entry->wqs, 0, entry->max_wqs * sizeof(struct idxd_wq *));
+	if (cpu_comp_wqs) {
+		free_percpu(cpu_comp_wqs);
+		cpu_comp_wqs = NULL;
+	}
+
+	pr_debug("freed comp/decomp wq tables\n");
 }
 
-static void clear_wq_table(void)
+static void pkg_global_wqs_dealloc(void)
 {
-	int cpu;
+	int i;
 
-	for (cpu = 0; cpu < nr_cpus; cpu++)
-		wq_table_clear_entry(cpu);
+	if (pkg_global_decomp_wqs) {
+		for (i = 0; i < nr_packages; ++i) {
+			kfree(pkg_global_decomp_wqs[i]->wqs);
+			kfree(pkg_global_decomp_wqs[i]);
+		}
+		kfree(pkg_global_decomp_wqs);
+		pkg_global_decomp_wqs = NULL;
+	}
 
-	pr_debug("cleared wq table\n");
+	if (pkg_global_comp_wqs) {
+		for (i = 0; i < nr_packages; ++i) {
+			kfree(pkg_global_comp_wqs[i]->wqs);
+			kfree(pkg_global_comp_wqs[i]);
+		}
+		kfree(pkg_global_comp_wqs);
+		pkg_global_comp_wqs = NULL;
+	}
 }
 
-static void free_wq_table(void)
+static bool pkg_global_wqs_alloc(void)
 {
-	int cpu;
+	int i;
+
+	pkg_global_decomp_wqs = kcalloc(nr_packages, sizeof(*pkg_global_decomp_wqs), GFP_KERNEL);
+	if (!pkg_global_decomp_wqs)
+		return false;
+
+	for (i = 0; i < nr_packages; ++i) {
+		pkg_global_decomp_wqs[i] = kzalloc(sizeof(struct wq_table_entry), GFP_KERNEL);
+		if (!pkg_global_decomp_wqs[i])
+			goto err;
+
+		pkg_global_decomp_wqs[i]->wqs = kcalloc(MAX_PKG_IAA * MAX_IAA_WQ, sizeof(struct idxd_wq *), GFP_KERNEL);
+		if (!pkg_global_decomp_wqs[i]->wqs)
+			goto err;
+
+		pkg_global_decomp_wqs[i]->max_wqs = MAX_PKG_IAA * MAX_IAA_WQ;
+	}
+
+	pkg_global_comp_wqs = kcalloc(nr_packages, sizeof(*pkg_global_comp_wqs), GFP_KERNEL);
+	if (!pkg_global_comp_wqs)
+		goto err;
+
+	for (i = 0; i < nr_packages; ++i) {
+		pkg_global_comp_wqs[i] = kzalloc(sizeof(struct wq_table_entry), GFP_KERNEL);
+		if (!pkg_global_comp_wqs[i])
+			goto err;
+
+		pkg_global_comp_wqs[i]->wqs = kcalloc(MAX_PKG_IAA * MAX_IAA_WQ, sizeof(struct idxd_wq *), GFP_KERNEL);
+		if (!pkg_global_comp_wqs[i]->wqs)
+			goto err;
 
-	for (cpu = 0; cpu < nr_cpus; cpu++)
-		wq_table_free_entry(cpu);
+		pkg_global_comp_wqs[i]->max_wqs = MAX_PKG_IAA * MAX_IAA_WQ;
+	}
 
-	free_percpu(wq_table);
+	return true;
 
-	pr_debug("freed wq table\n");
+err:
+	pkg_global_wqs_dealloc();
+	return false;
 }
 
 static int alloc_wq_table(int max_wqs)
 {
-	struct wq_table_entry *entry;
-	int cpu;
-
-	wq_table = alloc_percpu(struct wq_table_entry);
-	if (!wq_table)
+	cpu_decomp_wqs = alloc_percpu_gfp(struct wq_table_entry, GFP_KERNEL | __GFP_ZERO);
+	if (!cpu_decomp_wqs)
 		return -ENOMEM;
 
-	for (cpu = 0; cpu < nr_cpus; cpu++) {
-		entry = per_cpu_ptr(wq_table, cpu);
-		entry->wqs = kcalloc(max_wqs, sizeof(struct wq *), GFP_KERNEL);
-		if (!entry->wqs) {
-			free_wq_table();
-			return -ENOMEM;
-		}
+	cpu_comp_wqs = alloc_percpu_gfp(struct wq_table_entry, GFP_KERNEL | __GFP_ZERO);
+	if (!cpu_comp_wqs)
+		goto err;
 
-		entry->max_wqs = max_wqs;
-	}
+	if (!pkg_global_wqs_alloc())
+		goto err;
 
 	pr_debug("initialized wq table\n");
 
 	return 0;
+
+err:
+	free_wq_tables();
+	return -ENOMEM;
 }
 
-static void wq_table_add(int cpu, struct idxd_wq *wq)
+/*
+ * The caller should have established that device_iaa_wqs is not empty,
+ * i.e., every IAA device in "iaa_devices" has at least one WQ.
+ */
+static void add_device_wqs_to_wq_table(struct wq_table_entry *dst_wq_table,
+				       struct wq_table_entry *device_wq_table)
 {
-	struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
+	int i;
+
+	for (i = 0; i < device_wq_table->n_wqs; ++i)
+		dst_wq_table->wqs[dst_wq_table->n_wqs++] = device_wq_table->wqs[i];
+}
 
-	if (WARN_ON(entry->n_wqs == entry->max_wqs))
+static bool reinit_pkg_global_wqs(bool comp)
+{
+	int cur_iaa = 0, pkg = 0;
+	struct iaa_device *iaa_device;
+	struct wq_table_entry **pkg_wqs = comp ? pkg_global_comp_wqs : pkg_global_decomp_wqs;
+
+	for (pkg = 0; pkg < nr_packages; ++pkg)
+		pkg_wqs[pkg]->n_wqs = 0;
+
+	pkg = 0;
+
+one_iaa_special_case:
+	/* Re-initialize per-package wqs. */
+	list_for_each_entry(iaa_device, &iaa_devices, list) {
+		struct wq_table_entry *device_wq_table = comp ?
+			((iaa_device->comp_wq_table->n_wqs > 0) ?
+				iaa_device->comp_wq_table : iaa_device->generic_wq_table) :
+			iaa_device->generic_wq_table;
+
+		if (pkg_wqs[pkg]->n_wqs + device_wq_table->n_wqs > pkg_wqs[pkg]->max_wqs) {
+			pkg_wqs[pkg]->wqs = krealloc(pkg_wqs[pkg]->wqs,
+						     ksize(pkg_wqs[pkg]->wqs) +
+						     max((MAX_PKG_IAA * MAX_IAA_WQ), iaa_device->n_wq) * sizeof(struct idxd_wq *),
+						     GFP_KERNEL | __GFP_ZERO);
+			if (!pkg_wqs[pkg]->wqs)
+				return false;
+
+			pkg_wqs[pkg]->max_wqs = ksize(pkg_wqs[pkg]->wqs)/sizeof(struct idxd_wq *);
+		}
+
+		add_device_wqs_to_wq_table(pkg_wqs[pkg], device_wq_table);
+
+		pr_info("pkg_global_%s_wqs[%d] has %u n_wqs %u max_wqs",
+			(comp ? "comp" : "decomp"), pkg, pkg_wqs[pkg]->n_wqs, pkg_wqs[pkg]->max_wqs);
+
+		if (++cur_iaa == atomic_read(&nr_iaa_per_package)) {
+			if (++pkg == nr_packages)
+				break;
+			cur_iaa = 0;
+			if (atomic_read(&nr_iaa) == 1)
+				goto one_iaa_special_case;
+		}
+	}
+
+	return true;
+}
+
+static void create_cpu_wq_table(int cpu, struct wq_table_entry *wq_table, bool comp)
+{
+	struct wq_table_entry *entry = comp ?
+		per_cpu_ptr(cpu_comp_wqs, cpu) :
+		per_cpu_ptr(cpu_decomp_wqs, cpu);
+
+	if (!iaa_crypto_enabled || !atomic_read(&iaa_device_registration_done)) {
+		mutex_lock(&first_wq_found_lock);
+
+		BUG_ON(!first_wq_found && !wq_table->n_wqs);
+
+		if (!first_wq_found)
+			first_wq_found = wq_table->wqs[0];
+
+		mutex_unlock(&first_wq_found_lock);
+
+		entry->wqs = &first_wq_found;
+		entry->max_wqs = 1;
+		entry->n_wqs = 1;
+		entry->cur_wq = 0;
+		pr_info("%s: cpu %d: added %u first_wq_found for %s wqs up to wq %d.%d\n", __func__,
+			 cpu, entry->n_wqs, comp ? "comp":"decomp",
+			 entry->wqs[entry->n_wqs - 1]->idxd->id,
+			 entry->wqs[entry->n_wqs - 1]->id);
 		return;
+	}
+
+	entry->wqs = wq_table->wqs;
+	entry->max_wqs = wq_table->max_wqs;
+	entry->n_wqs = wq_table->n_wqs;
+	entry->cur_wq = 0;
+
+	if (entry->n_wqs)
+		pr_info("%s: cpu %d: added %u iaa %s wqs up to wq %d.%d: entry->max_wqs = %u\n", __func__,
+			 cpu, entry->n_wqs, comp ? "comp":"decomp",
+			 entry->wqs[entry->n_wqs - 1]->idxd->id, entry->wqs[entry->n_wqs - 1]->id,
+			 entry->max_wqs);
+}
+
+static void set_cpu_wq_table_start_wq(int cpu, bool comp)
+{
+	struct wq_table_entry *entry = comp ?
+		per_cpu_ptr(cpu_comp_wqs, cpu) :
+		per_cpu_ptr(cpu_decomp_wqs, cpu);
+	unsigned int num_pkg_iaa = atomic_read(&nr_iaa_per_package);
 
-	entry->wqs[entry->n_wqs++] = wq;
+	int start_wq = (entry->n_wqs / num_pkg_iaa) * (cpu_to_iaa(cpu) % num_pkg_iaa);
 
-	pr_debug("%s: added iaa wq %d.%d to idx %d of cpu %d\n", __func__,
-		 entry->wqs[entry->n_wqs - 1]->idxd->id,
-		 entry->wqs[entry->n_wqs - 1]->id, entry->n_wqs - 1, cpu);
+	if ((start_wq >= 0) && (start_wq < entry->n_wqs))
+		entry->cur_wq = start_wq;
 }
 
-static int wq_table_add_wqs(int iaa, int cpu)
+static void create_cpu_wq_table_from_pkg_wqs(bool comp)
+{
+	int cpu;
+
+	/*
+	 * All CPU on the same package share the same "package global"
+	 * [de]comp_wqs.
+	 */
+	for (cpu = 0; cpu < nr_cpus; cpu += nr_cpus_per_package) {
+		int package_id = topology_logical_package_id(cpu);
+		struct wq_table_entry *pkg_wq_table = comp ?
+			((pkg_global_comp_wqs[package_id]->n_wqs > 0) ?
+				pkg_global_comp_wqs[package_id] : pkg_global_decomp_wqs[package_id])
+			: pkg_global_decomp_wqs[package_id];
+		int pkg_cpu;
+
+		for (pkg_cpu = cpu; pkg_cpu < cpu + nr_cpus_per_package; ++pkg_cpu) {
+			/* Initialize decomp/comp wq_table for CPU. */
+			create_cpu_wq_table(pkg_cpu, pkg_wq_table, comp);
+			/* Stagger the starting WQ in the package WQ table, for each CPU. */
+			set_cpu_wq_table_start_wq(pkg_cpu, comp);
+		}
+	}
+}
+
+static int add_mapped_device_wq_table_for_cpu(int iaa, int cpu, bool comp)
 {
 	struct iaa_device *iaa_device, *found_device = NULL;
-	int ret = 0, cur_iaa = 0, n_wqs_added = 0;
-	struct idxd_device *idxd;
-	struct iaa_wq *iaa_wq;
-	struct pci_dev *pdev;
-	struct device *dev;
+	struct wq_table_entry *device_wq_table;
+	int ret = 0, cur_iaa = 0;
 
 	list_for_each_entry(iaa_device, &iaa_devices, list) {
-		idxd = iaa_device->idxd;
-		pdev = idxd->pdev;
-		dev = &pdev->dev;
-
 		if (cur_iaa != iaa) {
 			cur_iaa++;
 			continue;
 		}
 
 		found_device = iaa_device;
-		dev_dbg(dev, "getting wq from iaa_device %d, cur_iaa %d\n",
+		dev_dbg(&found_device->idxd->pdev->dev,
+			"getting wq from iaa_device %d, cur_iaa %d\n",
 			found_device->idxd->id, cur_iaa);
 		break;
 	}
@@ -861,100 +1235,176 @@ static int wq_table_add_wqs(int iaa, int cpu)
 		}
 		cur_iaa = 0;
 
-		idxd = found_device->idxd;
-		pdev = idxd->pdev;
-		dev = &pdev->dev;
-		dev_dbg(dev, "getting wq from only iaa_device %d, cur_iaa %d\n",
+		dev_dbg(&found_device->idxd->pdev->dev,
+			"getting wq from only iaa_device %d, cur_iaa %d\n",
 			found_device->idxd->id, cur_iaa);
 	}
 
-	list_for_each_entry(iaa_wq, &found_device->wqs, list) {
-		wq_table_add(cpu, iaa_wq->wq);
-		pr_debug("rebalance: added wq for cpu=%d: iaa wq %d.%d\n",
-			 cpu, iaa_wq->wq->idxd->id, iaa_wq->wq->id);
-		n_wqs_added++;
-	}
+	device_wq_table = comp ?
+		((found_device->comp_wq_table->n_wqs > 0) ?
+			found_device->comp_wq_table : found_device->generic_wq_table) :
+		found_device->generic_wq_table;
+
+	create_cpu_wq_table(cpu, device_wq_table, comp);
 
-	if (!n_wqs_added) {
-		pr_debug("couldn't find any iaa wqs!\n");
-		ret = -EINVAL;
-		goto out;
-	}
 out:
 	return ret;
 }
 
-/*
- * Rebalance the wq table so that given a cpu, it's easy to find the
- * closest IAA instance.  The idea is to try to choose the most
- * appropriate IAA instance for a caller and spread available
- * workqueues around to clients.
- */
-static void rebalance_wq_table(void)
+static void create_cpu_wq_table_from_mapped_device(bool comp)
 {
-	const struct cpumask *node_cpus;
-	int node, cpu, iaa = -1;
+	int cpu, iaa;
 
-	if (nr_iaa == 0)
-		return;
+	for (cpu = 0; cpu < nr_cpus; cpu++) {
+		iaa = cpu_to_iaa(cpu);
+		pr_debug("rebalance: cpu=%d iaa=%d\n", cpu, iaa);
 
-	pr_debug("rebalance: nr_nodes=%d, nr_cpus %d, nr_iaa %d, cpus_per_iaa %d\n",
-		 nr_nodes, nr_cpus, nr_iaa, cpus_per_iaa);
+		if (WARN_ON(iaa == -1)) {
+			pr_debug("rebalance (cpu_to_iaa(%d)) failed!\n", cpu);
+			return;
+		}
 
-	clear_wq_table();
+		if (WARN_ON(add_mapped_device_wq_table_for_cpu(iaa, cpu, comp))) {
+			pr_debug("could not add any wqs of iaa %d to cpu %d!\n", iaa, cpu);
+			return;
+		}
+	}
+}
 
-	if (nr_iaa == 1) {
-		for (cpu = 0; cpu < nr_cpus; cpu++) {
-			if (WARN_ON(wq_table_add_wqs(0, cpu))) {
-				pr_debug("could not add any wqs for iaa 0 to cpu %d!\n", cpu);
-				return;
-			}
+static int map_iaa_device_wqs(struct iaa_device *iaa_device)
+{
+	struct wq_table_entry *generic, *for_comps;
+	int ret = 0, n_wqs_added = 0;
+	struct iaa_wq *iaa_wq;
+
+	generic = iaa_device->generic_wq_table;
+	for_comps = iaa_device->comp_wq_table;
+
+	list_for_each_entry(iaa_wq, &iaa_device->wqs, list) {
+		if (iaa_wq->mapped && ++n_wqs_added)
+			continue;
+
+		pr_debug("iaa_device %px: processing wq %d.%d\n", iaa_device, iaa_device->idxd->id, iaa_wq->wq->id);
+
+		if ((!n_wqs_added || ((n_wqs_added + g_comp_wqs_per_iaa) < iaa_device->n_wq)) &&
+			(generic->n_wqs < generic->max_wqs)) {
+
+			generic->wqs[generic->n_wqs++] = iaa_wq->wq;
+			pr_debug("iaa_device %px: added decomp wq %d.%d\n", iaa_device, iaa_device->idxd->id, iaa_wq->wq->id);
+		} else {
+			if (WARN_ON(for_comps->n_wqs == for_comps->max_wqs))
+				break;
+
+			for_comps->wqs[for_comps->n_wqs++] = iaa_wq->wq;
+			pr_debug("iaa_device %px: added comp wq %d.%d\n", iaa_device, iaa_device->idxd->id, iaa_wq->wq->id);
 		}
 
-		return;
+		iaa_wq->mapped = true;
+		++n_wqs_added;
 	}
 
-	for_each_node_with_cpus(node) {
-		node_cpus = cpumask_of_node(node);
+	if (!n_wqs_added && !iaa_device->n_wq) {
+		pr_debug("iaa_device %d: couldn't find any iaa wqs!\n", iaa_device->idxd->id);
+		ret = -EINVAL;
+	}
 
-		for (cpu = 0; cpu <  cpumask_weight(node_cpus); cpu++) {
-			int node_cpu = cpumask_nth(cpu, node_cpus);
+	return ret;
+}
 
-			if (WARN_ON(node_cpu >= nr_cpu_ids)) {
-				pr_debug("node_cpu %d doesn't exist!\n", node_cpu);
-				return;
-			}
+static void map_iaa_devices(void)
+{
+	struct iaa_device *iaa_device;
 
-			if ((cpu % cpus_per_iaa) == 0)
-				iaa++;
+	list_for_each_entry(iaa_device, &iaa_devices, list) {
+		BUG_ON(map_iaa_device_wqs(iaa_device));
+	}
+}
 
-			if (WARN_ON(wq_table_add_wqs(iaa, node_cpu))) {
-				pr_debug("could not add any wqs for iaa %d to cpu %d!\n", iaa, cpu);
-				return;
-			}
-		}
+/*
+ * Rebalance the per-cpu wq table based on available IAA devices/WQs.
+ * Three driver parameters control how this algorithm works:
+ *
+ * - g_comp_wqs_per_iaa:
+ *
+ *   If multiple WQs are configured for a given device, this setting determines
+ *   the number of WQs to be used as "compress only" WQs. The remaining WQs will
+ *   be used as "decompress only WQs".
+ *   Note that the comp WQ can be the same as the decomp WQ, for e.g., if
+ *   g_comp_wqs_per_iaa is 0 (regardless of the # of available WQs per device), or,
+ *   if there is only 1 WQ configured for a device (regardless of
+ *   g_comp_wqs_per_iaa).
+ *
+ * - distribute_decomps, distribute_comps:
+ *
+ *   If this is enabled, all [de]comp WQs found from the IAA devices on a
+ *   package, will be aggregated into pkg_global_[de]comp_wqs, then assigned to
+ *   each CPU on the package.
+ */
+static bool rebalance_wq_table(void)
+{
+	if (atomic_read(&nr_iaa) == 0)
+		return true;
+
+	map_iaa_devices();
+
+	pr_info("rebalance: nr_packages=%d, nr_cpus %d, nr_iaa %d, nr_iaa_per_package %d, cpus_per_iaa %d\n",
+		nr_packages, nr_cpus, atomic_read(&nr_iaa),
+		atomic_read(&nr_iaa_per_package), atomic_read(&cpus_per_iaa));
+
+	if (iaa_distribute_decomps) {
+		/* Each CPU uses all IAA devices on package for decomps. */
+		if (!reinit_pkg_global_wqs(false))
+			return false;
+		create_cpu_wq_table_from_pkg_wqs(false);
+	} else {
+		/*
+		 * Each CPU uses the decomp WQ on the mapped IAA device using
+		 * a balanced mapping of cores to IAA.
+		 */
+		create_cpu_wq_table_from_mapped_device(false);
+	}
+
+	if (iaa_distribute_comps) {
+		/* Each CPU uses all IAA devices on package for comps. */
+		if (!reinit_pkg_global_wqs(true))
+			return false;
+		create_cpu_wq_table_from_pkg_wqs(true);
+	} else {
+		/*
+		 * Each CPU uses the comp WQ on the mapped IAA device using
+		 * a balanced mapping of cores to IAA.
+		 */
+		create_cpu_wq_table_from_mapped_device(true);
 	}
+
+	pr_debug("Finished rebalance decomp/comp wqs.");
+	return true;
 }
 
 /***************************************************************
  * Assign work-queues for driver ops using per-cpu wq_tables.
  ***************************************************************/
 
-static struct idxd_wq *wq_table_next_wq(int cpu)
+static struct idxd_wq *decomp_wq_table_next_wq(int cpu)
 {
-	struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
+	struct wq_table_entry *entry = per_cpu_ptr(cpu_decomp_wqs, cpu);
+	struct idxd_wq *wq = entry->wqs[entry->cur_wq];
 
-	if (++entry->cur_wq >= entry->n_wqs)
+	if (++entry->cur_wq == entry->n_wqs)
 		entry->cur_wq = 0;
 
-	if (!entry->wqs[entry->cur_wq])
-		return NULL;
+	return wq;
+}
 
-	pr_debug("%s: returning wq at idx %d (iaa wq %d.%d) from cpu %d\n", __func__,
-		 entry->cur_wq, entry->wqs[entry->cur_wq]->idxd->id,
-		 entry->wqs[entry->cur_wq]->id, cpu);
+static struct idxd_wq *comp_wq_table_next_wq(int cpu)
+{
+	struct wq_table_entry *entry = per_cpu_ptr(cpu_comp_wqs, cpu);
+	struct idxd_wq *wq = entry->wqs[entry->cur_wq];
+
+	if (++entry->cur_wq == entry->n_wqs)
+		entry->cur_wq = 0;
 
-	return entry->wqs[entry->cur_wq];
+	return wq;
 }
 
 /*************************************************
@@ -1527,7 +1977,7 @@ static int iaa_comp_acompress(struct acomp_req *req)
 	}
 
 	cpu = get_cpu();
-	wq = wq_table_next_wq(cpu);
+	wq = comp_wq_table_next_wq(cpu);
 	put_cpu();
 	if (!wq) {
 		pr_debug("no wq configured for cpu=%d\n", cpu);
@@ -1625,7 +2075,7 @@ static int iaa_comp_adecompress(struct acomp_req *req)
 	}
 
 	cpu = get_cpu();
-	wq = wq_table_next_wq(cpu);
+	wq = decomp_wq_table_next_wq(cpu);
 	put_cpu();
 	if (!wq) {
 		pr_debug("no wq configured for cpu=%d\n", cpu);
@@ -1728,17 +2178,20 @@ static int iaa_register_compression_device(void)
 
 	ret = crypto_register_acomp(&iaa_acomp_fixed_deflate);
 	if (ret) {
+		atomic_set(&iaa_device_registration_done, 0);
 		pr_err("deflate algorithm acomp fixed registration failed (%d)\n", ret);
 		goto out;
 	}
 
 	iaa_crypto_registered = true;
+	atomic_set(&iaa_device_registration_done, 1);
 out:
 	return ret;
 }
 
 static int iaa_unregister_compression_device(void)
 {
+	atomic_set(&iaa_device_registration_done, 0);
 	if (iaa_crypto_registered)
 		crypto_unregister_acomp(&iaa_acomp_fixed_deflate);
 
@@ -1760,10 +2213,13 @@ static int iaa_crypto_probe(struct idxd_dev *idxd_dev)
 	if (data->type != IDXD_TYPE_IAX)
 		return -ENODEV;
 
+	mutex_lock(&iaa_devices_lock);
+
 	mutex_lock(&wq->wq_lock);
 
 	if (idxd_wq_get_private(wq)) {
 		mutex_unlock(&wq->wq_lock);
+		mutex_unlock(&iaa_devices_lock);
 		return -EBUSY;
 	}
 
@@ -1785,8 +2241,6 @@ static int iaa_crypto_probe(struct idxd_dev *idxd_dev)
 		goto err;
 	}
 
-	mutex_lock(&iaa_devices_lock);
-
 	if (list_empty(&iaa_devices)) {
 		ret = alloc_wq_table(wq->idxd->max_wqs);
 		if (ret)
@@ -1798,7 +2252,10 @@ static int iaa_crypto_probe(struct idxd_dev *idxd_dev)
 	if (ret)
 		goto err_save;
 
-	rebalance_wq_table();
+	if (!rebalance_wq_table()) {
+		dev_dbg(dev, "iaa_crypto_probe: IAA rebalancing device wq tables failed\n");
+		goto err_register;
+	}
 
 	if (first_wq) {
 		iaa_crypto_enabled = true;
@@ -1808,14 +2265,22 @@ static int iaa_crypto_probe(struct idxd_dev *idxd_dev)
 			dev_dbg(dev, "IAA compression device registration failed\n");
 			goto err_register;
 		}
+
+		BUG_ON(!atomic_read(&iaa_device_registration_done));
+		if (!rebalance_wq_table()) {
+			iaa_crypto_enabled = false;
+			dev_dbg(dev, "iaa_crypto_probe: Rerun after registration: IAA rebalancing device wq tables failed\n");
+			goto err_register;
+		}
+
 		try_module_get(THIS_MODULE);
 
 		pr_info("iaa_crypto now ENABLED\n");
 	}
 
-	mutex_unlock(&iaa_devices_lock);
 out:
 	mutex_unlock(&wq->wq_lock);
+	mutex_unlock(&iaa_devices_lock);
 
 	return ret;
 
@@ -1824,9 +2289,8 @@ static int iaa_crypto_probe(struct idxd_dev *idxd_dev)
 	free_iaa_wq(idxd_wq_get_private(wq));
 err_save:
 	if (first_wq)
-		free_wq_table();
+		free_wq_tables();
 err_alloc:
-	mutex_unlock(&iaa_devices_lock);
 	idxd_drv_disable_wq(wq);
 err:
 	wq->type = IDXD_WQT_NONE;
@@ -1843,8 +2307,8 @@ static void iaa_crypto_remove(struct idxd_dev *idxd_dev)
 
 	idxd_wq_quiesce(wq);
 
-	mutex_lock(&wq->wq_lock);
 	mutex_lock(&iaa_devices_lock);
+	mutex_lock(&wq->wq_lock);
 
 	remove_iaa_wq(wq);
 
@@ -1870,18 +2334,26 @@ static void iaa_crypto_remove(struct idxd_dev *idxd_dev)
 	}
 
 	idxd_drv_disable_wq(wq);
-	rebalance_wq_table();
 
-	if (nr_iaa == 0) {
+	if (!rebalance_wq_table()) {
+		pr_debug("iaa_crypto_remove: IAA rebalancing device wq tables failed\n");
+		iaa_crypto_enabled = false;
+	}
+
+	if (atomic_read(&nr_iaa) == 0) {
 		iaa_crypto_enabled = false;
-		free_wq_table();
+		atomic_set(&iaa_device_registration_done, 0);
+		pkg_global_wqs_dealloc();
+		free_wq_tables();
+		BUG_ON(!list_empty(&iaa_devices));
+		INIT_LIST_HEAD(&iaa_devices);
 		module_put(THIS_MODULE);
 
 		pr_info("iaa_crypto now DISABLED\n");
 	}
 out:
-	mutex_unlock(&iaa_devices_lock);
 	mutex_unlock(&wq->wq_lock);
+	mutex_unlock(&iaa_devices_lock);
 }
 
 static enum idxd_dev_type dev_types[] = {
@@ -1900,16 +2372,11 @@ static struct idxd_device_driver iaa_crypto_driver = {
 static int __init iaa_crypto_init_module(void)
 {
 	int ret = 0;
-	int node;
+	INIT_LIST_HEAD(&iaa_devices);
 
 	nr_cpus = num_possible_cpus();
-	for_each_node_with_cpus(node)
-		nr_nodes++;
-	if (!nr_nodes) {
-		pr_err("IAA couldn't find any nodes with cpus\n");
-		return -ENODEV;
-	}
-	nr_cpus_per_node = nr_cpus / nr_nodes;
+	nr_cpus_per_package = topology_num_cores_per_package();
+	nr_packages = topology_max_packages();
 
 	ret = iaa_aecs_init_fixed();
 	if (ret < 0) {
@@ -1923,6 +2390,27 @@ static int __init iaa_crypto_init_module(void)
 		goto err_driver_reg;
 	}
 
+	ret = driver_create_file(&iaa_crypto_driver.drv,
+				&driver_attr_g_comp_wqs_per_iaa);
+	if (ret) {
+		pr_debug("IAA g_comp_wqs_per_iaa attr creation failed\n");
+		goto err_g_comp_wqs_per_iaa_attr_create;
+	}
+
+	ret = driver_create_file(&iaa_crypto_driver.drv,
+				 &driver_attr_distribute_decomps);
+	if (ret) {
+		pr_debug("IAA distribute_decomps attr creation failed\n");
+		goto err_distribute_decomps_attr_create;
+	}
+
+	ret = driver_create_file(&iaa_crypto_driver.drv,
+				 &driver_attr_distribute_comps);
+	if (ret) {
+		pr_debug("IAA distribute_comps attr creation failed\n");
+		goto err_distribute_comps_attr_create;
+	}
+
 	ret = driver_create_file(&iaa_crypto_driver.drv,
 				 &driver_attr_verify_compress);
 	if (ret) {
@@ -1948,6 +2436,15 @@ static int __init iaa_crypto_init_module(void)
 	driver_remove_file(&iaa_crypto_driver.drv,
 			   &driver_attr_verify_compress);
 err_verify_attr_create:
+	driver_remove_file(&iaa_crypto_driver.drv,
+			   &driver_attr_distribute_comps);
+err_distribute_comps_attr_create:
+	driver_remove_file(&iaa_crypto_driver.drv,
+			   &driver_attr_distribute_decomps);
+err_distribute_decomps_attr_create:
+	driver_remove_file(&iaa_crypto_driver.drv,
+			   &driver_attr_g_comp_wqs_per_iaa);
+err_g_comp_wqs_per_iaa_attr_create:
 	idxd_driver_unregister(&iaa_crypto_driver);
 err_driver_reg:
 	iaa_aecs_cleanup_fixed();
@@ -1966,6 +2463,12 @@ static void __exit iaa_crypto_cleanup_module(void)
 			   &driver_attr_sync_mode);
 	driver_remove_file(&iaa_crypto_driver.drv,
 			   &driver_attr_verify_compress);
+	driver_remove_file(&iaa_crypto_driver.drv,
+			   &driver_attr_distribute_comps);
+	driver_remove_file(&iaa_crypto_driver.drv,
+			   &driver_attr_distribute_decomps);
+	driver_remove_file(&iaa_crypto_driver.drv,
+			   &driver_attr_g_comp_wqs_per_iaa);
 	idxd_driver_unregister(&iaa_crypto_driver);
 	iaa_aecs_cleanup_fixed();
 

From patchwork Wed Apr 30 20:52:53 2025
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: "Sridhar, Kanchana P" <kanchana.p.sridhar@intel.com>
X-Patchwork-Id: 886548
Received: from mgamail.intel.com (mgamail.intel.com [198.175.65.15])
 (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
 (No client certificate requested)
 by smtp.subspace.kernel.org (Postfix) with ESMTPS id 7E3852C2AA1;
 Wed, 30 Apr 2025 20:53:13 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.175.65.15
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
 t=1746046395; cv=none;
 b=gpFdDmvAEKhaRAaeBoZazGQntS6suHP4X1cx4HWJxTO4Z5Sb4wufAu6PdkB7C8hjtx4Ak49k5sKImHJRK444nXQBW3XI19QBPhN3G7P3fXO7UltKSmPcRt6LAtFQj59PW38etMtkw/5p79R8tlKBOhBG2XlQjeBg8M+ycCLggvg=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
 s=arc-20240116; t=1746046395; c=relaxed/simple;
 bh=WKyBT52U1L3mEjci9iHKwXn3qQ9NOODMueb2KPC9q4M=;
 h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
 MIME-Version;
 b=mod9sQzW8twpoarWDWUz+CHekWEZTcunC4sh9arhfQzirxDGfX9Gqg1OjoomYkkV+daF6trzIfEYnPfUx+u2k7Vhl3OH4XTkBmE29GXz+M1bLtdTKQXsRWPG/pRqLPIzYfVbcObh9zkK885BfdQE+lf87eIZQhvlt6itpOgtFnk=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=intel.com;
 spf=pass smtp.mailfrom=intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=cj0McV2+; arc=none smtp.client-ip=198.175.65.15
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=pass smtp.mailfrom=intel.com
Authentication-Results: smtp.subspace.kernel.org;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="cj0McV2+"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
 d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
 t=1746046394; x=1777582394;
 h=from:to:cc:subject:date:message-id:in-reply-to:
 references:mime-version:content-transfer-encoding;
 bh=WKyBT52U1L3mEjci9iHKwXn3qQ9NOODMueb2KPC9q4M=;
 b=cj0McV2+9cpQdrY63VcC+t6wsOM4F2jo2zpLvprIujNIm+pnPaI7USXI
 gbgdWq/VuMuy6JYifTBIXouGy0wJ1gc+8F7QkPqShx3yekDWKPq3k4/tf
 5n8Glkicd/MRcvRHcyTyfE6lT2n3mz/GbA+SygHksVlXbE0qw6WGPwhCd
 SAY3Zo5nPqG5/ngZPZVu/5qlKWfGk9o1uA3YLB88slwXgZjQIgFfGIHHW
 ekHTVlSeU2dN+jgTcW64qto+mkmCiQRUv7U59GJacha55qJ2HE7JZOEqc
 E0Hv8d/XjsOwrWISQqA+jb835dKcq9dPT4VmwbErHhiZpxCzGtw2MC78F w==;
X-CSE-ConnectionGUID: TFgtEG7zREq/rHFil0yGnw==
X-CSE-MsgGUID: ezCmCxaMQVqsA6hOg4aPTA==
X-IronPort-AV: E=McAfee;i="6700,10204,11419"; a="51388571"
X-IronPort-AV: E=Sophos;i="6.15,252,1739865600";
   d="scan'208";a="51388571"
Received: from fmviesa003.fm.intel.com ([10.60.135.143])
 by orvoesa107.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 30 Apr 2025 13:53:13 -0700
X-CSE-ConnectionGUID: zRmnVO5iRN+pmqNzjQ2kwg==
X-CSE-MsgGUID: C+FBmurUTvCXJDo87pycMQ==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.15,252,1739865600"; d="scan'208";a="138248904"
Received: from jf5300-b11a338t.jf.intel.com ([10.242.51.115])
 by fmviesa003.fm.intel.com with ESMTP; 30 Apr 2025 13:53:12 -0700
From: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
To: linux-kernel@vger.kernel.org, linux-mm@kvack.org, hannes@cmpxchg.org,
 yosry.ahmed@linux.dev, nphamcs@gmail.com, chengming.zhou@linux.dev,
 usamaarif642@gmail.com, ryan.roberts@arm.com, 21cnbao@gmail.com,
 ying.huang@linux.alibaba.com, akpm@linux-foundation.org,
 linux-crypto@vger.kernel.org, herbert@gondor.apana.org.au,
 davem@davemloft.net, clabbe@baylibre.com, ardb@kernel.org,
 ebiggers@google.com, surenb@google.com, kristen.c.accardi@intel.com
Cc: wajdi.k.feghali@intel.com, vinodh.gopal@intel.com,
 kanchana.p.sridhar@intel.com
Subject: [PATCH v9 07/19] crypto: iaa - Define and use req->data instead of
 req->base.data.
Date: Wed, 30 Apr 2025 13:52:53 -0700
Message-Id: <20250430205305.22844-8-kanchana.p.sridhar@intel.com>
X-Mailer: git-send-email 2.27.0
In-Reply-To: <20250430205305.22844-1-kanchana.p.sridhar@intel.com>
References: <20250430205305.22844-1-kanchana.p.sridhar@intel.com>
Precedence: bulk
X-Mailing-List: linux-crypto@vger.kernel.org
List-Id: <linux-crypto.vger.kernel.org>
List-Subscribe: <mailto:linux-crypto+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-crypto+unsubscribe@vger.kernel.org>
MIME-Version: 1.0

Since req->base.data is for the user and not for the driver, we define a
"void *data" in struct acomp_req for use by driver code.

At present, iaa_crypto saves the "struct idxd_desc *idxd_desc" that is
allocated in iaa_[de]compress(), in req->data. When batching is
introduced in subsequent patches, we will need to support an async
"submit-poll" mechanism to achieve parallelism using IAA hardware. To
accomplish this, we will submit the descriptors for each request in the
batch in iaa_[de]compress(), and return -EINPROGRESS. The polling
function will retrieve the descriptor from req->data to check the
request's completion status.

Signed-off-by: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
---
 drivers/crypto/intel/iaa/iaa_crypto_main.c | 12 +++++++-----
 include/crypto/acompress.h                 |  2 ++
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c
index 2f2dc6987cc6..0b821b8b4264 100644
--- a/drivers/crypto/intel/iaa/iaa_crypto_main.c
+++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c
@@ -1718,7 +1718,7 @@ static void iaa_desc_complete(struct idxd_desc *idxd_desc,
 	iaa_wq_put(idxd_desc->wq);
 }
 
-static int iaa_compress(struct crypto_tfm *tfm,	struct acomp_req *req,
+static int iaa_compress(struct crypto_tfm *tfm, struct acomp_req *req,
 			struct idxd_wq *wq,
 			dma_addr_t src_addr, unsigned int slen,
 			dma_addr_t dst_addr, unsigned int *dlen,
@@ -1778,8 +1778,9 @@ static int iaa_compress(struct crypto_tfm *tfm,	struct acomp_req *req,
 			" src_addr %llx, dst_addr %llx\n", __func__,
 			active_compression_mode->name,
 			src_addr, dst_addr);
-	} else if (ctx->async_mode)
-		req->base.data = idxd_desc;
+	} else if (ctx->async_mode) {
+		req->data = idxd_desc;
+	}
 
 	dev_dbg(dev, "%s: compression mode %s,"
 		" desc->src1_addr %llx, desc->src1_size %d,"
@@ -1889,8 +1890,9 @@ static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req,
 			" src_addr %llx, dst_addr %llx\n", __func__,
 			active_compression_mode->name,
 			src_addr, dst_addr);
-	} else if (ctx->async_mode && !disable_async)
-		req->base.data = idxd_desc;
+	} else if (ctx->async_mode && !disable_async) {
+		req->data = idxd_desc;
+	}
 
 	dev_dbg(dev, "%s: decompression mode %s,"
 		" desc->src1_addr %llx, desc->src1_size %d,"
diff --git a/include/crypto/acompress.h b/include/crypto/acompress.h
index 267d557daeb1..01389fd7055f 100644
--- a/include/crypto/acompress.h
+++ b/include/crypto/acompress.h
@@ -81,6 +81,7 @@ struct acomp_req_chain {
  * @doff:	Destination folio offset
  * @slen:	Size of the input buffer
  * @dlen:	Size of the output buffer and number of bytes produced
+ * @data:	Private API code data, do not use
  * @chain:	Private API code data, do not use
  * @__ctx:	Start of private context data
  */
@@ -101,6 +102,7 @@ struct acomp_req {
 	unsigned int slen;
 	unsigned int dlen;
 
+	void *data;
 	struct acomp_req_chain chain;
 
 	void *__ctx[] CRYPTO_MINALIGN_ATTR;

From patchwork Wed Apr 30 20:52:54 2025
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: "Sridhar, Kanchana P" <kanchana.p.sridhar@intel.com>
X-Patchwork-Id: 886167
Received: from mgamail.intel.com (mgamail.intel.com [198.175.65.15])
 (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
 (No client certificate requested)
 by smtp.subspace.kernel.org (Postfix) with ESMTPS id 31AF82D0AB3;
 Wed, 30 Apr 2025 20:53:15 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.175.65.15
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
 t=1746046397; cv=none;
 b=rAEMJFEOgsH400HO0b87vzKFEE38bdNxgqFvufmDbFVwJugzP+yizkeB58bsKuZpogAcpdCfJWGiTFDz0xUKJzN0QukGJTeS4QitEikuVZx/48PdbgCH/V4k0CcfJ9XrAEqh057ijJFiVpi+gO/Wpev1ZXOBbqlckViO0sRSeqk=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
 s=arc-20240116; t=1746046397; c=relaxed/simple;
 bh=+XwXzM2JtnnapecA98UfJihsUQ5zhZpmnmCn8je5v3o=;
 h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
 MIME-Version;
 b=caFuIt+BUrT16MOU9naiwMt204yZntlZizMVlM14IxYdfHD1yutoLjefhOFIrG0BmMKHTwlzK+KNJEiguIzzzB0QYUSpVLsckGfJrvRyBdBi+ae/JnvzvbL/g5MwzwAR7v5ND5Uh/xeSCfIImwWubg+nxVqdx/hUl3yUNHXsxsI=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=intel.com;
 spf=pass smtp.mailfrom=intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=BfZhTKfR; arc=none smtp.client-ip=198.175.65.15
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=pass smtp.mailfrom=intel.com
Authentication-Results: smtp.subspace.kernel.org;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="BfZhTKfR"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
 d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
 t=1746046396; x=1777582396;
 h=from:to:cc:subject:date:message-id:in-reply-to:
 references:mime-version:content-transfer-encoding;
 bh=+XwXzM2JtnnapecA98UfJihsUQ5zhZpmnmCn8je5v3o=;
 b=BfZhTKfROAVv6rV18O/mFZ76O2nWxLlbbWlV80BV90JyRi6QWFP7qlHk
 9fiuLq3kRhaF5F/FrlLpejxv08uTsI8k/8uWKRUoBIhSAqWEWAXnoTFdW
 7/fC5g4FzBq3g/7m8Ml2Mu8X+w3vtEfEKWZQ+/NTxT1KxdM43eBb6Mr2+
 YW2HuYb1l+Ll1R2quUHc0YC4iLPp3RlbyMyErWVwG2IrN+CzWeogQvmg7
 VfYTUQRMRliJAh6sYGZxn1y+qL3foXlqG+BokiG/We0A+ht4NJ8Ifyq2n
 N/WGB2PTXkg9rAJrWS+OdqT0ak2iNIev8M30lxYJKZHTz8wDa8w0e+0bW w==;
X-CSE-ConnectionGUID: ppg8/AVDS0eEEDaNKBNy2Q==
X-CSE-MsgGUID: 60rrxhygSl68oA/y2W5Mjw==
X-IronPort-AV: E=McAfee;i="6700,10204,11419"; a="51388588"
X-IronPort-AV: E=Sophos;i="6.15,252,1739865600";
   d="scan'208";a="51388588"
Received: from fmviesa003.fm.intel.com ([10.60.135.143])
 by orvoesa107.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 30 Apr 2025 13:53:14 -0700
X-CSE-ConnectionGUID: 870q6KrASIK9pvCl5N4/vQ==
X-CSE-MsgGUID: 29LiYuNiSZWPJLUImXw8+A==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.15,252,1739865600"; d="scan'208";a="138248909"
Received: from jf5300-b11a338t.jf.intel.com ([10.242.51.115])
 by fmviesa003.fm.intel.com with ESMTP; 30 Apr 2025 13:53:13 -0700
From: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
To: linux-kernel@vger.kernel.org, linux-mm@kvack.org, hannes@cmpxchg.org,
 yosry.ahmed@linux.dev, nphamcs@gmail.com, chengming.zhou@linux.dev,
 usamaarif642@gmail.com, ryan.roberts@arm.com, 21cnbao@gmail.com,
 ying.huang@linux.alibaba.com, akpm@linux-foundation.org,
 linux-crypto@vger.kernel.org, herbert@gondor.apana.org.au,
 davem@davemloft.net, clabbe@baylibre.com, ardb@kernel.org,
 ebiggers@google.com, surenb@google.com, kristen.c.accardi@intel.com
Cc: wajdi.k.feghali@intel.com, vinodh.gopal@intel.com,
 kanchana.p.sridhar@intel.com
Subject: [PATCH v9 08/19] crypto: iaa - Descriptor allocation timeouts with
 mitigations in iaa_crypto.
Date: Wed, 30 Apr 2025 13:52:54 -0700
Message-Id: <20250430205305.22844-9-kanchana.p.sridhar@intel.com>
X-Mailer: git-send-email 2.27.0
In-Reply-To: <20250430205305.22844-1-kanchana.p.sridhar@intel.com>
References: <20250430205305.22844-1-kanchana.p.sridhar@intel.com>
Precedence: bulk
X-Mailing-List: linux-crypto@vger.kernel.org
List-Id: <linux-crypto.vger.kernel.org>
List-Subscribe: <mailto:linux-crypto+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-crypto+unsubscribe@vger.kernel.org>
MIME-Version: 1.0

This patch modifies the descriptor allocation from blocking to non-blocking
with bounded retries or "timeouts".

This is necessary to prevent task blocked errors in high contention
scenarios, for instance, when the platform has only 1 IAA device
enabled. With 1 IAA device enabled per package on a dual-package
Sapphire Rapids with 56 cores/package, there are 112 logical cores
mapped to this single IAA device. In this scenario, the task blocked
errors can occur because idxd_alloc_desc() is called with
IDXD_OP_BLOCK. With batching, multiple descriptors will need to be
allocated per batch. Any process that is able to do so, can cause
contention for allocating descriptors for all other processes that share
the use of the same sbitmap_queue. Under IDXD_OP_BLOCK, this can cause
compress/decompress jobs to stall in stress test scenarios
(e.g. zswap_store() of 2M folios).

In order to make the iaa_crypto driver be more fail-safe, this commit
implements the following:

1) Change compress/decompress descriptor allocations to be non-blocking
   with retries ("timeouts").
2) Return compress error to zswap if descriptor allocation with timeouts
   fails during compress ops. zswap_store() will return an error and the
   folio gets stored in the backing swap device.
3) Fallback to software decompress if descriptor allocation with timeouts
   fails during decompress ops.

With these fixes, there are no task blocked errors seen under stress
testing conditions, and no performance degradation observed.

This patch also simplifies the success/error return paths in
iaa_[de]compress() and iaa_compress_verify().

Signed-off-by: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
---
 drivers/crypto/intel/iaa/iaa_crypto.h      |  3 +
 drivers/crypto/intel/iaa/iaa_crypto_main.c | 84 ++++++++++++----------
 2 files changed, 48 insertions(+), 39 deletions(-)

diff --git a/drivers/crypto/intel/iaa/iaa_crypto.h b/drivers/crypto/intel/iaa/iaa_crypto.h
index 549ac98a9366..b4a94da2c315 100644
--- a/drivers/crypto/intel/iaa/iaa_crypto.h
+++ b/drivers/crypto/intel/iaa/iaa_crypto.h
@@ -21,6 +21,9 @@
 
 #define IAA_COMPLETION_TIMEOUT		1000000
 
+#define IAA_ALLOC_DESC_COMP_TIMEOUT	   1000
+#define IAA_ALLOC_DESC_DECOMP_TIMEOUT	    500
+
 #define IAA_ANALYTICS_ERROR		0x0a
 #define IAA_ERROR_DECOMP_BUF_OVERFLOW	0x0b
 #define IAA_ERROR_COMP_BUF_OVERFLOW	0x19
diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c
index 0b821b8b4264..7dab340c4a34 100644
--- a/drivers/crypto/intel/iaa/iaa_crypto_main.c
+++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c
@@ -1416,6 +1416,7 @@ static int deflate_generic_decompress(struct acomp_req *req)
 	ACOMP_REQUEST_ON_STACK(fbreq, crypto_acomp_reqtfm(req));
 	int ret;
 
+	req->dlen = PAGE_SIZE;
 	acomp_request_set_callback(fbreq, 0, NULL, NULL);
 	acomp_request_set_params(fbreq, req->src, req->dst, req->slen,
 				 req->dlen);
@@ -1536,7 +1537,8 @@ static int iaa_compress_verify(struct crypto_tfm *tfm, struct acomp_req *req,
 	struct iaa_device_compression_mode *active_compression_mode;
 	struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
 	struct iaa_device *iaa_device;
-	struct idxd_desc *idxd_desc;
+	struct idxd_desc *idxd_desc = ERR_PTR(-EAGAIN);
+	u16 alloc_desc_retries = 0;
 	struct iax_hw_desc *desc;
 	struct idxd_device *idxd;
 	struct iaa_wq *iaa_wq;
@@ -1552,7 +1554,11 @@ static int iaa_compress_verify(struct crypto_tfm *tfm, struct acomp_req *req,
 
 	active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode);
 
-	idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
+	while ((idxd_desc == ERR_PTR(-EAGAIN)) && (alloc_desc_retries++ < IAA_ALLOC_DESC_DECOMP_TIMEOUT)) {
+		idxd_desc = idxd_alloc_desc(wq, IDXD_OP_NONBLOCK);
+		cpu_relax();
+	}
+
 	if (IS_ERR(idxd_desc)) {
 		dev_dbg(dev, "idxd descriptor allocation failed\n");
 		dev_dbg(dev, "iaa compress failed: ret=%ld\n",
@@ -1604,14 +1610,10 @@ static int iaa_compress_verify(struct crypto_tfm *tfm, struct acomp_req *req,
 		goto err;
 	}
 
-	idxd_free_desc(wq, idxd_desc);
-out:
-	return ret;
 err:
 	idxd_free_desc(wq, idxd_desc);
-	dev_dbg(dev, "iaa compress failed: ret=%d\n", ret);
 
-	goto out;
+	return ret;
 }
 
 static void iaa_desc_complete(struct idxd_desc *idxd_desc,
@@ -1727,7 +1729,8 @@ static int iaa_compress(struct crypto_tfm *tfm, struct acomp_req *req,
 	struct iaa_device_compression_mode *active_compression_mode;
 	struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
 	struct iaa_device *iaa_device;
-	struct idxd_desc *idxd_desc;
+	struct idxd_desc *idxd_desc = ERR_PTR(-EAGAIN);
+	u16 alloc_desc_retries = 0;
 	struct iax_hw_desc *desc;
 	struct idxd_device *idxd;
 	struct iaa_wq *iaa_wq;
@@ -1743,7 +1746,11 @@ static int iaa_compress(struct crypto_tfm *tfm, struct acomp_req *req,
 
 	active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode);
 
-	idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
+	while ((idxd_desc == ERR_PTR(-EAGAIN)) && (alloc_desc_retries++ < IAA_ALLOC_DESC_COMP_TIMEOUT)) {
+		idxd_desc = idxd_alloc_desc(wq, IDXD_OP_NONBLOCK);
+		cpu_relax();
+	}
+
 	if (IS_ERR(idxd_desc)) {
 		dev_dbg(dev, "idxd descriptor allocation failed\n");
 		dev_dbg(dev, "iaa compress failed: ret=%ld\n", PTR_ERR(idxd_desc));
@@ -1820,15 +1827,10 @@ static int iaa_compress(struct crypto_tfm *tfm, struct acomp_req *req,
 
 	*compression_crc = idxd_desc->iax_completion->crc;
 
-	if (!ctx->async_mode)
-		idxd_free_desc(wq, idxd_desc);
-out:
-	return ret;
 err:
 	idxd_free_desc(wq, idxd_desc);
-	dev_dbg(dev, "iaa compress failed: ret=%d\n", ret);
-
-	goto out;
+out:
+	return ret;
 }
 
 static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req,
@@ -1840,7 +1842,8 @@ static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req,
 	struct iaa_device_compression_mode *active_compression_mode;
 	struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
 	struct iaa_device *iaa_device;
-	struct idxd_desc *idxd_desc;
+	struct idxd_desc *idxd_desc = ERR_PTR(-EAGAIN);
+	u16 alloc_desc_retries = 0;
 	struct iax_hw_desc *desc;
 	struct idxd_device *idxd;
 	struct iaa_wq *iaa_wq;
@@ -1856,12 +1859,18 @@ static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req,
 
 	active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode);
 
-	idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
+	while ((idxd_desc == ERR_PTR(-EAGAIN)) && (alloc_desc_retries++ < IAA_ALLOC_DESC_DECOMP_TIMEOUT)) {
+		idxd_desc = idxd_alloc_desc(wq, IDXD_OP_NONBLOCK);
+		cpu_relax();
+	}
+
 	if (IS_ERR(idxd_desc)) {
 		dev_dbg(dev, "idxd descriptor allocation failed\n");
 		dev_dbg(dev, "iaa decompress failed: ret=%ld\n",
 			PTR_ERR(idxd_desc));
-		return PTR_ERR(idxd_desc);
+		ret = PTR_ERR(idxd_desc);
+		idxd_desc = NULL;
+		goto fallback_software_decomp;
 	}
 	desc = idxd_desc->iax_hw;
 
@@ -1905,7 +1914,7 @@ static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req,
 	ret = idxd_submit_desc(wq, idxd_desc);
 	if (ret) {
 		dev_dbg(dev, "submit_desc failed ret=%d\n", ret);
-		goto err;
+		goto fallback_software_decomp;
 	}
 
 	/* Update stats */
@@ -1919,40 +1928,37 @@ static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req,
 	}
 
 	ret = check_completion(dev, idxd_desc->iax_completion, false, false);
+
+fallback_software_decomp:
 	if (ret) {
-		dev_dbg(dev, "%s: check_completion failed ret=%d\n", __func__, ret);
-		if (idxd_desc->iax_completion->status == IAA_ANALYTICS_ERROR) {
+		dev_dbg(dev, "%s: desc allocation/submission/check_completion failed ret=%d\n", __func__, ret);
+		if (idxd_desc && idxd_desc->iax_completion->status == IAA_ANALYTICS_ERROR) {
 			pr_warn("%s: falling back to deflate-generic decompress, "
 				"analytics error code %x\n", __func__,
 				idxd_desc->iax_completion->error_code);
-			ret = deflate_generic_decompress(req);
-			if (ret) {
-				dev_dbg(dev, "%s: deflate-generic failed ret=%d\n",
-					__func__, ret);
-				goto err;
-			}
-		} else {
+		}
+
+		ret = deflate_generic_decompress(req);
+
+		if (ret) {
+			pr_err("%s: iaa decompress failed: deflate-generic fallback to software decompress error ret=%d\n", __func__, ret);
 			goto err;
 		}
 	} else {
 		req->dlen = idxd_desc->iax_completion->output_size;
+
+		/* Update stats */
+		update_total_decomp_bytes_in(slen);
+		update_wq_decomp_bytes(wq, slen);
 	}
 
 	*dlen = req->dlen;
 
-	if (!ctx->async_mode || disable_async)
+err:
+	if (idxd_desc)
 		idxd_free_desc(wq, idxd_desc);
-
-	/* Update stats */
-	update_total_decomp_bytes_in(slen);
-	update_wq_decomp_bytes(wq, slen);
 out:
 	return ret;
-err:
-	idxd_free_desc(wq, idxd_desc);
-	dev_dbg(dev, "iaa decompress failed: ret=%d\n", ret);
-
-	goto out;
 }
 
 static int iaa_comp_acompress(struct acomp_req *req)

From patchwork Wed Apr 30 20:52:55 2025
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: "Sridhar, Kanchana P" <kanchana.p.sridhar@intel.com>
X-Patchwork-Id: 886547
Received: from mgamail.intel.com (mgamail.intel.com [198.175.65.15])
 (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
 (No client certificate requested)
 by smtp.subspace.kernel.org (Postfix) with ESMTPS id 691DA2D0ABF;
 Wed, 30 Apr 2025 20:53:15 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.175.65.15
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
 t=1746046397; cv=none;
 b=Gn3mVtkJN79vFV2r3AEbon/To8YpgAxaMTcpWo2YFfN1GRGIjYfNAHNX4urkd174HzgL9v/wGNmYf3p+F4wP7cgFLkqPIVF48iD8JfVLUYGPPZTDL6OFjFamUp9EgdgXv0oUyjIUV7flqg1LjXXCJhT5SGUa+OeroZH+dBd3jIs=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
 s=arc-20240116; t=1746046397; c=relaxed/simple;
 bh=ZmGjviUpJ+c+Is95YFL01k3D2zJB4AA9yt94lFpXChk=;
 h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
 MIME-Version;
 b=G/g0mvb8TeX2J0cqACGzu2Zh4g8iWvFnACGvPgW0IHgUtumc/06iyvsOXECK0t6p7/bioNJFvD6etngcJqHgTHz/xxJIMsoaT+HH8497aaG5XC36X3QeRlR1U9iabp4/IQYYjkxk47LUtunlsJ+NQ5h79ozE66nS1q731KDYKgM=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=intel.com;
 spf=pass smtp.mailfrom=intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=FVZqyck6; arc=none smtp.client-ip=198.175.65.15
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=pass smtp.mailfrom=intel.com
Authentication-Results: smtp.subspace.kernel.org;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="FVZqyck6"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
 d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
 t=1746046396; x=1777582396;
 h=from:to:cc:subject:date:message-id:in-reply-to:
 references:mime-version:content-transfer-encoding;
 bh=ZmGjviUpJ+c+Is95YFL01k3D2zJB4AA9yt94lFpXChk=;
 b=FVZqyck61gZrjpNvcriGyDXUOwVPjPAYT/IQpFU1Q/3Xp+SAO7bSRV39
 dytOcmDbfDQdai91JxP9AWISuYVt6Tb+b8s/p5l9Js/bKfE7j2tYvrpoL
 6IfsXWisSpjNruII9oRXTsXytomt1utFvDDi2p5zErl2L75ZtF30VX7ec
 X9lZUf1VBDQ9X4zthaoml9Bq8DYVK2e1IkXRBpnBH9oimNtDHGFjF00kp
 hDgXw76YP8w7nElEM4ICGENOrk3jJhwEdX36cwmCsozGkOdpWXO/TDCsy
 1jfccKRYOJyj1mcI943MOZUK0229DRy1IayqeFsJXopmZKmohxVVgfI+7 g==;
X-CSE-ConnectionGUID: GsjnCkdUTRu59NuVN7L2zQ==
X-CSE-MsgGUID: BsnQYkbNS426u3XJWPounQ==
X-IronPort-AV: E=McAfee;i="6700,10204,11419"; a="51388600"
X-IronPort-AV: E=Sophos;i="6.15,252,1739865600";
   d="scan'208";a="51388600"
Received: from fmviesa003.fm.intel.com ([10.60.135.143])
 by orvoesa107.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 30 Apr 2025 13:53:15 -0700
X-CSE-ConnectionGUID: WuvCvmlHRBWlt04qC1yzXw==
X-CSE-MsgGUID: qe7sYLJxThKdFJCF6h2u1w==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.15,252,1739865600"; d="scan'208";a="138248912"
Received: from jf5300-b11a338t.jf.intel.com ([10.242.51.115])
 by fmviesa003.fm.intel.com with ESMTP; 30 Apr 2025 13:53:13 -0700
From: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
To: linux-kernel@vger.kernel.org, linux-mm@kvack.org, hannes@cmpxchg.org,
 yosry.ahmed@linux.dev, nphamcs@gmail.com, chengming.zhou@linux.dev,
 usamaarif642@gmail.com, ryan.roberts@arm.com, 21cnbao@gmail.com,
 ying.huang@linux.alibaba.com, akpm@linux-foundation.org,
 linux-crypto@vger.kernel.org, herbert@gondor.apana.org.au,
 davem@davemloft.net, clabbe@baylibre.com, ardb@kernel.org,
 ebiggers@google.com, surenb@google.com, kristen.c.accardi@intel.com
Cc: wajdi.k.feghali@intel.com, vinodh.gopal@intel.com,
 kanchana.p.sridhar@intel.com
Subject: [PATCH v9 09/19] crypto: iaa - CRYPTO_ACOMP_REQ_POLL acomp_req flag
 for sequential vs. parallel.
Date: Wed, 30 Apr 2025 13:52:55 -0700
Message-Id: <20250430205305.22844-10-kanchana.p.sridhar@intel.com>
X-Mailer: git-send-email 2.27.0
In-Reply-To: <20250430205305.22844-1-kanchana.p.sridhar@intel.com>
References: <20250430205305.22844-1-kanchana.p.sridhar@intel.com>
Precedence: bulk
X-Mailing-List: linux-crypto@vger.kernel.org
List-Id: <linux-crypto.vger.kernel.org>
List-Subscribe: <mailto:linux-crypto+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-crypto+unsubscribe@vger.kernel.org>
MIME-Version: 1.0

The purpose of this commit is to allow kernel users of iaa_crypto, such
as zswap, to be able to invoke the crypto_acomp_compress() API in fully
synchronous mode for non-batching use cases (i.e. today's status-quo),
where zswap calls crypto_wait_req(crypto_acomp_compress(req), wait);
and to non-instrusively invoke the fully asynchronous batch
compress/decompress API that will be introduced in subsequent
patches. Both use cases need to reuse same code paths in the driver to
interface with hardware: the CRYPTO_ACOMP_REQ_POLL flag allows this
shared code to determine whether we need to process an acomp_req
synchronously/asynchronously. The idea is to simplify the crypto_acomp
sequential/batching interfaces for use by zswap.

Thus, regardless of the iaa_crypto driver's 'sync_mode' setting, it
can still be forced to use synchronous mode by turning
off the CRYPTO_ACOMP_REQ_POLL flag in req->base.flags (the default to
support sequential use cases in zswap today).

IAA batching functionality will be implemented in subsequent patches,
that will set the CRYPTO_ACOMP_REQ_POLL flag for the acomp_reqs in a
batch. This enables the iaa_crypto driver to implement true
async "submit-polling" for parallel compressions and decompressions in
the IAA hardware accelerator.

In other words, all three of the following need to be true for a request
to be processed in fully async submit-poll mode:

 1) async_mode should be "true"
 2) use_irq should be "false"
 3) req->base.flags & CRYPTO_ACOMP_REQ_POLL should be "true"

Subsequent patches will:
  - Set (1) and (2) as iaa_crypto defaults once async submit-poll is
    implemented.
  - Enable (3) for iaa_crypto batching, and clear the
    CRYPTO_ACOMP_REQ_POLL flags before exiting from the batching
    routines since the assumption is that the acomp_reqs are
    created/managed by a higher level kernel user such as zswap, and are
    reused for both, sequential and batching use cases from zswap's
    perspective.

This patch also removes "disable_async" from iaa_decompress().

Signed-off-by: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
---
 drivers/crypto/intel/iaa/iaa_crypto_main.c | 15 +++++++--------
 include/crypto/acompress.h                 |  6 ++++++
 2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c
index 7dab340c4a34..52fe68606f4d 100644
--- a/drivers/crypto/intel/iaa/iaa_crypto_main.c
+++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c
@@ -1785,7 +1785,7 @@ static int iaa_compress(struct crypto_tfm *tfm, struct acomp_req *req,
 			" src_addr %llx, dst_addr %llx\n", __func__,
 			active_compression_mode->name,
 			src_addr, dst_addr);
-	} else if (ctx->async_mode) {
+	} else if (ctx->async_mode && (req->base.flags & CRYPTO_ACOMP_REQ_POLL)) {
 		req->data = idxd_desc;
 	}
 
@@ -1807,7 +1807,7 @@ static int iaa_compress(struct crypto_tfm *tfm, struct acomp_req *req,
 	update_total_comp_calls();
 	update_wq_comp_calls(wq);
 
-	if (ctx->async_mode) {
+	if (ctx->async_mode && (req->base.flags & CRYPTO_ACOMP_REQ_POLL)) {
 		ret = -EINPROGRESS;
 		dev_dbg(dev, "%s: returning -EINPROGRESS\n", __func__);
 		goto out;
@@ -1836,8 +1836,7 @@ static int iaa_compress(struct crypto_tfm *tfm, struct acomp_req *req,
 static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req,
 			  struct idxd_wq *wq,
 			  dma_addr_t src_addr, unsigned int slen,
-			  dma_addr_t dst_addr, unsigned int *dlen,
-			  bool disable_async)
+			  dma_addr_t dst_addr, unsigned int *dlen)
 {
 	struct iaa_device_compression_mode *active_compression_mode;
 	struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
@@ -1886,7 +1885,7 @@ static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req,
 	desc->src1_size = slen;
 	desc->completion_addr = idxd_desc->compl_dma;
 
-	if (ctx->use_irq && !disable_async) {
+	if (ctx->use_irq) {
 		desc->flags |= IDXD_OP_FLAG_RCI;
 
 		idxd_desc->crypto.req = req;
@@ -1899,7 +1898,7 @@ static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req,
 			" src_addr %llx, dst_addr %llx\n", __func__,
 			active_compression_mode->name,
 			src_addr, dst_addr);
-	} else if (ctx->async_mode && !disable_async) {
+	} else if (ctx->async_mode && (req->base.flags & CRYPTO_ACOMP_REQ_POLL)) {
 		req->data = idxd_desc;
 	}
 
@@ -1921,7 +1920,7 @@ static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req,
 	update_total_decomp_calls();
 	update_wq_decomp_calls(wq);
 
-	if (ctx->async_mode && !disable_async) {
+	if (ctx->async_mode && (req->base.flags & CRYPTO_ACOMP_REQ_POLL)) {
 		ret = -EINPROGRESS;
 		dev_dbg(dev, "%s: returning -EINPROGRESS\n", __func__);
 		goto out;
@@ -2127,7 +2126,7 @@ static int iaa_comp_adecompress(struct acomp_req *req)
 		req->dst, req->dlen, sg_dma_len(req->dst));
 
 	ret = iaa_decompress(tfm, req, wq, src_addr, req->slen,
-			     dst_addr, &req->dlen, false);
+			     dst_addr, &req->dlen);
 	if (ret == -EINPROGRESS)
 		return ret;
 
diff --git a/include/crypto/acompress.h b/include/crypto/acompress.h
index 01389fd7055f..939e51d122b0 100644
--- a/include/crypto/acompress.h
+++ b/include/crypto/acompress.h
@@ -20,6 +20,12 @@
 #include <linux/spinlock_types.h>
 #include <linux/types.h>
 
+/*
+ * If set, the driver must have a way to submit the req, then
+ * poll its completion status for success/error.
+ */
+#define CRYPTO_ACOMP_REQ_POLL		0x00000001
+
 /* Set this bit if source is virtual address instead of SG list. */
 #define CRYPTO_ACOMP_REQ_SRC_VIRT	0x00000002
 

From patchwork Wed Apr 30 20:52:56 2025
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: "Sridhar, Kanchana P" <kanchana.p.sridhar@intel.com>
X-Patchwork-Id: 886546
Received: from mgamail.intel.com (mgamail.intel.com [198.175.65.15])
 (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
 (No client certificate requested)
 by smtp.subspace.kernel.org (Postfix) with ESMTPS id 060A42D110A;
 Wed, 30 Apr 2025 20:53:17 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.175.65.15
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
 t=1746046399; cv=none;
 b=FX/1Ua75MOugR0JrSYMeiB8I1J6lbuIoaUShGs6bMGafd7QEOiNimmUmpjOYzMk2iE4TuDqYMEmBCZ/4z/5R3uUYbWQWW+8CS4kmTflfWRwMBLmnHgg0+Vle8v1aw1cDLAQhXuALO/gQ8paLGMCEJ0E0duRO1hVcwoOjE5CAAkw=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
 s=arc-20240116; t=1746046399; c=relaxed/simple;
 bh=cw35xIKM3GQ7IYN44nqMVxUAoNpG2Du37jUAD6lMlcs=;
 h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
 MIME-Version;
 b=kqO763J3AA3ptKlxA0k6H7QX7Cc07H0Hb/vyAWgVNF5S242msQOTmffGCt5Wo8opI7hgUMOYDZynbd2XBdw1PNdKM84uJsFLPEPu1GHvnO5894wajwoUmOFHoi373Z+IKUnrIUxZO8YCmXDCDfgsBc9nqCEYXurl/Bb7TGy2gKs=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=intel.com;
 spf=pass smtp.mailfrom=intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=jLIWPHHy; arc=none smtp.client-ip=198.175.65.15
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=pass smtp.mailfrom=intel.com
Authentication-Results: smtp.subspace.kernel.org;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="jLIWPHHy"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
 d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
 t=1746046397; x=1777582397;
 h=from:to:cc:subject:date:message-id:in-reply-to:
 references:mime-version:content-transfer-encoding;
 bh=cw35xIKM3GQ7IYN44nqMVxUAoNpG2Du37jUAD6lMlcs=;
 b=jLIWPHHyAIRk+Hy6zGg/xU/Fjbf6/4nEfk4+Z4Ej1k8Ph5ElN5zvGFJt
 zQy9TOrfyo2gULOCP2XW+IKl+QBQ8sZ/tpi9dFrvtvPxGEWGnid2/A4tx
 mh2YudDFRca+7PD7gGMVX09765ING5imsrUhxI2kVRX1DMUEuSUxduSjL
 rCFvwyjavHfB/bPXFvwCJf4BmA73Hi7pE3K12sagFgRUYmNSnwdxKd7Th
 zedOrgRE1DiBadrIexndQxHQAZzyZy+25DOSFMFm1/z8Ubv9oIDZIVQ2W
 pltCZlX/NWKyNV22AhjnITbVthNBGl1pAY7vZrsO8ByTIb/BUQGjTrtoa w==;
X-CSE-ConnectionGUID: RoG6lHofTBu06Z/Ms4x+0Q==
X-CSE-MsgGUID: FkGHmILHT2+omEnjqbG9bQ==
X-IronPort-AV: E=McAfee;i="6700,10204,11419"; a="51388617"
X-IronPort-AV: E=Sophos;i="6.15,252,1739865600";
   d="scan'208";a="51388617"
Received: from fmviesa003.fm.intel.com ([10.60.135.143])
 by orvoesa107.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 30 Apr 2025 13:53:16 -0700
X-CSE-ConnectionGUID: C9ygkhUwTwOT2d3kS0vE7w==
X-CSE-MsgGUID: uvHeYmr0TfapwvOC9nZ26g==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.15,252,1739865600"; d="scan'208";a="138248918"
Received: from jf5300-b11a338t.jf.intel.com ([10.242.51.115])
 by fmviesa003.fm.intel.com with ESMTP; 30 Apr 2025 13:53:14 -0700
From: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
To: linux-kernel@vger.kernel.org, linux-mm@kvack.org, hannes@cmpxchg.org,
 yosry.ahmed@linux.dev, nphamcs@gmail.com, chengming.zhou@linux.dev,
 usamaarif642@gmail.com, ryan.roberts@arm.com, 21cnbao@gmail.com,
 ying.huang@linux.alibaba.com, akpm@linux-foundation.org,
 linux-crypto@vger.kernel.org, herbert@gondor.apana.org.au,
 davem@davemloft.net, clabbe@baylibre.com, ardb@kernel.org,
 ebiggers@google.com, surenb@google.com, kristen.c.accardi@intel.com
Cc: wajdi.k.feghali@intel.com, vinodh.gopal@intel.com,
 kanchana.p.sridhar@intel.com
Subject: [PATCH v9 10/19] crypto: acomp - New interfaces to facilitate
 batching support in acomp & drivers.
Date: Wed, 30 Apr 2025 13:52:56 -0700
Message-Id: <20250430205305.22844-11-kanchana.p.sridhar@intel.com>
X-Mailer: git-send-email 2.27.0
In-Reply-To: <20250430205305.22844-1-kanchana.p.sridhar@intel.com>
References: <20250430205305.22844-1-kanchana.p.sridhar@intel.com>
Precedence: bulk
X-Mailing-List: linux-crypto@vger.kernel.org
List-Id: <linux-crypto.vger.kernel.org>
List-Subscribe: <mailto:linux-crypto+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-crypto+unsubscribe@vger.kernel.org>
MIME-Version: 1.0

This commit adds get_batch_size(), batch_compress() and batch_decompress()
interfaces to:

  struct acomp_alg
  struct crypto_acomp

A crypto_acomp compression algorithm that supports batching of compressions
and decompressions must provide implementations for these API, so that a
higher level module based on crypto_acomp, such as zswap, can allocate
resources for submitting multiple compress/decompress jobs that can be
batched, and invoke batching of [de]compressions.

A new helper function acomp_has_async_batching() can be invoked to query if
a crypto_acomp has registered these batching interfaces.

Further, zswap can invoke the newly added "crypto_acomp_batch_size()"
API to query the maximum number of requests that can be batch
[de]compressed. crypto_acomp_batch_size() returns 1 if the acomp has not
provided an implementation for get_batch_size(). Based on this, zswap
can use the minimum of any zswap-specific upper limits for batch-size
and the compressor's max batch-size, to allocate batching resources.

This allows the iaa_crypto Intel IAA driver to register implementations for
the get_batch_size(), batch_compress() and batch_decompress() acomp_alg
interfaces, that can subsequently be invoked from zswap to
compress/decompress pages in parallel in the IAA hardware accelerator to
improve swapout/swapin performance, through these newly added
corresponding crypto_acomp API:

  crypto_acomp_batch_size()
  crypto_acomp_batch_compress()
  crypto_acomp_batch_decompress()

Signed-off-by: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
---
 crypto/acompress.c                  |   3 +
 include/crypto/acompress.h          | 107 ++++++++++++++++++++++++++++
 include/crypto/internal/acompress.h |  20 ++++++
 3 files changed, 130 insertions(+)

diff --git a/crypto/acompress.c b/crypto/acompress.c
index d08e0fe8cd9e..c7cca5596acf 100644
--- a/crypto/acompress.c
+++ b/crypto/acompress.c
@@ -95,6 +95,9 @@ static int crypto_acomp_init_tfm(struct crypto_tfm *tfm)
 
 	acomp->compress = alg->compress;
 	acomp->decompress = alg->decompress;
+	acomp->get_batch_size = alg->get_batch_size;
+	acomp->batch_compress = alg->batch_compress;
+	acomp->batch_decompress = alg->batch_decompress;
 	acomp->reqsize = alg->reqsize;
 
 	acomp->base.exit = crypto_acomp_exit_tfm;
diff --git a/include/crypto/acompress.h b/include/crypto/acompress.h
index 939e51d122b0..e50f3e71ba58 100644
--- a/include/crypto/acompress.h
+++ b/include/crypto/acompress.h
@@ -120,6 +120,10 @@ struct acomp_req {
  *
  * @compress:		Function performs a compress operation
  * @decompress:		Function performs a de-compress operation
+ * @get_batch_size:	Maximum batch-size for batching compress/decompress
+ *			operations.
+ * @batch_compress:	Function performs a batch compress operation.
+ * @batch_decompress:	Function performs a batch decompress operation.
  * @reqsize:		Context size for (de)compression requests
  * @fb:			Synchronous fallback tfm
  * @base:		Common crypto API algorithm data structure
@@ -127,6 +131,22 @@ struct acomp_req {
 struct crypto_acomp {
 	int (*compress)(struct acomp_req *req);
 	int (*decompress)(struct acomp_req *req);
+	unsigned int (*get_batch_size)(void);
+	bool (*batch_compress)(
+		struct acomp_req *reqs[],
+		struct page *pages[],
+		u8 *dsts[],
+		unsigned int dlens[],
+		int errors[],
+		int nr_reqs);
+	bool (*batch_decompress)(
+		struct acomp_req *reqs[],
+		u8 *srcs[],
+		struct page *pages[],
+		unsigned int slens[],
+		unsigned int dlens[],
+		int errors[],
+		int nr_reqs);
 	unsigned int reqsize;
 	struct crypto_acomp *fb;
 	struct crypto_tfm base;
@@ -224,6 +244,13 @@ static inline bool acomp_is_async(struct crypto_acomp *tfm)
 	       CRYPTO_ALG_ASYNC;
 }
 
+static inline bool acomp_has_async_batching(struct crypto_acomp *tfm)
+{
+	return (acomp_is_async(tfm) &&
+		(crypto_comp_alg_common(tfm)->base.cra_flags & CRYPTO_ALG_TYPE_ACOMPRESS) &&
+		tfm->get_batch_size && tfm->batch_compress && tfm->batch_decompress);
+}
+
 static inline struct crypto_acomp *crypto_acomp_reqtfm(struct acomp_req *req)
 {
 	return __crypto_acomp_tfm(req->base.tfm);
@@ -595,4 +622,84 @@ static inline struct acomp_req *acomp_request_on_stack_init(
 	return req;
 }
 
+/**
+ * crypto_acomp_batch_size() -- Get the algorithm's batch size
+ *
+ * Function returns the algorithm's batch size for batching operations
+ *
+ * @tfm:	ACOMPRESS tfm handle allocated with crypto_alloc_acomp()
+ *
+ * Return:	crypto_acomp's batch size.
+ */
+static inline unsigned int crypto_acomp_batch_size(struct crypto_acomp *tfm)
+{
+	if (acomp_has_async_batching(tfm))
+		return tfm->get_batch_size();
+
+	return 1;
+}
+
+/**
+ * crypto_acomp_batch_compress() -- Invoke asynchronous compress of a batch
+ * of requests.
+ *
+ * @reqs: @nr_reqs asynchronous compress requests.
+ * @pages: Pages to be compressed by IAA.
+ * @dsts: Pre-allocated destination buffers to store results of compression.
+ *        Each element of @dsts must be of size "PAGE_SIZE * 2".
+ * @dlens: Will contain the compressed lengths for @pages.
+ * @errors: zero on successful compression of the corresponding
+ *          req, or error code in case of error.
+ * @nr_reqs: The number of requests in @reqs, up to IAA_CRYPTO_MAX_BATCH_SIZE,
+ *           to be compressed.
+ *
+ * Returns true if all compress requests in the batch complete successfully,
+ * false otherwise.
+ */
+static inline bool crypto_acomp_batch_compress(
+	struct acomp_req *reqs[],
+	struct page *pages[],
+	u8 *dsts[],
+	unsigned int dlens[],
+	int errors[],
+	int nr_reqs)
+{
+	struct crypto_acomp *tfm = crypto_acomp_reqtfm(reqs[0]);
+
+	return tfm->batch_compress(reqs, pages, dsts, dlens, errors, nr_reqs);
+}
+
+/**
+ * crypto_acomp_batch_decompress() -- Invoke asynchronous decompress of a batch
+ * of requests.
+ *
+ * @reqs: @nr_reqs asynchronous decompress requests.
+ * @srcs: Source buffers to to be decompressed.
+ * @pages: Destination pages corresponding to @srcs.
+ * @slens: Buffer lengths for @srcs.
+ * @dlens: Will contain the decompressed lengths for @srcs.
+ *	   For batch decompressions, the caller must enforce additional
+ *	   semantics such as, BUG_ON(dlens[i] != PAGE_SIZE) assertions.
+ * @errors: zero on successful decompression of the corresponding
+ *          req, or error code in case of error.
+ * @nr_reqs: The number of requests in @reqs, up to IAA_CRYPTO_MAX_BATCH_SIZE,
+ *           to be decompressed.
+ *
+ * Returns true if all decompress requests in the batch complete successfully,
+ * false otherwise.
+ */
+static inline bool crypto_acomp_batch_decompress(
+	struct acomp_req *reqs[],
+	u8 *srcs[],
+	struct page *pages[],
+	unsigned int slens[],
+	unsigned int dlens[],
+	int errors[],
+	int nr_reqs)
+{
+	struct crypto_acomp *tfm = crypto_acomp_reqtfm(reqs[0]);
+
+	return tfm->batch_decompress(reqs, srcs, pages, slens, dlens, errors, nr_reqs);
+}
+
 #endif
diff --git a/include/crypto/internal/acompress.h b/include/crypto/internal/acompress.h
index b69d818d7e68..891e40831af8 100644
--- a/include/crypto/internal/acompress.h
+++ b/include/crypto/internal/acompress.h
@@ -23,6 +23,10 @@
  *
  * @compress:	Function performs a compress operation
  * @decompress:	Function performs a de-compress operation
+ * @get_batch_size:	Maximum batch-size for batching compress/decompress
+ *			operations.
+ * @batch_compress:	Function performs a batch compress operation.
+ * @batch_decompress:	Function performs a batch decompress operation.
  * @init:	Initialize the cryptographic transformation object.
  *		This function is used to initialize the cryptographic
  *		transformation object. This function is called only once at
@@ -43,6 +47,22 @@
 struct acomp_alg {
 	int (*compress)(struct acomp_req *req);
 	int (*decompress)(struct acomp_req *req);
+	unsigned int (*get_batch_size)(void);
+	bool (*batch_compress)(
+		struct acomp_req *reqs[],
+		struct page *pages[],
+		u8 *dsts[],
+		unsigned int dlens[],
+		int errors[],
+		int nr_reqs);
+	bool (*batch_decompress)(
+		struct acomp_req *reqs[],
+		u8 *srcs[],
+		struct page *pages[],
+		unsigned int slens[],
+		unsigned int dlens[],
+		int errors[],
+		int nr_reqs);
 	int (*init)(struct crypto_acomp *tfm);
 	void (*exit)(struct crypto_acomp *tfm);
 

From patchwork Wed Apr 30 20:52:57 2025
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: "Sridhar, Kanchana P" <kanchana.p.sridhar@intel.com>
X-Patchwork-Id: 886165
Received: from mgamail.intel.com (mgamail.intel.com [198.175.65.15])
 (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
 (No client certificate requested)
 by smtp.subspace.kernel.org (Postfix) with ESMTPS id 8C0D72D111F;
 Wed, 30 Apr 2025 20:53:17 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.175.65.15
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
 t=1746046401; cv=none;
 b=m9V0PLSnI1xa3OJEQ+10aqPiCXatRWHokJtgsENRpQ6NmlqtID7sQNJUMEvH0ZAD+8uKZ3Ci1ef6FIbm8VslW/ojdJ+/LLRSqu3pvN0GqfN7053fRbXVRti6j6DOqLnXiycqZVz3swX1f2lpvMJYvE4y/Zm2Q3mr+ciDcTUR38w=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
 s=arc-20240116; t=1746046401; c=relaxed/simple;
 bh=UyA9sOUzeFWM/IfsI80doZFP3YbMuXrREZWQXOgHADM=;
 h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
 MIME-Version;
 b=g6P0AmQs6NKkdxLKWV4QNvkQkm2wpMjtDhFKw7Qhvxd+gMR93sOaao6b+4IyexOCOoNc6tnUOHKejwPaWmRLGqTguSSRD4HtI00ZcJg+TQZCVwtMynr0+2+7REm8v0m8+p0UBdUz3miEmpy0uLYyZ++eyUQovQSiBReeSKLYZZI=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=intel.com;
 spf=pass smtp.mailfrom=intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=kGrg7xaD; arc=none smtp.client-ip=198.175.65.15
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=pass smtp.mailfrom=intel.com
Authentication-Results: smtp.subspace.kernel.org;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="kGrg7xaD"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
 d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
 t=1746046398; x=1777582398;
 h=from:to:cc:subject:date:message-id:in-reply-to:
 references:mime-version:content-transfer-encoding;
 bh=UyA9sOUzeFWM/IfsI80doZFP3YbMuXrREZWQXOgHADM=;
 b=kGrg7xaDJBodwcXj/svSPBJsRQHwPa4o3N8k7d1aSafOd2pFQmNqqVHQ
 uHz3jEioZIsOMaL5qkTOOj2UANdaGKgvsSToZWNc4eNK6JmvthrnN0sbk
 KKkNCZfB0YVaXHjdxLSzCfeRnhYPQqFXriEHr64gpGJELPH5fLqf2IhXI
 js8KH/qufuh33mi6KwKsizxug/tvcTu4Tf1jGjYt5B45XYG1QZGV5nqpA
 tR3/x9C8mZ6rNZ7AX4y2jdGBtl7Kj+t377ZcLlGpcFUP76CQYIByTebZO
 g3kCJQ727QVkSC2cs6n0u+E8Mv2y40L5oWzMwAg65SH6cZmRdORS47pj/ w==;
X-CSE-ConnectionGUID: NHiRKsroTv6S8sppHVJx9A==
X-CSE-MsgGUID: 4IXlaN9RThSwSVj9Xr1Ofw==
X-IronPort-AV: E=McAfee;i="6700,10204,11419"; a="51388631"
X-IronPort-AV: E=Sophos;i="6.15,252,1739865600";
   d="scan'208";a="51388631"
Received: from fmviesa003.fm.intel.com ([10.60.135.143])
 by orvoesa107.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 30 Apr 2025 13:53:17 -0700
X-CSE-ConnectionGUID: znWercD3TMKvHWRAkFah0g==
X-CSE-MsgGUID: M4wTsymcSvGKGkDGi8d6nA==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.15,252,1739865600"; d="scan'208";a="138248922"
Received: from jf5300-b11a338t.jf.intel.com ([10.242.51.115])
 by fmviesa003.fm.intel.com with ESMTP; 30 Apr 2025 13:53:15 -0700
From: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
To: linux-kernel@vger.kernel.org, linux-mm@kvack.org, hannes@cmpxchg.org,
 yosry.ahmed@linux.dev, nphamcs@gmail.com, chengming.zhou@linux.dev,
 usamaarif642@gmail.com, ryan.roberts@arm.com, 21cnbao@gmail.com,
 ying.huang@linux.alibaba.com, akpm@linux-foundation.org,
 linux-crypto@vger.kernel.org, herbert@gondor.apana.org.au,
 davem@davemloft.net, clabbe@baylibre.com, ardb@kernel.org,
 ebiggers@google.com, surenb@google.com, kristen.c.accardi@intel.com
Cc: wajdi.k.feghali@intel.com, vinodh.gopal@intel.com,
 kanchana.p.sridhar@intel.com
Subject: [PATCH v9 11/19] crypto: iaa - Implement crypto_acomp batching
 interfaces for Intel IAA.
Date: Wed, 30 Apr 2025 13:52:57 -0700
Message-Id: <20250430205305.22844-12-kanchana.p.sridhar@intel.com>
X-Mailer: git-send-email 2.27.0
In-Reply-To: <20250430205305.22844-1-kanchana.p.sridhar@intel.com>
References: <20250430205305.22844-1-kanchana.p.sridhar@intel.com>
Precedence: bulk
X-Mailing-List: linux-crypto@vger.kernel.org
List-Id: <linux-crypto.vger.kernel.org>
List-Subscribe: <mailto:linux-crypto+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-crypto+unsubscribe@vger.kernel.org>
MIME-Version: 1.0

iaa_crypto implements the new crypto_acomp get_batch_size() interface
that returns an iaa_driver specific constant, IAA_CRYPTO_MAX_BATCH_SIZE
(set to 8U currently).

This patch also provides the iaa_crypto driver implementations for the
newly added crypto_acomp batch_compress() and batch_decompress()
interfaces.

This allows swap modules such as zswap to allocate required batching
resources and then invoke fully asynchronous batch parallel
compression/decompression of pages on systems with Intel IAA, by
invoking these crypto API, respectively:

   crypto_acomp_batch_size(...);
   crypto_acomp_batch_compress(...);
   crypto_acomp_batch_decompress(...);

This enables zswap compress batching code to be developed in
a manner similar to the current single-page synchronous calls to:

   crypto_acomp_compress(...);

thereby, facilitating encapsulated and modular hand-off between the
kernel mm/zswap code and the crypto_acomp layer.

Suggested-by: Yosry Ahmed <yosry.ahmed@linux.dev>
Signed-off-by: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
---
 drivers/crypto/intel/iaa/iaa_crypto.h      |   9 +
 drivers/crypto/intel/iaa/iaa_crypto_main.c | 288 +++++++++++++++++++++
 2 files changed, 297 insertions(+)

diff --git a/drivers/crypto/intel/iaa/iaa_crypto.h b/drivers/crypto/intel/iaa/iaa_crypto.h
index b4a94da2c315..90ce336879f1 100644
--- a/drivers/crypto/intel/iaa/iaa_crypto.h
+++ b/drivers/crypto/intel/iaa/iaa_crypto.h
@@ -42,6 +42,15 @@
 					 IAA_DECOMP_CHECK_FOR_EOB | \
 					 IAA_DECOMP_STOP_ON_EOB)
 
+/*
+ * The maximum compress/decompress batch size for IAA's implementation of
+ * the crypto_acomp batch_compress() and batch_decompress() interfaces.
+ * The IAA compression algorithms should provide the crypto_acomp
+ * get_batch_size() interface through a function that returns this
+ * constant.
+ */
+#define IAA_CRYPTO_MAX_BATCH_SIZE 8U
+
 /* Representation of IAA workqueue */
 struct iaa_wq {
 	struct list_head	list;
diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c
index 52fe68606f4d..d577f555d6ab 100644
--- a/drivers/crypto/intel/iaa/iaa_crypto_main.c
+++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c
@@ -2149,6 +2149,291 @@ static void compression_ctx_init(struct iaa_compression_ctx *ctx)
 	ctx->use_irq = use_irq;
 }
 
+static __always_inline unsigned int iaa_comp_get_batch_size(void)
+{
+	return IAA_CRYPTO_MAX_BATCH_SIZE;
+}
+
+static int iaa_comp_poll(struct acomp_req *req)
+{
+	struct idxd_desc *idxd_desc;
+	struct idxd_device *idxd;
+	struct iaa_wq *iaa_wq;
+	struct pci_dev *pdev;
+	struct device *dev;
+	struct idxd_wq *wq;
+	bool compress_op;
+	int ret;
+
+	idxd_desc = req->data;
+	if (!idxd_desc)
+		return -EAGAIN;
+
+	compress_op = (idxd_desc->iax_hw->opcode == IAX_OPCODE_COMPRESS);
+	wq = idxd_desc->wq;
+	iaa_wq = idxd_wq_get_private(wq);
+	idxd = iaa_wq->iaa_device->idxd;
+	pdev = idxd->pdev;
+	dev = &pdev->dev;
+
+	ret = check_completion(dev, idxd_desc->iax_completion, compress_op, true);
+	if (ret == -EAGAIN)
+		return ret;
+	if (ret)
+		goto out;
+
+	req->dlen = idxd_desc->iax_completion->output_size;
+
+	/* Update stats */
+	if (compress_op) {
+		update_total_comp_bytes_out(req->dlen);
+		update_wq_comp_bytes(wq, req->dlen);
+	} else {
+		update_total_decomp_bytes_in(req->slen);
+		update_wq_decomp_bytes(wq, req->slen);
+	}
+
+	if (iaa_verify_compress && (idxd_desc->iax_hw->opcode == IAX_OPCODE_COMPRESS)) {
+		struct crypto_tfm *tfm = req->base.tfm;
+		dma_addr_t src_addr, dst_addr;
+		u32 compression_crc;
+
+		compression_crc = idxd_desc->iax_completion->crc;
+
+		dma_sync_sg_for_device(dev, req->dst, 1, DMA_FROM_DEVICE);
+		dma_sync_sg_for_device(dev, req->src, 1, DMA_TO_DEVICE);
+
+		src_addr = sg_dma_address(req->src);
+		dst_addr = sg_dma_address(req->dst);
+
+		ret = iaa_compress_verify(tfm, req, wq, src_addr, req->slen,
+					  dst_addr, &req->dlen, compression_crc);
+	}
+out:
+	/* caller doesn't call crypto_wait_req, so no acomp_request_complete() */
+
+	dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
+	dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
+
+	idxd_free_desc(idxd_desc->wq, idxd_desc);
+
+	dev_dbg(dev, "%s: returning ret=%d\n", __func__, ret);
+
+	return ret;
+}
+
+static __always_inline void iaa_set_req_poll(
+	struct acomp_req *reqs[],
+	int nr_reqs,
+	bool set_flag)
+{
+	int i;
+
+	for (i = 0; i < nr_reqs; ++i) {
+		set_flag ? (reqs[i]->base.flags |= CRYPTO_ACOMP_REQ_POLL) :
+			   (reqs[i]->base.flags &= ~CRYPTO_ACOMP_REQ_POLL);
+	}
+}
+
+/**
+ * This API provides IAA compress batching functionality for use by swap
+ * modules.
+ *
+ * @reqs: @nr_reqs asynchronous compress requests.
+ * @pages: Pages to be compressed by IAA.
+ * @dsts: Pre-allocated destination buffers to store results of IAA
+ *        compression. Each element of @dsts must be of size "PAGE_SIZE * 2".
+ * @dlens: Will contain the compressed lengths.
+ * @errors: zero on successful compression of the corresponding
+ *          req, or error code in case of error.
+ * @nr_reqs: The number of requests, up to IAA_CRYPTO_MAX_BATCH_SIZE,
+ *           to be compressed.
+ *
+ * Returns true if all compress requests in the batch complete successfully,
+ * false otherwise.
+ */
+static bool iaa_comp_acompress_batch(
+	struct acomp_req *reqs[],
+	struct page *pages[],
+	u8 *dsts[],
+	unsigned int dlens[],
+	int errors[],
+	int nr_reqs)
+{
+	struct scatterlist inputs[IAA_CRYPTO_MAX_BATCH_SIZE];
+	struct scatterlist outputs[IAA_CRYPTO_MAX_BATCH_SIZE];
+	bool compressions_done = false;
+	int i, err = 0;
+
+	BUG_ON(nr_reqs > IAA_CRYPTO_MAX_BATCH_SIZE);
+
+	iaa_set_req_poll(reqs, nr_reqs, true);
+
+	/*
+	 * Prepare and submit the batch of acomp_reqs to IAA. IAA will process
+	 * these compress jobs in parallel.
+	 */
+	for (i = 0; i < nr_reqs; ++i) {
+		sg_init_table(&inputs[i], 1);
+		sg_set_page(&inputs[i], pages[i], PAGE_SIZE, 0);
+
+		/*
+		 * We need PAGE_SIZE * 2 here since there maybe over-compression case,
+		 * and hardware-accelerators may won't check the dst buffer size, so
+		 * giving the dst buffer with enough length to avoid buffer overflow.
+		 */
+		sg_init_one(&outputs[i], dsts[i], PAGE_SIZE * 2);
+		acomp_request_set_params(reqs[i], &inputs[i],
+					 &outputs[i], PAGE_SIZE, PAGE_SIZE);
+
+		errors[i] = iaa_comp_acompress(reqs[i]);
+
+		if (errors[i] != -EINPROGRESS) {
+			errors[i] = -EINVAL;
+			err = -EINVAL;
+		} else {
+			errors[i] = -EAGAIN;
+		}
+	}
+
+	/*
+	 * Asynchronously poll for and process IAA compress job completions.
+	 */
+	while (!compressions_done) {
+		compressions_done = true;
+
+		for (i = 0; i < nr_reqs; ++i) {
+			/*
+			 * Skip, if the compression has already completed
+			 * successfully or with an error.
+			 */
+			if (errors[i] != -EAGAIN)
+				continue;
+
+			errors[i] = iaa_comp_poll(reqs[i]);
+
+			if (errors[i]) {
+				if (errors[i] == -EAGAIN)
+					compressions_done = false;
+				else
+					err = -EINVAL;
+			} else {
+				dlens[i] = reqs[i]->dlen;
+			}
+		}
+	}
+
+	/*
+	 * For the same 'reqs[]' to be usable by
+	 * iaa_comp_acompress()/iaa_comp_adecompress(),
+	 * clear the CRYPTO_ACOMP_REQ_POLL bit on all acomp_reqs.
+	 */
+	iaa_set_req_poll(reqs, nr_reqs, false);
+
+	return !err;
+}
+
+/**
+ * This API provides IAA decompress batching functionality for use by swap
+ * modules.
+ *
+ * @reqs: @nr_reqs asynchronous decompress requests.
+ * @srcs: The src buffers to be decompressed by IAA.
+ * @pages: The pages to store the decompressed buffers.
+ * @slens: Compressed lengths of @srcs.
+ * @dlens: Will contain the decompressed lengths.
+ * @errors: zero on successful compression of the corresponding
+ *          req, or error code in case of error.
+ * @nr_reqs: The number of pages, up to IAA_CRYPTO_MAX_BATCH_SIZE,
+ *            to be decompressed.
+ *
+ * Returns true if all decompress requests complete successfully,
+ * false otherwise.
+ */
+static bool iaa_comp_adecompress_batch(
+	struct acomp_req *reqs[],
+	u8 *srcs[],
+	struct page *pages[],
+	unsigned int slens[],
+	unsigned int dlens[],
+	int errors[],
+	int nr_reqs)
+{
+	struct scatterlist inputs[IAA_CRYPTO_MAX_BATCH_SIZE];
+	struct scatterlist outputs[IAA_CRYPTO_MAX_BATCH_SIZE];
+	bool decompressions_done = false;
+	int i, err = 0;
+
+	BUG_ON(nr_reqs > IAA_CRYPTO_MAX_BATCH_SIZE);
+
+	iaa_set_req_poll(reqs, nr_reqs, true);
+
+	/*
+	 * Prepare and submit the batch of acomp_reqs to IAA. IAA will process
+	 * these decompress jobs in parallel.
+	 */
+	for (i = 0; i < nr_reqs; ++i) {
+		sg_init_one(&inputs[i], srcs[i], slens[i]);
+		sg_init_table(&outputs[i], 1);
+		sg_set_page(&outputs[i], pages[i], PAGE_SIZE, 0);
+		acomp_request_set_params(reqs[i], &inputs[i],
+					&outputs[i], slens[i], PAGE_SIZE);
+
+		errors[i] = iaa_comp_adecompress(reqs[i]);
+
+		/*
+		 * If it failed desc allocation/submission, errors[i] can
+		 * be 0 or error value from software decompress.
+		 */
+		if (errors[i] != -EINPROGRESS) {
+			errors[i] = -EINVAL;
+			err = -EINVAL;
+		} else {
+			errors[i] = -EAGAIN;
+		}
+	}
+
+	/*
+	 * Asynchronously poll for and process IAA decompress job completions.
+	 */
+	while (!decompressions_done) {
+		decompressions_done = true;
+
+		for (i = 0; i < nr_reqs; ++i) {
+			/*
+			 * Skip, if the decompression has already completed
+			 * successfully or with an error.
+			 */
+			if (errors[i] != -EAGAIN)
+				continue;
+
+			errors[i] = iaa_comp_poll(reqs[i]);
+
+			if (errors[i]) {
+				if (errors[i] == -EAGAIN)
+					decompressions_done = false;
+				else
+					err = -EINVAL;
+			} else {
+				/*
+				 * For batch decompressions, the caller should
+				 * check @errors and handle dlens[i] != PAGE_SIZE.
+				 */
+				dlens[i] = reqs[i]->dlen;
+			}
+		}
+	}
+
+	/*
+	 * For the same 'reqs[]' to be usable by
+	 * iaa_comp_acompress()/iaa_comp_adecompress(),
+	 * clear the CRYPTO_ACOMP_REQ_POLL bit on all acomp_reqs.
+	 */
+	iaa_set_req_poll(reqs, nr_reqs, false);
+
+	return !err;
+}
+
 /*********************************************
  * Interfaces to crypto_alg and crypto_acomp.
  *********************************************/
@@ -2169,6 +2454,9 @@ static struct acomp_alg iaa_acomp_fixed_deflate = {
 	.init			= iaa_comp_init_fixed,
 	.compress		= iaa_comp_acompress,
 	.decompress		= iaa_comp_adecompress,
+	.get_batch_size		= iaa_comp_get_batch_size,
+	.batch_compress		= iaa_comp_acompress_batch,
+	.batch_decompress	= iaa_comp_adecompress_batch,
 	.base			= {
 		.cra_name		= "deflate",
 		.cra_driver_name	= "deflate-iaa",

From patchwork Wed Apr 30 20:52:58 2025
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: "Sridhar, Kanchana P" <kanchana.p.sridhar@intel.com>
X-Patchwork-Id: 886166
Received: from mgamail.intel.com (mgamail.intel.com [198.175.65.15])
 (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
 (No client certificate requested)
 by smtp.subspace.kernel.org (Postfix) with ESMTPS id B244A2D1127;
 Wed, 30 Apr 2025 20:53:17 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.175.65.15
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
 t=1746046399; cv=none;
 b=TIxYyfEutfNLmMKC2WQTdBlMA7TidkYKzssfxlD0800Ew3XuXvEWZJ01tcEFgq6xQdxV2OwjR8GIWYW7ZVOzkCKgZILnoTR4WsA1Fq7igL+k8+3xwMUdIPEJ8/OaG2XOUL5moOj3RzPZRALiM3Pbz2XrDIUXkhXdxcF+2YMTLKk=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
 s=arc-20240116; t=1746046399; c=relaxed/simple;
 bh=zk8J++ATRu4+E5dh9QqEjlY39chTwGmrGHh2DWD3/3c=;
 h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
 MIME-Version;
 b=kSdNa0X0sy20zFVAO/rmh+4z8XxebIs95dlJR9ZxxmvgiF9w5pfYj4wtGJfiA+h0VwRoNHPfLgOoiLunHx3ohlhbR/YL3NWYnBn4Rku3hWKDuZJUX7J+5M2JNte1ExT7w+76D2TRJOtc6O6g6TkdvCJ2RFNRVHYUlFh8BBSs7E8=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=intel.com;
 spf=pass smtp.mailfrom=intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=aO8B0ric; arc=none smtp.client-ip=198.175.65.15
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=pass smtp.mailfrom=intel.com
Authentication-Results: smtp.subspace.kernel.org;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="aO8B0ric"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
 d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
 t=1746046398; x=1777582398;
 h=from:to:cc:subject:date:message-id:in-reply-to:
 references:mime-version:content-transfer-encoding;
 bh=zk8J++ATRu4+E5dh9QqEjlY39chTwGmrGHh2DWD3/3c=;
 b=aO8B0ricOuhURpiI6ZfzFoOl6JZkSRtBlg6sy5aS+5ax3XA87X5t6v9h
 tbW1IoTY3bSMHbILMrtQQTzXZN1+SrqccdqrTO5lmI49UqJ/o1pUttAOz
 mTO1xHfdUK2e7sAF+Y3GE2oQ3M5wu2bp1ndOh6OR6Udzr9ofMPjg8caMV
 dmhCvQmjK06i5eBTzJZr+XKZFyj0xCjCVFUXkAUMa+kOT4pHEL2gFVSgb
 yENAUp6mWogA6fjGBq+v2jd6Ellu9BVX89/K7fBJYRHoy0JJ2K7C2ImzP
 1oFpzirVA4hMlQYcuyDi+mqmWB1u11K4jicQt1gq9oYRIFwX+lioDwolw A==;
X-CSE-ConnectionGUID: 4r+CZNc2SuiTt8wTCkopRw==
X-CSE-MsgGUID: wKc7y5flQnuDl0G3X5ynGw==
X-IronPort-AV: E=McAfee;i="6700,10204,11419"; a="51388646"
X-IronPort-AV: E=Sophos;i="6.15,252,1739865600";
   d="scan'208";a="51388646"
Received: from fmviesa003.fm.intel.com ([10.60.135.143])
 by orvoesa107.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 30 Apr 2025 13:53:18 -0700
X-CSE-ConnectionGUID: RR89VcatQsOSby4NeHe0+Q==
X-CSE-MsgGUID: DxvEq8k1TQys3M29WInqYA==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.15,252,1739865600"; d="scan'208";a="138248926"
Received: from jf5300-b11a338t.jf.intel.com ([10.242.51.115])
 by fmviesa003.fm.intel.com with ESMTP; 30 Apr 2025 13:53:16 -0700
From: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
To: linux-kernel@vger.kernel.org, linux-mm@kvack.org, hannes@cmpxchg.org,
 yosry.ahmed@linux.dev, nphamcs@gmail.com, chengming.zhou@linux.dev,
 usamaarif642@gmail.com, ryan.roberts@arm.com, 21cnbao@gmail.com,
 ying.huang@linux.alibaba.com, akpm@linux-foundation.org,
 linux-crypto@vger.kernel.org, herbert@gondor.apana.org.au,
 davem@davemloft.net, clabbe@baylibre.com, ardb@kernel.org,
 ebiggers@google.com, surenb@google.com, kristen.c.accardi@intel.com
Cc: wajdi.k.feghali@intel.com, vinodh.gopal@intel.com,
 kanchana.p.sridhar@intel.com
Subject: [PATCH v9 12/19] crypto: iaa - Enable async mode and make it the
 default.
Date: Wed, 30 Apr 2025 13:52:58 -0700
Message-Id: <20250430205305.22844-13-kanchana.p.sridhar@intel.com>
X-Mailer: git-send-email 2.27.0
In-Reply-To: <20250430205305.22844-1-kanchana.p.sridhar@intel.com>
References: <20250430205305.22844-1-kanchana.p.sridhar@intel.com>
Precedence: bulk
X-Mailing-List: linux-crypto@vger.kernel.org
List-Id: <linux-crypto.vger.kernel.org>
List-Subscribe: <mailto:linux-crypto+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-crypto+unsubscribe@vger.kernel.org>
MIME-Version: 1.0

This patch enables the 'async' sync_mode in the driver. Further, it sets
the default sync_mode to 'async', which makes it easier for IAA hardware
acceleration in the iaa_crypto driver to be loaded by default in the most
efficient/recommended 'async' mode for parallel
compressions/decompressions, namely, asynchronous submission of
descriptors, followed by polling for job completions. Earlier, the
"sync" mode used to be the default.

The iaa_crypto driver documentation has been updated with these
changes.

This way, anyone who wants to use IAA for zswap/zram can do so after
building the kernel, and without having to go through these steps to use
async mode:

  1) disable all the IAA device/wq bindings that happen at boot time
  2) rmmod iaa_crypto
  3) modprobe iaa_crypto
  4) echo async > /sys/bus/dsa/drivers/crypto/sync_mode
  5) re-run initialization of the IAA devices and wqs

Signed-off-by: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
---
 Documentation/driver-api/crypto/iaa/iaa-crypto.rst | 11 ++---------
 drivers/crypto/intel/iaa/iaa_crypto_main.c         |  4 ++--
 2 files changed, 4 insertions(+), 11 deletions(-)

diff --git a/Documentation/driver-api/crypto/iaa/iaa-crypto.rst b/Documentation/driver-api/crypto/iaa/iaa-crypto.rst
index 949bfa1ef624..8e0e98d50972 100644
--- a/Documentation/driver-api/crypto/iaa/iaa-crypto.rst
+++ b/Documentation/driver-api/crypto/iaa/iaa-crypto.rst
@@ -272,7 +272,7 @@ The available attributes are:
       echo async_irq > /sys/bus/dsa/drivers/crypto/sync_mode
 
     Async mode without interrupts (caller must poll) can be enabled by
-    writing 'async' to it (please see Caveat)::
+    writing 'async' to it::
 
       echo async > /sys/bus/dsa/drivers/crypto/sync_mode
 
@@ -281,14 +281,7 @@ The available attributes are:
 
       echo sync > /sys/bus/dsa/drivers/crypto/sync_mode
 
-    The default mode is 'sync'.
-
-    Caveat: since the only mechanism that iaa_crypto currently implements
-    for async polling without interrupts is via the 'sync' mode as
-    described earlier, writing 'async' to
-    '/sys/bus/dsa/drivers/crypto/sync_mode' will internally enable the
-    'sync' mode. This is to ensure correct iaa_crypto behavior until true
-    async polling without interrupts is enabled in iaa_crypto.
+    The default mode is 'async'.
 
   - g_comp_wqs_per_iaa
 
diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c
index d577f555d6ab..cfd4f5ead67b 100644
--- a/drivers/crypto/intel/iaa/iaa_crypto_main.c
+++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c
@@ -115,7 +115,7 @@ static bool iaa_verify_compress = true;
  */
 
 /* Use async mode */
-static bool async_mode;
+static bool async_mode = true;
 /* Use interrupts */
 static bool use_irq;
 
@@ -169,7 +169,7 @@ static int set_iaa_sync_mode(const char *name)
 		async_mode = false;
 		use_irq = false;
 	} else if (sysfs_streq(name, "async")) {
-		async_mode = false;
+		async_mode = true;
 		use_irq = false;
 	} else if (sysfs_streq(name, "async_irq")) {
 		async_mode = true;

From patchwork Wed Apr 30 20:52:59 2025
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: "Sridhar, Kanchana P" <kanchana.p.sridhar@intel.com>
X-Patchwork-Id: 886545
Received: from mgamail.intel.com (mgamail.intel.com [198.175.65.15])
 (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
 (No client certificate requested)
 by smtp.subspace.kernel.org (Postfix) with ESMTPS id 51EF12BD93C;
 Wed, 30 Apr 2025 20:53:19 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.175.65.15
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
 t=1746046401; cv=none;
 b=aw2QRhd84aDQW7RYj1W0bkTfnPiDTFgMiN/NehVYAt+tng6gEwAr8Qz0nWHIrGnU7sw/nnt767E1mkbjINpw79LGtCq2HY5UBb+YRBpq2pcNkNhPt5fOk8KdaihqK5u+XWD9Z9WgvXPWqtaeYv/nMFbptCKW1sjPFB7Hv33dl/o=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
 s=arc-20240116; t=1746046401; c=relaxed/simple;
 bh=kMvINlBJY0QU3fOT+sYo/6tn6u6i0LjWeSaNDcOydOE=;
 h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
 MIME-Version;
 b=Mv5ZhYG4SeWXXQguKAnCUrY2LpI9K5zsnKiDyxvI/FEPx8p4zke3saMyOYDio/KXnoDkzefBSP9D07ski5NMl9C99Ocp4C10Zx/8BHgG9HDlJSHBPmZn4y+CBzgulYcoPl60it27f/xHipl/DdXrTMh1GYMoZ/On/uEkuPVYphk=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=intel.com;
 spf=pass smtp.mailfrom=intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=lEsyDr+b; arc=none smtp.client-ip=198.175.65.15
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=pass smtp.mailfrom=intel.com
Authentication-Results: smtp.subspace.kernel.org;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="lEsyDr+b"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
 d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
 t=1746046400; x=1777582400;
 h=from:to:cc:subject:date:message-id:in-reply-to:
 references:mime-version:content-transfer-encoding;
 bh=kMvINlBJY0QU3fOT+sYo/6tn6u6i0LjWeSaNDcOydOE=;
 b=lEsyDr+bOFNt41KlTr1Ffvg2kLm2X08l8C3yyhwKMZ2IMWxqkHvdy7P5
 i0j8axhA534m0UkT9mi8UYzbZr1N44NT5KeO3Gt8gpfujFx5APHpHKJcB
 M4bBj/Qp03oyseIoAU7j+Zsg16F8GZZMQ5yD3Nuqn30qB4k/n8pLwuAbG
 cBAJeZjg2BqQMHeAm8SyWKZR4QrmipaHmdz9dy91l0JItIAFxS9TMSIxI
 i3CgbJ7ikClZTlhf5nhkAgSKPIJbZ253x4T3On4lPY/x+vbN1Xo9mwo8M
 MzL7vDSexUhYKZBYbvaFvNjh5zyqP+1Fmorx2bDuAaJJkL+dAyLSawGpk A==;
X-CSE-ConnectionGUID: CCWbk5vhS+ajGxUJcRSzrg==
X-CSE-MsgGUID: HSrz+qdiR1GxROP1BcGJQg==
X-IronPort-AV: E=McAfee;i="6700,10204,11419"; a="51388659"
X-IronPort-AV: E=Sophos;i="6.15,252,1739865600";
   d="scan'208";a="51388659"
Received: from fmviesa003.fm.intel.com ([10.60.135.143])
 by orvoesa107.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 30 Apr 2025 13:53:18 -0700
X-CSE-ConnectionGUID: FAB0Yim3ScWNBgAAQuz5mg==
X-CSE-MsgGUID: YqO5pxEATxO4SaOFddgc5g==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.15,252,1739865600"; d="scan'208";a="138248929"
Received: from jf5300-b11a338t.jf.intel.com ([10.242.51.115])
 by fmviesa003.fm.intel.com with ESMTP; 30 Apr 2025 13:53:17 -0700
From: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
To: linux-kernel@vger.kernel.org, linux-mm@kvack.org, hannes@cmpxchg.org,
 yosry.ahmed@linux.dev, nphamcs@gmail.com, chengming.zhou@linux.dev,
 usamaarif642@gmail.com, ryan.roberts@arm.com, 21cnbao@gmail.com,
 ying.huang@linux.alibaba.com, akpm@linux-foundation.org,
 linux-crypto@vger.kernel.org, herbert@gondor.apana.org.au,
 davem@davemloft.net, clabbe@baylibre.com, ardb@kernel.org,
 ebiggers@google.com, surenb@google.com, kristen.c.accardi@intel.com
Cc: wajdi.k.feghali@intel.com, vinodh.gopal@intel.com,
 kanchana.p.sridhar@intel.com
Subject: [PATCH v9 13/19] crypto: iaa - Disable iaa_verify_compress by default.
Date: Wed, 30 Apr 2025 13:52:59 -0700
Message-Id: <20250430205305.22844-14-kanchana.p.sridhar@intel.com>
X-Mailer: git-send-email 2.27.0
In-Reply-To: <20250430205305.22844-1-kanchana.p.sridhar@intel.com>
References: <20250430205305.22844-1-kanchana.p.sridhar@intel.com>
Precedence: bulk
X-Mailing-List: linux-crypto@vger.kernel.org
List-Id: <linux-crypto.vger.kernel.org>
List-Subscribe: <mailto:linux-crypto+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-crypto+unsubscribe@vger.kernel.org>
MIME-Version: 1.0

This patch makes it easier for IAA hardware acceleration in the iaa_crypto
driver to be loaded by default with "iaa_verify_compress" disabled, to
facilitate performance comparisons with software compressors (which also
do not run compress verification by default). Earlier, iaa_crypto compress
verification used to be enabled by default.

The iaa_crypto driver documentation has been updated with this change.

With this patch, if users want to enable compress verification, they can do
so with these steps:

  1) disable all the IAA device/wq bindings that happen at boot time
  2) rmmod iaa_crypto
  3) modprobe iaa_crypto
  4) echo 1 > /sys/bus/dsa/drivers/crypto/verify_compress
  5) re-run initialization of the IAA devices and wqs

Signed-off-by: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
---
 Documentation/driver-api/crypto/iaa/iaa-crypto.rst | 2 +-
 drivers/crypto/intel/iaa/iaa_crypto_main.c         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/driver-api/crypto/iaa/iaa-crypto.rst b/Documentation/driver-api/crypto/iaa/iaa-crypto.rst
index 8e0e98d50972..bc5912f22ae1 100644
--- a/Documentation/driver-api/crypto/iaa/iaa-crypto.rst
+++ b/Documentation/driver-api/crypto/iaa/iaa-crypto.rst
@@ -239,7 +239,7 @@ The available attributes are:
 
       echo 0 > /sys/bus/dsa/drivers/crypto/verify_compress
 
-    The default setting is '1' - verify all compresses.
+    The default setting is '0' - to not verify compresses.
 
   - sync_mode
 
diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c
index cfd4f5ead67b..815b5d718625 100644
--- a/drivers/crypto/intel/iaa/iaa_crypto_main.c
+++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c
@@ -83,7 +83,7 @@ static bool iaa_distribute_decomps = true;
 static bool iaa_distribute_comps = true;
 
 /* Verify results of IAA compress or not */
-static bool iaa_verify_compress = true;
+static bool iaa_verify_compress = false;
 
 /*
  * The iaa crypto driver supports three 'sync' methods determining how

From patchwork Wed Apr 30 20:53:00 2025
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: "Sridhar, Kanchana P" <kanchana.p.sridhar@intel.com>
X-Patchwork-Id: 886544
Received: from mgamail.intel.com (mgamail.intel.com [198.175.65.15])
 (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
 (No client certificate requested)
 by smtp.subspace.kernel.org (Postfix) with ESMTPS id B9BE52D2680;
 Wed, 30 Apr 2025 20:53:19 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.175.65.15
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
 t=1746046401; cv=none;
 b=C93OoX6f6fjGtZpYIwhW0KYBXRpRNZgAhCV9EOK2vU/OD3vVxJFyV+Lx1y828UKwvtnEb2KT20WcPEhbDLRNd1jRzO90I96A/HssKip39klD9o7t+qcH9vdIOl/etIkMFF6HYDcR+2EirXmRMBYTXPifWOgrsKYDrqJ0NpIexRU=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
 s=arc-20240116; t=1746046401; c=relaxed/simple;
 bh=l0g+LAnddWvzoTX2shNX+O/DFhTRbmW793v8KGvd668=;
 h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
 MIME-Version;
 b=SBMLRRoVbSonJFo6NXTYJctTkxgX9+yNHpZo5+ZaZyZQO6obNwRbR55rKDmC0K0uMIYPWO8tGSAM6B28XzgUu3Z4ELW/AqzzitmLIQuKW4NroZUbm+enezVZMD35JnfAkJVerWEpdlFArbiUwH2pXFzcSgmeBFurBkeeGeuDA1k=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=intel.com;
 spf=pass smtp.mailfrom=intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=YU9K/3si; arc=none smtp.client-ip=198.175.65.15
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=pass smtp.mailfrom=intel.com
Authentication-Results: smtp.subspace.kernel.org;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="YU9K/3si"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
 d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
 t=1746046400; x=1777582400;
 h=from:to:cc:subject:date:message-id:in-reply-to:
 references:mime-version:content-transfer-encoding;
 bh=l0g+LAnddWvzoTX2shNX+O/DFhTRbmW793v8KGvd668=;
 b=YU9K/3siN6a2dT9QNRQuZcNPJEZGk88NenYWPH+5CQAh+CUX1F2/vPsN
 0jQToC4M66LBp/PiW7fHXbhiCq0qsXZQzzdrkcEC+PijdcKB1yoWzwI3P
 CjgQ2yVlMxtU6ZM7VKx4miieZPXWr5tg594OCZgCuKh4O3eCW3I5XLlQX
 kUfDMAdg4vNaO4Y7vGaw/gZcO/1XCY0g7u3ktGIFkHwPObzIap1xcnMtD
 pNQz+WJxO6OjZCljeGAGHB3iW6CesjsOMqIeLSaZ8ydfLgyVWSa9WQChK
 jiQaauoxA7bTOqVehJhgUPuBT1a2hQryQqh8I0wr0OcG7jIkIg+H4o3mr Q==;
X-CSE-ConnectionGUID: zAXK+rcuRhqGTRbs+5wCag==
X-CSE-MsgGUID: tF6VLOHPQiC9Iy9KScurOA==
X-IronPort-AV: E=McAfee;i="6700,10204,11419"; a="51388673"
X-IronPort-AV: E=Sophos;i="6.15,252,1739865600";
   d="scan'208";a="51388673"
Received: from fmviesa003.fm.intel.com ([10.60.135.143])
 by orvoesa107.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 30 Apr 2025 13:53:19 -0700
X-CSE-ConnectionGUID: rpgM/CbWRUyB15lJ5VFjDA==
X-CSE-MsgGUID: GlJk7a+pSh6n3g5WrzPoNA==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.15,252,1739865600"; d="scan'208";a="138248932"
Received: from jf5300-b11a338t.jf.intel.com ([10.242.51.115])
 by fmviesa003.fm.intel.com with ESMTP; 30 Apr 2025 13:53:18 -0700
From: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
To: linux-kernel@vger.kernel.org, linux-mm@kvack.org, hannes@cmpxchg.org,
 yosry.ahmed@linux.dev, nphamcs@gmail.com, chengming.zhou@linux.dev,
 usamaarif642@gmail.com, ryan.roberts@arm.com, 21cnbao@gmail.com,
 ying.huang@linux.alibaba.com, akpm@linux-foundation.org,
 linux-crypto@vger.kernel.org, herbert@gondor.apana.org.au,
 davem@davemloft.net, clabbe@baylibre.com, ardb@kernel.org,
 ebiggers@google.com, surenb@google.com, kristen.c.accardi@intel.com
Cc: wajdi.k.feghali@intel.com, vinodh.gopal@intel.com,
 kanchana.p.sridhar@intel.com
Subject: [PATCH v9 14/19] mm: zswap: Move the CPU hotplug procedures under
 "pool functions".
Date: Wed, 30 Apr 2025 13:53:00 -0700
Message-Id: <20250430205305.22844-15-kanchana.p.sridhar@intel.com>
X-Mailer: git-send-email 2.27.0
In-Reply-To: <20250430205305.22844-1-kanchana.p.sridhar@intel.com>
References: <20250430205305.22844-1-kanchana.p.sridhar@intel.com>
Precedence: bulk
X-Mailing-List: linux-crypto@vger.kernel.org
List-Id: <linux-crypto.vger.kernel.org>
List-Subscribe: <mailto:linux-crypto+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-crypto+unsubscribe@vger.kernel.org>
MIME-Version: 1.0

This patch merely moves zswap_cpu_comp_prepare() and
zswap_cpu_comp_dead() to be in the "pool functions" section because
these functions are invoked upon pool creation/deletion.

Signed-off-by: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
---
 mm/zswap.c | 188 ++++++++++++++++++++++++++---------------------------
 1 file changed, 94 insertions(+), 94 deletions(-)

diff --git a/mm/zswap.c b/mm/zswap.c
index 455e9425c5f5..358dad3e612a 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -248,6 +248,100 @@ static inline struct xarray *swap_zswap_tree(swp_entry_t swp)
 **********************************/
 static void __zswap_pool_empty(struct percpu_ref *ref);
 
+static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
+{
+	struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
+	struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu);
+	struct crypto_acomp *acomp = NULL;
+	struct acomp_req *req = NULL;
+	u8 *buffer = NULL;
+	int ret;
+
+	buffer = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
+	if (!buffer) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu));
+	if (IS_ERR(acomp)) {
+		pr_err("could not alloc crypto acomp %s : %ld\n",
+				pool->tfm_name, PTR_ERR(acomp));
+		ret = PTR_ERR(acomp);
+		goto fail;
+	}
+
+	req = acomp_request_alloc(acomp);
+	if (!req) {
+		pr_err("could not alloc crypto acomp_request %s\n",
+		       pool->tfm_name);
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	/*
+	 * Only hold the mutex after completing allocations, otherwise we may
+	 * recurse into zswap through reclaim and attempt to hold the mutex
+	 * again resulting in a deadlock.
+	 */
+	mutex_lock(&acomp_ctx->mutex);
+	crypto_init_wait(&acomp_ctx->wait);
+
+	/*
+	 * if the backend of acomp is async zip, crypto_req_done() will wakeup
+	 * crypto_wait_req(); if the backend of acomp is scomp, the callback
+	 * won't be called, crypto_wait_req() will return without blocking.
+	 */
+	acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+				   crypto_req_done, &acomp_ctx->wait);
+
+	acomp_ctx->buffer = buffer;
+	acomp_ctx->acomp = acomp;
+	acomp_ctx->is_sleepable = acomp_is_async(acomp);
+	acomp_ctx->req = req;
+	mutex_unlock(&acomp_ctx->mutex);
+	return 0;
+
+fail:
+	if (acomp)
+		crypto_free_acomp(acomp);
+	kfree(buffer);
+	return ret;
+}
+
+static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node)
+{
+	struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
+	struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu);
+	struct acomp_req *req;
+	struct crypto_acomp *acomp;
+	u8 *buffer;
+
+	if (IS_ERR_OR_NULL(acomp_ctx))
+		return 0;
+
+	mutex_lock(&acomp_ctx->mutex);
+	req = acomp_ctx->req;
+	acomp = acomp_ctx->acomp;
+	buffer = acomp_ctx->buffer;
+	acomp_ctx->req = NULL;
+	acomp_ctx->acomp = NULL;
+	acomp_ctx->buffer = NULL;
+	mutex_unlock(&acomp_ctx->mutex);
+
+	/*
+	 * Do the actual freeing after releasing the mutex to avoid subtle
+	 * locking dependencies causing deadlocks.
+	 */
+	if (!IS_ERR_OR_NULL(req))
+		acomp_request_free(req);
+	if (!IS_ERR_OR_NULL(acomp))
+		crypto_free_acomp(acomp);
+	kfree(buffer);
+
+	return 0;
+}
+
 static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
 {
 	struct zswap_pool *pool;
@@ -818,100 +912,6 @@ static void zswap_entry_free(struct zswap_entry *entry)
 /*********************************
 * compressed storage functions
 **********************************/
-static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
-{
-	struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
-	struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu);
-	struct crypto_acomp *acomp = NULL;
-	struct acomp_req *req = NULL;
-	u8 *buffer = NULL;
-	int ret;
-
-	buffer = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
-	if (!buffer) {
-		ret = -ENOMEM;
-		goto fail;
-	}
-
-	acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu));
-	if (IS_ERR(acomp)) {
-		pr_err("could not alloc crypto acomp %s : %ld\n",
-				pool->tfm_name, PTR_ERR(acomp));
-		ret = PTR_ERR(acomp);
-		goto fail;
-	}
-
-	req = acomp_request_alloc(acomp);
-	if (!req) {
-		pr_err("could not alloc crypto acomp_request %s\n",
-		       pool->tfm_name);
-		ret = -ENOMEM;
-		goto fail;
-	}
-
-	/*
-	 * Only hold the mutex after completing allocations, otherwise we may
-	 * recurse into zswap through reclaim and attempt to hold the mutex
-	 * again resulting in a deadlock.
-	 */
-	mutex_lock(&acomp_ctx->mutex);
-	crypto_init_wait(&acomp_ctx->wait);
-
-	/*
-	 * if the backend of acomp is async zip, crypto_req_done() will wakeup
-	 * crypto_wait_req(); if the backend of acomp is scomp, the callback
-	 * won't be called, crypto_wait_req() will return without blocking.
-	 */
-	acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
-				   crypto_req_done, &acomp_ctx->wait);
-
-	acomp_ctx->buffer = buffer;
-	acomp_ctx->acomp = acomp;
-	acomp_ctx->is_sleepable = acomp_is_async(acomp);
-	acomp_ctx->req = req;
-	mutex_unlock(&acomp_ctx->mutex);
-	return 0;
-
-fail:
-	if (acomp)
-		crypto_free_acomp(acomp);
-	kfree(buffer);
-	return ret;
-}
-
-static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node)
-{
-	struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
-	struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu);
-	struct acomp_req *req;
-	struct crypto_acomp *acomp;
-	u8 *buffer;
-
-	if (IS_ERR_OR_NULL(acomp_ctx))
-		return 0;
-
-	mutex_lock(&acomp_ctx->mutex);
-	req = acomp_ctx->req;
-	acomp = acomp_ctx->acomp;
-	buffer = acomp_ctx->buffer;
-	acomp_ctx->req = NULL;
-	acomp_ctx->acomp = NULL;
-	acomp_ctx->buffer = NULL;
-	mutex_unlock(&acomp_ctx->mutex);
-
-	/*
-	 * Do the actual freeing after releasing the mutex to avoid subtle
-	 * locking dependencies causing deadlocks.
-	 */
-	if (!IS_ERR_OR_NULL(req))
-		acomp_request_free(req);
-	if (!IS_ERR_OR_NULL(acomp))
-		crypto_free_acomp(acomp);
-	kfree(buffer);
-
-	return 0;
-}
-
 static struct crypto_acomp_ctx *acomp_ctx_get_cpu_lock(struct zswap_pool *pool)
 {
 	struct crypto_acomp_ctx *acomp_ctx;

From patchwork Wed Apr 30 20:53:01 2025
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: "Sridhar, Kanchana P" <kanchana.p.sridhar@intel.com>
X-Patchwork-Id: 886543
Received: from mgamail.intel.com (mgamail.intel.com [198.175.65.15])
 (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
 (No client certificate requested)
 by smtp.subspace.kernel.org (Postfix) with ESMTPS id 5AABF2D26B3;
 Wed, 30 Apr 2025 20:53:21 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.175.65.15
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
 t=1746046404; cv=none;
 b=aoB6MYZi6Chvnt2UTq3HhVv/W6Javo+xWd7yagz5QasDN4V/rVTi7tf9HXDtSDNKhaV5O3hQnCNMUrnfBM61uE9FmVsw5NTaxaSMrsF7XkRPS/vpertlsSDxuA6bsNIZ+POsRuyK3Pr7yvOq2JXPd4VcKSkIadJMGp13gGRFSsY=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
 s=arc-20240116; t=1746046404; c=relaxed/simple;
 bh=AvunHsRrxVpuS1Dp2nlscgwVBY2zU8yiTAXdQyduiSQ=;
 h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
 MIME-Version;
 b=fBfTTui4FVgjTU2Kpfa4HuurTBT0YYyUT2pyi354TQ+T3m3ydBGUbeYqhaYiFIdfvDjZLVIL9djMPvXNuyDfuRHRSpn1JwS66Kb6S0pTs7NZOIQxfNR5xYGf/vgXVD7TYg11Nyccz+3v6ifPSOaMTWmW/uS2hLifDdpmGT9/jaE=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=intel.com;
 spf=pass smtp.mailfrom=intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=PQlb5FOz; arc=none smtp.client-ip=198.175.65.15
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=pass smtp.mailfrom=intel.com
Authentication-Results: smtp.subspace.kernel.org;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="PQlb5FOz"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
 d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
 t=1746046402; x=1777582402;
 h=from:to:cc:subject:date:message-id:in-reply-to:
 references:mime-version:content-transfer-encoding;
 bh=AvunHsRrxVpuS1Dp2nlscgwVBY2zU8yiTAXdQyduiSQ=;
 b=PQlb5FOz66TYoleZeolSf7f2KRalF9SiH98PQ/slULwYyTOZ7uX/gUH8
 T1cRJOwDTR3Sz6o/dn6kk1zSnEFTT/L0VfKzh+FgOt1IpgZ8ZrXcexdJl
 NQi1M60JAyfWXNaRO9Jfqvw1iNAGnrK8cHomsOJW5nA8IANs371+qv4/e
 V87quvzegM/9iznoo7xyGtYqQhE5SwPgo+Tw0hS2gREYvxZsVbZu7qu1D
 KJLNkOaFG6QbfSoHpccJcqQp1SMULaGgnfO0f90MXja1lW16NStfzKbK0
 Vyx6EvxYViu/u4Q79vPRMGS8posz2ndZcdH3mQNwn+BH4V88wHahDibqW A==;
X-CSE-ConnectionGUID: PqNY17ZKTNaSwq379B7Y4w==
X-CSE-MsgGUID: qwrwRSPCSpCHPNUmYRE1/Q==
X-IronPort-AV: E=McAfee;i="6700,10204,11419"; a="51388687"
X-IronPort-AV: E=Sophos;i="6.15,252,1739865600";
   d="scan'208";a="51388687"
Received: from fmviesa003.fm.intel.com ([10.60.135.143])
 by orvoesa107.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 30 Apr 2025 13:53:20 -0700
X-CSE-ConnectionGUID: xwuqEKmXSaOdIYE2dNDueQ==
X-CSE-MsgGUID: An8Du/lMTsqTa+0zQDAPhQ==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.15,252,1739865600"; d="scan'208";a="138248935"
Received: from jf5300-b11a338t.jf.intel.com ([10.242.51.115])
 by fmviesa003.fm.intel.com with ESMTP; 30 Apr 2025 13:53:19 -0700
From: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
To: linux-kernel@vger.kernel.org, linux-mm@kvack.org, hannes@cmpxchg.org,
 yosry.ahmed@linux.dev, nphamcs@gmail.com, chengming.zhou@linux.dev,
 usamaarif642@gmail.com, ryan.roberts@arm.com, 21cnbao@gmail.com,
 ying.huang@linux.alibaba.com, akpm@linux-foundation.org,
 linux-crypto@vger.kernel.org, herbert@gondor.apana.org.au,
 davem@davemloft.net, clabbe@baylibre.com, ardb@kernel.org,
 ebiggers@google.com, surenb@google.com, kristen.c.accardi@intel.com
Cc: wajdi.k.feghali@intel.com, vinodh.gopal@intel.com,
 kanchana.p.sridhar@intel.com
Subject: [PATCH v9 15/19] mm: zswap: Per-CPU acomp_ctx resources exist from
 pool creation to deletion.
Date: Wed, 30 Apr 2025 13:53:01 -0700
Message-Id: <20250430205305.22844-16-kanchana.p.sridhar@intel.com>
X-Mailer: git-send-email 2.27.0
In-Reply-To: <20250430205305.22844-1-kanchana.p.sridhar@intel.com>
References: <20250430205305.22844-1-kanchana.p.sridhar@intel.com>
Precedence: bulk
X-Mailing-List: linux-crypto@vger.kernel.org
List-Id: <linux-crypto.vger.kernel.org>
List-Subscribe: <mailto:linux-crypto+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-crypto+unsubscribe@vger.kernel.org>
MIME-Version: 1.0

This patch simplifies the zswap_pool's per-CPU acomp_ctx resource
management. Similar to the per-CPU acomp_ctx itself, the per-CPU
acomp_ctx's resources' (acomp, ref, buffer) lifetime will also be from
pool creation to pool deletion. These resources will persist through CPU
hotplug operations. The zswap_cpu_comp_dead() teardown callback has been
deleted from the call to
cpuhp_setup_state_multi(CPUHP_MM_ZSWP_POOL_PREPARE). As a result, CPU
offline hotplug operations will be no-ops as far as the acomp_ctx
resources are concerned.

The main benefit of using the CPU hotplug multi state instance startup
callback to allocate the acomp_ctx resources is that it prevents the
cores from being offlined until the multi state instance addition call
returns.

  From Documentation/core-api/cpu_hotplug.rst:

    "The node list add/remove operations and the callback invocations are
     serialized against CPU hotplug operations."

Furthermore, zswap_[de]compress() cannot contend with
zswap_cpu_comp_prepare() because:

  - During pool creation/deletion, the pool is not in the zswap_pools
    list.

  - During CPU hot[un]plug, the CPU is not yet online, as Yosry pointed
    out. zswap_cpu_comp_prepare() will be executed on a control CPU,
    since CPUHP_MM_ZSWP_POOL_PREPARE is in the PREPARE section of "enum
    cpuhp_state". Thanks Yosry for sharing this observation!

  In both these cases, any recursions into zswap reclaim from
  zswap_cpu_comp_prepare() will be handled by the old pool.

The above two observations enable the following simplifications:

 1) zswap_cpu_comp_prepare(): CPU cannot be offlined. Reclaim cannot use
    the pool. Considerations for mutex init/locking and handling
    subsequent CPU hotplug online-offlines:

    Should we lock the mutex of current CPU's acomp_ctx from start to
    end? It doesn't seem like this is required. The CPU hotplug
    operations acquire a "cpuhp_state_mutex" before proceeding, hence
    they are serialized against CPU hotplug operations.

    If the process gets migrated while zswap_cpu_comp_prepare() is
    running, it will complete on the new CPU. In case of failures, we
    pass the acomp_ctx pointer obtained at the start of
    zswap_cpu_comp_prepare() to acomp_ctx_dealloc(), which again, can
    only undergo migration. There appear to be no contention scenarios
    that might cause inconsistent values of acomp_ctx's members. Hence,
    it seems there is no need for mutex_lock(&acomp_ctx->mutex) in
    zswap_cpu_comp_prepare().

    Since the pool is not yet on zswap_pools list, we don't need to
    initialize the per-CPU acomp_ctx mutex in zswap_pool_create(). This
    has been restored to occur in zswap_cpu_comp_prepare().

    zswap_cpu_comp_prepare() checks upfront if acomp_ctx->acomp is
    valid. If so, it returns success. This should handle any CPU
    hotplug online-offline transitions after pool creation is done.

 2) CPU offline vis-a-vis zswap ops: Let's suppose the process is
    migrated to another CPU before the current CPU is dysfunctional. If
    zswap_[de]compress() holds the acomp_ctx->mutex lock of the offlined
    CPU, that mutex will be released once it completes on the new
    CPU. Since there is no teardown callback, there is no possibility of
    UAF.

 3) Pool creation/deletion and process migration to another CPU:

    - During pool creation/deletion, the pool is not in the zswap_pools
      list. Hence it cannot contend with zswap ops on that CPU. However,
      the process can get migrated.

      Pool creation --> zswap_cpu_comp_prepare()
                                --> process migrated:
                                    * CPU offline: no-op.
                                    * zswap_cpu_comp_prepare() continues
                                      to run on the new CPU to finish
                                      allocating acomp_ctx resources for
                                      the offlined CPU.

      Pool deletion --> acomp_ctx_dealloc()
                                --> process migrated:
                                    * CPU offline: no-op.
                                    * acomp_ctx_dealloc() continues
                                      to run on the new CPU to finish
                                      de-allocating acomp_ctx resources
                                      for the offlined CPU.

 4) Pool deletion vis-a-vis CPU onlining:
    To prevent possibility of race conditions between
    acomp_ctx_dealloc() freeing the acomp_ctx resources and the initial
    check for a valid acomp_ctx->acomp in zswap_cpu_comp_prepare(), we
    need to delete the multi state instance right after it is added, in
    zswap_pool_create().

 Summary of changes based on the above:
 --------------------------------------
 1) Zero-initialization of pool->acomp_ctx in zswap_pool_create() to
    simplify and share common code for different error handling/cleanup
    related to the acomp_ctx.

 2) Remove the node list instance right after node list add function
    call in zswap_pool_create(). This prevents race conditions between
    CPU onlining after initial pool creation, and acomp_ctx_dealloc()
    freeing the acomp_ctx resources.

 3) zswap_pool_destroy() will call acomp_ctx_dealloc() to de-allocate
    the per-CPU acomp_ctx resources.

 4) Changes to zswap_cpu_comp_prepare():

    a) Check if acomp_ctx->acomp is valid at the beginning and return,
       because the acomp_ctx is already initialized.
    b) Move the mutex_init to happen in this procedure, before it
       returns.
    c) All error conditions handled by calling acomp_ctx_dealloc().

 5) New procedure acomp_ctx_dealloc() for common error/cleanup code.

 6) No more multi state instance teardown callback. CPU offlining is a
    no-op as far as acomp_ctx resources are concerned.

 7) Delete acomp_ctx_get_cpu_lock()/acomp_ctx_put_unlock(). Directly
    call mutex_lock(&acomp_ctx->mutex)/mutex_unlock(&acomp_ctx->mutex)
    in zswap_[de]compress().

The per-CPU memory cost of not deleting the acomp_ctx resources upon CPU
offlining, and only deleting them when the pool is destroyed, is as
follows, on x86_64:

    IAA with batching: 64.8 KB
    Software compressors: 8.2 KB

Signed-off-by: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
---
 mm/zswap.c | 193 +++++++++++++++++++++++++----------------------------
 1 file changed, 92 insertions(+), 101 deletions(-)

diff --git a/mm/zswap.c b/mm/zswap.c
index 358dad3e612a..238f92e63a22 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -248,43 +248,65 @@ static inline struct xarray *swap_zswap_tree(swp_entry_t swp)
 **********************************/
 static void __zswap_pool_empty(struct percpu_ref *ref);
 
+/*
+ * The per-cpu pool->acomp_ctx is zero-initialized on allocation. This makes
+ * it easy for different error conditions/cleanup related to the acomp_ctx
+ * to be handled by acomp_ctx_dealloc():
+ * - Errors during zswap_cpu_comp_prepare().
+ * - Partial success/error of cpuhp_state_add_instance() call in
+ *   zswap_pool_create(). Only some cores could have executed
+ *   zswap_cpu_comp_prepare(), not others.
+ * - Cleanup acomp_ctx resources on all cores in zswap_pool_destroy().
+ */
+static void acomp_ctx_dealloc(struct crypto_acomp_ctx *acomp_ctx)
+{
+	if (IS_ERR_OR_NULL(acomp_ctx))
+		return;
+
+	if (!IS_ERR_OR_NULL(acomp_ctx->req))
+		acomp_request_free(acomp_ctx->req);
+	if (!IS_ERR_OR_NULL(acomp_ctx->acomp))
+		crypto_free_acomp(acomp_ctx->acomp);
+	kfree(acomp_ctx->buffer);
+}
+
 static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
 {
 	struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
 	struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu);
-	struct crypto_acomp *acomp = NULL;
-	struct acomp_req *req = NULL;
-	u8 *buffer = NULL;
-	int ret;
+	int ret = -ENOMEM;
 
-	buffer = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
-	if (!buffer) {
-		ret = -ENOMEM;
-		goto fail;
-	}
+	/*
+	 * The per-CPU pool->acomp_ctx is zero-initialized on allocation.
+	 * Even though we delete the multi state instance right after successful
+	 * addition of the instance in zswap_pool_create(), we cannot eliminate
+	 * the possibility of the CPU going through offline-online transitions.
+	 * If this does happen, we check if the acomp_ctx has already been
+	 * initialized, and return.
+	 */
+	if (!IS_ERR_OR_NULL(acomp_ctx->acomp))
+		return 0;
 
-	acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu));
-	if (IS_ERR(acomp)) {
+	acomp_ctx->buffer = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
+	if (!acomp_ctx->buffer)
+		return ret;
+
+	acomp_ctx->acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu));
+	if (IS_ERR(acomp_ctx->acomp)) {
 		pr_err("could not alloc crypto acomp %s : %ld\n",
-				pool->tfm_name, PTR_ERR(acomp));
-		ret = PTR_ERR(acomp);
+				pool->tfm_name, PTR_ERR(acomp_ctx->acomp));
+		ret = PTR_ERR(acomp_ctx->acomp);
 		goto fail;
 	}
+	acomp_ctx->is_sleepable = acomp_is_async(acomp_ctx->acomp);
 
-	req = acomp_request_alloc(acomp);
-	if (!req) {
+	acomp_ctx->req = acomp_request_alloc(acomp_ctx->acomp);
+	if (!acomp_ctx->req) {
 		pr_err("could not alloc crypto acomp_request %s\n",
 		       pool->tfm_name);
-		ret = -ENOMEM;
 		goto fail;
 	}
 
-	/*
-	 * Only hold the mutex after completing allocations, otherwise we may
-	 * recurse into zswap through reclaim and attempt to hold the mutex
-	 * again resulting in a deadlock.
-	 */
-	mutex_lock(&acomp_ctx->mutex);
 	crypto_init_wait(&acomp_ctx->wait);
 
 	/*
@@ -292,56 +314,17 @@ static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
 	 * crypto_wait_req(); if the backend of acomp is scomp, the callback
 	 * won't be called, crypto_wait_req() will return without blocking.
 	 */
-	acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+	acomp_request_set_callback(acomp_ctx->req, CRYPTO_TFM_REQ_MAY_BACKLOG,
 				   crypto_req_done, &acomp_ctx->wait);
 
-	acomp_ctx->buffer = buffer;
-	acomp_ctx->acomp = acomp;
-	acomp_ctx->is_sleepable = acomp_is_async(acomp);
-	acomp_ctx->req = req;
-	mutex_unlock(&acomp_ctx->mutex);
+	mutex_init(&acomp_ctx->mutex);
 	return 0;
 
 fail:
-	if (acomp)
-		crypto_free_acomp(acomp);
-	kfree(buffer);
+	acomp_ctx_dealloc(acomp_ctx);
 	return ret;
 }
 
-static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node)
-{
-	struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
-	struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu);
-	struct acomp_req *req;
-	struct crypto_acomp *acomp;
-	u8 *buffer;
-
-	if (IS_ERR_OR_NULL(acomp_ctx))
-		return 0;
-
-	mutex_lock(&acomp_ctx->mutex);
-	req = acomp_ctx->req;
-	acomp = acomp_ctx->acomp;
-	buffer = acomp_ctx->buffer;
-	acomp_ctx->req = NULL;
-	acomp_ctx->acomp = NULL;
-	acomp_ctx->buffer = NULL;
-	mutex_unlock(&acomp_ctx->mutex);
-
-	/*
-	 * Do the actual freeing after releasing the mutex to avoid subtle
-	 * locking dependencies causing deadlocks.
-	 */
-	if (!IS_ERR_OR_NULL(req))
-		acomp_request_free(req);
-	if (!IS_ERR_OR_NULL(acomp))
-		crypto_free_acomp(acomp);
-	kfree(buffer);
-
-	return 0;
-}
-
 static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
 {
 	struct zswap_pool *pool;
@@ -375,19 +358,43 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
 
 	strscpy(pool->tfm_name, compressor, sizeof(pool->tfm_name));
 
-	pool->acomp_ctx = alloc_percpu(*pool->acomp_ctx);
+	/* Many things rely on the zero-initialization. */
+	pool->acomp_ctx = alloc_percpu_gfp(*pool->acomp_ctx,
+					   GFP_KERNEL | __GFP_ZERO);
 	if (!pool->acomp_ctx) {
 		pr_err("percpu alloc failed\n");
 		goto error;
 	}
 
-	for_each_possible_cpu(cpu)
-		mutex_init(&per_cpu_ptr(pool->acomp_ctx, cpu)->mutex);
-
+	/*
+	 * This is serialized against CPU hotplug operations. Hence, cores
+	 * cannot be offlined until this finishes.
+	 * In case of errors, we need to goto "ref_fail" instead of "error"
+	 * because there is no teardown callback registered anymore, for
+	 * cpuhp_state_add_instance() to de-allocate resources as it rolls back
+	 * state on cores before the CPU on which error was encountered.
+	 */
 	ret = cpuhp_state_add_instance(CPUHP_MM_ZSWP_POOL_PREPARE,
 				       &pool->node);
+
+	/*
+	 * We only needed the multi state instance add operation to invoke the
+	 * startup callback for all cores without cores getting offlined. Since
+	 * the acomp_ctx resources will now only be de-allocated when the pool
+	 * is destroyed, we can safely remove the multi state instance. This
+	 * minimizes (but does not eliminate) the possibility of
+	 * zswap_cpu_comp_prepare() being invoked again due to a CPU
+	 * offline-online transition. Removing the instance also prevents race
+	 * conditions between CPU onlining after initial pool creation, and
+	 * acomp_ctx_dealloc() freeing the acomp_ctx resources.
+	 * Note that we delete the instance before checking the error status of
+	 * the node list add operation because we want the instance removal even
+	 * in case of errors in the former.
+	 */
+	cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node);
+
 	if (ret)
-		goto error;
+		goto ref_fail;
 
 	/* being the current pool takes 1 ref; this func expects the
 	 * caller to always add the new pool as the current pool
@@ -403,7 +410,8 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
 	return pool;
 
 ref_fail:
-	cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node);
+	for_each_possible_cpu(cpu)
+		acomp_ctx_dealloc(per_cpu_ptr(pool->acomp_ctx, cpu));
 error:
 	if (pool->acomp_ctx)
 		free_percpu(pool->acomp_ctx);
@@ -457,9 +465,13 @@ static struct zswap_pool *__zswap_pool_create_fallback(void)
 
 static void zswap_pool_destroy(struct zswap_pool *pool)
 {
+	int cpu;
+
 	zswap_pool_debug("destroying", pool);
 
-	cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node);
+	for_each_possible_cpu(cpu)
+		acomp_ctx_dealloc(per_cpu_ptr(pool->acomp_ctx, cpu));
+
 	free_percpu(pool->acomp_ctx);
 
 	zpool_destroy_pool(pool->zpool);
@@ -912,31 +924,6 @@ static void zswap_entry_free(struct zswap_entry *entry)
 /*********************************
 * compressed storage functions
 **********************************/
-static struct crypto_acomp_ctx *acomp_ctx_get_cpu_lock(struct zswap_pool *pool)
-{
-	struct crypto_acomp_ctx *acomp_ctx;
-
-	for (;;) {
-		acomp_ctx = raw_cpu_ptr(pool->acomp_ctx);
-		mutex_lock(&acomp_ctx->mutex);
-		if (likely(acomp_ctx->req))
-			return acomp_ctx;
-		/*
-		 * It is possible that we were migrated to a different CPU after
-		 * getting the per-CPU ctx but before the mutex was acquired. If
-		 * the old CPU got offlined, zswap_cpu_comp_dead() could have
-		 * already freed ctx->req (among other things) and set it to
-		 * NULL. Just try again on the new CPU that we ended up on.
-		 */
-		mutex_unlock(&acomp_ctx->mutex);
-	}
-}
-
-static void acomp_ctx_put_unlock(struct crypto_acomp_ctx *acomp_ctx)
-{
-	mutex_unlock(&acomp_ctx->mutex);
-}
-
 static bool zswap_compress(struct page *page, struct zswap_entry *entry,
 			   struct zswap_pool *pool)
 {
@@ -949,7 +936,10 @@ static bool zswap_compress(struct page *page, struct zswap_entry *entry,
 	gfp_t gfp;
 	u8 *dst;
 
-	acomp_ctx = acomp_ctx_get_cpu_lock(pool);
+	acomp_ctx = raw_cpu_ptr(pool->acomp_ctx);
+
+	mutex_lock(&acomp_ctx->mutex);
+
 	dst = acomp_ctx->buffer;
 	sg_init_table(&input, 1);
 	sg_set_page(&input, page, PAGE_SIZE, 0);
@@ -997,7 +987,7 @@ static bool zswap_compress(struct page *page, struct zswap_entry *entry,
 	else if (alloc_ret)
 		zswap_reject_alloc_fail++;
 
-	acomp_ctx_put_unlock(acomp_ctx);
+	mutex_unlock(&acomp_ctx->mutex);
 	return comp_ret == 0 && alloc_ret == 0;
 }
 
@@ -1009,7 +999,8 @@ static bool zswap_decompress(struct zswap_entry *entry, struct folio *folio)
 	int decomp_ret, dlen;
 	u8 *src, *obj;
 
-	acomp_ctx = acomp_ctx_get_cpu_lock(entry->pool);
+	acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
+	mutex_lock(&acomp_ctx->mutex);
 	obj = zpool_obj_read_begin(zpool, entry->handle, acomp_ctx->buffer);
 
 	/*
@@ -1033,7 +1024,7 @@ static bool zswap_decompress(struct zswap_entry *entry, struct folio *folio)
 	dlen = acomp_ctx->req->dlen;
 
 	zpool_obj_read_end(zpool, entry->handle, obj);
-	acomp_ctx_put_unlock(acomp_ctx);
+	mutex_unlock(&acomp_ctx->mutex);
 
 	if (!decomp_ret && dlen == PAGE_SIZE)
 		return true;
@@ -1849,7 +1840,7 @@ static int zswap_setup(void)
 	ret = cpuhp_setup_state_multi(CPUHP_MM_ZSWP_POOL_PREPARE,
 				      "mm/zswap_pool:prepare",
 				      zswap_cpu_comp_prepare,
-				      zswap_cpu_comp_dead);
+				      NULL);
 	if (ret)
 		goto hp_fail;
 

From patchwork Wed Apr 30 20:53:02 2025
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: "Sridhar, Kanchana P" <kanchana.p.sridhar@intel.com>
X-Patchwork-Id: 886164
Received: from mgamail.intel.com (mgamail.intel.com [198.175.65.15])
 (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
 (No client certificate requested)
 by smtp.subspace.kernel.org (Postfix) with ESMTPS id B58382D29B0;
 Wed, 30 Apr 2025 20:53:21 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.175.65.15
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
 t=1746046403; cv=none;
 b=kO8FRKSFYH2k4d0W/PMurjS47JBLDgMp4MXVHiuZLUh0PXtS6UFIq7PCI+qtBLDyDKS6cUeQWCOch1/uykdl27YpWatFYFU3HzpJ5IoGTnLTbzFfE8FIhvm9JV4we/Dpsbgr0P3Y5J1QMO3pHesERrfg/hEqrexQr7wdody/Xk4=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
 s=arc-20240116; t=1746046403; c=relaxed/simple;
 bh=0i1XAzaurCUawIwqEktmrNT/73IFm7t499dJ0rxYn9I=;
 h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
 MIME-Version;
 b=bt8+lq8Q6v8NfFqnZuXkynh5mW12JZ7krO2k/+bNOYKIqglf08j9D++gDLSPC/yi8bWrUXg3ohbDOPUe8IWUwiYkaaU4QQ43wSQPCxuSJuRIk3pXOuY2D1tCxE1OxncKmW3HQsfc0KXU5bfgCxuw4EiqhnOuBio8yC1MILUFp6U=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=intel.com;
 spf=pass smtp.mailfrom=intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=DTs9XgsM; arc=none smtp.client-ip=198.175.65.15
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=pass smtp.mailfrom=intel.com
Authentication-Results: smtp.subspace.kernel.org;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="DTs9XgsM"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
 d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
 t=1746046402; x=1777582402;
 h=from:to:cc:subject:date:message-id:in-reply-to:
 references:mime-version:content-transfer-encoding;
 bh=0i1XAzaurCUawIwqEktmrNT/73IFm7t499dJ0rxYn9I=;
 b=DTs9XgsMnz/uI03y8l1+4ySXOpn+6pHsx01Z6yLu+RZeimpL3kAtiibP
 BXFeVTqT7wcPpx3N0It0pGQVRPknDIGOVo9D35BxCJjdlwlyCO/ni09B5
 i+UFMnm5WsSg4Ox4/j62qTFnZ/tL6p3eVkpaj5KU1BqQd+cmf4N0LMXBp
 Y3XfP2CPZv8ks66N35v71G0p6fmUH95TRVPLD9cLhWWETuvaOnoKQAS0h
 ILlgo/Y6d8K4PAKb/43xAvo/ksa6XSk3yvvV3XLs88FaD1jiC/sIJiukm
 bYjMqdiPWqNocuw+zDV6xpyqK9K/9Dnb3fx9y7gz2vgL0mK7xI0m7dBIz A==;
X-CSE-ConnectionGUID: 8bMDg+ByRDi0eCifs5RGIw==
X-CSE-MsgGUID: beG8S2BHTZKWFvtribKHog==
X-IronPort-AV: E=McAfee;i="6700,10204,11419"; a="51388701"
X-IronPort-AV: E=Sophos;i="6.15,252,1739865600";
   d="scan'208";a="51388701"
Received: from fmviesa003.fm.intel.com ([10.60.135.143])
 by orvoesa107.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 30 Apr 2025 13:53:21 -0700
X-CSE-ConnectionGUID: IRXs5gDkQQebuVPA0AIg0Q==
X-CSE-MsgGUID: WDtUBR3bRoqtGWmSDUecWQ==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.15,252,1739865600"; d="scan'208";a="138248939"
Received: from jf5300-b11a338t.jf.intel.com ([10.242.51.115])
 by fmviesa003.fm.intel.com with ESMTP; 30 Apr 2025 13:53:19 -0700
From: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
To: linux-kernel@vger.kernel.org, linux-mm@kvack.org, hannes@cmpxchg.org,
 yosry.ahmed@linux.dev, nphamcs@gmail.com, chengming.zhou@linux.dev,
 usamaarif642@gmail.com, ryan.roberts@arm.com, 21cnbao@gmail.com,
 ying.huang@linux.alibaba.com, akpm@linux-foundation.org,
 linux-crypto@vger.kernel.org, herbert@gondor.apana.org.au,
 davem@davemloft.net, clabbe@baylibre.com, ardb@kernel.org,
 ebiggers@google.com, surenb@google.com, kristen.c.accardi@intel.com
Cc: wajdi.k.feghali@intel.com, vinodh.gopal@intel.com,
 kanchana.p.sridhar@intel.com
Subject: [PATCH v9 16/19] mm: zswap: Consistently use IS_ERR_OR_NULL() to
 check acomp_ctx resources.
Date: Wed, 30 Apr 2025 13:53:02 -0700
Message-Id: <20250430205305.22844-17-kanchana.p.sridhar@intel.com>
X-Mailer: git-send-email 2.27.0
In-Reply-To: <20250430205305.22844-1-kanchana.p.sridhar@intel.com>
References: <20250430205305.22844-1-kanchana.p.sridhar@intel.com>
Precedence: bulk
X-Mailing-List: linux-crypto@vger.kernel.org
List-Id: <linux-crypto.vger.kernel.org>
List-Subscribe: <mailto:linux-crypto+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-crypto+unsubscribe@vger.kernel.org>
MIME-Version: 1.0

This patch uses IS_ERR_OR_NULL() in zswap_cpu_comp_prepare() to check
for valid acomp/req, thereby making it consistent with acomp_ctx_dealloc().

Signed-off-by: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
---
 mm/zswap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/zswap.c b/mm/zswap.c
index 238f92e63a22..717835f214b2 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -292,7 +292,7 @@ static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
 		return ret;
 
 	acomp_ctx->acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu));
-	if (IS_ERR(acomp_ctx->acomp)) {
+	if (IS_ERR_OR_NULL(acomp_ctx->acomp)) {
 		pr_err("could not alloc crypto acomp %s : %ld\n",
 				pool->tfm_name, PTR_ERR(acomp_ctx->acomp));
 		ret = PTR_ERR(acomp_ctx->acomp);
@@ -301,7 +301,7 @@ static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
 	acomp_ctx->is_sleepable = acomp_is_async(acomp_ctx->acomp);
 
 	acomp_ctx->req = acomp_request_alloc(acomp_ctx->acomp);
-	if (!acomp_ctx->req) {
+	if (IS_ERR_OR_NULL(acomp_ctx->req)) {
 		pr_err("could not alloc crypto acomp_request %s\n",
 		       pool->tfm_name);
 		goto fail;

From patchwork Wed Apr 30 20:53:03 2025
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: "Sridhar, Kanchana P" <kanchana.p.sridhar@intel.com>
X-Patchwork-Id: 886163
Received: from mgamail.intel.com (mgamail.intel.com [198.175.65.15])
 (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
 (No client certificate requested)
 by smtp.subspace.kernel.org (Postfix) with ESMTPS id E1C442D29C7;
 Wed, 30 Apr 2025 20:53:21 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.175.65.15
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
 t=1746046404; cv=none;
 b=amV+Ls/9tOVeqqR5iO1W1O1p2NJHjDWJZJUqPSgT95Hww6BOxEzBVGcKXbsAgK7SW2FxQRwnRhU7tvWGyCL9ORrNkdKhV3fqIOk3qJf3rWE2Vx6n0KrpHFlc4vLIDpo06wkW4EEFuHFdF0SIILkTyeG7lf1M6Vf/+7CEBBy2r4w=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
 s=arc-20240116; t=1746046404; c=relaxed/simple;
 bh=n8lZ8yIQmOM0rAWlBzDxJH5i5KHCUMbDeeth3crBxmU=;
 h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
 MIME-Version;
 b=m3r68lzK+7HK289LpP6zOuuXXQEDNA7+Fn6Sv1JOPq5Ghcfb17aVyiCc/VQvcJ5hazRn5Aqm3pEQGrQrOp92RqxdZDWgsHn1yIXl+GqgRHwO6mJRntSp/qAUp8uBpw3U0FkRw3wPA/fKp1/cUBAsmj/wwKOrg09fMX4XnSGNGHY=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=intel.com;
 spf=pass smtp.mailfrom=intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=HRn6h7Us; arc=none smtp.client-ip=198.175.65.15
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=pass smtp.mailfrom=intel.com
Authentication-Results: smtp.subspace.kernel.org;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="HRn6h7Us"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
 d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
 t=1746046402; x=1777582402;
 h=from:to:cc:subject:date:message-id:in-reply-to:
 references:mime-version:content-transfer-encoding;
 bh=n8lZ8yIQmOM0rAWlBzDxJH5i5KHCUMbDeeth3crBxmU=;
 b=HRn6h7UsAi4+d2GU6ZmBYuCgEtc9Ycb6xvqqlnY5DzIHzcl+AEat0f6F
 Fz5c7pq3/05BDjXCHU8sxILT85qD7hx5pUubKWQPV48WddH+eseNw3SH2
 2k8lczfuxAo8tz5rw43/R2R519G3V6ON7RER3LnFehB7MXVUkh6yu7quf
 KWO0C4rIqIt6r3XJH7QtyWT/G6I0vZIOdI34l0XlTu5yelJH7vxuUmjyf
 a/9bUFw8nz1++ZGScch04Ttmu9+KaEaAtfp0Rfo4PrfJ7WaAbDnUsB2Di
 uMMW72pDoGY2VdSLOf6LuTKRG0gUXvmtBG47AVZNJjXu8UFBN1PqHfV0B g==;
X-CSE-ConnectionGUID: 525+IaFxSDKnPZO0QKNs9g==
X-CSE-MsgGUID: 5skvEutJRdm8iggfqgo1AQ==
X-IronPort-AV: E=McAfee;i="6700,10204,11419"; a="51388714"
X-IronPort-AV: E=Sophos;i="6.15,252,1739865600";
   d="scan'208";a="51388714"
Received: from fmviesa003.fm.intel.com ([10.60.135.143])
 by orvoesa107.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 30 Apr 2025 13:53:22 -0700
X-CSE-ConnectionGUID: WGk7IbFZRbmB23ATzy1ZwA==
X-CSE-MsgGUID: z+HS5+OUSIi6/PcNHgYuCQ==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.15,252,1739865600"; d="scan'208";a="138248942"
Received: from jf5300-b11a338t.jf.intel.com ([10.242.51.115])
 by fmviesa003.fm.intel.com with ESMTP; 30 Apr 2025 13:53:20 -0700
From: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
To: linux-kernel@vger.kernel.org, linux-mm@kvack.org, hannes@cmpxchg.org,
 yosry.ahmed@linux.dev, nphamcs@gmail.com, chengming.zhou@linux.dev,
 usamaarif642@gmail.com, ryan.roberts@arm.com, 21cnbao@gmail.com,
 ying.huang@linux.alibaba.com, akpm@linux-foundation.org,
 linux-crypto@vger.kernel.org, herbert@gondor.apana.org.au,
 davem@davemloft.net, clabbe@baylibre.com, ardb@kernel.org,
 ebiggers@google.com, surenb@google.com, kristen.c.accardi@intel.com
Cc: wajdi.k.feghali@intel.com, vinodh.gopal@intel.com,
 kanchana.p.sridhar@intel.com
Subject: [PATCH v9 17/19] mm: zswap: Allocate pool batching resources if the
 compressor supports batching.
Date: Wed, 30 Apr 2025 13:53:03 -0700
Message-Id: <20250430205305.22844-18-kanchana.p.sridhar@intel.com>
X-Mailer: git-send-email 2.27.0
In-Reply-To: <20250430205305.22844-1-kanchana.p.sridhar@intel.com>
References: <20250430205305.22844-1-kanchana.p.sridhar@intel.com>
Precedence: bulk
X-Mailing-List: linux-crypto@vger.kernel.org
List-Id: <linux-crypto.vger.kernel.org>
List-Subscribe: <mailto:linux-crypto+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-crypto+unsubscribe@vger.kernel.org>
MIME-Version: 1.0

This patch adds support for the per-CPU acomp_ctx to track multiple
compression/decompression requests and multiple compression destination
buffers. zswap_cpu_comp_prepare() will get the maximum batch-size the
compressor supports. If so, it will allocate the necessary batching
resources (reqs/buffers), up to ZSWAP_MAX_BATCH_SIZE, which this patch
defines as 8U.

This patch also adds a "u8 nr_reqs" member to "struct
zswap_pool". Thanks Yosry for this suggestion. Once the pool's per-CPU
acomp_ctx resources have been successfully allocated, the pool->nr_reqs
is set up as the minimum of ZSWAP_MAX_BATCH_SIZE and
crypto_acomp_batch_size(acomp_ctx->acomp).

However, zswap does not use more than one request yet. Follow-up patches
will actually utilize the multiple acomp_ctx requests/buffers for batch
compression/decompression of multiple pages.

The newly added ZSWAP_MAX_BATCH_SIZE limits the amount of extra memory
used for batching. There is a small extra memory overhead of allocating
the "reqs" and "buffers" arrays for compressors that do not support
batching: On x86_64, the overhead is two pointers per-CPU (i.e. 16 bytes).

Suggested-by: Yosry Ahmed <yosry.ahmed@linux.dev>
Signed-off-by: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
---
 mm/zswap.c | 113 ++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 81 insertions(+), 32 deletions(-)

diff --git a/mm/zswap.c b/mm/zswap.c
index 717835f214b2..2273dbfd460f 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -80,6 +80,9 @@ static bool zswap_pool_reached_full;
 
 #define ZSWAP_PARAM_UNSET ""
 
+/* Limit the batch size to limit per-CPU memory usage for reqs and buffers. */
+#define ZSWAP_MAX_BATCH_SIZE 8U
+
 static int zswap_setup(void);
 
 /* Enable/disable zswap */
@@ -145,9 +148,9 @@ bool zswap_never_enabled(void)
 
 struct crypto_acomp_ctx {
 	struct crypto_acomp *acomp;
-	struct acomp_req *req;
+	struct acomp_req **reqs;
+	u8 **buffers;
 	struct crypto_wait wait;
-	u8 *buffer;
 	struct mutex mutex;
 	bool is_sleepable;
 };
@@ -166,6 +169,7 @@ struct zswap_pool {
 	struct work_struct release_work;
 	struct hlist_node node;
 	char tfm_name[CRYPTO_MAX_ALG_NAME];
+	u8 nr_reqs;
 };
 
 /* Global LRU lists shared by all zswap pools. */
@@ -258,16 +262,29 @@ static void __zswap_pool_empty(struct percpu_ref *ref);
  *   zswap_cpu_comp_prepare(), not others.
  * - Cleanup acomp_ctx resources on all cores in zswap_pool_destroy().
  */
-static void acomp_ctx_dealloc(struct crypto_acomp_ctx *acomp_ctx)
+static void acomp_ctx_dealloc(struct crypto_acomp_ctx *acomp_ctx, u8 nr_reqs)
 {
+	u8 i;
+
 	if (IS_ERR_OR_NULL(acomp_ctx))
 		return;
 
-	if (!IS_ERR_OR_NULL(acomp_ctx->req))
-		acomp_request_free(acomp_ctx->req);
+	if (acomp_ctx->reqs) {
+		for (i = 0; i < nr_reqs; ++i) {
+			if (!IS_ERR_OR_NULL(acomp_ctx->reqs[i]))
+				acomp_request_free(acomp_ctx->reqs[i]);
+		}
+		kfree(acomp_ctx->reqs);
+	}
+
 	if (!IS_ERR_OR_NULL(acomp_ctx->acomp))
 		crypto_free_acomp(acomp_ctx->acomp);
-	kfree(acomp_ctx->buffer);
+
+	if (acomp_ctx->buffers) {
+		for (i = 0; i < nr_reqs; ++i)
+			kfree(acomp_ctx->buffers[i]);
+		kfree(acomp_ctx->buffers);
+	}
 }
 
 static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
@@ -275,6 +292,7 @@ static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
 	struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
 	struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu);
 	int ret = -ENOMEM;
+	u8 i, nr_reqs = 0;
 
 	/*
 	 * The per-CPU pool->acomp_ctx is zero-initialized on allocation.
@@ -287,10 +305,6 @@ static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
 	if (!IS_ERR_OR_NULL(acomp_ctx->acomp))
 		return 0;
 
-	acomp_ctx->buffer = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
-	if (!acomp_ctx->buffer)
-		return ret;
-
 	acomp_ctx->acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu));
 	if (IS_ERR_OR_NULL(acomp_ctx->acomp)) {
 		pr_err("could not alloc crypto acomp %s : %ld\n",
@@ -300,33 +314,58 @@ static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
 	}
 	acomp_ctx->is_sleepable = acomp_is_async(acomp_ctx->acomp);
 
-	acomp_ctx->req = acomp_request_alloc(acomp_ctx->acomp);
-	if (IS_ERR_OR_NULL(acomp_ctx->req)) {
-		pr_err("could not alloc crypto acomp_request %s\n",
-		       pool->tfm_name);
+	nr_reqs = min(ZSWAP_MAX_BATCH_SIZE,
+		      crypto_acomp_batch_size(acomp_ctx->acomp));
+
+	acomp_ctx->buffers = kcalloc_node(nr_reqs, sizeof(u8 *),
+					  GFP_KERNEL, cpu_to_node(cpu));
+	if (!acomp_ctx->buffers)
 		goto fail;
+
+	for (i = 0; i < nr_reqs; ++i) {
+		acomp_ctx->buffers[i] = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL,
+						     cpu_to_node(cpu));
+		if (!acomp_ctx->buffers[i])
+			goto fail;
 	}
 
-	crypto_init_wait(&acomp_ctx->wait);
+	acomp_ctx->reqs = kcalloc_node(nr_reqs, sizeof(struct acomp_req *),
+				       GFP_KERNEL, cpu_to_node(cpu));
+	if (!acomp_ctx->reqs)
+		goto fail;
+
+	for (i = 0; i < nr_reqs; ++i) {
+		acomp_ctx->reqs[i] = acomp_request_alloc(acomp_ctx->acomp);
+		if (IS_ERR_OR_NULL(acomp_ctx->reqs[i])) {
+			pr_err("could not alloc crypto acomp_request reqs[%d] %s\n",
+				i, pool->tfm_name);
+			goto fail;
+		}
+	}
 
 	/*
+	 * All calls to crypto_acomp_[de]compress() from zswap will use
+	 * acomp_ctx->reqs[0] with acomp_ctx->wait.
 	 * if the backend of acomp is async zip, crypto_req_done() will wakeup
 	 * crypto_wait_req(); if the backend of acomp is scomp, the callback
 	 * won't be called, crypto_wait_req() will return without blocking.
 	 */
-	acomp_request_set_callback(acomp_ctx->req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+	crypto_init_wait(&acomp_ctx->wait);
+
+	acomp_request_set_callback(acomp_ctx->reqs[0], CRYPTO_TFM_REQ_MAY_BACKLOG,
 				   crypto_req_done, &acomp_ctx->wait);
 
 	mutex_init(&acomp_ctx->mutex);
 	return 0;
 
 fail:
-	acomp_ctx_dealloc(acomp_ctx);
+	acomp_ctx_dealloc(acomp_ctx, nr_reqs);
 	return ret;
 }
 
 static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
 {
+	struct crypto_acomp_ctx *acomp_ctx;
 	struct zswap_pool *pool;
 	char name[38]; /* 'zswap' + 32 char (max) num + \0 */
 	gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
@@ -343,6 +382,7 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
 			return NULL;
 	}
 
+	/* Many things rely on the zero-initialization. */
 	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
 	if (!pool)
 		return NULL;
@@ -393,9 +433,18 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
 	 */
 	cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node);
 
+	/*
+	 * If there was an error in adding the multi instance state, the
+	 * zero-initialized pool->nr_reqs value will be accurate for passing to
+	 * acomp_ctx_dealloc().
+	 */
 	if (ret)
 		goto ref_fail;
 
+	acomp_ctx = raw_cpu_ptr(pool->acomp_ctx);
+	pool->nr_reqs = min(ZSWAP_MAX_BATCH_SIZE,
+			    crypto_acomp_batch_size(acomp_ctx->acomp));
+
 	/* being the current pool takes 1 ref; this func expects the
 	 * caller to always add the new pool as the current pool
 	 */
@@ -411,7 +460,7 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
 
 ref_fail:
 	for_each_possible_cpu(cpu)
-		acomp_ctx_dealloc(per_cpu_ptr(pool->acomp_ctx, cpu));
+		acomp_ctx_dealloc(per_cpu_ptr(pool->acomp_ctx, cpu), pool->nr_reqs);
 error:
 	if (pool->acomp_ctx)
 		free_percpu(pool->acomp_ctx);
@@ -470,7 +519,7 @@ static void zswap_pool_destroy(struct zswap_pool *pool)
 	zswap_pool_debug("destroying", pool);
 
 	for_each_possible_cpu(cpu)
-		acomp_ctx_dealloc(per_cpu_ptr(pool->acomp_ctx, cpu));
+		acomp_ctx_dealloc(per_cpu_ptr(pool->acomp_ctx, cpu), pool->nr_reqs);
 
 	free_percpu(pool->acomp_ctx);
 
@@ -940,7 +989,7 @@ static bool zswap_compress(struct page *page, struct zswap_entry *entry,
 
 	mutex_lock(&acomp_ctx->mutex);
 
-	dst = acomp_ctx->buffer;
+	dst = acomp_ctx->buffers[0];
 	sg_init_table(&input, 1);
 	sg_set_page(&input, page, PAGE_SIZE, 0);
 
@@ -950,7 +999,7 @@ static bool zswap_compress(struct page *page, struct zswap_entry *entry,
 	 * giving the dst buffer with enough length to avoid buffer overflow.
 	 */
 	sg_init_one(&output, dst, PAGE_SIZE * 2);
-	acomp_request_set_params(acomp_ctx->req, &input, &output, PAGE_SIZE, dlen);
+	acomp_request_set_params(acomp_ctx->reqs[0], &input, &output, PAGE_SIZE, dlen);
 
 	/*
 	 * it maybe looks a little bit silly that we send an asynchronous request,
@@ -964,8 +1013,8 @@ static bool zswap_compress(struct page *page, struct zswap_entry *entry,
 	 * but in different threads running on different cpu, we have different
 	 * acomp instance, so multiple threads can do (de)compression in parallel.
 	 */
-	comp_ret = crypto_wait_req(crypto_acomp_compress(acomp_ctx->req), &acomp_ctx->wait);
-	dlen = acomp_ctx->req->dlen;
+	comp_ret = crypto_wait_req(crypto_acomp_compress(acomp_ctx->reqs[0]), &acomp_ctx->wait);
+	dlen = acomp_ctx->reqs[0]->dlen;
 	if (comp_ret)
 		goto unlock;
 
@@ -1001,27 +1050,27 @@ static bool zswap_decompress(struct zswap_entry *entry, struct folio *folio)
 
 	acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
 	mutex_lock(&acomp_ctx->mutex);
-	obj = zpool_obj_read_begin(zpool, entry->handle, acomp_ctx->buffer);
+	obj = zpool_obj_read_begin(zpool, entry->handle, acomp_ctx->buffers[0]);
 
 	/*
 	 * zpool_obj_read_begin() might return a kmap address of highmem when
-	 * acomp_ctx->buffer is not used.  However, sg_init_one() does not
-	 * handle highmem addresses, so copy the object to acomp_ctx->buffer.
+	 * acomp_ctx->buffers[0] is not used.  However, sg_init_one() does not
+	 * handle highmem addresses, so copy the object to acomp_ctx->buffers[0].
 	 */
 	if (virt_addr_valid(obj)) {
 		src = obj;
 	} else {
-		WARN_ON_ONCE(obj == acomp_ctx->buffer);
-		memcpy(acomp_ctx->buffer, obj, entry->length);
-		src = acomp_ctx->buffer;
+		WARN_ON_ONCE(obj == acomp_ctx->buffers[0]);
+		memcpy(acomp_ctx->buffers[0], obj, entry->length);
+		src = acomp_ctx->buffers[0];
 	}
 
 	sg_init_one(&input, src, entry->length);
 	sg_init_table(&output, 1);
 	sg_set_folio(&output, folio, PAGE_SIZE, 0);
-	acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, PAGE_SIZE);
-	decomp_ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait);
-	dlen = acomp_ctx->req->dlen;
+	acomp_request_set_params(acomp_ctx->reqs[0], &input, &output, entry->length, PAGE_SIZE);
+	decomp_ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->reqs[0]), &acomp_ctx->wait);
+	dlen = acomp_ctx->reqs[0]->dlen;
 
 	zpool_obj_read_end(zpool, entry->handle, obj);
 	mutex_unlock(&acomp_ctx->mutex);

From patchwork Wed Apr 30 20:53:04 2025
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: "Sridhar, Kanchana P" <kanchana.p.sridhar@intel.com>
X-Patchwork-Id: 886542
Received: from mgamail.intel.com (mgamail.intel.com [198.175.65.15])
 (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
 (No client certificate requested)
 by smtp.subspace.kernel.org (Postfix) with ESMTPS id EA4D52D3A98;
 Wed, 30 Apr 2025 20:53:23 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.175.65.15
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
 t=1746046406; cv=none;
 b=uFP2f2iFq3S6CJAqE3cs8r+MLHkWaJBFVl5COQ3jMjqp0zmx4hkCjqvsuQu4xnGn1SXZF9xy2vmdPoHabOcC4pMrEqTfk4waiHO2wNK7Z7OzDu7OyLVXS8ZNqHrY5JRGNAm+sypbNm8mcH/sGGsSXRA9KEAPXmvl1T233pwwjfU=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
 s=arc-20240116; t=1746046406; c=relaxed/simple;
 bh=DDaOA19vkB8fzpz/hF+6UEVXZdzAdZ5S5Yw7TXl9byM=;
 h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
 MIME-Version;
 b=E0Q6gLLCmJDaZqvddRLC1OJFLBhjWGBz3xJ7KySYMy8OpXTFmxBU0ZC4g79msjMAyB4zOks/y/dWgSP8gaEwjeX6a98f7W7/7ozjuD27b4qZMX54qJvdUWbzXvUlrAJxoZW7xrbp2rPFArkWODIngkZc1VP25Zu0ha0Qjga/Bwc=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=intel.com;
 spf=pass smtp.mailfrom=intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=ObXdk1ho; arc=none smtp.client-ip=198.175.65.15
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=pass smtp.mailfrom=intel.com
Authentication-Results: smtp.subspace.kernel.org;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="ObXdk1ho"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
 d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
 t=1746046404; x=1777582404;
 h=from:to:cc:subject:date:message-id:in-reply-to:
 references:mime-version:content-transfer-encoding;
 bh=DDaOA19vkB8fzpz/hF+6UEVXZdzAdZ5S5Yw7TXl9byM=;
 b=ObXdk1hopkuBegkfIPQop63X3Vaay/D7oPMuIWjEGMdVYOeNeTxh/KzH
 7Uyx2PhDFu1UHENkDPn7b2aJSVsI3KMfdt6zBQdNsAwXwFfG2cpfEZk4G
 BQGjaemnwI6jJftHAkT8kdEdPdjBfo4WcnmVKqmx7A59XAk0uYGS+Ia3C
 WicspoNoX6hWCXHrRcKjx3lpEIjQ78XfYR0tUiNZOeReFPoxGJpjNoFuc
 NmrAbf94Q9w6ewLHeNLGaVXqvyroPyU6vsjjdhkWOoWg6voO6QF8WFPbS
 OpjioMjyGFZYcrx/fRvF1Li8ISZGL/+3K4ivZhRssWse/q1j+0PkWXyTt A==;
X-CSE-ConnectionGUID: VhXuLxaTSAOYegi1pTOK6w==
X-CSE-MsgGUID: x548cQcQQZCsr12zIbilWw==
X-IronPort-AV: E=McAfee;i="6700,10204,11419"; a="51388729"
X-IronPort-AV: E=Sophos;i="6.15,252,1739865600";
   d="scan'208";a="51388729"
Received: from fmviesa003.fm.intel.com ([10.60.135.143])
 by orvoesa107.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 30 Apr 2025 13:53:22 -0700
X-CSE-ConnectionGUID: 2N9YmZPURRG0BctXYVgV3w==
X-CSE-MsgGUID: RnR/ko3ATRSQLNGN4z86iw==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.15,252,1739865600"; d="scan'208";a="138248945"
Received: from jf5300-b11a338t.jf.intel.com ([10.242.51.115])
 by fmviesa003.fm.intel.com with ESMTP; 30 Apr 2025 13:53:21 -0700
From: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
To: linux-kernel@vger.kernel.org, linux-mm@kvack.org, hannes@cmpxchg.org,
 yosry.ahmed@linux.dev, nphamcs@gmail.com, chengming.zhou@linux.dev,
 usamaarif642@gmail.com, ryan.roberts@arm.com, 21cnbao@gmail.com,
 ying.huang@linux.alibaba.com, akpm@linux-foundation.org,
 linux-crypto@vger.kernel.org, herbert@gondor.apana.org.au,
 davem@davemloft.net, clabbe@baylibre.com, ardb@kernel.org,
 ebiggers@google.com, surenb@google.com, kristen.c.accardi@intel.com
Cc: wajdi.k.feghali@intel.com, vinodh.gopal@intel.com,
 kanchana.p.sridhar@intel.com
Subject: [PATCH v9 18/19] mm: zswap: zswap_store() will process a folio in
 batches.
Date: Wed, 30 Apr 2025 13:53:04 -0700
Message-Id: <20250430205305.22844-19-kanchana.p.sridhar@intel.com>
X-Mailer: git-send-email 2.27.0
In-Reply-To: <20250430205305.22844-1-kanchana.p.sridhar@intel.com>
References: <20250430205305.22844-1-kanchana.p.sridhar@intel.com>
Precedence: bulk
X-Mailing-List: linux-crypto@vger.kernel.org
List-Id: <linux-crypto.vger.kernel.org>
List-Subscribe: <mailto:linux-crypto+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-crypto+unsubscribe@vger.kernel.org>
MIME-Version: 1.0

This patch modifies zswap_store() to store a batch of pages at a time,
instead of storing one page at a time. It does this by calling a new
procedure zswap_store_pages() with "batch_size" pages. If the folio is
of order-0, the batch_size is 1. If zswap_store() is processing a large
folio:

 - If the compressor supports batching, the batch_size will be the
   pool->nr_reqs.

 - If the compressor does not support batching, the batch_size will be
   ZSWAP_MAX_BATCH_SIZE.

zswap_store_pages() implements all the computes done earlier in
zswap_store_page() for a single-page, for multiple pages in a folio,
namely the "batch". zswap_store_pages() starts by allocating all zswap
entries required to store the batch. Next, it calls zswap_compress() to
sequentially compress each page in the batch. Finally, it adds the
batch's zswap entries to the xarray and LRU, charges zswap memory and
increments zswap stats.

The error handling and cleanup required for all failure scenarios that can
occur while storing a batch in zswap are consolidated to a single
"store_pages_failed" label in zswap_store_pages().

Signed-off-by: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
---
 mm/zswap.c | 199 ++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 130 insertions(+), 69 deletions(-)

diff --git a/mm/zswap.c b/mm/zswap.c
index 2273dbfd460f..1d6795704350 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -1518,81 +1518,125 @@ static void shrink_worker(struct work_struct *w)
 * main API
 **********************************/
 
-static bool zswap_store_page(struct page *page,
-			     struct obj_cgroup *objcg,
-			     struct zswap_pool *pool)
+/*
+ * Store multiple pages in @folio, starting from the page at index @start up to
+ * the page at index @end-1.
+ */
+static bool zswap_store_pages(struct folio *folio,
+			      long start,
+			      long end,
+			      struct obj_cgroup *objcg,
+			      struct zswap_pool *pool)
 {
-	swp_entry_t page_swpentry = page_swap_entry(page);
-	struct zswap_entry *entry, *old;
-
-	/* allocate entry */
-	entry = zswap_entry_cache_alloc(GFP_KERNEL, page_to_nid(page));
-	if (!entry) {
-		zswap_reject_kmemcache_fail++;
-		return false;
-	}
+	struct zswap_entry *entries[ZSWAP_MAX_BATCH_SIZE];
+	int node_id = folio_nid(folio);
+	u8 i, store_fail_idx = 0, nr_pages = end - start;
 
-	if (!zswap_compress(page, entry, pool))
-		goto compress_failed;
+	for (i = 0; i < nr_pages; ++i) {
+		entries[i] = zswap_entry_cache_alloc(GFP_KERNEL, node_id);
 
-	old = xa_store(swap_zswap_tree(page_swpentry),
-		       swp_offset(page_swpentry),
-		       entry, GFP_KERNEL);
-	if (xa_is_err(old)) {
-		int err = xa_err(old);
+		if (unlikely(!entries[i])) {
+			zswap_reject_kmemcache_fail++;
+			/*
+			 * While handling this error, we only need to call
+			 * zswap_entry_cache_free() for entries[0 .. i-1].
+			 */
+			nr_pages = i;
+			goto store_pages_failed;
+		}
 
-		WARN_ONCE(err != -ENOMEM, "unexpected xarray error: %d\n", err);
-		zswap_reject_alloc_fail++;
-		goto store_failed;
+		/*
+		 * Initialize the handle to an error value. This facilitates
+		 * having a consolidated failure handling
+		 * 'goto store_pages_failed' that can inspect the value of the
+		 * handle to determine whether zpool memory needs to be
+		 * de-allocated.
+		 */
+		entries[i]->handle = (unsigned long)ERR_PTR(-EINVAL);
 	}
 
-	/*
-	 * We may have had an existing entry that became stale when
-	 * the folio was redirtied and now the new version is being
-	 * swapped out. Get rid of the old.
-	 */
-	if (old)
-		zswap_entry_free(old);
+	for (i = 0; i < nr_pages; ++i) {
+		struct page *page = folio_page(folio, start + i);
 
-	/*
-	 * The entry is successfully compressed and stored in the tree, there is
-	 * no further possibility of failure. Grab refs to the pool and objcg,
-	 * charge zswap memory, and increment zswap_stored_pages.
-	 * The opposite actions will be performed by zswap_entry_free()
-	 * when the entry is removed from the tree.
-	 */
-	zswap_pool_get(pool);
-	if (objcg) {
-		obj_cgroup_get(objcg);
-		obj_cgroup_charge_zswap(objcg, entry->length);
+		if (!zswap_compress(page, entries[i], pool))
+			goto store_pages_failed;
 	}
-	atomic_long_inc(&zswap_stored_pages);
 
-	/*
-	 * We finish initializing the entry while it's already in xarray.
-	 * This is safe because:
-	 *
-	 * 1. Concurrent stores and invalidations are excluded by folio lock.
-	 *
-	 * 2. Writeback is excluded by the entry not being on the LRU yet.
-	 *    The publishing order matters to prevent writeback from seeing
-	 *    an incoherent entry.
-	 */
-	entry->pool = pool;
-	entry->swpentry = page_swpentry;
-	entry->objcg = objcg;
-	entry->referenced = true;
-	if (entry->length) {
-		INIT_LIST_HEAD(&entry->lru);
-		zswap_lru_add(&zswap_list_lru, entry);
+	for (i = 0; i < nr_pages; ++i) {
+		swp_entry_t page_swpentry = page_swap_entry(folio_page(folio, start + i));
+		struct zswap_entry *old, *entry = entries[i];
+
+		old = xa_store(swap_zswap_tree(page_swpentry),
+			       swp_offset(page_swpentry),
+			       entry, GFP_KERNEL);
+		if (unlikely(xa_is_err(old))) {
+			int err = xa_err(old);
+
+			WARN_ONCE(err != -ENOMEM, "unexpected xarray error: %d\n", err);
+			zswap_reject_alloc_fail++;
+			/*
+			 * Entries up to this point have been stored in the
+			 * xarray. zswap_store() will erase them from the xarray
+			 * and call zswap_entry_free(). Local cleanup in
+			 * 'store_pages_failed' only needs to happen for
+			 * entries from [@i to @nr_pages).
+			 */
+			store_fail_idx = i;
+			goto store_pages_failed;
+		}
+
+		/*
+		 * We may have had an existing entry that became stale when
+		 * the folio was redirtied and now the new version is being
+		 * swapped out. Get rid of the old.
+		 */
+		if (unlikely(old))
+			zswap_entry_free(old);
+
+		/*
+		 * The entry is successfully compressed and stored in the tree, there is
+		 * no further possibility of failure. Grab refs to the pool and objcg,
+		 * charge zswap memory, and increment zswap_stored_pages.
+		 * The opposite actions will be performed by zswap_entry_free()
+		 * when the entry is removed from the tree.
+		 */
+		zswap_pool_get(pool);
+		if (objcg) {
+			obj_cgroup_get(objcg);
+			obj_cgroup_charge_zswap(objcg, entry->length);
+		}
+		atomic_long_inc(&zswap_stored_pages);
+
+		/*
+		 * We finish initializing the entry while it's already in xarray.
+		 * This is safe because:
+		 *
+		 * 1. Concurrent stores and invalidations are excluded by folio lock.
+		 *
+		 * 2. Writeback is excluded by the entry not being on the LRU yet.
+		 *    The publishing order matters to prevent writeback from seeing
+		 *    an incoherent entry.
+		 */
+		entry->pool = pool;
+		entry->swpentry = page_swpentry;
+		entry->objcg = objcg;
+		entry->referenced = true;
+		if (likely(entry->length)) {
+			INIT_LIST_HEAD(&entry->lru);
+			zswap_lru_add(&zswap_list_lru, entry);
+		}
 	}
 
 	return true;
 
-store_failed:
-	zpool_free(pool->zpool, entry->handle);
-compress_failed:
-	zswap_entry_cache_free(entry);
+store_pages_failed:
+	for (i = store_fail_idx; i < nr_pages; ++i) {
+		if (!IS_ERR_VALUE(entries[i]->handle))
+			zpool_free(pool->zpool, entries[i]->handle);
+
+		zswap_entry_cache_free(entries[i]);
+	}
+
 	return false;
 }
 
@@ -1603,8 +1647,9 @@ bool zswap_store(struct folio *folio)
 	struct obj_cgroup *objcg = NULL;
 	struct mem_cgroup *memcg = NULL;
 	struct zswap_pool *pool;
+	unsigned int batch_size;
 	bool ret = false;
-	long index;
+	long start, end;
 
 	VM_WARN_ON_ONCE(!folio_test_locked(folio));
 	VM_WARN_ON_ONCE(!folio_test_swapcache(folio));
@@ -1638,10 +1683,26 @@ bool zswap_store(struct folio *folio)
 		mem_cgroup_put(memcg);
 	}
 
-	for (index = 0; index < nr_pages; ++index) {
-		struct page *page = folio_page(folio, index);
+	/*
+	 * If a large folio is being swapped out and the zswap compressor
+	 * supports batching, i.e., has multiple acomp requests, the folio will
+	 * be compressed in batches of @pool->nr_reqs. If the compressor has
+	 * only one acomp request, the folio will be compressed in batches of
+	 * ZSWAP_MAX_BATCH_SIZE pages, where each page in the batch is
+	 * compressed sequentially. We see better performance by processing the
+	 * folio in batches of ZSWAP_MAX_BATCH_SIZE, due to cache locality of
+	 * working set structures such as the array of zswap_entry's for the
+	 * batch.
+	 */
+	batch_size = (nr_pages > 1) ? ((pool->nr_reqs > 1) ?
+					pool->nr_reqs : ZSWAP_MAX_BATCH_SIZE)
+				    : 1;
+
+	/* Store the folio in batches of "batch_size" pages. */
+	for (start = 0; start < nr_pages; start += batch_size) {
+		end = min(start + batch_size, nr_pages);
 
-		if (!zswap_store_page(page, objcg, pool))
+		if (!zswap_store_pages(folio, start, end, objcg, pool))
 			goto put_pool;
 	}
 
@@ -1671,9 +1732,9 @@ bool zswap_store(struct folio *folio)
 		struct zswap_entry *entry;
 		struct xarray *tree;
 
-		for (index = 0; index < nr_pages; ++index) {
-			tree = swap_zswap_tree(swp_entry(type, offset + index));
-			entry = xa_erase(tree, offset + index);
+		for (start = 0; start < nr_pages; ++start) {
+			tree = swap_zswap_tree(swp_entry(type, offset + start));
+			entry = xa_erase(tree, offset + start);
 			if (entry)
 				zswap_entry_free(entry);
 		}

From patchwork Wed Apr 30 20:53:05 2025
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: "Sridhar, Kanchana P" <kanchana.p.sridhar@intel.com>
X-Patchwork-Id: 886162
Received: from mgamail.intel.com (mgamail.intel.com [198.175.65.15])
 (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
 (No client certificate requested)
 by smtp.subspace.kernel.org (Postfix) with ESMTPS id 388D82D3F8B;
 Wed, 30 Apr 2025 20:53:24 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.175.65.15
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
 t=1746046406; cv=none;
 b=erS5FRW1I8iEi5b7l7mmJ4gBxy6hVdAxUTq8CvI28bHWo/hbukUxF7ZjNHg0T0ni/45PFiMPoZWauYZJolPIgYHh4bGAqzsWz8RBl96mUKqMYfBhBbmtdep4D+AfViMNWVpxaxb+obCDBuE3c24k8Yp15AgT12GUvjGEELocFnA=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
 s=arc-20240116; t=1746046406; c=relaxed/simple;
 bh=QeU7OJJnQc8OUFHpvKhiB60Fd/8n0u/d9ngD8H0HIB4=;
 h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:
 MIME-Version;
 b=nXxoExBowJSb5ap9isV/AX9LDUqjDwrKduaXTc4GMP/ronCvI5NR/J14J714lp8w5bI04wTgE2poJeRrA/ASTeFLacna+20lcSqQvGMIq6fmISIFbiNt/VRTFUx+azOhCi8PCsYITVLEjftcTQnuiCbszefJiuJ3vaFxXs6H1Zw=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=intel.com;
 spf=pass smtp.mailfrom=intel.com;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b=GmFUZdCn; arc=none smtp.client-ip=198.175.65.15
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=intel.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=pass smtp.mailfrom=intel.com
Authentication-Results: smtp.subspace.kernel.org;
 dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com
 header.b="GmFUZdCn"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
 d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
 t=1746046405; x=1777582405;
 h=from:to:cc:subject:date:message-id:in-reply-to:
 references:mime-version:content-transfer-encoding;
 bh=QeU7OJJnQc8OUFHpvKhiB60Fd/8n0u/d9ngD8H0HIB4=;
 b=GmFUZdCnfvdKHafYqwIDQ0S/b13sX7BBhoUX2N5sSq4HTHGIXs05ZG3c
 8FakKE7yzE9ymO3xyN6REefDNsu84to/34qtCa9uFVRONn5+HScKPDDYc
 J6VFRvx8jJ7SLS7D5p8LHKOVBTurKzQrZ00iaPdtX1PwaM0TbpRH0loJj
 zOlAeN+jwTwpwkt+v234jxRujTY/O2QPhO1GnQadSDV7VHzfal7YianGf
 xm84uEFP+MAaFCXpRvqN8Ndli0hp3sgMBYAhzdEzJ2GdHo6ku6NCcUIgv
 GpN3Y9nyeTq1P1wHC+pb3rpoyWunsSozjtyp+YEislDw2qW8EPFUrg/7C w==;
X-CSE-ConnectionGUID: K0KSPS6nRiG4s+XZPALdiw==
X-CSE-MsgGUID: eGZpxKEDSzqAjvb7twpJeg==
X-IronPort-AV: E=McAfee;i="6700,10204,11419"; a="51388743"
X-IronPort-AV: E=Sophos;i="6.15,252,1739865600";
   d="scan'208";a="51388743"
Received: from fmviesa003.fm.intel.com ([10.60.135.143])
 by orvoesa107.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 30 Apr 2025 13:53:23 -0700
X-CSE-ConnectionGUID: ZMpkVDrXSE6O2WNqgEU8zw==
X-CSE-MsgGUID: NP+Y/ySFR6GDgJ/O1eDJKQ==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.15,252,1739865600"; d="scan'208";a="138248949"
Received: from jf5300-b11a338t.jf.intel.com ([10.242.51.115])
 by fmviesa003.fm.intel.com with ESMTP; 30 Apr 2025 13:53:22 -0700
From: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
To: linux-kernel@vger.kernel.org, linux-mm@kvack.org, hannes@cmpxchg.org,
 yosry.ahmed@linux.dev, nphamcs@gmail.com, chengming.zhou@linux.dev,
 usamaarif642@gmail.com, ryan.roberts@arm.com, 21cnbao@gmail.com,
 ying.huang@linux.alibaba.com, akpm@linux-foundation.org,
 linux-crypto@vger.kernel.org, herbert@gondor.apana.org.au,
 davem@davemloft.net, clabbe@baylibre.com, ardb@kernel.org,
 ebiggers@google.com, surenb@google.com, kristen.c.accardi@intel.com
Cc: wajdi.k.feghali@intel.com, vinodh.gopal@intel.com,
 kanchana.p.sridhar@intel.com
Subject: [PATCH v9 19/19] mm: zswap: Batched zswap_compress() with compress
 batching of large folios.
Date: Wed, 30 Apr 2025 13:53:05 -0700
Message-Id: <20250430205305.22844-20-kanchana.p.sridhar@intel.com>
X-Mailer: git-send-email 2.27.0
In-Reply-To: <20250430205305.22844-1-kanchana.p.sridhar@intel.com>
References: <20250430205305.22844-1-kanchana.p.sridhar@intel.com>
Precedence: bulk
X-Mailing-List: linux-crypto@vger.kernel.org
List-Id: <linux-crypto.vger.kernel.org>
List-Subscribe: <mailto:linux-crypto+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-crypto+unsubscribe@vger.kernel.org>
MIME-Version: 1.0

This patch introduces a new unified implementation of zswap_compress()
for compressors that do and do not support batching. This eliminates
code duplication and facilitates maintainability of the code with the
introduction of compress batching.

The vectorized implementation of calling the earlier zswap_compress()
sequentially, one page at a time in zswap_store_pages(), is replaced
with this new version of zswap_compress() that accepts multiple pages to
compress as a batch.

If the compressor does not support batching, each page in the batch is
compressed and stored sequentially.

If the zswap compressor supports batching, for e.g., 'deflate-iaa',
the Intel IAA hardware accelerator, the batch is compressed in parallel
in hardware by calling crypto_acomp_batch_compress(), the new batch
compression API added earlier in this series. If all requests in the
batch are compressed without errors, the compressed buffers are then
stored in zpool.

Another important change this patch makes is with the acomp_ctx mutex
locking in zswap_compress(). Earlier, the mutex was only held during
compression. With the new code, [un]locking the mutex per page caused
regressions for software compressors when testing with usemem
(30 processes) and also kernel compilation with 'allmod' config. The
regressions were more eggregious when PMD folios were stored. The
implementation in this commit locks/unlocks the mutex once per batch,
that resolves the regression.

The use of prefetchw() for zswap entries and likely()/unlikely()
annotations prevent regressions with software compressors like zstd, and
generally improve non-batching compressors' performance with the
batching code by ~8%.

Signed-off-by: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
---
 mm/zswap.c | 187 +++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 132 insertions(+), 55 deletions(-)

diff --git a/mm/zswap.c b/mm/zswap.c
index 1d6795704350..561096f29c58 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -35,6 +35,7 @@
 #include <linux/pagemap.h>
 #include <linux/workqueue.h>
 #include <linux/list_lru.h>
+#include <linux/prefetch.h>
 
 #include "swap.h"
 #include "internal.h"
@@ -973,71 +974,147 @@ static void zswap_entry_free(struct zswap_entry *entry)
 /*********************************
 * compressed storage functions
 **********************************/
-static bool zswap_compress(struct page *page, struct zswap_entry *entry,
-			   struct zswap_pool *pool)
+/*
+ * Unified code path for compressors that do and do not support batching. This
+ * procedure will compress multiple @nr_pages passed in as @pages.
+ *
+ * @nr_pages can be ZSWAP_MAX_BATCH_SIZE even if the compressor does not support
+ * batching.
+ *
+ * If @pool->nr_reqs is 1, each page is processed sequentially.
+ *
+ * If @pool->nr_reqs is > 1, compression batching is invoked, except if
+ * @nr_pages is 1: if so, we call the fully synchronous non-batching
+ * crypto_acomp API.
+ *
+ * It is assumed that @nr_pages <= @pool->nr_reqs. We could
+ * check this, but don't, for performance reasons. zswap_store() makes
+ * sure of this by design.
+ *
+ * In both cases, if all compressions are successful, the compressed buffers
+ * are stored in zpool.
+ *
+ * A few important changes made to not regress and in fact improve
+ * compression performance with non-batching software compressors, using this
+ * new/batching code:
+ *
+ * 1) acomp_ctx mutex locking:
+ *    Earlier, the mutex was only held during compression. With the new code,
+ *    [un]locking the mutex per page caused regressions for software
+ *    compressors. We now lock the mutex once per batch, which resolved the
+ *    regression.
+ *
+ * 2) The prefetchw() and likely()/unlikely() annotations prevent
+ *    regressions with software compressors like zstd, and generally improve
+ *    non-batching compressors' performance with the batching code by ~7.3%.
+ */
+static bool zswap_compress(struct page *pages[], struct zswap_entry *entries[],
+			   unsigned int nr_pages, struct zswap_pool *pool)
 {
 	struct crypto_acomp_ctx *acomp_ctx;
 	struct scatterlist input, output;
-	int comp_ret = 0, alloc_ret = 0;
-	unsigned int dlen = PAGE_SIZE;
-	unsigned long handle;
-	struct zpool *zpool;
+	unsigned int dlens[ZSWAP_MAX_BATCH_SIZE];
+	int errors[ZSWAP_MAX_BATCH_SIZE];
+	struct zpool *zpool = pool->zpool;
+	unsigned int i, j, nr_comps = min(nr_pages, pool->nr_reqs);
+	int err;
 	gfp_t gfp;
-	u8 *dst;
+
+	gfp = GFP_NOWAIT | __GFP_NORETRY | __GFP_HIGHMEM | __GFP_MOVABLE;
 
 	acomp_ctx = raw_cpu_ptr(pool->acomp_ctx);
 
 	mutex_lock(&acomp_ctx->mutex);
 
-	dst = acomp_ctx->buffers[0];
-	sg_init_table(&input, 1);
-	sg_set_page(&input, page, PAGE_SIZE, 0);
-
 	/*
-	 * We need PAGE_SIZE * 2 here since there maybe over-compression case,
-	 * and hardware-accelerators may won't check the dst buffer size, so
-	 * giving the dst buffer with enough length to avoid buffer overflow.
+	 * Note:
+	 * [i] refers to the incoming batch space and is used to
+	 *     index into @pages, @entries and @errors.
 	 */
-	sg_init_one(&output, dst, PAGE_SIZE * 2);
-	acomp_request_set_params(acomp_ctx->reqs[0], &input, &output, PAGE_SIZE, dlen);
+	for (i = 0; i < nr_pages; i += nr_comps) {
 
-	/*
-	 * it maybe looks a little bit silly that we send an asynchronous request,
-	 * then wait for its completion synchronously. This makes the process look
-	 * synchronous in fact.
-	 * Theoretically, acomp supports users send multiple acomp requests in one
-	 * acomp instance, then get those requests done simultaneously. but in this
-	 * case, zswap actually does store and load page by page, there is no
-	 * existing method to send the second page before the first page is done
-	 * in one thread doing zwap.
-	 * but in different threads running on different cpu, we have different
-	 * acomp instance, so multiple threads can do (de)compression in parallel.
-	 */
-	comp_ret = crypto_wait_req(crypto_acomp_compress(acomp_ctx->reqs[0]), &acomp_ctx->wait);
-	dlen = acomp_ctx->reqs[0]->dlen;
-	if (comp_ret)
-		goto unlock;
+		if (likely(nr_comps == 1)) {
+			sg_init_table(&input, 1);
+			sg_set_page(&input, pages[i], PAGE_SIZE, 0);
 
-	zpool = pool->zpool;
-	gfp = GFP_NOWAIT | __GFP_NORETRY | __GFP_HIGHMEM | __GFP_MOVABLE;
-	alloc_ret = zpool_malloc(zpool, dlen, gfp, &handle, page_to_nid(page));
-	if (alloc_ret)
-		goto unlock;
-
-	zpool_obj_write(zpool, handle, dst, dlen);
-	entry->handle = handle;
-	entry->length = dlen;
-
-unlock:
-	if (comp_ret == -ENOSPC || alloc_ret == -ENOSPC)
-		zswap_reject_compress_poor++;
-	else if (comp_ret)
-		zswap_reject_compress_fail++;
-	else if (alloc_ret)
-		zswap_reject_alloc_fail++;
+			/*
+			 * We need PAGE_SIZE * 2 here since there maybe over-compression case,
+			 * and hardware-accelerators may won't check the dst buffer size, so
+			 * giving the dst buffer with enough length to avoid buffer overflow.
+			 */
+			sg_init_one(&output, acomp_ctx->buffers[0], PAGE_SIZE * 2);
+			acomp_request_set_params(acomp_ctx->reqs[0], &input,
+						 &output, PAGE_SIZE, PAGE_SIZE);
+
+			errors[i] = crypto_wait_req(crypto_acomp_compress(acomp_ctx->reqs[0]),
+						    &acomp_ctx->wait);
+			if (unlikely(errors[i]))
+				goto compress_error;
+		} else if (!crypto_acomp_batch_compress(acomp_ctx->reqs,
+							pages,
+							acomp_ctx->buffers,
+							dlens,
+							errors,
+							nr_pages)) {
+				goto compress_error;
+		}
+
+		/*
+		 * All @nr_comps pages were successfully compressed.
+		 * Store the pages in zpool.
+		 *
+		 * Note:
+		 * [j] refers to the incoming batch space and is used to
+		 *     index into @pages, @entries and @errors.
+		 * [k] refers to the @acomp_ctx space, as determined by
+		 *     @pool->nr_reqs, and is used to index into
+		 *     @acomp_ctx->reqs and @acomp_ctx->buffers.
+		 */
+		for (j = i; j < i + nr_comps; ++j) {
+			unsigned int k = j - i;
+			unsigned long handle;
+
+			/*
+			 * prefetchw() minimizes cache-miss latency by
+			 * moving the zswap entry to the cache before it
+			 * is written to; reducing sys time by ~1.5% for
+			 * non-batching software compressors.
+			 */
+			prefetchw(entries[j]);
+			err = zpool_malloc(zpool, acomp_ctx->reqs[k]->dlen, gfp, &handle,
+					   page_to_nid(pages[j]));
+
+			if (unlikely(err)) {
+				if (err == -ENOSPC)
+					zswap_reject_compress_poor++;
+				else
+					zswap_reject_alloc_fail++;
+
+				goto err_unlock;
+			}
+
+			zpool_obj_write(zpool, handle, acomp_ctx->buffers[k], acomp_ctx->reqs[k]->dlen);
+			entries[j]->handle = handle;
+			entries[j]->length = acomp_ctx->reqs[k]->dlen;
+		}
+	} /* finished compress and store nr_pages. */
 
 	mutex_unlock(&acomp_ctx->mutex);
-	return comp_ret == 0 && alloc_ret == 0;
+	return true;
+
+compress_error:
+	for (j = i; j < i + nr_comps; ++j) {
+		if (errors[j]) {
+			if (errors[j] == -ENOSPC)
+				zswap_reject_compress_poor++;
+			else
+				zswap_reject_compress_fail++;
+		}
+	}
+
+err_unlock:
+	mutex_unlock(&acomp_ctx->mutex);
+	return false;
 }
 
 static bool zswap_decompress(struct zswap_entry *entry, struct folio *folio)
@@ -1529,6 +1606,7 @@ static bool zswap_store_pages(struct folio *folio,
 			      struct zswap_pool *pool)
 {
 	struct zswap_entry *entries[ZSWAP_MAX_BATCH_SIZE];
+	struct page *pages[ZSWAP_MAX_BATCH_SIZE];
 	int node_id = folio_nid(folio);
 	u8 i, store_fail_idx = 0, nr_pages = end - start;
 
@@ -1555,12 +1633,11 @@ static bool zswap_store_pages(struct folio *folio,
 		entries[i]->handle = (unsigned long)ERR_PTR(-EINVAL);
 	}
 
-	for (i = 0; i < nr_pages; ++i) {
-		struct page *page = folio_page(folio, start + i);
+	for (i = 0; i < nr_pages; ++i)
+		pages[i] = folio_page(folio, start + i);
 
-		if (!zswap_compress(page, entries[i], pool))
-			goto store_pages_failed;
-	}
+	if (!zswap_compress(pages, entries, nr_pages, pool))
+		goto store_pages_failed;
 
 	for (i = 0; i < nr_pages; ++i) {
 		swp_entry_t page_swpentry = page_swap_entry(folio_page(folio, start + i));