diff mbox series

scsi: libsas: Add SMP request allocation handler callback

Message ID 20240325131751.1840329-1-liyihang9@huawei.com
State New
Headers show
Series scsi: libsas: Add SMP request allocation handler callback | expand

Commit Message

Yihang Li March 25, 2024, 1:17 p.m. UTC
This series [1] reducing the kmalloc() minimum alignment on arm64 to 8
(from 128).

The hisi_sas has special requirements on the memory address alignment
(must be 16-byte-aligned) of the command request frame, so add a SMP
request allocation callback and fill it in for the hisi_sas driver.

Link: https://lkml.kernel.org/r/20230612153201.554742-1-catalin.marinas@arm.com [1]
Signed-off-by: Yihang Li <liyihang9@huawei.com>
---
 drivers/scsi/hisi_sas/hisi_sas_main.c | 14 ++++++++++++
 drivers/scsi/libsas/sas_expander.c    | 31 ++++++++++++++++++---------
 include/scsi/libsas.h                 |  3 +++
 3 files changed, 38 insertions(+), 10 deletions(-)

Comments

Jason Yan March 25, 2024, 2:21 p.m. UTC | #1
On 2024/3/25 22:03, Damien Le Moal wrote:
> On 3/25/24 22:17, Yihang Li wrote:
>> This series [1] reducing the kmalloc() minimum alignment on arm64 to 8
>> (from 128).
> 
> And ? What is the point you are trying to convey here ?
> 
>> The hisi_sas has special requirements on the memory address alignment
>> (must be 16-byte-aligned) of the command request frame, so add a SMP
>> request allocation callback and fill it in for the hisi_sas driver.
> 
> 128 is aligned to 16. So what is the problem you are trying to solve here ?
> Can you clarify ? I suspect this is all about memory allocation optimization ?

After series [1] been merged, kmalloc is 8-byte-aligned, however 
hisi_sas hardware needs 16-byte-aligned. That's the problem.

> 
>>
>> Link: https://lkml.kernel.org/r/20230612153201.554742-1-catalin.marinas@arm.com [1]
>> Signed-off-by: Yihang Li <liyihang9@huawei.com>
>> ---
>>   drivers/scsi/hisi_sas/hisi_sas_main.c | 14 ++++++++++++
>>   drivers/scsi/libsas/sas_expander.c    | 31 ++++++++++++++++++---------
>>   include/scsi/libsas.h                 |  3 +++
>>   3 files changed, 38 insertions(+), 10 deletions(-)
>>
>> diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c
>> index 097dfe4b620d..40329558d435 100644
>> --- a/drivers/scsi/hisi_sas/hisi_sas_main.c
>> +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c
>> @@ -2031,6 +2031,19 @@ static int hisi_sas_write_gpio(struct sas_ha_struct *sha, u8 reg_type,
>>   				reg_index, reg_count, write_data);
>>   }
>>   
>> +static void *hisi_sas_alloc_smp_req(int size)
>> +{
>> +	u8 *p;
>> +
>> +	/* The address must be 16-byte-aligned. */
> 
> ARCH_DMA_MINALIGN is not always 16, right ?
> 
>> +	size = ALIGN(size, ARCH_DMA_MINALIGN);
>> +	p = kzalloc(size, GFP_KERNEL);
>> +	if (p)
>> +		p[0] = SMP_REQUEST;
>> +
>> +	return p;
>> +}
>> +
>>   static void hisi_sas_phy_disconnected(struct hisi_sas_phy *phy)
>>   {
>>   	struct asd_sas_phy *sas_phy = &phy->sas_phy;
>> @@ -2130,6 +2143,7 @@ static struct sas_domain_function_template hisi_sas_transport_ops = {
>>   	.lldd_write_gpio	= hisi_sas_write_gpio,
>>   	.lldd_tmf_aborted	= hisi_sas_tmf_aborted,
>>   	.lldd_abort_timeout	= hisi_sas_internal_abort_timeout,
>> +	.lldd_alloc_smp_req	= hisi_sas_alloc_smp_req,
> 
> Why this complexity ? Why not simply modify alloc_smp_req() to have the required
> alignment ? This will avoid a costly indirect function call.

Yeah, I think it's simpler to modify alloc_smp_req() directly too. 
Yihang, Can you please cook a new one?

Thansk,
Jason
John Garry March 25, 2024, 3:10 p.m. UTC | #2
On 25/03/2024 14:21, Jason Yan wrote:
> On 2024/3/25 22:03, Damien Le Moal wrote:
>> On 3/25/24 22:17, Yihang Li wrote:
>>> This series [1] reducing the kmalloc() minimum alignment on arm64 to 8
>>> (from 128).
>>
>> And ? What is the point you are trying to convey here ?
>>
>>> The hisi_sas has special requirements on the memory address alignment
>>> (must be 16-byte-aligned) of the command request frame, so add a SMP
>>> request allocation callback and fill it in for the hisi_sas driver.
>>
>> 128 is aligned to 16. So what is the problem you are trying to solve 
>> here ?
>> Can you clarify ? I suspect this is all about memory allocation 
>> optimization ?
> 
> After series [1] been merged, kmalloc is 8-byte-aligned, however 
> hisi_sas hardware needs 16-byte-aligned. That's the problem.
> 
>>
>>>
>>> Link: 
>>> https://urldefense.com/v3/__https://lkml.kernel.org/r/20230612153201.554742-1-catalin.marinas@arm.com__;!!ACWV5N9M2RV99hQ!L85qzmByNbkBbZByXKuNMAhvt8wxKPCsogKt3Pgn93DTkzfc54jA3of5XL8oEDDDDTMU1OtghdKiLZdKe5ub$  [1]
>>> Signed-off-by: Yihang Li <liyihang9@huawei.com>
>>> ---
>>>   drivers/scsi/hisi_sas/hisi_sas_main.c | 14 ++++++++++++
>>>   drivers/scsi/libsas/sas_expander.c    | 31 ++++++++++++++++++---------
>>>   include/scsi/libsas.h                 |  3 +++
>>>   3 files changed, 38 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c 
>>> b/drivers/scsi/hisi_sas/hisi_sas_main.c
>>> index 097dfe4b620d..40329558d435 100644
>>> --- a/drivers/scsi/hisi_sas/hisi_sas_main.c
>>> +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c
>>> @@ -2031,6 +2031,19 @@ static int hisi_sas_write_gpio(struct 
>>> sas_ha_struct *sha, u8 reg_type,
>>>                   reg_index, reg_count, write_data);
>>>   }
>>> +static void *hisi_sas_alloc_smp_req(int size)
>>> +{
>>> +    u8 *p;
>>> +
>>> +    /* The address must be 16-byte-aligned. */
>>
>> ARCH_DMA_MINALIGN is not always 16, right ?
>>
>>> +    size = ALIGN(size, ARCH_DMA_MINALIGN);
>>> +    p = kzalloc(size, GFP_KERNEL);

Please make it clear that kmalloc() will return a naturally-aligned 
memory for power-of-2 sizes, and so ensuring that size is roundup to 16B 
will give a data which is aligned to 16B

>>> +    if (p)
>>> +        p[0] = SMP_REQUEST;
>>> +
>>> +    return p;
>>> +}
>>> +
>>>   static void hisi_sas_phy_disconnected(struct hisi_sas_phy *phy)
>>>   {
>>>       struct asd_sas_phy *sas_phy = &phy->sas_phy;
>>> @@ -2130,6 +2143,7 @@ static struct sas_domain_function_template 
>>> hisi_sas_transport_ops = {
>>>       .lldd_write_gpio    = hisi_sas_write_gpio,
>>>       .lldd_tmf_aborted    = hisi_sas_tmf_aborted,
>>>       .lldd_abort_timeout    = hisi_sas_internal_abort_timeout,
>>> +    .lldd_alloc_smp_req    = hisi_sas_alloc_smp_req,
>>
>> Why this complexity ? Why not simply modify alloc_smp_req() to have 
>> the required
>> alignment ? This will avoid a costly indirect function call.
> 
> Yeah, I think it's simpler to modify alloc_smp_req() directly too. 
> Yihang, Can you please cook a new one?
> 
> Thansk,
> Jason
Yihang Li March 26, 2024, 6:34 a.m. UTC | #3
On 2024/3/25 22:21, Jason Yan wrote:
> On 2024/3/25 22:03, Damien Le Moal wrote:
>> On 3/25/24 22:17, Yihang Li wrote:
>>> This series [1] reducing the kmalloc() minimum alignment on arm64 to 8
>>> (from 128).
>>
>> And ? What is the point you are trying to convey here ?
>>
>>> The hisi_sas has special requirements on the memory address alignment
>>> (must be 16-byte-aligned) of the command request frame, so add a SMP
>>> request allocation callback and fill it in for the hisi_sas driver.
>>
>> 128 is aligned to 16. So what is the problem you are trying to solve here ?
>> Can you clarify ? I suspect this is all about memory allocation optimization ?
> 
> After series [1] been merged, kmalloc is 8-byte-aligned, however hisi_sas hardware needs 16-byte-aligned. That's the problem.

Yes, that's the problem. I will explain it in detail in the next version.

> 
>>
>>>
>>> Link: https://lkml.kernel.org/r/20230612153201.554742-1-catalin.marinas@arm.com [1]
>>> Signed-off-by: Yihang Li <liyihang9@huawei.com>
>>> ---
>>>   drivers/scsi/hisi_sas/hisi_sas_main.c | 14 ++++++++++++
>>>   drivers/scsi/libsas/sas_expander.c    | 31 ++++++++++++++++++---------
>>>   include/scsi/libsas.h                 |  3 +++
>>>   3 files changed, 38 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c
>>> index 097dfe4b620d..40329558d435 100644
>>> --- a/drivers/scsi/hisi_sas/hisi_sas_main.c
>>> +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c
>>> @@ -2031,6 +2031,19 @@ static int hisi_sas_write_gpio(struct sas_ha_struct *sha, u8 reg_type,
>>>                   reg_index, reg_count, write_data);
>>>   }
>>>   +static void *hisi_sas_alloc_smp_req(int size)
>>> +{
>>> +    u8 *p;
>>> +
>>> +    /* The address must be 16-byte-aligned. */
>>
>> ARCH_DMA_MINALIGN is not always 16, right ?
>>
>>> +    size = ALIGN(size, ARCH_DMA_MINALIGN);
>>> +    p = kzalloc(size, GFP_KERNEL);
>>> +    if (p)
>>> +        p[0] = SMP_REQUEST;
>>> +
>>> +    return p;
>>> +}
>>> +
>>>   static void hisi_sas_phy_disconnected(struct hisi_sas_phy *phy)
>>>   {
>>>       struct asd_sas_phy *sas_phy = &phy->sas_phy;
>>> @@ -2130,6 +2143,7 @@ static struct sas_domain_function_template hisi_sas_transport_ops = {
>>>       .lldd_write_gpio    = hisi_sas_write_gpio,
>>>       .lldd_tmf_aborted    = hisi_sas_tmf_aborted,
>>>       .lldd_abort_timeout    = hisi_sas_internal_abort_timeout,
>>> +    .lldd_alloc_smp_req    = hisi_sas_alloc_smp_req,
>>
>> Why this complexity ? Why not simply modify alloc_smp_req() to have the required
>> alignment ? This will avoid a costly indirect function call.
> 
> Yeah, I think it's simpler to modify alloc_smp_req() directly too. Yihang, Can you please cook a new one?

Sure, I will post a new version later.
The reason why I did not directly modify alloc_smp_req() is that I think the requirement that the command frame must be 16-byte aligned is hisi_sas private and not a general requirement.
To avoid the impact on other drivers, I only add aligned to 16B in hisi_sas. If this measure is considered pointless, I will directly modify alloc_smp_req().

Thanks,
Yihang

> 
> Thansk,
> Jason
> .
>
diff mbox series

Patch

diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c
index 097dfe4b620d..40329558d435 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_main.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_main.c
@@ -2031,6 +2031,19 @@  static int hisi_sas_write_gpio(struct sas_ha_struct *sha, u8 reg_type,
 				reg_index, reg_count, write_data);
 }
 
+static void *hisi_sas_alloc_smp_req(int size)
+{
+	u8 *p;
+
+	/* The address must be 16-byte-aligned. */
+	size = ALIGN(size, ARCH_DMA_MINALIGN);
+	p = kzalloc(size, GFP_KERNEL);
+	if (p)
+		p[0] = SMP_REQUEST;
+
+	return p;
+}
+
 static void hisi_sas_phy_disconnected(struct hisi_sas_phy *phy)
 {
 	struct asd_sas_phy *sas_phy = &phy->sas_phy;
@@ -2130,6 +2143,7 @@  static struct sas_domain_function_template hisi_sas_transport_ops = {
 	.lldd_write_gpio	= hisi_sas_write_gpio,
 	.lldd_tmf_aborted	= hisi_sas_tmf_aborted,
 	.lldd_abort_timeout	= hisi_sas_internal_abort_timeout,
+	.lldd_alloc_smp_req	= hisi_sas_alloc_smp_req,
 };
 
 void hisi_sas_init_mem(struct hisi_hba *hisi_hba)
diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
index a2204674b680..0fd0507f7fc6 100644
--- a/drivers/scsi/libsas/sas_expander.c
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -141,6 +141,17 @@  static inline void *alloc_smp_req(int size)
 	return p;
 }
 
+static void *sas_alloc_smp_req(struct domain_device *dev, int size)
+{
+	struct sas_internal *i =
+		to_sas_internal(dev->port->ha->shost->transportt);
+
+	if (i->dft->lldd_alloc_smp_req)
+		return i->dft->lldd_alloc_smp_req(size);
+
+	return alloc_smp_req(size);
+}
+
 static inline void *alloc_smp_resp(int size)
 {
 	return kzalloc(size, GFP_KERNEL);
@@ -377,7 +388,7 @@  int sas_ex_phy_discover(struct domain_device *dev, int single)
 	u8   *disc_req;
 	struct smp_disc_resp *disc_resp;
 
-	disc_req = alloc_smp_req(DISCOVER_REQ_SIZE);
+	disc_req = sas_alloc_smp_req(dev, DISCOVER_REQ_SIZE);
 	if (!disc_req)
 		return -ENOMEM;
 
@@ -440,7 +451,7 @@  static int sas_ex_general(struct domain_device *dev)
 	int res;
 	int i;
 
-	rg_req = alloc_smp_req(RG_REQ_SIZE);
+	rg_req = sas_alloc_smp_req(dev, RG_REQ_SIZE);
 	if (!rg_req)
 		return -ENOMEM;
 
@@ -519,7 +530,7 @@  static int sas_ex_manuf_info(struct domain_device *dev)
 	u8 *mi_resp;
 	int res;
 
-	mi_req = alloc_smp_req(MI_REQ_SIZE);
+	mi_req = sas_alloc_smp_req(dev, MI_REQ_SIZE);
 	if (!mi_req)
 		return -ENOMEM;
 
@@ -560,7 +571,7 @@  int sas_smp_phy_control(struct domain_device *dev, int phy_id,
 	u8 *pc_resp;
 	int res;
 
-	pc_req = alloc_smp_req(PC_REQ_SIZE);
+	pc_req = sas_alloc_smp_req(dev, PC_REQ_SIZE);
 	if (!pc_req)
 		return -ENOMEM;
 
@@ -642,7 +653,7 @@  int sas_smp_get_phy_events(struct sas_phy *phy)
 	struct sas_rphy *rphy = dev_to_rphy(phy->dev.parent);
 	struct domain_device *dev = sas_find_dev_by_rphy(rphy);
 
-	req = alloc_smp_req(RPEL_REQ_SIZE);
+	req = sas_alloc_smp_req(dev, RPEL_REQ_SIZE);
 	if (!req)
 		return -ENOMEM;
 
@@ -682,7 +693,7 @@  int sas_get_report_phy_sata(struct domain_device *dev, int phy_id,
 			    struct smp_rps_resp *rps_resp)
 {
 	int res;
-	u8 *rps_req = alloc_smp_req(RPS_REQ_SIZE);
+	u8 *rps_req = sas_alloc_smp_req(dev, RPS_REQ_SIZE);
 	u8 *resp = (u8 *)rps_resp;
 
 	if (!rps_req)
@@ -1344,7 +1355,7 @@  static int sas_configure_present(struct domain_device *dev, int phy_id,
 	*present = 0;
 	*index = 0;
 
-	rri_req = alloc_smp_req(RRI_REQ_SIZE);
+	rri_req = sas_alloc_smp_req(dev, RRI_REQ_SIZE);
 	if (!rri_req)
 		return -ENOMEM;
 
@@ -1412,7 +1423,7 @@  static int sas_configure_set(struct domain_device *dev, int phy_id,
 	u8 *cri_req;
 	u8 *cri_resp;
 
-	cri_req = alloc_smp_req(CRI_REQ_SIZE);
+	cri_req = sas_alloc_smp_req(dev, CRI_REQ_SIZE);
 	if (!cri_req)
 		return -ENOMEM;
 
@@ -1627,7 +1638,7 @@  static int sas_get_phy_discover(struct domain_device *dev,
 	int res;
 	u8 *disc_req;
 
-	disc_req = alloc_smp_req(DISCOVER_REQ_SIZE);
+	disc_req = sas_alloc_smp_req(dev, DISCOVER_REQ_SIZE);
 	if (!disc_req)
 		return -ENOMEM;
 
@@ -1723,7 +1734,7 @@  static int sas_get_ex_change_count(struct domain_device *dev, int *ecc)
 	u8  *rg_req;
 	struct smp_rg_resp  *rg_resp;
 
-	rg_req = alloc_smp_req(RG_REQ_SIZE);
+	rg_req = sas_alloc_smp_req(dev, RG_REQ_SIZE);
 	if (!rg_req)
 		return -ENOMEM;
 
diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
index f5257103fdb6..2e6e5f6e50db 100644
--- a/include/scsi/libsas.h
+++ b/include/scsi/libsas.h
@@ -670,6 +670,9 @@  struct sas_domain_function_template {
 	/* GPIO support */
 	int (*lldd_write_gpio)(struct sas_ha_struct *, u8 reg_type,
 			       u8 reg_index, u8 reg_count, u8 *write_data);
+
+	/* Allocation for SMP request */
+	void *(*lldd_alloc_smp_req)(int size);
 };
 
 extern int sas_register_ha(struct sas_ha_struct *);