diff mbox series

drivers/perf: Change WARN_ON() to dev_err() on irq_set_affinity() failure

Message ID 20220815092815.11597-1-yangyicong@huawei.com
State New
Headers show
Series drivers/perf: Change WARN_ON() to dev_err() on irq_set_affinity() failure | expand

Commit Message

Yicong Yang Aug. 15, 2022, 9:28 a.m. UTC
From: Yicong Yang <yangyicong@hisilicon.com>

The WARN_ON() on irq_set_affinity() failure is misused according to the [1]
and may crash people's box unintentionally. This may also be redundant since
in the failure case we may also trigger the WARN and dump the stack in the
perf core[2] for a second time.

So change the WARN_ON() to dev_err() to just print the failure message.

[1] https://github.com/torvalds/linux/blob/master/include/asm-generic/bug.h#L74
[2] https://github.com/torvalds/linux/blob/master/kernel/events/core.c#L313

Suggested-by: Greg KH <gregkh@linuxfoundation.org>
[https://lore.kernel.org/lkml/YuOi3i0XHV++z1YI@kroah.com/]
Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
---
 drivers/perf/arm-ccn.c                   | 5 +++--
 drivers/perf/arm_dmc620_pmu.c            | 3 ++-
 drivers/perf/arm_smmuv3_pmu.c            | 6 ++++--
 drivers/perf/fsl_imx8_ddr_perf.c         | 3 ++-
 drivers/perf/hisilicon/hisi_pcie_pmu.c   | 6 ++++--
 drivers/perf/hisilicon/hisi_uncore_pmu.c | 6 ++++--
 drivers/perf/qcom_l2_pmu.c               | 8 ++++++--
 drivers/perf/xgene_pmu.c                 | 6 ++++--
 8 files changed, 29 insertions(+), 14 deletions(-)

Comments

Jonathan Cameron Aug. 15, 2022, 9:39 a.m. UTC | #1
On Mon, 15 Aug 2022 17:28:15 +0800
Yicong Yang <yangyicong@huawei.com> wrote:

> From: Yicong Yang <yangyicong@hisilicon.com>
> 
> The WARN_ON() on irq_set_affinity() failure is misused according to the [1]
> and may crash people's box unintentionally. This may also be redundant since
> in the failure case we may also trigger the WARN and dump the stack in the
> perf core[2] for a second time.
> 
> So change the WARN_ON() to dev_err() to just print the failure message.
> 
> [1] https://github.com/torvalds/linux/blob/master/include/asm-generic/bug.h#L74
> [2] https://github.com/torvalds/linux/blob/master/kernel/events/core.c#L313
> 
> Suggested-by: Greg KH <gregkh@linuxfoundation.org>
> [https://lore.kernel.org/lkml/YuOi3i0XHV++z1YI@kroah.com/]
> Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>

Looks like progress in a sensible direction to me.

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>

Kind of unrelated question inline.

>


> diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c
> index 00d4c45a8017..05e1b3e274d7 100644
> --- a/drivers/perf/arm_smmuv3_pmu.c
> +++ b/drivers/perf/arm_smmuv3_pmu.c
> @@ -646,7 +646,8 @@ static int smmu_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
>  
>  	perf_pmu_migrate_context(&smmu_pmu->pmu, cpu, target);
>  	smmu_pmu->on_cpu = target;
> -	WARN_ON(irq_set_affinity(smmu_pmu->irq, cpumask_of(target)));
> +	if (irq_set_affinity(smmu_pmu->irq, cpumask_of(target)))
> +		dev_err(smmu_pmu->dev, "Failed to set interrupt affinity\n");
>  
>  	return 0;
>  }
> @@ -892,7 +893,8 @@ static int smmu_pmu_probe(struct platform_device *pdev)
>  
>  	/* Pick one CPU to be the preferred one to use */
>  	smmu_pmu->on_cpu = raw_smp_processor_id();
> -	WARN_ON(irq_set_affinity(smmu_pmu->irq, cpumask_of(smmu_pmu->on_cpu)));
> +	if (irq_set_affinity(smmu_pmu->irq, cpumask_of(smmu_pmu->on_cpu)))
> +		dev_err(dev, "Failed to set interrupt affinity\n");

In this case we have the option to fail probe.  Failing to set affinity means
we are broken anyway, so perhaps that is cleaner than carrying on.

As a side note, I wonder if other drivers could benefit from what I think
is a micro optimization to short cut calling the hp handlers when the
decision of which CPU is easy...

>  
>  	err = cpuhp_state_add_instance_nocalls(cpuhp_state_num,
>  					       &smmu_pmu->node);
Greg Kroah-Hartman Aug. 15, 2022, 10:47 a.m. UTC | #2
On Mon, Aug 15, 2022 at 05:28:15PM +0800, Yicong Yang wrote:
> From: Yicong Yang <yangyicong@hisilicon.com>
> 
> The WARN_ON() on irq_set_affinity() failure is misused according to the [1]
> and may crash people's box unintentionally. This may also be redundant since
> in the failure case we may also trigger the WARN and dump the stack in the
> perf core[2] for a second time.
> 
> So change the WARN_ON() to dev_err() to just print the failure message.
> 
> [1] https://github.com/torvalds/linux/blob/master/include/asm-generic/bug.h#L74
> [2] https://github.com/torvalds/linux/blob/master/kernel/events/core.c#L313

Please point to git.kernel.org links, we do not control github.com and
it's random mirrors.

> 
> Suggested-by: Greg KH <gregkh@linuxfoundation.org>
> [https://lore.kernel.org/lkml/YuOi3i0XHV++z1YI@kroah.com/]
> Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
> ---
>  drivers/perf/arm-ccn.c                   | 5 +++--
>  drivers/perf/arm_dmc620_pmu.c            | 3 ++-
>  drivers/perf/arm_smmuv3_pmu.c            | 6 ++++--
>  drivers/perf/fsl_imx8_ddr_perf.c         | 3 ++-
>  drivers/perf/hisilicon/hisi_pcie_pmu.c   | 6 ++++--
>  drivers/perf/hisilicon/hisi_uncore_pmu.c | 6 ++++--
>  drivers/perf/qcom_l2_pmu.c               | 8 ++++++--
>  drivers/perf/xgene_pmu.c                 | 6 ++++--
>  8 files changed, 29 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c
> index 728d13d8e98a..83abd909ba49 100644
> --- a/drivers/perf/arm-ccn.c
> +++ b/drivers/perf/arm-ccn.c
> @@ -1210,8 +1210,9 @@ static int arm_ccn_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
>  		return 0;
>  	perf_pmu_migrate_context(&dt->pmu, cpu, target);
>  	dt->cpu = target;
> -	if (ccn->irq)
> -		WARN_ON(irq_set_affinity(ccn->irq, cpumask_of(dt->cpu)));
> +	if (ccn->irq && irq_set_affinity(ccn->irq, cpumask_of(dt->cpu)))
> +		dev_err(ccn->dev, "Failed to set interrupt affinity\n");
> +
>  	return 0;

Why are you returning with no error, if an error happened?

Same everywhere else, you need to explain this in your changelog text.

thanks,

greg k-h
Barry Song Aug. 15, 2022, 12:25 p.m. UTC | #3
On Tue, Aug 16, 2022 at 12:06 AM Mark Rutland <mark.rutland@arm.com> wrote:
>
> On Mon, Aug 15, 2022 at 05:28:15PM +0800, Yicong Yang wrote:
> > From: Yicong Yang <yangyicong@hisilicon.com>
> >
> > The WARN_ON() on irq_set_affinity() failure is misused according to the [1]
> > and may crash people's box unintentionally. This may also be redundant since
> > in the failure case we may also trigger the WARN and dump the stack in the
> > perf core[2] for a second time.
>
> In what way do you think are these misused? I can't immediately see what you
> think applies from [1].
>
> In perf we rely upon interrupt affinity to enforce serialization in a few
> places, so if we fail to set the interrupt affinity there are a number of
> things which could go wrong (e.g. memory corruption, and all the fun that could
> result from that). We use WARN_ON() to catch that early.

Hi Mark,

If this is the case, is it better for us to return an ERROR after
printing a dev_err then
let the driver fail?

I really don't understand how a WARN_ON can help fix or even alert something is
wrong if we always need a successful irq_sey_affinity to make the driver work.

>
> I can't immediately see how [2] is relevant, since that's in the context of an
> IPI handler, and this patch affects the affinity of the PMU HW IRQ handler.
>
> Thanks,
> Mark.
>
> >
> > So change the WARN_ON() to dev_err() to just print the failure message.
> >
> > [1] https://github.com/torvalds/linux/blob/master/include/asm-generic/bug.h#L74
> > [2] https://github.com/torvalds/linux/blob/master/kernel/events/core.c#L313
> >
> > Suggested-by: Greg KH <gregkh@linuxfoundation.org>
> > [https://lore.kernel.org/lkml/YuOi3i0XHV++z1YI@kroah.com/]
> > Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
> > ---
> >  drivers/perf/arm-ccn.c                   | 5 +++--
> >  drivers/perf/arm_dmc620_pmu.c            | 3 ++-
> >  drivers/perf/arm_smmuv3_pmu.c            | 6 ++++--
> >  drivers/perf/fsl_imx8_ddr_perf.c         | 3 ++-
> >  drivers/perf/hisilicon/hisi_pcie_pmu.c   | 6 ++++--
> >  drivers/perf/hisilicon/hisi_uncore_pmu.c | 6 ++++--
> >  drivers/perf/qcom_l2_pmu.c               | 8 ++++++--
> >  drivers/perf/xgene_pmu.c                 | 6 ++++--
> >  8 files changed, 29 insertions(+), 14 deletions(-)
> >
> > diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c
> > index 728d13d8e98a..83abd909ba49 100644
> > --- a/drivers/perf/arm-ccn.c
> > +++ b/drivers/perf/arm-ccn.c
> > @@ -1210,8 +1210,9 @@ static int arm_ccn_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
> >               return 0;
> >       perf_pmu_migrate_context(&dt->pmu, cpu, target);
> >       dt->cpu = target;
> > -     if (ccn->irq)
> > -             WARN_ON(irq_set_affinity(ccn->irq, cpumask_of(dt->cpu)));
> > +     if (ccn->irq && irq_set_affinity(ccn->irq, cpumask_of(dt->cpu)))
> > +             dev_err(ccn->dev, "Failed to set interrupt affinity\n");
> > +
> >       return 0;
> >  }
> >
> > diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c
> > index 280a6ae3e27c..b59d3d9eb779 100644
> > --- a/drivers/perf/arm_dmc620_pmu.c
> > +++ b/drivers/perf/arm_dmc620_pmu.c
> > @@ -621,7 +621,8 @@ static int dmc620_pmu_cpu_teardown(unsigned int cpu,
> >               perf_pmu_migrate_context(&dmc620_pmu->pmu, irq->cpu, target);
> >       mutex_unlock(&dmc620_pmu_irqs_lock);
> >
> > -     WARN_ON(irq_set_affinity(irq->irq_num, cpumask_of(target)));
> > +     if (irq_set_affinity(irq->irq_num, cpumask_of(target)))
> > +             dev_err(dmc620_pmu->pmu.dev, "Failed to set interrupt affinity\n");
> >       irq->cpu = target;
> >
> >       return 0;
> > diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c
> > index 00d4c45a8017..05e1b3e274d7 100644
> > --- a/drivers/perf/arm_smmuv3_pmu.c
> > +++ b/drivers/perf/arm_smmuv3_pmu.c
> > @@ -646,7 +646,8 @@ static int smmu_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
> >
> >       perf_pmu_migrate_context(&smmu_pmu->pmu, cpu, target);
> >       smmu_pmu->on_cpu = target;
> > -     WARN_ON(irq_set_affinity(smmu_pmu->irq, cpumask_of(target)));
> > +     if (irq_set_affinity(smmu_pmu->irq, cpumask_of(target)))
> > +             dev_err(smmu_pmu->dev, "Failed to set interrupt affinity\n");
> >
> >       return 0;
> >  }
> > @@ -892,7 +893,8 @@ static int smmu_pmu_probe(struct platform_device *pdev)
> >
> >       /* Pick one CPU to be the preferred one to use */
> >       smmu_pmu->on_cpu = raw_smp_processor_id();
> > -     WARN_ON(irq_set_affinity(smmu_pmu->irq, cpumask_of(smmu_pmu->on_cpu)));
> > +     if (irq_set_affinity(smmu_pmu->irq, cpumask_of(smmu_pmu->on_cpu)))
> > +             dev_err(dev, "Failed to set interrupt affinity\n");
> >
> >       err = cpuhp_state_add_instance_nocalls(cpuhp_state_num,
> >                                              &smmu_pmu->node);
> > diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c
> > index 8e058e08fe81..c44192e2d9db 100644
> > --- a/drivers/perf/fsl_imx8_ddr_perf.c
> > +++ b/drivers/perf/fsl_imx8_ddr_perf.c
> > @@ -671,7 +671,8 @@ static int ddr_perf_offline_cpu(unsigned int cpu, struct hlist_node *node)
> >       perf_pmu_migrate_context(&pmu->pmu, cpu, target);
> >       pmu->cpu = target;
> >
> > -     WARN_ON(irq_set_affinity(pmu->irq, cpumask_of(pmu->cpu)));
> > +     if (irq_set_affinity(pmu->irq, cpumask_of(pmu->cpu)))
> > +             dev_err(pmu->dev, "Failed to set interrupt affinity\n");
> >
> >       return 0;
> >  }
> > diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c
> > index 21771708597d..90aed9e51396 100644
> > --- a/drivers/perf/hisilicon/hisi_pcie_pmu.c
> > +++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c
> > @@ -655,7 +655,8 @@ static int hisi_pcie_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
> >
> >       if (pcie_pmu->on_cpu == -1) {
> >               pcie_pmu->on_cpu = cpu;
> > -             WARN_ON(irq_set_affinity(pcie_pmu->irq, cpumask_of(cpu)));
> > +             if (irq_set_affinity(pcie_pmu->irq, cpumask_of(cpu)))
> > +                     pci_err(pcie_pmu->pdev, "Failed to set interrupt affinity\n");
> >       }
> >
> >       return 0;
> > @@ -681,7 +682,8 @@ static int hisi_pcie_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
> >       perf_pmu_migrate_context(&pcie_pmu->pmu, cpu, target);
> >       /* Use this CPU for event counting */
> >       pcie_pmu->on_cpu = target;
> > -     WARN_ON(irq_set_affinity(pcie_pmu->irq, cpumask_of(target)));
> > +     if (irq_set_affinity(pcie_pmu->irq, cpumask_of(target)))
> > +             pci_err(pcie_pmu->pdev, "Failed to set interrupt affinity\n");
> >
> >       return 0;
> >  }
> > diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c
> > index fbc8a93d5eac..74397b5ec889 100644
> > --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c
> > +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c
> > @@ -492,7 +492,8 @@ int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
> >       hisi_pmu->on_cpu = cpu;
> >
> >       /* Overflow interrupt also should use the same CPU */
> > -     WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(cpu)));
> > +     if (irq_set_affinity(hisi_pmu->irq, cpumask_of(cpu)))
> > +             dev_err(hisi_pmu->dev, "Failed to set interrupt affinity\n");
> >
> >       return 0;
> >  }
> > @@ -525,7 +526,8 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
> >       perf_pmu_migrate_context(&hisi_pmu->pmu, cpu, target);
> >       /* Use this CPU for event counting */
> >       hisi_pmu->on_cpu = target;
> > -     WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(target)));
> > +     if (irq_set_affinity(hisi_pmu->irq, cpumask_of(target)))
> > +             dev_err(hisi_pmu->dev, "Failed to set interrupt affinity\n");
> >
> >       return 0;
> >  }
> > diff --git a/drivers/perf/qcom_l2_pmu.c b/drivers/perf/qcom_l2_pmu.c
> > index 30234c261b05..c6fe01c7e637 100644
> > --- a/drivers/perf/qcom_l2_pmu.c
> > +++ b/drivers/perf/qcom_l2_pmu.c
> > @@ -793,7 +793,9 @@ static int l2cache_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
> >       cpumask_set_cpu(cpu, &l2cache_pmu->cpumask);
> >       cluster_pmu_reset();
> >
> > -     WARN_ON(irq_set_affinity(cluster->irq, cpumask_of(cpu)));
> > +     if (irq_set_affinity(cluster->irq, cpumask_of(cpu)))
> > +             dev_err(&l2cache_pmu->pdev->dev,
> > +                     "Failed to set interrupt affinity\n");
> >       enable_irq(cluster->irq);
> >
> >       return 0;
> > @@ -831,7 +833,9 @@ static int l2cache_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
> >       perf_pmu_migrate_context(&l2cache_pmu->pmu, cpu, target);
> >       cluster->on_cpu = target;
> >       cpumask_set_cpu(target, &l2cache_pmu->cpumask);
> > -     WARN_ON(irq_set_affinity(cluster->irq, cpumask_of(target)));
> > +     if (irq_set_affinity(cluster->irq, cpumask_of(target)))
> > +             dev_err(&l2cache_pmu->pdev->dev,
> > +                     "Failed to set interrupt affinity\n");
> >
> >       return 0;
> >  }
> > diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c
> > index 0c32dffc7ede..f31e678fdb69 100644
> > --- a/drivers/perf/xgene_pmu.c
> > +++ b/drivers/perf/xgene_pmu.c
> > @@ -1790,7 +1790,8 @@ static int xgene_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
> >               cpumask_set_cpu(cpu, &xgene_pmu->cpu);
> >
> >       /* Overflow interrupt also should use the same CPU */
> > -     WARN_ON(irq_set_affinity(xgene_pmu->irq, &xgene_pmu->cpu));
> > +     if (irq_set_affinity(xgene_pmu->irq, &xgene_pmu->cpu))
> > +             dev_err(xgene_pmu->dev, "Failed to set interrupt affinity\n");
> >
> >       return 0;
> >  }
> > @@ -1823,7 +1824,8 @@ static int xgene_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
> >
> >       cpumask_set_cpu(target, &xgene_pmu->cpu);
> >       /* Overflow interrupt also should use the same CPU */
> > -     WARN_ON(irq_set_affinity(xgene_pmu->irq, &xgene_pmu->cpu));
> > +     if (irq_set_affinity(xgene_pmu->irq, &xgene_pmu->cpu))
> > +             dev_err(xgene_pmu->dev, "Failed to set interrupt affinity\n");
> >
> >       return 0;
> >  }
> > --
> > 2.24.0
> >

Thanks
Barry
Yicong Yang Aug. 16, 2022, 6:22 a.m. UTC | #4
On 2022/8/15 17:39, Jonathan Cameron wrote:
> On Mon, 15 Aug 2022 17:28:15 +0800
> Yicong Yang <yangyicong@huawei.com> wrote:
> 
>> From: Yicong Yang <yangyicong@hisilicon.com>
>>
>> The WARN_ON() on irq_set_affinity() failure is misused according to the [1]
>> and may crash people's box unintentionally. This may also be redundant since
>> in the failure case we may also trigger the WARN and dump the stack in the
>> perf core[2] for a second time.
>>
>> So change the WARN_ON() to dev_err() to just print the failure message.
>>
>> [1] https://github.com/torvalds/linux/blob/master/include/asm-generic/bug.h#L74
>> [2] https://github.com/torvalds/linux/blob/master/kernel/events/core.c#L313
>>
>> Suggested-by: Greg KH <gregkh@linuxfoundation.org>
>> [https://lore.kernel.org/lkml/YuOi3i0XHV++z1YI@kroah.com/]
>> Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
> 
> Looks like progress in a sensible direction to me.
> 
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> 
> Kind of unrelated question inline.
> 

Thanks for the quick review! replies below.

>>
> 
> 
>> diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c
>> index 00d4c45a8017..05e1b3e274d7 100644
>> --- a/drivers/perf/arm_smmuv3_pmu.c
>> +++ b/drivers/perf/arm_smmuv3_pmu.c
>> @@ -646,7 +646,8 @@ static int smmu_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
>>  
>>  	perf_pmu_migrate_context(&smmu_pmu->pmu, cpu, target);
>>  	smmu_pmu->on_cpu = target;
>> -	WARN_ON(irq_set_affinity(smmu_pmu->irq, cpumask_of(target)));
>> +	if (irq_set_affinity(smmu_pmu->irq, cpumask_of(target)))
>> +		dev_err(smmu_pmu->dev, "Failed to set interrupt affinity\n");
>>  
>>  	return 0;
>>  }
>> @@ -892,7 +893,8 @@ static int smmu_pmu_probe(struct platform_device *pdev)
>>  
>>  	/* Pick one CPU to be the preferred one to use */
>>  	smmu_pmu->on_cpu = raw_smp_processor_id();
>> -	WARN_ON(irq_set_affinity(smmu_pmu->irq, cpumask_of(smmu_pmu->on_cpu)));
>> +	if (irq_set_affinity(smmu_pmu->irq, cpumask_of(smmu_pmu->on_cpu)))
>> +		dev_err(dev, "Failed to set interrupt affinity\n");
> 
> In this case we have the option to fail probe.  Failing to set affinity means
> we are broken anyway, so perhaps that is cleaner than carrying on.
> 

This patch only switch the way on error notification with no functional change intended.
So if we need to change the behaviour here it should be in a separate patch. Indeed I'm
not sure it's necessary to fail probe here since we can use the pmu if it fails here.

> As a side note, I wonder if other drivers could benefit from what I think
> is a micro optimization to short cut calling the hp handlers when the
> decision of which CPU is easy...
> 

It seems sensible to me but may differ in differenct pmu drivers since they may need the
hp handlers called. Needs more check.

Thanks.

>>  
>>  	err = cpuhp_state_add_instance_nocalls(cpuhp_state_num,
>>  					       &smmu_pmu->node);
> .
>
Yicong Yang Aug. 16, 2022, 6:26 a.m. UTC | #5
On 2022/8/15 18:47, Greg KH wrote:
> On Mon, Aug 15, 2022 at 05:28:15PM +0800, Yicong Yang wrote:
>> From: Yicong Yang <yangyicong@hisilicon.com>
>>
>> The WARN_ON() on irq_set_affinity() failure is misused according to the [1]
>> and may crash people's box unintentionally. This may also be redundant since
>> in the failure case we may also trigger the WARN and dump the stack in the
>> perf core[2] for a second time.
>>
>> So change the WARN_ON() to dev_err() to just print the failure message.
>>
>> [1] https://github.com/torvalds/linux/blob/master/include/asm-generic/bug.h#L74
>> [2] https://github.com/torvalds/linux/blob/master/kernel/events/core.c#L313
> 
> Please point to git.kernel.org links, we do not control github.com and
> it's random mirrors.
> 

Got it. Will update with a git.kernel.org link. Thanks for point it out!

>>
>> Suggested-by: Greg KH <gregkh@linuxfoundation.org>
>> [https://lore.kernel.org/lkml/YuOi3i0XHV++z1YI@kroah.com/]
>> Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
>> ---
>>  drivers/perf/arm-ccn.c                   | 5 +++--
>>  drivers/perf/arm_dmc620_pmu.c            | 3 ++-
>>  drivers/perf/arm_smmuv3_pmu.c            | 6 ++++--
>>  drivers/perf/fsl_imx8_ddr_perf.c         | 3 ++-
>>  drivers/perf/hisilicon/hisi_pcie_pmu.c   | 6 ++++--
>>  drivers/perf/hisilicon/hisi_uncore_pmu.c | 6 ++++--
>>  drivers/perf/qcom_l2_pmu.c               | 8 ++++++--
>>  drivers/perf/xgene_pmu.c                 | 6 ++++--
>>  8 files changed, 29 insertions(+), 14 deletions(-)
>>
>> diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c
>> index 728d13d8e98a..83abd909ba49 100644
>> --- a/drivers/perf/arm-ccn.c
>> +++ b/drivers/perf/arm-ccn.c
>> @@ -1210,8 +1210,9 @@ static int arm_ccn_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
>>  		return 0;
>>  	perf_pmu_migrate_context(&dt->pmu, cpu, target);
>>  	dt->cpu = target;
>> -	if (ccn->irq)
>> -		WARN_ON(irq_set_affinity(ccn->irq, cpumask_of(dt->cpu)));
>> +	if (ccn->irq && irq_set_affinity(ccn->irq, cpumask_of(dt->cpu)))
>> +		dev_err(ccn->dev, "Failed to set interrupt affinity\n");
>> +
>>  	return 0;
> 
> Why are you returning with no error, if an error happened?
> 
> Same everywhere else, you need to explain this in your changelog text.
> 

This patch intends no functional change but to switch the way on the error notification to avoid
crash the box. So just keep the current handling behaviour. Will mention this in the commit in v2.
I think whether we should actually reponse to the error should be according to the driver.

Thanks.
Yicong Yang Aug. 16, 2022, 7:37 a.m. UTC | #6
On 2022/8/15 19:25, Mark Rutland wrote:
> On Mon, Aug 15, 2022 at 05:28:15PM +0800, Yicong Yang wrote:
>> From: Yicong Yang <yangyicong@hisilicon.com>
>>
>> The WARN_ON() on irq_set_affinity() failure is misused according to the [1]
>> and may crash people's box unintentionally. This may also be redundant since
>> in the failure case we may also trigger the WARN and dump the stack in the
>> perf core[2] for a second time.
> 
> In what way do you think are these misused? I can't immediately see what you
> think applies from [1].

As commented by irq_set_affinity() it "Fails if cpumask does not contain an online
 CPU" which means we passed an invalid input, I think which violiates the "Do not
use these macros when checking for invalid external inputs".

> 
> In perf we rely upon interrupt affinity to enforce serialization in a few
> places, so if we fail to set the interrupt affinity there are a number of
> things which could go wrong (e.g. memory corruption, and all the fun that could
> result from that). We use WARN_ON() to catch that early.
> 

If we'd like to catch this failure information early maybe a dev_err() should be
enough to indicate this.

> I can't immediately see how [2] is relevant, since that's in the context of an
> IPI handler, and this patch affects the affinity of the PMU HW IRQ handler.
> 

I think it's relevant (please correct me) as when I debug another pmu driver using
MSI interrupt[*], I found I'll trigger the WARN() in [2] if the interrupt is not
bind to the CPU which start trace. So I think it's required to handle the interrupt
on the same CPU start the trace otherwise the "context" is mismatched.

[*] https://lore.kernel.org/lkml/20220721130116.43366-3-yangyicong@huawei.com/

Thanks.

> Thanks,
> Mark.
> 
>>
>> So change the WARN_ON() to dev_err() to just print the failure message.
>>
>> [1] https://github.com/torvalds/linux/blob/master/include/asm-generic/bug.h#L74
>> [2] https://github.com/torvalds/linux/blob/master/kernel/events/core.c#L313
>>
>> Suggested-by: Greg KH <gregkh@linuxfoundation.org>
>> [https://lore.kernel.org/lkml/YuOi3i0XHV++z1YI@kroah.com/]
>> Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
>> ---
>>  drivers/perf/arm-ccn.c                   | 5 +++--
>>  drivers/perf/arm_dmc620_pmu.c            | 3 ++-
>>  drivers/perf/arm_smmuv3_pmu.c            | 6 ++++--
>>  drivers/perf/fsl_imx8_ddr_perf.c         | 3 ++-
>>  drivers/perf/hisilicon/hisi_pcie_pmu.c   | 6 ++++--
>>  drivers/perf/hisilicon/hisi_uncore_pmu.c | 6 ++++--
>>  drivers/perf/qcom_l2_pmu.c               | 8 ++++++--
>>  drivers/perf/xgene_pmu.c                 | 6 ++++--
>>  8 files changed, 29 insertions(+), 14 deletions(-)
>>
>> diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c
>> index 728d13d8e98a..83abd909ba49 100644
>> --- a/drivers/perf/arm-ccn.c
>> +++ b/drivers/perf/arm-ccn.c
>> @@ -1210,8 +1210,9 @@ static int arm_ccn_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
>>  		return 0;
>>  	perf_pmu_migrate_context(&dt->pmu, cpu, target);
>>  	dt->cpu = target;
>> -	if (ccn->irq)
>> -		WARN_ON(irq_set_affinity(ccn->irq, cpumask_of(dt->cpu)));
>> +	if (ccn->irq && irq_set_affinity(ccn->irq, cpumask_of(dt->cpu)))
>> +		dev_err(ccn->dev, "Failed to set interrupt affinity\n");
>> +
>>  	return 0;
>>  }
>>  
>> diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c
>> index 280a6ae3e27c..b59d3d9eb779 100644
>> --- a/drivers/perf/arm_dmc620_pmu.c
>> +++ b/drivers/perf/arm_dmc620_pmu.c
>> @@ -621,7 +621,8 @@ static int dmc620_pmu_cpu_teardown(unsigned int cpu,
>>  		perf_pmu_migrate_context(&dmc620_pmu->pmu, irq->cpu, target);
>>  	mutex_unlock(&dmc620_pmu_irqs_lock);
>>  
>> -	WARN_ON(irq_set_affinity(irq->irq_num, cpumask_of(target)));
>> +	if (irq_set_affinity(irq->irq_num, cpumask_of(target)))
>> +		dev_err(dmc620_pmu->pmu.dev, "Failed to set interrupt affinity\n");
>>  	irq->cpu = target;
>>  
>>  	return 0;
>> diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c
>> index 00d4c45a8017..05e1b3e274d7 100644
>> --- a/drivers/perf/arm_smmuv3_pmu.c
>> +++ b/drivers/perf/arm_smmuv3_pmu.c
>> @@ -646,7 +646,8 @@ static int smmu_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
>>  
>>  	perf_pmu_migrate_context(&smmu_pmu->pmu, cpu, target);
>>  	smmu_pmu->on_cpu = target;
>> -	WARN_ON(irq_set_affinity(smmu_pmu->irq, cpumask_of(target)));
>> +	if (irq_set_affinity(smmu_pmu->irq, cpumask_of(target)))
>> +		dev_err(smmu_pmu->dev, "Failed to set interrupt affinity\n");
>>  
>>  	return 0;
>>  }
>> @@ -892,7 +893,8 @@ static int smmu_pmu_probe(struct platform_device *pdev)
>>  
>>  	/* Pick one CPU to be the preferred one to use */
>>  	smmu_pmu->on_cpu = raw_smp_processor_id();
>> -	WARN_ON(irq_set_affinity(smmu_pmu->irq, cpumask_of(smmu_pmu->on_cpu)));
>> +	if (irq_set_affinity(smmu_pmu->irq, cpumask_of(smmu_pmu->on_cpu)))
>> +		dev_err(dev, "Failed to set interrupt affinity\n");
>>  
>>  	err = cpuhp_state_add_instance_nocalls(cpuhp_state_num,
>>  					       &smmu_pmu->node);
>> diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c
>> index 8e058e08fe81..c44192e2d9db 100644
>> --- a/drivers/perf/fsl_imx8_ddr_perf.c
>> +++ b/drivers/perf/fsl_imx8_ddr_perf.c
>> @@ -671,7 +671,8 @@ static int ddr_perf_offline_cpu(unsigned int cpu, struct hlist_node *node)
>>  	perf_pmu_migrate_context(&pmu->pmu, cpu, target);
>>  	pmu->cpu = target;
>>  
>> -	WARN_ON(irq_set_affinity(pmu->irq, cpumask_of(pmu->cpu)));
>> +	if (irq_set_affinity(pmu->irq, cpumask_of(pmu->cpu)))
>> +		dev_err(pmu->dev, "Failed to set interrupt affinity\n");
>>  
>>  	return 0;
>>  }
>> diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c
>> index 21771708597d..90aed9e51396 100644
>> --- a/drivers/perf/hisilicon/hisi_pcie_pmu.c
>> +++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c
>> @@ -655,7 +655,8 @@ static int hisi_pcie_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
>>  
>>  	if (pcie_pmu->on_cpu == -1) {
>>  		pcie_pmu->on_cpu = cpu;
>> -		WARN_ON(irq_set_affinity(pcie_pmu->irq, cpumask_of(cpu)));
>> +		if (irq_set_affinity(pcie_pmu->irq, cpumask_of(cpu)))
>> +			pci_err(pcie_pmu->pdev, "Failed to set interrupt affinity\n");
>>  	}
>>  
>>  	return 0;
>> @@ -681,7 +682,8 @@ static int hisi_pcie_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
>>  	perf_pmu_migrate_context(&pcie_pmu->pmu, cpu, target);
>>  	/* Use this CPU for event counting */
>>  	pcie_pmu->on_cpu = target;
>> -	WARN_ON(irq_set_affinity(pcie_pmu->irq, cpumask_of(target)));
>> +	if (irq_set_affinity(pcie_pmu->irq, cpumask_of(target)))
>> +		pci_err(pcie_pmu->pdev, "Failed to set interrupt affinity\n");
>>  
>>  	return 0;
>>  }
>> diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c
>> index fbc8a93d5eac..74397b5ec889 100644
>> --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c
>> +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c
>> @@ -492,7 +492,8 @@ int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
>>  	hisi_pmu->on_cpu = cpu;
>>  
>>  	/* Overflow interrupt also should use the same CPU */
>> -	WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(cpu)));
>> +	if (irq_set_affinity(hisi_pmu->irq, cpumask_of(cpu)))
>> +		dev_err(hisi_pmu->dev, "Failed to set interrupt affinity\n");
>>  
>>  	return 0;
>>  }
>> @@ -525,7 +526,8 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
>>  	perf_pmu_migrate_context(&hisi_pmu->pmu, cpu, target);
>>  	/* Use this CPU for event counting */
>>  	hisi_pmu->on_cpu = target;
>> -	WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(target)));
>> +	if (irq_set_affinity(hisi_pmu->irq, cpumask_of(target)))
>> +		dev_err(hisi_pmu->dev, "Failed to set interrupt affinity\n");
>>  
>>  	return 0;
>>  }
>> diff --git a/drivers/perf/qcom_l2_pmu.c b/drivers/perf/qcom_l2_pmu.c
>> index 30234c261b05..c6fe01c7e637 100644
>> --- a/drivers/perf/qcom_l2_pmu.c
>> +++ b/drivers/perf/qcom_l2_pmu.c
>> @@ -793,7 +793,9 @@ static int l2cache_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
>>  	cpumask_set_cpu(cpu, &l2cache_pmu->cpumask);
>>  	cluster_pmu_reset();
>>  
>> -	WARN_ON(irq_set_affinity(cluster->irq, cpumask_of(cpu)));
>> +	if (irq_set_affinity(cluster->irq, cpumask_of(cpu)))
>> +		dev_err(&l2cache_pmu->pdev->dev,
>> +			"Failed to set interrupt affinity\n");
>>  	enable_irq(cluster->irq);
>>  
>>  	return 0;
>> @@ -831,7 +833,9 @@ static int l2cache_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
>>  	perf_pmu_migrate_context(&l2cache_pmu->pmu, cpu, target);
>>  	cluster->on_cpu = target;
>>  	cpumask_set_cpu(target, &l2cache_pmu->cpumask);
>> -	WARN_ON(irq_set_affinity(cluster->irq, cpumask_of(target)));
>> +	if (irq_set_affinity(cluster->irq, cpumask_of(target)))
>> +		dev_err(&l2cache_pmu->pdev->dev,
>> +			"Failed to set interrupt affinity\n");
>>  
>>  	return 0;
>>  }
>> diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c
>> index 0c32dffc7ede..f31e678fdb69 100644
>> --- a/drivers/perf/xgene_pmu.c
>> +++ b/drivers/perf/xgene_pmu.c
>> @@ -1790,7 +1790,8 @@ static int xgene_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
>>  		cpumask_set_cpu(cpu, &xgene_pmu->cpu);
>>  
>>  	/* Overflow interrupt also should use the same CPU */
>> -	WARN_ON(irq_set_affinity(xgene_pmu->irq, &xgene_pmu->cpu));
>> +	if (irq_set_affinity(xgene_pmu->irq, &xgene_pmu->cpu))
>> +		dev_err(xgene_pmu->dev, "Failed to set interrupt affinity\n");
>>  
>>  	return 0;
>>  }
>> @@ -1823,7 +1824,8 @@ static int xgene_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
>>  
>>  	cpumask_set_cpu(target, &xgene_pmu->cpu);
>>  	/* Overflow interrupt also should use the same CPU */
>> -	WARN_ON(irq_set_affinity(xgene_pmu->irq, &xgene_pmu->cpu));
>> +	if (irq_set_affinity(xgene_pmu->irq, &xgene_pmu->cpu))
>> +		dev_err(xgene_pmu->dev, "Failed to set interrupt affinity\n");
>>  
>>  	return 0;
>>  }
>> -- 
>> 2.24.0
>>
> .
>
Mark Rutland Aug. 16, 2022, 10:26 a.m. UTC | #7
On Tue, Aug 16, 2022 at 03:37:29PM +0800, Yicong Yang wrote:
> On 2022/8/15 19:25, Mark Rutland wrote:
> > On Mon, Aug 15, 2022 at 05:28:15PM +0800, Yicong Yang wrote:
> >> From: Yicong Yang <yangyicong@hisilicon.com>
> >>
> >> The WARN_ON() on irq_set_affinity() failure is misused according to the [1]
> >> and may crash people's box unintentionally. This may also be redundant since
> >> in the failure case we may also trigger the WARN and dump the stack in the
> >> perf core[2] for a second time.
> > 
> > In what way do you think are these misused? I can't immediately see what you
> > think applies from [1].
> 
> As commented by irq_set_affinity() it "Fails if cpumask does not contain an online
>  CPU" 

In all of the cases below we've chosen an online CPU. So the only way this can
happen is if there is a bug in the code, in which case a WARN_ON() is
appropriate. Also, there are *other* reasons irq_set_affinity() can fail.

> which means we passed an invalid input, I think which violiates the "Do not
> use these macros when checking for invalid external inputs".

There is no external input here. The chosen CPU didn't come from userspace or
an external source; we chose it ourselves based on kernel internal data (e.g.
cpu_online_mask).

So that does not apply here.

> > In perf we rely upon interrupt affinity to enforce serialization in a few
> > places, so if we fail to set the interrupt affinity there are a number of
> > things which could go wrong (e.g. memory corruption, and all the fun that could
> > result from that). We use WARN_ON() to catch that early.
> 
> If we'd like to catch this failure information early maybe a dev_err() should be
> enough to indicate this.

I don't see why it is necessary to change to dev_err().

> > I can't immediately see how [2] is relevant, since that's in the context of an
> > IPI handler, and this patch affects the affinity of the PMU HW IRQ handler.
> 
> I think it's relevant (please correct me) as when I debug another pmu driver using
> MSI interrupt[*], I found I'll trigger the WARN() in [2] if the interrupt is not
> bind to the CPU which start trace. So I think it's required to handle the interrupt
> on the same CPU start the trace otherwise the "context" is mismatched.

Sorry, I had confused event_function_local() with event_function().

I still think we want to keep the WARN_ON() here since before we get to
event_function_local() we may do other things in the IRQ handler.

Thanks,
Mark.

> 
> [*] https://lore.kernel.org/lkml/20220721130116.43366-3-yangyicong@huawei.com/
> 
> Thanks.
> 
> > Thanks,
> > Mark.
> > 
> >>
> >> So change the WARN_ON() to dev_err() to just print the failure message.
> >>
> >> [1] https://github.com/torvalds/linux/blob/master/include/asm-generic/bug.h#L74
> >> [2] https://github.com/torvalds/linux/blob/master/kernel/events/core.c#L313
> >>
> >> Suggested-by: Greg KH <gregkh@linuxfoundation.org>
> >> [https://lore.kernel.org/lkml/YuOi3i0XHV++z1YI@kroah.com/]
> >> Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
> >> ---
> >>  drivers/perf/arm-ccn.c                   | 5 +++--
> >>  drivers/perf/arm_dmc620_pmu.c            | 3 ++-
> >>  drivers/perf/arm_smmuv3_pmu.c            | 6 ++++--
> >>  drivers/perf/fsl_imx8_ddr_perf.c         | 3 ++-
> >>  drivers/perf/hisilicon/hisi_pcie_pmu.c   | 6 ++++--
> >>  drivers/perf/hisilicon/hisi_uncore_pmu.c | 6 ++++--
> >>  drivers/perf/qcom_l2_pmu.c               | 8 ++++++--
> >>  drivers/perf/xgene_pmu.c                 | 6 ++++--
> >>  8 files changed, 29 insertions(+), 14 deletions(-)
> >>
> >> diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c
> >> index 728d13d8e98a..83abd909ba49 100644
> >> --- a/drivers/perf/arm-ccn.c
> >> +++ b/drivers/perf/arm-ccn.c
> >> @@ -1210,8 +1210,9 @@ static int arm_ccn_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
> >>  		return 0;
> >>  	perf_pmu_migrate_context(&dt->pmu, cpu, target);
> >>  	dt->cpu = target;
> >> -	if (ccn->irq)
> >> -		WARN_ON(irq_set_affinity(ccn->irq, cpumask_of(dt->cpu)));
> >> +	if (ccn->irq && irq_set_affinity(ccn->irq, cpumask_of(dt->cpu)))
> >> +		dev_err(ccn->dev, "Failed to set interrupt affinity\n");
> >> +
> >>  	return 0;
> >>  }
> >>  
> >> diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c
> >> index 280a6ae3e27c..b59d3d9eb779 100644
> >> --- a/drivers/perf/arm_dmc620_pmu.c
> >> +++ b/drivers/perf/arm_dmc620_pmu.c
> >> @@ -621,7 +621,8 @@ static int dmc620_pmu_cpu_teardown(unsigned int cpu,
> >>  		perf_pmu_migrate_context(&dmc620_pmu->pmu, irq->cpu, target);
> >>  	mutex_unlock(&dmc620_pmu_irqs_lock);
> >>  
> >> -	WARN_ON(irq_set_affinity(irq->irq_num, cpumask_of(target)));
> >> +	if (irq_set_affinity(irq->irq_num, cpumask_of(target)))
> >> +		dev_err(dmc620_pmu->pmu.dev, "Failed to set interrupt affinity\n");
> >>  	irq->cpu = target;
> >>  
> >>  	return 0;
> >> diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c
> >> index 00d4c45a8017..05e1b3e274d7 100644
> >> --- a/drivers/perf/arm_smmuv3_pmu.c
> >> +++ b/drivers/perf/arm_smmuv3_pmu.c
> >> @@ -646,7 +646,8 @@ static int smmu_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
> >>  
> >>  	perf_pmu_migrate_context(&smmu_pmu->pmu, cpu, target);
> >>  	smmu_pmu->on_cpu = target;
> >> -	WARN_ON(irq_set_affinity(smmu_pmu->irq, cpumask_of(target)));
> >> +	if (irq_set_affinity(smmu_pmu->irq, cpumask_of(target)))
> >> +		dev_err(smmu_pmu->dev, "Failed to set interrupt affinity\n");
> >>  
> >>  	return 0;
> >>  }
> >> @@ -892,7 +893,8 @@ static int smmu_pmu_probe(struct platform_device *pdev)
> >>  
> >>  	/* Pick one CPU to be the preferred one to use */
> >>  	smmu_pmu->on_cpu = raw_smp_processor_id();
> >> -	WARN_ON(irq_set_affinity(smmu_pmu->irq, cpumask_of(smmu_pmu->on_cpu)));
> >> +	if (irq_set_affinity(smmu_pmu->irq, cpumask_of(smmu_pmu->on_cpu)))
> >> +		dev_err(dev, "Failed to set interrupt affinity\n");
> >>  
> >>  	err = cpuhp_state_add_instance_nocalls(cpuhp_state_num,
> >>  					       &smmu_pmu->node);
> >> diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c
> >> index 8e058e08fe81..c44192e2d9db 100644
> >> --- a/drivers/perf/fsl_imx8_ddr_perf.c
> >> +++ b/drivers/perf/fsl_imx8_ddr_perf.c
> >> @@ -671,7 +671,8 @@ static int ddr_perf_offline_cpu(unsigned int cpu, struct hlist_node *node)
> >>  	perf_pmu_migrate_context(&pmu->pmu, cpu, target);
> >>  	pmu->cpu = target;
> >>  
> >> -	WARN_ON(irq_set_affinity(pmu->irq, cpumask_of(pmu->cpu)));
> >> +	if (irq_set_affinity(pmu->irq, cpumask_of(pmu->cpu)))
> >> +		dev_err(pmu->dev, "Failed to set interrupt affinity\n");
> >>  
> >>  	return 0;
> >>  }
> >> diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c
> >> index 21771708597d..90aed9e51396 100644
> >> --- a/drivers/perf/hisilicon/hisi_pcie_pmu.c
> >> +++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c
> >> @@ -655,7 +655,8 @@ static int hisi_pcie_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
> >>  
> >>  	if (pcie_pmu->on_cpu == -1) {
> >>  		pcie_pmu->on_cpu = cpu;
> >> -		WARN_ON(irq_set_affinity(pcie_pmu->irq, cpumask_of(cpu)));
> >> +		if (irq_set_affinity(pcie_pmu->irq, cpumask_of(cpu)))
> >> +			pci_err(pcie_pmu->pdev, "Failed to set interrupt affinity\n");
> >>  	}
> >>  
> >>  	return 0;
> >> @@ -681,7 +682,8 @@ static int hisi_pcie_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
> >>  	perf_pmu_migrate_context(&pcie_pmu->pmu, cpu, target);
> >>  	/* Use this CPU for event counting */
> >>  	pcie_pmu->on_cpu = target;
> >> -	WARN_ON(irq_set_affinity(pcie_pmu->irq, cpumask_of(target)));
> >> +	if (irq_set_affinity(pcie_pmu->irq, cpumask_of(target)))
> >> +		pci_err(pcie_pmu->pdev, "Failed to set interrupt affinity\n");
> >>  
> >>  	return 0;
> >>  }
> >> diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c
> >> index fbc8a93d5eac..74397b5ec889 100644
> >> --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c
> >> +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c
> >> @@ -492,7 +492,8 @@ int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
> >>  	hisi_pmu->on_cpu = cpu;
> >>  
> >>  	/* Overflow interrupt also should use the same CPU */
> >> -	WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(cpu)));
> >> +	if (irq_set_affinity(hisi_pmu->irq, cpumask_of(cpu)))
> >> +		dev_err(hisi_pmu->dev, "Failed to set interrupt affinity\n");
> >>  
> >>  	return 0;
> >>  }
> >> @@ -525,7 +526,8 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
> >>  	perf_pmu_migrate_context(&hisi_pmu->pmu, cpu, target);
> >>  	/* Use this CPU for event counting */
> >>  	hisi_pmu->on_cpu = target;
> >> -	WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(target)));
> >> +	if (irq_set_affinity(hisi_pmu->irq, cpumask_of(target)))
> >> +		dev_err(hisi_pmu->dev, "Failed to set interrupt affinity\n");
> >>  
> >>  	return 0;
> >>  }
> >> diff --git a/drivers/perf/qcom_l2_pmu.c b/drivers/perf/qcom_l2_pmu.c
> >> index 30234c261b05..c6fe01c7e637 100644
> >> --- a/drivers/perf/qcom_l2_pmu.c
> >> +++ b/drivers/perf/qcom_l2_pmu.c
> >> @@ -793,7 +793,9 @@ static int l2cache_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
> >>  	cpumask_set_cpu(cpu, &l2cache_pmu->cpumask);
> >>  	cluster_pmu_reset();
> >>  
> >> -	WARN_ON(irq_set_affinity(cluster->irq, cpumask_of(cpu)));
> >> +	if (irq_set_affinity(cluster->irq, cpumask_of(cpu)))
> >> +		dev_err(&l2cache_pmu->pdev->dev,
> >> +			"Failed to set interrupt affinity\n");
> >>  	enable_irq(cluster->irq);
> >>  
> >>  	return 0;
> >> @@ -831,7 +833,9 @@ static int l2cache_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
> >>  	perf_pmu_migrate_context(&l2cache_pmu->pmu, cpu, target);
> >>  	cluster->on_cpu = target;
> >>  	cpumask_set_cpu(target, &l2cache_pmu->cpumask);
> >> -	WARN_ON(irq_set_affinity(cluster->irq, cpumask_of(target)));
> >> +	if (irq_set_affinity(cluster->irq, cpumask_of(target)))
> >> +		dev_err(&l2cache_pmu->pdev->dev,
> >> +			"Failed to set interrupt affinity\n");
> >>  
> >>  	return 0;
> >>  }
> >> diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c
> >> index 0c32dffc7ede..f31e678fdb69 100644
> >> --- a/drivers/perf/xgene_pmu.c
> >> +++ b/drivers/perf/xgene_pmu.c
> >> @@ -1790,7 +1790,8 @@ static int xgene_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
> >>  		cpumask_set_cpu(cpu, &xgene_pmu->cpu);
> >>  
> >>  	/* Overflow interrupt also should use the same CPU */
> >> -	WARN_ON(irq_set_affinity(xgene_pmu->irq, &xgene_pmu->cpu));
> >> +	if (irq_set_affinity(xgene_pmu->irq, &xgene_pmu->cpu))
> >> +		dev_err(xgene_pmu->dev, "Failed to set interrupt affinity\n");
> >>  
> >>  	return 0;
> >>  }
> >> @@ -1823,7 +1824,8 @@ static int xgene_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
> >>  
> >>  	cpumask_set_cpu(target, &xgene_pmu->cpu);
> >>  	/* Overflow interrupt also should use the same CPU */
> >> -	WARN_ON(irq_set_affinity(xgene_pmu->irq, &xgene_pmu->cpu));
> >> +	if (irq_set_affinity(xgene_pmu->irq, &xgene_pmu->cpu))
> >> +		dev_err(xgene_pmu->dev, "Failed to set interrupt affinity\n");
> >>  
> >>  	return 0;
> >>  }
> >> -- 
> >> 2.24.0
> >>
> > .
> >
Yicong Yang Aug. 16, 2022, 3:20 p.m. UTC | #8
On 8/16/2022 6:26 PM, Mark Rutland wrote:
> On Tue, Aug 16, 2022 at 03:37:29PM +0800, Yicong Yang wrote:
>> On 2022/8/15 19:25, Mark Rutland wrote:
>>> On Mon, Aug 15, 2022 at 05:28:15PM +0800, Yicong Yang wrote:
>>>> From: Yicong Yang <yangyicong@hisilicon.com>
>>>>
>>>> The WARN_ON() on irq_set_affinity() failure is misused according to the [1]
>>>> and may crash people's box unintentionally. This may also be redundant since
>>>> in the failure case we may also trigger the WARN and dump the stack in the
>>>> perf core[2] for a second time.
>>>
>>> In what way do you think are these misused? I can't immediately see what you
>>> think applies from [1].
>>
>> As commented by irq_set_affinity() it "Fails if cpumask does not contain an online
>>  CPU" 
> 
> In all of the cases below we've chosen an online CPU. So the only way this can
> happen is if there is a bug in the code, in which case a WARN_ON() is
> appropriate. Also, there are *other* reasons irq_set_affinity() can fail.
> 
>> which means we passed an invalid input, I think which violiates the "Do not
>> use these macros when checking for invalid external inputs".
> 
> There is no external input here. The chosen CPU didn't come from userspace or
> an external source; we chose it ourselves based on kernel internal data (e.g.
> cpu_online_mask).
> 
> So that does not apply here.
> 
>>> In perf we rely upon interrupt affinity to enforce serialization in a few
>>> places, so if we fail to set the interrupt affinity there are a number of
>>> things which could go wrong (e.g. memory corruption, and all the fun that could
>>> result from that). We use WARN_ON() to catch that early.
>>
>> If we'd like to catch this failure information early maybe a dev_err() should be
>> enough to indicate this.
> 
> I don't see why it is necessary to change to dev_err().
> 
>>> I can't immediately see how [2] is relevant, since that's in the context of an
>>> IPI handler, and this patch affects the affinity of the PMU HW IRQ handler.
>>
>> I think it's relevant (please correct me) as when I debug another pmu driver using
>> MSI interrupt[*], I found I'll trigger the WARN() in [2] if the interrupt is not
>> bind to the CPU which start trace. So I think it's required to handle the interrupt
>> on the same CPU start the trace otherwise the "context" is mismatched.
> 
> Sorry, I had confused event_function_local() with event_function().
> 
> I still think we want to keep the WARN_ON() here since before we get to
> event_function_local() we may do other things in the IRQ handler.
> 

ok. Another point of this patch is to avoid panic and reboot on WARN() (pointed by Greg
to not crash people's box), is it intended here? If not but we still want a callstack in
these places, does it make sense to use dev_err() + dump_stack() instead?

Thanks.

> 
>>
>> [*] https://lore.kernel.org/lkml/20220721130116.43366-3-yangyicong@huawei.com/
>>
>> Thanks.
>>
>>> Thanks,
>>> Mark.
>>>
>>>>
>>>> So change the WARN_ON() to dev_err() to just print the failure message.
>>>>
>>>> [1] https://github.com/torvalds/linux/blob/master/include/asm-generic/bug.h#L74
>>>> [2] https://github.com/torvalds/linux/blob/master/kernel/events/core.c#L313
>>>>
>>>> Suggested-by: Greg KH <gregkh@linuxfoundation.org>
>>>> [https://lore.kernel.org/lkml/YuOi3i0XHV++z1YI@kroah.com/]
>>>> Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
>>>> ---
>>>>  drivers/perf/arm-ccn.c                   | 5 +++--
>>>>  drivers/perf/arm_dmc620_pmu.c            | 3 ++-
>>>>  drivers/perf/arm_smmuv3_pmu.c            | 6 ++++--
>>>>  drivers/perf/fsl_imx8_ddr_perf.c         | 3 ++-
>>>>  drivers/perf/hisilicon/hisi_pcie_pmu.c   | 6 ++++--
>>>>  drivers/perf/hisilicon/hisi_uncore_pmu.c | 6 ++++--
>>>>  drivers/perf/qcom_l2_pmu.c               | 8 ++++++--
>>>>  drivers/perf/xgene_pmu.c                 | 6 ++++--
>>>>  8 files changed, 29 insertions(+), 14 deletions(-)
>>>>
>>>> diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c
>>>> index 728d13d8e98a..83abd909ba49 100644
>>>> --- a/drivers/perf/arm-ccn.c
>>>> +++ b/drivers/perf/arm-ccn.c
>>>> @@ -1210,8 +1210,9 @@ static int arm_ccn_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
>>>>  		return 0;
>>>>  	perf_pmu_migrate_context(&dt->pmu, cpu, target);
>>>>  	dt->cpu = target;
>>>> -	if (ccn->irq)
>>>> -		WARN_ON(irq_set_affinity(ccn->irq, cpumask_of(dt->cpu)));
>>>> +	if (ccn->irq && irq_set_affinity(ccn->irq, cpumask_of(dt->cpu)))
>>>> +		dev_err(ccn->dev, "Failed to set interrupt affinity\n");
>>>> +
>>>>  	return 0;
>>>>  }
>>>>  
>>>> diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c
>>>> index 280a6ae3e27c..b59d3d9eb779 100644
>>>> --- a/drivers/perf/arm_dmc620_pmu.c
>>>> +++ b/drivers/perf/arm_dmc620_pmu.c
>>>> @@ -621,7 +621,8 @@ static int dmc620_pmu_cpu_teardown(unsigned int cpu,
>>>>  		perf_pmu_migrate_context(&dmc620_pmu->pmu, irq->cpu, target);
>>>>  	mutex_unlock(&dmc620_pmu_irqs_lock);
>>>>  
>>>> -	WARN_ON(irq_set_affinity(irq->irq_num, cpumask_of(target)));
>>>> +	if (irq_set_affinity(irq->irq_num, cpumask_of(target)))
>>>> +		dev_err(dmc620_pmu->pmu.dev, "Failed to set interrupt affinity\n");
>>>>  	irq->cpu = target;
>>>>  
>>>>  	return 0;
>>>> diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c
>>>> index 00d4c45a8017..05e1b3e274d7 100644
>>>> --- a/drivers/perf/arm_smmuv3_pmu.c
>>>> +++ b/drivers/perf/arm_smmuv3_pmu.c
>>>> @@ -646,7 +646,8 @@ static int smmu_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
>>>>  
>>>>  	perf_pmu_migrate_context(&smmu_pmu->pmu, cpu, target);
>>>>  	smmu_pmu->on_cpu = target;
>>>> -	WARN_ON(irq_set_affinity(smmu_pmu->irq, cpumask_of(target)));
>>>> +	if (irq_set_affinity(smmu_pmu->irq, cpumask_of(target)))
>>>> +		dev_err(smmu_pmu->dev, "Failed to set interrupt affinity\n");
>>>>  
>>>>  	return 0;
>>>>  }
>>>> @@ -892,7 +893,8 @@ static int smmu_pmu_probe(struct platform_device *pdev)
>>>>  
>>>>  	/* Pick one CPU to be the preferred one to use */
>>>>  	smmu_pmu->on_cpu = raw_smp_processor_id();
>>>> -	WARN_ON(irq_set_affinity(smmu_pmu->irq, cpumask_of(smmu_pmu->on_cpu)));
>>>> +	if (irq_set_affinity(smmu_pmu->irq, cpumask_of(smmu_pmu->on_cpu)))
>>>> +		dev_err(dev, "Failed to set interrupt affinity\n");
>>>>  
>>>>  	err = cpuhp_state_add_instance_nocalls(cpuhp_state_num,
>>>>  					       &smmu_pmu->node);
>>>> diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c
>>>> index 8e058e08fe81..c44192e2d9db 100644
>>>> --- a/drivers/perf/fsl_imx8_ddr_perf.c
>>>> +++ b/drivers/perf/fsl_imx8_ddr_perf.c
>>>> @@ -671,7 +671,8 @@ static int ddr_perf_offline_cpu(unsigned int cpu, struct hlist_node *node)
>>>>  	perf_pmu_migrate_context(&pmu->pmu, cpu, target);
>>>>  	pmu->cpu = target;
>>>>  
>>>> -	WARN_ON(irq_set_affinity(pmu->irq, cpumask_of(pmu->cpu)));
>>>> +	if (irq_set_affinity(pmu->irq, cpumask_of(pmu->cpu)))
>>>> +		dev_err(pmu->dev, "Failed to set interrupt affinity\n");
>>>>  
>>>>  	return 0;
>>>>  }
>>>> diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c
>>>> index 21771708597d..90aed9e51396 100644
>>>> --- a/drivers/perf/hisilicon/hisi_pcie_pmu.c
>>>> +++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c
>>>> @@ -655,7 +655,8 @@ static int hisi_pcie_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
>>>>  
>>>>  	if (pcie_pmu->on_cpu == -1) {
>>>>  		pcie_pmu->on_cpu = cpu;
>>>> -		WARN_ON(irq_set_affinity(pcie_pmu->irq, cpumask_of(cpu)));
>>>> +		if (irq_set_affinity(pcie_pmu->irq, cpumask_of(cpu)))
>>>> +			pci_err(pcie_pmu->pdev, "Failed to set interrupt affinity\n");
>>>>  	}
>>>>  
>>>>  	return 0;
>>>> @@ -681,7 +682,8 @@ static int hisi_pcie_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
>>>>  	perf_pmu_migrate_context(&pcie_pmu->pmu, cpu, target);
>>>>  	/* Use this CPU for event counting */
>>>>  	pcie_pmu->on_cpu = target;
>>>> -	WARN_ON(irq_set_affinity(pcie_pmu->irq, cpumask_of(target)));
>>>> +	if (irq_set_affinity(pcie_pmu->irq, cpumask_of(target)))
>>>> +		pci_err(pcie_pmu->pdev, "Failed to set interrupt affinity\n");
>>>>  
>>>>  	return 0;
>>>>  }
>>>> diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c
>>>> index fbc8a93d5eac..74397b5ec889 100644
>>>> --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c
>>>> +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c
>>>> @@ -492,7 +492,8 @@ int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
>>>>  	hisi_pmu->on_cpu = cpu;
>>>>  
>>>>  	/* Overflow interrupt also should use the same CPU */
>>>> -	WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(cpu)));
>>>> +	if (irq_set_affinity(hisi_pmu->irq, cpumask_of(cpu)))
>>>> +		dev_err(hisi_pmu->dev, "Failed to set interrupt affinity\n");
>>>>  
>>>>  	return 0;
>>>>  }
>>>> @@ -525,7 +526,8 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
>>>>  	perf_pmu_migrate_context(&hisi_pmu->pmu, cpu, target);
>>>>  	/* Use this CPU for event counting */
>>>>  	hisi_pmu->on_cpu = target;
>>>> -	WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(target)));
>>>> +	if (irq_set_affinity(hisi_pmu->irq, cpumask_of(target)))
>>>> +		dev_err(hisi_pmu->dev, "Failed to set interrupt affinity\n");
>>>>  
>>>>  	return 0;
>>>>  }
>>>> diff --git a/drivers/perf/qcom_l2_pmu.c b/drivers/perf/qcom_l2_pmu.c
>>>> index 30234c261b05..c6fe01c7e637 100644
>>>> --- a/drivers/perf/qcom_l2_pmu.c
>>>> +++ b/drivers/perf/qcom_l2_pmu.c
>>>> @@ -793,7 +793,9 @@ static int l2cache_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
>>>>  	cpumask_set_cpu(cpu, &l2cache_pmu->cpumask);
>>>>  	cluster_pmu_reset();
>>>>  
>>>> -	WARN_ON(irq_set_affinity(cluster->irq, cpumask_of(cpu)));
>>>> +	if (irq_set_affinity(cluster->irq, cpumask_of(cpu)))
>>>> +		dev_err(&l2cache_pmu->pdev->dev,
>>>> +			"Failed to set interrupt affinity\n");
>>>>  	enable_irq(cluster->irq);
>>>>  
>>>>  	return 0;
>>>> @@ -831,7 +833,9 @@ static int l2cache_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
>>>>  	perf_pmu_migrate_context(&l2cache_pmu->pmu, cpu, target);
>>>>  	cluster->on_cpu = target;
>>>>  	cpumask_set_cpu(target, &l2cache_pmu->cpumask);
>>>> -	WARN_ON(irq_set_affinity(cluster->irq, cpumask_of(target)));
>>>> +	if (irq_set_affinity(cluster->irq, cpumask_of(target)))
>>>> +		dev_err(&l2cache_pmu->pdev->dev,
>>>> +			"Failed to set interrupt affinity\n");
>>>>  
>>>>  	return 0;
>>>>  }
>>>> diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c
>>>> index 0c32dffc7ede..f31e678fdb69 100644
>>>> --- a/drivers/perf/xgene_pmu.c
>>>> +++ b/drivers/perf/xgene_pmu.c
>>>> @@ -1790,7 +1790,8 @@ static int xgene_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
>>>>  		cpumask_set_cpu(cpu, &xgene_pmu->cpu);
>>>>  
>>>>  	/* Overflow interrupt also should use the same CPU */
>>>> -	WARN_ON(irq_set_affinity(xgene_pmu->irq, &xgene_pmu->cpu));
>>>> +	if (irq_set_affinity(xgene_pmu->irq, &xgene_pmu->cpu))
>>>> +		dev_err(xgene_pmu->dev, "Failed to set interrupt affinity\n");
>>>>  
>>>>  	return 0;
>>>>  }
>>>> @@ -1823,7 +1824,8 @@ static int xgene_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
>>>>  
>>>>  	cpumask_set_cpu(target, &xgene_pmu->cpu);
>>>>  	/* Overflow interrupt also should use the same CPU */
>>>> -	WARN_ON(irq_set_affinity(xgene_pmu->irq, &xgene_pmu->cpu));
>>>> +	if (irq_set_affinity(xgene_pmu->irq, &xgene_pmu->cpu))
>>>> +		dev_err(xgene_pmu->dev, "Failed to set interrupt affinity\n");
>>>>  
>>>>  	return 0;
>>>>  }
>>>> -- 
>>>> 2.24.0
>>>>
>>> .
>>>
diff mbox series

Patch

diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c
index 728d13d8e98a..83abd909ba49 100644
--- a/drivers/perf/arm-ccn.c
+++ b/drivers/perf/arm-ccn.c
@@ -1210,8 +1210,9 @@  static int arm_ccn_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
 		return 0;
 	perf_pmu_migrate_context(&dt->pmu, cpu, target);
 	dt->cpu = target;
-	if (ccn->irq)
-		WARN_ON(irq_set_affinity(ccn->irq, cpumask_of(dt->cpu)));
+	if (ccn->irq && irq_set_affinity(ccn->irq, cpumask_of(dt->cpu)))
+		dev_err(ccn->dev, "Failed to set interrupt affinity\n");
+
 	return 0;
 }
 
diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c
index 280a6ae3e27c..b59d3d9eb779 100644
--- a/drivers/perf/arm_dmc620_pmu.c
+++ b/drivers/perf/arm_dmc620_pmu.c
@@ -621,7 +621,8 @@  static int dmc620_pmu_cpu_teardown(unsigned int cpu,
 		perf_pmu_migrate_context(&dmc620_pmu->pmu, irq->cpu, target);
 	mutex_unlock(&dmc620_pmu_irqs_lock);
 
-	WARN_ON(irq_set_affinity(irq->irq_num, cpumask_of(target)));
+	if (irq_set_affinity(irq->irq_num, cpumask_of(target)))
+		dev_err(dmc620_pmu->pmu.dev, "Failed to set interrupt affinity\n");
 	irq->cpu = target;
 
 	return 0;
diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c
index 00d4c45a8017..05e1b3e274d7 100644
--- a/drivers/perf/arm_smmuv3_pmu.c
+++ b/drivers/perf/arm_smmuv3_pmu.c
@@ -646,7 +646,8 @@  static int smmu_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
 
 	perf_pmu_migrate_context(&smmu_pmu->pmu, cpu, target);
 	smmu_pmu->on_cpu = target;
-	WARN_ON(irq_set_affinity(smmu_pmu->irq, cpumask_of(target)));
+	if (irq_set_affinity(smmu_pmu->irq, cpumask_of(target)))
+		dev_err(smmu_pmu->dev, "Failed to set interrupt affinity\n");
 
 	return 0;
 }
@@ -892,7 +893,8 @@  static int smmu_pmu_probe(struct platform_device *pdev)
 
 	/* Pick one CPU to be the preferred one to use */
 	smmu_pmu->on_cpu = raw_smp_processor_id();
-	WARN_ON(irq_set_affinity(smmu_pmu->irq, cpumask_of(smmu_pmu->on_cpu)));
+	if (irq_set_affinity(smmu_pmu->irq, cpumask_of(smmu_pmu->on_cpu)))
+		dev_err(dev, "Failed to set interrupt affinity\n");
 
 	err = cpuhp_state_add_instance_nocalls(cpuhp_state_num,
 					       &smmu_pmu->node);
diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c
index 8e058e08fe81..c44192e2d9db 100644
--- a/drivers/perf/fsl_imx8_ddr_perf.c
+++ b/drivers/perf/fsl_imx8_ddr_perf.c
@@ -671,7 +671,8 @@  static int ddr_perf_offline_cpu(unsigned int cpu, struct hlist_node *node)
 	perf_pmu_migrate_context(&pmu->pmu, cpu, target);
 	pmu->cpu = target;
 
-	WARN_ON(irq_set_affinity(pmu->irq, cpumask_of(pmu->cpu)));
+	if (irq_set_affinity(pmu->irq, cpumask_of(pmu->cpu)))
+		dev_err(pmu->dev, "Failed to set interrupt affinity\n");
 
 	return 0;
 }
diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c
index 21771708597d..90aed9e51396 100644
--- a/drivers/perf/hisilicon/hisi_pcie_pmu.c
+++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c
@@ -655,7 +655,8 @@  static int hisi_pcie_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
 
 	if (pcie_pmu->on_cpu == -1) {
 		pcie_pmu->on_cpu = cpu;
-		WARN_ON(irq_set_affinity(pcie_pmu->irq, cpumask_of(cpu)));
+		if (irq_set_affinity(pcie_pmu->irq, cpumask_of(cpu)))
+			pci_err(pcie_pmu->pdev, "Failed to set interrupt affinity\n");
 	}
 
 	return 0;
@@ -681,7 +682,8 @@  static int hisi_pcie_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
 	perf_pmu_migrate_context(&pcie_pmu->pmu, cpu, target);
 	/* Use this CPU for event counting */
 	pcie_pmu->on_cpu = target;
-	WARN_ON(irq_set_affinity(pcie_pmu->irq, cpumask_of(target)));
+	if (irq_set_affinity(pcie_pmu->irq, cpumask_of(target)))
+		pci_err(pcie_pmu->pdev, "Failed to set interrupt affinity\n");
 
 	return 0;
 }
diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c
index fbc8a93d5eac..74397b5ec889 100644
--- a/drivers/perf/hisilicon/hisi_uncore_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c
@@ -492,7 +492,8 @@  int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
 	hisi_pmu->on_cpu = cpu;
 
 	/* Overflow interrupt also should use the same CPU */
-	WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(cpu)));
+	if (irq_set_affinity(hisi_pmu->irq, cpumask_of(cpu)))
+		dev_err(hisi_pmu->dev, "Failed to set interrupt affinity\n");
 
 	return 0;
 }
@@ -525,7 +526,8 @@  int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
 	perf_pmu_migrate_context(&hisi_pmu->pmu, cpu, target);
 	/* Use this CPU for event counting */
 	hisi_pmu->on_cpu = target;
-	WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(target)));
+	if (irq_set_affinity(hisi_pmu->irq, cpumask_of(target)))
+		dev_err(hisi_pmu->dev, "Failed to set interrupt affinity\n");
 
 	return 0;
 }
diff --git a/drivers/perf/qcom_l2_pmu.c b/drivers/perf/qcom_l2_pmu.c
index 30234c261b05..c6fe01c7e637 100644
--- a/drivers/perf/qcom_l2_pmu.c
+++ b/drivers/perf/qcom_l2_pmu.c
@@ -793,7 +793,9 @@  static int l2cache_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
 	cpumask_set_cpu(cpu, &l2cache_pmu->cpumask);
 	cluster_pmu_reset();
 
-	WARN_ON(irq_set_affinity(cluster->irq, cpumask_of(cpu)));
+	if (irq_set_affinity(cluster->irq, cpumask_of(cpu)))
+		dev_err(&l2cache_pmu->pdev->dev,
+			"Failed to set interrupt affinity\n");
 	enable_irq(cluster->irq);
 
 	return 0;
@@ -831,7 +833,9 @@  static int l2cache_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
 	perf_pmu_migrate_context(&l2cache_pmu->pmu, cpu, target);
 	cluster->on_cpu = target;
 	cpumask_set_cpu(target, &l2cache_pmu->cpumask);
-	WARN_ON(irq_set_affinity(cluster->irq, cpumask_of(target)));
+	if (irq_set_affinity(cluster->irq, cpumask_of(target)))
+		dev_err(&l2cache_pmu->pdev->dev,
+			"Failed to set interrupt affinity\n");
 
 	return 0;
 }
diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c
index 0c32dffc7ede..f31e678fdb69 100644
--- a/drivers/perf/xgene_pmu.c
+++ b/drivers/perf/xgene_pmu.c
@@ -1790,7 +1790,8 @@  static int xgene_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
 		cpumask_set_cpu(cpu, &xgene_pmu->cpu);
 
 	/* Overflow interrupt also should use the same CPU */
-	WARN_ON(irq_set_affinity(xgene_pmu->irq, &xgene_pmu->cpu));
+	if (irq_set_affinity(xgene_pmu->irq, &xgene_pmu->cpu))
+		dev_err(xgene_pmu->dev, "Failed to set interrupt affinity\n");
 
 	return 0;
 }
@@ -1823,7 +1824,8 @@  static int xgene_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
 
 	cpumask_set_cpu(target, &xgene_pmu->cpu);
 	/* Overflow interrupt also should use the same CPU */
-	WARN_ON(irq_set_affinity(xgene_pmu->irq, &xgene_pmu->cpu));
+	if (irq_set_affinity(xgene_pmu->irq, &xgene_pmu->cpu))
+		dev_err(xgene_pmu->dev, "Failed to set interrupt affinity\n");
 
 	return 0;
 }