diff mbox series

[v3,6/7] net: qrtr: Add MHI transport layer

Message ID 20200324061050.14845-7-manivannan.sadhasivam@linaro.org
State Superseded
Headers show
Series Improvements to MHI Bus | expand

Commit Message

Manivannan Sadhasivam March 24, 2020, 6:10 a.m. UTC
MHI is the transport layer used for communicating to the external modems.
Hence, this commit adds MHI transport layer support to QRTR for
transferring the QMI messages over IPC Router.

Cc: "David S. Miller" <davem@davemloft.net>
Cc: netdev@vger.kernel.org
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
---
 net/qrtr/Kconfig  |   7 ++
 net/qrtr/Makefile |   2 +
 net/qrtr/mhi.c    | 208 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 217 insertions(+)
 create mode 100644 net/qrtr/mhi.c

Comments

Bjorn Andersson March 24, 2020, 8:39 p.m. UTC | #1
On Mon 23 Mar 23:10 PDT 2020, Manivannan Sadhasivam wrote:

> MHI is the transport layer used for communicating to the external modems.
> Hence, this commit adds MHI transport layer support to QRTR for
> transferring the QMI messages over IPC Router.
> 
> Cc: "David S. Miller" <davem@davemloft.net>
> Cc: netdev@vger.kernel.org
> Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
> ---
>  net/qrtr/Kconfig  |   7 ++
>  net/qrtr/Makefile |   2 +
>  net/qrtr/mhi.c    | 208 ++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 217 insertions(+)
>  create mode 100644 net/qrtr/mhi.c
> 
> diff --git a/net/qrtr/Kconfig b/net/qrtr/Kconfig
> index 63f89cc6e82c..8eb876471564 100644
> --- a/net/qrtr/Kconfig
> +++ b/net/qrtr/Kconfig
> @@ -29,4 +29,11 @@ config QRTR_TUN
>  	  implement endpoints of QRTR, for purpose of tunneling data to other
>  	  hosts or testing purposes.
>  
> +config QRTR_MHI
> +	tristate "MHI IPC Router channels"
> +	depends on MHI_BUS
> +	help
> +	  Say Y here to support MHI based ipcrouter channels. MHI is the
> +	  transport used for communicating to external modems.
> +
>  endif # QRTR
> diff --git a/net/qrtr/Makefile b/net/qrtr/Makefile
> index 1c6d6c120fb7..3dc0a7c9d455 100644
> --- a/net/qrtr/Makefile
> +++ b/net/qrtr/Makefile
> @@ -5,3 +5,5 @@ obj-$(CONFIG_QRTR_SMD) += qrtr-smd.o
>  qrtr-smd-y	:= smd.o
>  obj-$(CONFIG_QRTR_TUN) += qrtr-tun.o
>  qrtr-tun-y	:= tun.o
> +obj-$(CONFIG_QRTR_MHI) += qrtr-mhi.o
> +qrtr-mhi-y	:= mhi.o
> diff --git a/net/qrtr/mhi.c b/net/qrtr/mhi.c
> new file mode 100644
> index 000000000000..90af208f34c1
> --- /dev/null
> +++ b/net/qrtr/mhi.c
> @@ -0,0 +1,208 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (c) 2018-2020, The Linux Foundation. All rights reserved.
> + */
> +
> +#include <linux/mhi.h>
> +#include <linux/mod_devicetable.h>
> +#include <linux/module.h>
> +#include <linux/skbuff.h>
> +#include <net/sock.h>
> +
> +#include "qrtr.h"
> +
> +struct qrtr_mhi_dev {
> +	struct qrtr_endpoint ep;
> +	struct mhi_device *mhi_dev;
> +	struct device *dev;
> +	spinlock_t ul_lock;		/* lock to protect ul_pkts */
> +	struct list_head ul_pkts;
> +	atomic_t in_reset;
> +};
> +
> +struct qrtr_mhi_pkt {
> +	struct list_head node;
> +	struct sk_buff *skb;
> +	struct kref refcount;
> +	struct completion done;
> +};
> +
> +static void qrtr_mhi_pkt_release(struct kref *ref)
> +{
> +	struct qrtr_mhi_pkt *pkt = container_of(ref, struct qrtr_mhi_pkt,
> +						refcount);
> +	struct sock *sk = pkt->skb->sk;
> +
> +	consume_skb(pkt->skb);
> +	if (sk)
> +		sock_put(sk);
> +
> +	kfree(pkt);
> +}
> +
> +/* From MHI to QRTR */
> +static void qcom_mhi_qrtr_dl_callback(struct mhi_device *mhi_dev,
> +				      struct mhi_result *mhi_res)
> +{
> +	struct qrtr_mhi_dev *qdev = dev_get_drvdata(&mhi_dev->dev);
> +	int rc;
> +
> +	if (!qdev || mhi_res->transaction_status)
> +		return;
> +
> +	rc = qrtr_endpoint_post(&qdev->ep, mhi_res->buf_addr,
> +				mhi_res->bytes_xferd);
> +	if (rc == -EINVAL)
> +		dev_err(qdev->dev, "invalid ipcrouter packet\n");

Perhaps this should be a debug print, perhaps rate limited. But either
way it's relevant for any transport, so I think you should skip it here
- and potentially move it into qrtr_endpoint_post() in some form.

> +}
> +
> +/* From QRTR to MHI */
> +static void qcom_mhi_qrtr_ul_callback(struct mhi_device *mhi_dev,
> +				      struct mhi_result *mhi_res)
> +{
> +	struct qrtr_mhi_dev *qdev = dev_get_drvdata(&mhi_dev->dev);
> +	struct qrtr_mhi_pkt *pkt;
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&qdev->ul_lock, flags);
> +	pkt = list_first_entry(&qdev->ul_pkts, struct qrtr_mhi_pkt, node);
> +	list_del(&pkt->node);
> +	complete_all(&pkt->done);

You should be able to release the lock after popping the item off the
list, then complete and refcount it.

> +
> +	kref_put(&pkt->refcount, qrtr_mhi_pkt_release);
> +	spin_unlock_irqrestore(&qdev->ul_lock, flags);
> +}
> +
> +static void qcom_mhi_qrtr_status_callback(struct mhi_device *mhi_dev,
> +					  enum mhi_callback mhi_cb)
> +{
> +	struct qrtr_mhi_dev *qdev = dev_get_drvdata(&mhi_dev->dev);
> +	struct qrtr_mhi_pkt *pkt;
> +	unsigned long flags;
> +
> +	if (mhi_cb != MHI_CB_FATAL_ERROR)
> +		return;
> +
> +	atomic_inc(&qdev->in_reset);

You have ul_lock close at hand in both places where you access in_reset,
so I think it would be better to just use that, instead of an atomic.

> +	spin_lock_irqsave(&qdev->ul_lock, flags);
> +	list_for_each_entry(pkt, &qdev->ul_pkts, node)
> +		complete_all(&pkt->done);
> +	spin_unlock_irqrestore(&qdev->ul_lock, flags);
> +}
> +
> +/* Send data over MHI */
> +static int qcom_mhi_qrtr_send(struct qrtr_endpoint *ep, struct sk_buff *skb)
> +{
> +	struct qrtr_mhi_dev *qdev = container_of(ep, struct qrtr_mhi_dev, ep);
> +	struct qrtr_mhi_pkt *pkt;
> +	int rc;
> +
> +	rc = skb_linearize(skb);
> +	if (rc) {
> +		kfree_skb(skb);
> +		return rc;
> +	}
> +
> +	pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
> +	if (!pkt) {
> +		kfree_skb(skb);
> +		return -ENOMEM;
> +	}
> +
> +	init_completion(&pkt->done);
> +	kref_init(&pkt->refcount);
> +	kref_get(&pkt->refcount);
> +	pkt->skb = skb;
> +
> +	spin_lock_bh(&qdev->ul_lock);
> +	list_add_tail(&pkt->node, &qdev->ul_pkts);
> +	rc = mhi_queue_skb(qdev->mhi_dev, DMA_TO_DEVICE, skb, skb->len,
> +			   MHI_EOT);

Do you want to continue doing this when qdev->in_reset? Wouldn't it be
better to bail early if the remote end is dying?

> +	if (rc) {
> +		list_del(&pkt->node);
> +		/* Reference count needs to be dropped 2 times */
> +		kref_put(&pkt->refcount, qrtr_mhi_pkt_release);
> +		kref_put(&pkt->refcount, qrtr_mhi_pkt_release);
> +		kfree_skb(skb);
> +		spin_unlock_bh(&qdev->ul_lock);
> +		return rc;
> +	}
> +
> +	spin_unlock_bh(&qdev->ul_lock);
> +	if (skb->sk)
> +		sock_hold(skb->sk);
> +
> +	rc = wait_for_completion_interruptible_timeout(&pkt->done, HZ * 5);
> +	if (atomic_read(&qdev->in_reset))
> +		rc = -ECONNRESET;
> +	else if (rc == 0)
> +		rc = -ETIMEDOUT;

Is this recoverable? The message will remain on the list and may be
delivered at a later point(?), but qrtr and the app will learn that the
message was lost - which is presumably considered fatal.

Is it guaranteed that qcom_mhi_qrtr_ul_callback() will happen later and
find the head of the list?


The reason for my question is that without this you have one of two
scenarios;
1) the message is put on the list, decremented in
qcom_mhi_qrtr_ul_callback() then we get back here and decrement it
again.
2) the message is put on the list, then qcom_mhi_qrtr_status_callback()
happens and all messages are released - presumably then
qcom_mhi_qrtr_ul_callback() won't happen.


So if the third case (where we return here and then later
qcom_mhi_qrtr_ul_callback() must find this particular packet at the
front of the queue) can't happen, then you can just skip the entire
refcounting.

Further more, you could carry qrtr_mhi_pkt on the stack.


...or to flip this around, is there a reason to wait here at all? What
would happen if you just return immediately after calling
mhi_queue_skb()? Wouldn't that provide you better throughput?

> +	else if (rc > 0)
> +		rc = 0;
> +
> +	kref_put(&pkt->refcount, qrtr_mhi_pkt_release);
> +
> +	return rc;
> +}
> +
> +static int qcom_mhi_qrtr_probe(struct mhi_device *mhi_dev,
> +			       const struct mhi_device_id *id)
> +{
> +	struct qrtr_mhi_dev *qdev;
> +	u32 net_id;
> +	int rc;
> +
> +	qdev = devm_kzalloc(&mhi_dev->dev, sizeof(*qdev), GFP_KERNEL);
> +	if (!qdev)
> +		return -ENOMEM;
> +
> +	qdev->mhi_dev = mhi_dev;
> +	qdev->dev = &mhi_dev->dev;
> +	qdev->ep.xmit = qcom_mhi_qrtr_send;
> +	atomic_set(&qdev->in_reset, 0);
> +
> +	net_id = QRTR_EP_NID_AUTO;

Just pass QRTR_EP_NID_AUTO directly in the function call below.

Regards,
Bjorn

> +
> +	INIT_LIST_HEAD(&qdev->ul_pkts);
> +	spin_lock_init(&qdev->ul_lock);
> +
> +	dev_set_drvdata(&mhi_dev->dev, qdev);
> +	rc = qrtr_endpoint_register(&qdev->ep, net_id);
> +	if (rc)
> +		return rc;
> +
> +	dev_dbg(qdev->dev, "Qualcomm MHI QRTR driver probed\n");
> +
> +	return 0;
> +}
> +
> +static void qcom_mhi_qrtr_remove(struct mhi_device *mhi_dev)
> +{
> +	struct qrtr_mhi_dev *qdev = dev_get_drvdata(&mhi_dev->dev);
> +
> +	qrtr_endpoint_unregister(&qdev->ep);
> +	dev_set_drvdata(&mhi_dev->dev, NULL);
> +}
> +
> +static const struct mhi_device_id qcom_mhi_qrtr_id_table[] = {
> +	{ .chan = "IPCR" },
> +	{}
> +};
> +MODULE_DEVICE_TABLE(mhi, qcom_mhi_qrtr_id_table);
> +
> +static struct mhi_driver qcom_mhi_qrtr_driver = {
> +	.probe = qcom_mhi_qrtr_probe,
> +	.remove = qcom_mhi_qrtr_remove,
> +	.dl_xfer_cb = qcom_mhi_qrtr_dl_callback,
> +	.ul_xfer_cb = qcom_mhi_qrtr_ul_callback,
> +	.status_cb = qcom_mhi_qrtr_status_callback,
> +	.id_table = qcom_mhi_qrtr_id_table,
> +	.driver = {
> +		.name = "qcom_mhi_qrtr",
> +	},
> +};
> +
> +module_mhi_driver(qcom_mhi_qrtr_driver);
> +
> +MODULE_DESCRIPTION("Qualcomm IPC-Router MHI interface driver");
> +MODULE_LICENSE("GPL v2");
> -- 
> 2.17.1
>
Chris Lew March 26, 2020, 10:54 p.m. UTC | #2
On 3/25/2020 3:37 AM, Manivannan Sadhasivam wrote:
> Hi Bjorn,
> 
> + Chris Lew
> 
> On Tue, Mar 24, 2020 at 01:39:52PM -0700, Bjorn Andersson wrote:
>> On Mon 23 Mar 23:10 PDT 2020, Manivannan Sadhasivam wrote:
>>
>>> MHI is the transport layer used for communicating to the external modems.
>>> Hence, this commit adds MHI transport layer support to QRTR for
>>> transferring the QMI messages over IPC Router.
>>>
>>> Cc: "David S. Miller" <davem@davemloft.net>
>>> Cc: netdev@vger.kernel.org
>>> Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
>>> ---
>>>   net/qrtr/Kconfig  |   7 ++
>>>   net/qrtr/Makefile |   2 +
>>>   net/qrtr/mhi.c    | 208 ++++++++++++++++++++++++++++++++++++++++++++++
>>>   3 files changed, 217 insertions(+)
>>>   create mode 100644 net/qrtr/mhi.c
>>>
>>> diff --git a/net/qrtr/Kconfig b/net/qrtr/Kconfig
>>> index 63f89cc6e82c..8eb876471564 100644
>>> --- a/net/qrtr/Kconfig
>>> +++ b/net/qrtr/Kconfig
>>> @@ -29,4 +29,11 @@ config QRTR_TUN
>>>   	  implement endpoints of QRTR, for purpose of tunneling data to other
>>>   	  hosts or testing purposes.
>>>   
>>> +config QRTR_MHI
>>> +	tristate "MHI IPC Router channels"
>>> +	depends on MHI_BUS
>>> +	help
>>> +	  Say Y here to support MHI based ipcrouter channels. MHI is the
>>> +	  transport used for communicating to external modems.
>>> +
>>>   endif # QRTR
>>> diff --git a/net/qrtr/Makefile b/net/qrtr/Makefile
>>> index 1c6d6c120fb7..3dc0a7c9d455 100644
>>> --- a/net/qrtr/Makefile
>>> +++ b/net/qrtr/Makefile
>>> @@ -5,3 +5,5 @@ obj-$(CONFIG_QRTR_SMD) += qrtr-smd.o
>>>   qrtr-smd-y	:= smd.o
>>>   obj-$(CONFIG_QRTR_TUN) += qrtr-tun.o
>>>   qrtr-tun-y	:= tun.o
>>> +obj-$(CONFIG_QRTR_MHI) += qrtr-mhi.o
>>> +qrtr-mhi-y	:= mhi.o
>>> diff --git a/net/qrtr/mhi.c b/net/qrtr/mhi.c
>>> new file mode 100644
>>> index 000000000000..90af208f34c1
>>> --- /dev/null
>>> +++ b/net/qrtr/mhi.c
>>> @@ -0,0 +1,208 @@
>>> +// SPDX-License-Identifier: GPL-2.0
>>> +/*
>>> + * Copyright (c) 2018-2020, The Linux Foundation. All rights reserved.
>>> + */
>>> +
>>> +#include <linux/mhi.h>
>>> +#include <linux/mod_devicetable.h>
>>> +#include <linux/module.h>
>>> +#include <linux/skbuff.h>
>>> +#include <net/sock.h>
>>> +
>>> +#include "qrtr.h"
>>> +
>>> +struct qrtr_mhi_dev {
>>> +	struct qrtr_endpoint ep;
>>> +	struct mhi_device *mhi_dev;
>>> +	struct device *dev;
>>> +	spinlock_t ul_lock;		/* lock to protect ul_pkts */
>>> +	struct list_head ul_pkts;
>>> +	atomic_t in_reset;
>>> +};
>>> +
>>> +struct qrtr_mhi_pkt {
>>> +	struct list_head node;
>>> +	struct sk_buff *skb;
>>> +	struct kref refcount;
>>> +	struct completion done;
>>> +};
>>> +
>>> +static void qrtr_mhi_pkt_release(struct kref *ref)
>>> +{
>>> +	struct qrtr_mhi_pkt *pkt = container_of(ref, struct qrtr_mhi_pkt,
>>> +						refcount);
>>> +	struct sock *sk = pkt->skb->sk;
>>> +
>>> +	consume_skb(pkt->skb);
>>> +	if (sk)
>>> +		sock_put(sk);
>>> +
>>> +	kfree(pkt);
>>> +}
>>> +
>>> +/* From MHI to QRTR */
>>> +static void qcom_mhi_qrtr_dl_callback(struct mhi_device *mhi_dev,
>>> +				      struct mhi_result *mhi_res)
>>> +{
>>> +	struct qrtr_mhi_dev *qdev = dev_get_drvdata(&mhi_dev->dev);
>>> +	int rc;
>>> +
>>> +	if (!qdev || mhi_res->transaction_status)
>>> +		return;
>>> +
>>> +	rc = qrtr_endpoint_post(&qdev->ep, mhi_res->buf_addr,
>>> +				mhi_res->bytes_xferd);
>>> +	if (rc == -EINVAL)
>>> +		dev_err(qdev->dev, "invalid ipcrouter packet\n");
>>
>> Perhaps this should be a debug print, perhaps rate limited. But either
>> way it's relevant for any transport, so I think you should skip it here
>> - and potentially move it into qrtr_endpoint_post() in some form.
>>
> 
> I agree with moving this to qrtr_endpoint_post() but I don't think it is a
> good idea to make it as a debug print. It is an error log and should stay
> as it is. Only in this MHI transport driver, the return value is not getting
> used but in others it is.
> 

This print has been useful for catching transport errors. Only issue I 
see with moving this to qrtr_endpoint_post() is that there are multiple 
points of failure in qrtr_endpoint_post() and logging it here makes it 
easier.

>>> +}
>>> +
>>> +/* From QRTR to MHI */
>>> +static void qcom_mhi_qrtr_ul_callback(struct mhi_device *mhi_dev,
>>> +				      struct mhi_result *mhi_res)
>>> +{
>>> +	struct qrtr_mhi_dev *qdev = dev_get_drvdata(&mhi_dev->dev);
>>> +	struct qrtr_mhi_pkt *pkt;
>>> +	unsigned long flags;
>>> +
>>> +	spin_lock_irqsave(&qdev->ul_lock, flags);
>>> +	pkt = list_first_entry(&qdev->ul_pkts, struct qrtr_mhi_pkt, node);
>>> +	list_del(&pkt->node);
>>> +	complete_all(&pkt->done);
>>
>> You should be able to release the lock after popping the item off the
>> list, then complete and refcount it.
>>
> 
> Okay.
> 
>>> +
>>> +	kref_put(&pkt->refcount, qrtr_mhi_pkt_release);
>>> +	spin_unlock_irqrestore(&qdev->ul_lock, flags);
>>> +}
>>> +
>>> +static void qcom_mhi_qrtr_status_callback(struct mhi_device *mhi_dev,
>>> +					  enum mhi_callback mhi_cb)
>>> +{
>>> +	struct qrtr_mhi_dev *qdev = dev_get_drvdata(&mhi_dev->dev);
>>> +	struct qrtr_mhi_pkt *pkt;
>>> +	unsigned long flags;
>>> +
>>> +	if (mhi_cb != MHI_CB_FATAL_ERROR)
>>> +		return;
>>> +
>>> +	atomic_inc(&qdev->in_reset);
>>
>> You have ul_lock close at hand in both places where you access in_reset,
>> so I think it would be better to just use that, instead of an atomic.
>>
> 
> Okay.
> 

Does this version of MHI give the MHI_CB_FATAL_ERROR as a status 
callback? We added this as an early notifier workaround. The in_reset 
code probably isn't required with the current feature set.

>>> +	spin_lock_irqsave(&qdev->ul_lock, flags);
>>> +	list_for_each_entry(pkt, &qdev->ul_pkts, node)
>>> +		complete_all(&pkt->done);
> 
> Chris, shouldn't we require list_del(&pkt->node) here?
>

No this isn't a full cleanup, with the "early notifier" we just 
unblocked any threads waiting for the ul_callback. Those threads will 
wake, check in_reset, return an error back to the caller. Any list 
cleanup will be done in the ul_callbacks that the mhi bus will do for 
each queued packet right before device remove.

Again to simplify the code, we can probable remove the in_reset handling 
since it's not required with the current feature set.

>>> +	spin_unlock_irqrestore(&qdev->ul_lock, flags);
>>> +}
>>> +
>>> +/* Send data over MHI */
>>> +static int qcom_mhi_qrtr_send(struct qrtr_endpoint *ep, struct sk_buff *skb)
>>> +{
>>> +	struct qrtr_mhi_dev *qdev = container_of(ep, struct qrtr_mhi_dev, ep);
>>> +	struct qrtr_mhi_pkt *pkt;
>>> +	int rc;
>>> +
>>> +	rc = skb_linearize(skb);
>>> +	if (rc) {
>>> +		kfree_skb(skb);
>>> +		return rc;
>>> +	}
>>> +
>>> +	pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
>>> +	if (!pkt) {
>>> +		kfree_skb(skb);
>>> +		return -ENOMEM;
>>> +	}
>>> +
>>> +	init_completion(&pkt->done);
>>> +	kref_init(&pkt->refcount);
>>> +	kref_get(&pkt->refcount);
>>> +	pkt->skb = skb;
>>> +
>>> +	spin_lock_bh(&qdev->ul_lock);
>>> +	list_add_tail(&pkt->node, &qdev->ul_pkts);
>>> +	rc = mhi_queue_skb(qdev->mhi_dev, DMA_TO_DEVICE, skb, skb->len,
>>> +			   MHI_EOT);
>>
>> Do you want to continue doing this when qdev->in_reset? Wouldn't it be
>> better to bail early if the remote end is dying?
>>
> 
> Now I'm thinking why we are not decrementing in_reset anywhere! Incase of
> SYS_ERR, the status_cb will get processed and in_reset will be set but
> it will stay so even when after MHI gets reset.
> 
> Chris, can you please clarify?
> 

Early notify of reset, if we do get SYS_ERR, we expect remove to be 
called *soon*.

>>> +	if (rc) {
>>> +		list_del(&pkt->node);
>>> +		/* Reference count needs to be dropped 2 times */
>>> +		kref_put(&pkt->refcount, qrtr_mhi_pkt_release);
>>> +		kref_put(&pkt->refcount, qrtr_mhi_pkt_release);
>>> +		kfree_skb(skb);
>>> +		spin_unlock_bh(&qdev->ul_lock);
>>> +		return rc;
>>> +	}
>>> +
>>> +	spin_unlock_bh(&qdev->ul_lock);
>>> +	if (skb->sk)
>>> +		sock_hold(skb->sk);
>>> +
>>> +	rc = wait_for_completion_interruptible_timeout(&pkt->done, HZ * 5);
>>> +	if (atomic_read(&qdev->in_reset))
>>> +		rc = -ECONNRESET;
>>> +	else if (rc == 0)
>>> +		rc = -ETIMEDOUT;
>>
>> Is this recoverable? The message will remain on the list and may be
>> delivered at a later point(?), but qrtr and the app will learn that the
>> message was lost - which is presumably considered fatal.
>>
>> Is it guaranteed that qcom_mhi_qrtr_ul_callback() will happen later and
>> find the head of the list?
>>
> 
> There are 2 scenarios:
> 
> 1. If the completion interrupt happens after timeout, ul_callback()
> will be called. But it will only try to fetch the current head of ul_pkts.
> In most cases, we can hope that the completion interrupt will happen before
> next queue_skb().
> 
> 2. If we don't get completion interrupt, timeout will happen and at the final
> stage (during mhi_driver_remove()), MHI stack will go over the pending TREs
> for all channels in queue and call ul_callback() with -ENOTCONN. But in the
> callback, we don't have any idea of the pkt which was not successfully
> transferred to the device and currently just fetching first entry.
> 
> Now I'm seeing some issue here which I missed earlier. If the completion
> interrupt never happens then the corresponding pkt will never get freed and
> therefore we have a leak. Eventhough the ul_callback() will get called during
> mhi_driver_remove() for pending TREs, we don't exactly fetch the right pkt.
> 
> Chris, our assumption of the ul_callback() gets called irrespective of
> transfer status is wrong. I think this code needs a bit of rework.
> 
>>
>> The reason for my question is that without this you have one of two
>> scenarios;
>> 1) the message is put on the list, decremented in
>> qcom_mhi_qrtr_ul_callback() then we get back here and decrement it
>> again.
>> 2) the message is put on the list, then qcom_mhi_qrtr_status_callback()
>> happens and all messages are released - presumably then
>> qcom_mhi_qrtr_ul_callback() won't happen.
>>
>>
>> So if the third case (where we return here and then later
>> qcom_mhi_qrtr_ul_callback() must find this particular packet at the
>> front of the queue) can't happen, then you can just skip the entire
>> refcounting.
>>
>> Further more, you could carry qrtr_mhi_pkt on the stack.
>>
>>
>> ...or to flip this around, is there a reason to wait here at all? What
>> would happen if you just return immediately after calling
>> mhi_queue_skb()? Wouldn't that provide you better throughput?
>>
> 
> Chris would be best person to answer this question.
> 

Yea - after working with MHI for a bit now, I think we can definitely 
return after queueing the skb.

The IPC-Router Protocol, wasn't really designed with the ability to 
recover from dropped packets since SMD/GLINK are "Lossless". I figured 
it would be better for the client to definitely know that the packet 
reached the other side which is why it blocks until ul callback.

I thought having the client get an error on timeout and resend the 
packet would be better than silently dropping it. In practice, we've 
really only seen the timeout or ul_callback errors on unrecoverable 
errors so I think the timeout handling can definitely be redone.

Thanks,
Chris

>>> +	else if (rc > 0)
>>> +		rc = 0;
>>> +
>>> +	kref_put(&pkt->refcount, qrtr_mhi_pkt_release);
>>> +
>>> +	return rc;
>>> +}
>>> +
>>> +static int qcom_mhi_qrtr_probe(struct mhi_device *mhi_dev,
>>> +			       const struct mhi_device_id *id)
>>> +{
>>> +	struct qrtr_mhi_dev *qdev;
>>> +	u32 net_id;
>>> +	int rc;
>>> +
>>> +	qdev = devm_kzalloc(&mhi_dev->dev, sizeof(*qdev), GFP_KERNEL);
>>> +	if (!qdev)
>>> +		return -ENOMEM;
>>> +
>>> +	qdev->mhi_dev = mhi_dev;
>>> +	qdev->dev = &mhi_dev->dev;
>>> +	qdev->ep.xmit = qcom_mhi_qrtr_send;
>>> +	atomic_set(&qdev->in_reset, 0);
>>> +
>>> +	net_id = QRTR_EP_NID_AUTO;
>>
>> Just pass QRTR_EP_NID_AUTO directly in the function call below.
>>
> 
> Okay.
> 
> Thanks,
> Mani
> 
>> Regards,
>> Bjorn
>>
>>> +
>>> +	INIT_LIST_HEAD(&qdev->ul_pkts);
>>> +	spin_lock_init(&qdev->ul_lock);
>>> +
>>> +	dev_set_drvdata(&mhi_dev->dev, qdev);
>>> +	rc = qrtr_endpoint_register(&qdev->ep, net_id);
>>> +	if (rc)
>>> +		return rc;
>>> +
>>> +	dev_dbg(qdev->dev, "Qualcomm MHI QRTR driver probed\n");
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +static void qcom_mhi_qrtr_remove(struct mhi_device *mhi_dev)
>>> +{
>>> +	struct qrtr_mhi_dev *qdev = dev_get_drvdata(&mhi_dev->dev);
>>> +
>>> +	qrtr_endpoint_unregister(&qdev->ep);
>>> +	dev_set_drvdata(&mhi_dev->dev, NULL);
>>> +}
>>> +
>>> +static const struct mhi_device_id qcom_mhi_qrtr_id_table[] = {
>>> +	{ .chan = "IPCR" },
>>> +	{}
>>> +};
>>> +MODULE_DEVICE_TABLE(mhi, qcom_mhi_qrtr_id_table);
>>> +
>>> +static struct mhi_driver qcom_mhi_qrtr_driver = {
>>> +	.probe = qcom_mhi_qrtr_probe,
>>> +	.remove = qcom_mhi_qrtr_remove,
>>> +	.dl_xfer_cb = qcom_mhi_qrtr_dl_callback,
>>> +	.ul_xfer_cb = qcom_mhi_qrtr_ul_callback,
>>> +	.status_cb = qcom_mhi_qrtr_status_callback,
>>> +	.id_table = qcom_mhi_qrtr_id_table,
>>> +	.driver = {
>>> +		.name = "qcom_mhi_qrtr",
>>> +	},
>>> +};
>>> +
>>> +module_mhi_driver(qcom_mhi_qrtr_driver);
>>> +
>>> +MODULE_DESCRIPTION("Qualcomm IPC-Router MHI interface driver");
>>> +MODULE_LICENSE("GPL v2");
>>> -- 
>>> 2.17.1
>>>
Manivannan Sadhasivam March 30, 2020, 9:49 a.m. UTC | #3
Hi Chris,

On Thu, Mar 26, 2020 at 03:54:42PM -0700, Chris Lew wrote:
> 
> 
> On 3/25/2020 3:37 AM, Manivannan Sadhasivam wrote:
> > Hi Bjorn,
> > 
> > + Chris Lew
> > 
> > On Tue, Mar 24, 2020 at 01:39:52PM -0700, Bjorn Andersson wrote:
> > > On Mon 23 Mar 23:10 PDT 2020, Manivannan Sadhasivam wrote:
> > > 
> > > > MHI is the transport layer used for communicating to the external modems.
> > > > Hence, this commit adds MHI transport layer support to QRTR for
> > > > transferring the QMI messages over IPC Router.
> > > > 
> > > > Cc: "David S. Miller" <davem@davemloft.net>
> > > > Cc: netdev@vger.kernel.org
> > > > Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
> > > > ---
> > > >   net/qrtr/Kconfig  |   7 ++
> > > >   net/qrtr/Makefile |   2 +
> > > >   net/qrtr/mhi.c    | 208 ++++++++++++++++++++++++++++++++++++++++++++++
> > > >   3 files changed, 217 insertions(+)
> > > >   create mode 100644 net/qrtr/mhi.c
> > > > 
> > > > diff --git a/net/qrtr/Kconfig b/net/qrtr/Kconfig
> > > > index 63f89cc6e82c..8eb876471564 100644
> > > > --- a/net/qrtr/Kconfig
> > > > +++ b/net/qrtr/Kconfig
> > > > @@ -29,4 +29,11 @@ config QRTR_TUN
> > > >   	  implement endpoints of QRTR, for purpose of tunneling data to other
> > > >   	  hosts or testing purposes.
> > > > +config QRTR_MHI
> > > > +	tristate "MHI IPC Router channels"
> > > > +	depends on MHI_BUS
> > > > +	help
> > > > +	  Say Y here to support MHI based ipcrouter channels. MHI is the
> > > > +	  transport used for communicating to external modems.
> > > > +
> > > >   endif # QRTR
> > > > diff --git a/net/qrtr/Makefile b/net/qrtr/Makefile
> > > > index 1c6d6c120fb7..3dc0a7c9d455 100644
> > > > --- a/net/qrtr/Makefile
> > > > +++ b/net/qrtr/Makefile
> > > > @@ -5,3 +5,5 @@ obj-$(CONFIG_QRTR_SMD) += qrtr-smd.o
> > > >   qrtr-smd-y	:= smd.o
> > > >   obj-$(CONFIG_QRTR_TUN) += qrtr-tun.o
> > > >   qrtr-tun-y	:= tun.o
> > > > +obj-$(CONFIG_QRTR_MHI) += qrtr-mhi.o
> > > > +qrtr-mhi-y	:= mhi.o
> > > > diff --git a/net/qrtr/mhi.c b/net/qrtr/mhi.c
> > > > new file mode 100644
> > > > index 000000000000..90af208f34c1
> > > > --- /dev/null
> > > > +++ b/net/qrtr/mhi.c
> > > > @@ -0,0 +1,208 @@
> > > > +// SPDX-License-Identifier: GPL-2.0
> > > > +/*
> > > > + * Copyright (c) 2018-2020, The Linux Foundation. All rights reserved.
> > > > + */
> > > > +
> > > > +#include <linux/mhi.h>
> > > > +#include <linux/mod_devicetable.h>
> > > > +#include <linux/module.h>
> > > > +#include <linux/skbuff.h>
> > > > +#include <net/sock.h>
> > > > +
> > > > +#include "qrtr.h"
> > > > +
> > > > +struct qrtr_mhi_dev {
> > > > +	struct qrtr_endpoint ep;
> > > > +	struct mhi_device *mhi_dev;
> > > > +	struct device *dev;
> > > > +	spinlock_t ul_lock;		/* lock to protect ul_pkts */
> > > > +	struct list_head ul_pkts;
> > > > +	atomic_t in_reset;
> > > > +};
> > > > +
> > > > +struct qrtr_mhi_pkt {
> > > > +	struct list_head node;
> > > > +	struct sk_buff *skb;
> > > > +	struct kref refcount;
> > > > +	struct completion done;
> > > > +};
> > > > +
> > > > +static void qrtr_mhi_pkt_release(struct kref *ref)
> > > > +{
> > > > +	struct qrtr_mhi_pkt *pkt = container_of(ref, struct qrtr_mhi_pkt,
> > > > +						refcount);
> > > > +	struct sock *sk = pkt->skb->sk;
> > > > +
> > > > +	consume_skb(pkt->skb);
> > > > +	if (sk)
> > > > +		sock_put(sk);
> > > > +
> > > > +	kfree(pkt);
> > > > +}
> > > > +
> > > > +/* From MHI to QRTR */
> > > > +static void qcom_mhi_qrtr_dl_callback(struct mhi_device *mhi_dev,
> > > > +				      struct mhi_result *mhi_res)
> > > > +{
> > > > +	struct qrtr_mhi_dev *qdev = dev_get_drvdata(&mhi_dev->dev);
> > > > +	int rc;
> > > > +
> > > > +	if (!qdev || mhi_res->transaction_status)
> > > > +		return;
> > > > +
> > > > +	rc = qrtr_endpoint_post(&qdev->ep, mhi_res->buf_addr,
> > > > +				mhi_res->bytes_xferd);
> > > > +	if (rc == -EINVAL)
> > > > +		dev_err(qdev->dev, "invalid ipcrouter packet\n");
> > > 
> > > Perhaps this should be a debug print, perhaps rate limited. But either
> > > way it's relevant for any transport, so I think you should skip it here
> > > - and potentially move it into qrtr_endpoint_post() in some form.
> > > 
> > 
> > I agree with moving this to qrtr_endpoint_post() but I don't think it is a
> > good idea to make it as a debug print. It is an error log and should stay
> > as it is. Only in this MHI transport driver, the return value is not getting
> > used but in others it is.
> > 
> 
> This print has been useful for catching transport errors. Only issue I see
> with moving this to qrtr_endpoint_post() is that there are multiple points
> of failure in qrtr_endpoint_post() and logging it here makes it easier.
> 
> > > > +}
> > > > +
> > > > +/* From QRTR to MHI */
> > > > +static void qcom_mhi_qrtr_ul_callback(struct mhi_device *mhi_dev,
> > > > +				      struct mhi_result *mhi_res)
> > > > +{
> > > > +	struct qrtr_mhi_dev *qdev = dev_get_drvdata(&mhi_dev->dev);
> > > > +	struct qrtr_mhi_pkt *pkt;
> > > > +	unsigned long flags;
> > > > +
> > > > +	spin_lock_irqsave(&qdev->ul_lock, flags);
> > > > +	pkt = list_first_entry(&qdev->ul_pkts, struct qrtr_mhi_pkt, node);
> > > > +	list_del(&pkt->node);
> > > > +	complete_all(&pkt->done);
> > > 
> > > You should be able to release the lock after popping the item off the
> > > list, then complete and refcount it.
> > > 
> > 
> > Okay.
> > 
> > > > +
> > > > +	kref_put(&pkt->refcount, qrtr_mhi_pkt_release);
> > > > +	spin_unlock_irqrestore(&qdev->ul_lock, flags);
> > > > +}
> > > > +
> > > > +static void qcom_mhi_qrtr_status_callback(struct mhi_device *mhi_dev,
> > > > +					  enum mhi_callback mhi_cb)
> > > > +{
> > > > +	struct qrtr_mhi_dev *qdev = dev_get_drvdata(&mhi_dev->dev);
> > > > +	struct qrtr_mhi_pkt *pkt;
> > > > +	unsigned long flags;
> > > > +
> > > > +	if (mhi_cb != MHI_CB_FATAL_ERROR)
> > > > +		return;
> > > > +
> > > > +	atomic_inc(&qdev->in_reset);
> > > 
> > > You have ul_lock close at hand in both places where you access in_reset,
> > > so I think it would be better to just use that, instead of an atomic.
> > > 
> > 
> > Okay.
> > 
> 
> Does this version of MHI give the MHI_CB_FATAL_ERROR as a status callback?
> We added this as an early notifier workaround. The in_reset code probably
> isn't required with the current feature set.
> 

Err, NO. I got confused with mhi_cntrl->status_cb which gives MHI_CB_FATAL_ERROR
to the controller drivers. But there is no MHI_CB_FATAL_ERROR provided for the
client drivers like this one.

Even in downstream (msm-4.14), I can't find where the early notifier is getting
called.

> > > > +	spin_lock_irqsave(&qdev->ul_lock, flags);
> > > > +	list_for_each_entry(pkt, &qdev->ul_pkts, node)
> > > > +		complete_all(&pkt->done);
> > 
> > Chris, shouldn't we require list_del(&pkt->node) here?
> > 
> 
> No this isn't a full cleanup, with the "early notifier" we just unblocked
> any threads waiting for the ul_callback. Those threads will wake, check
> in_reset, return an error back to the caller. Any list cleanup will be done
> in the ul_callbacks that the mhi bus will do for each queued packet right
> before device remove.
> 
> Again to simplify the code, we can probable remove the in_reset handling
> since it's not required with the current feature set.
> 

So since we are not getting status_cb for fatal errors, I think we should just
remove status_cb, in_reset and timeout code.

> > > > +	spin_unlock_irqrestore(&qdev->ul_lock, flags);
> > > > +}
> > > > +
> > > > +/* Send data over MHI */
> > > > +static int qcom_mhi_qrtr_send(struct qrtr_endpoint *ep, struct sk_buff *skb)
> > > > +{
> > > > +	struct qrtr_mhi_dev *qdev = container_of(ep, struct qrtr_mhi_dev, ep);
> > > > +	struct qrtr_mhi_pkt *pkt;
> > > > +	int rc;
> > > > +
> > > > +	rc = skb_linearize(skb);
> > > > +	if (rc) {
> > > > +		kfree_skb(skb);
> > > > +		return rc;
> > > > +	}
> > > > +
> > > > +	pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
> > > > +	if (!pkt) {
> > > > +		kfree_skb(skb);
> > > > +		return -ENOMEM;
> > > > +	}
> > > > +
> > > > +	init_completion(&pkt->done);
> > > > +	kref_init(&pkt->refcount);
> > > > +	kref_get(&pkt->refcount);
> > > > +	pkt->skb = skb;
> > > > +
> > > > +	spin_lock_bh(&qdev->ul_lock);
> > > > +	list_add_tail(&pkt->node, &qdev->ul_pkts);
> > > > +	rc = mhi_queue_skb(qdev->mhi_dev, DMA_TO_DEVICE, skb, skb->len,
> > > > +			   MHI_EOT);
> > > 
> > > Do you want to continue doing this when qdev->in_reset? Wouldn't it be
> > > better to bail early if the remote end is dying?
> > > 
> > 
> > Now I'm thinking why we are not decrementing in_reset anywhere! Incase of
> > SYS_ERR, the status_cb will get processed and in_reset will be set but
> > it will stay so even when after MHI gets reset.
> > 
> > Chris, can you please clarify?
> > 
> 
> Early notify of reset, if we do get SYS_ERR, we expect remove to be called
> *soon*.
> 
> > > > +	if (rc) {
> > > > +		list_del(&pkt->node);
> > > > +		/* Reference count needs to be dropped 2 times */
> > > > +		kref_put(&pkt->refcount, qrtr_mhi_pkt_release);
> > > > +		kref_put(&pkt->refcount, qrtr_mhi_pkt_release);
> > > > +		kfree_skb(skb);
> > > > +		spin_unlock_bh(&qdev->ul_lock);
> > > > +		return rc;
> > > > +	}
> > > > +
> > > > +	spin_unlock_bh(&qdev->ul_lock);
> > > > +	if (skb->sk)
> > > > +		sock_hold(skb->sk);
> > > > +
> > > > +	rc = wait_for_completion_interruptible_timeout(&pkt->done, HZ * 5);
> > > > +	if (atomic_read(&qdev->in_reset))
> > > > +		rc = -ECONNRESET;
> > > > +	else if (rc == 0)
> > > > +		rc = -ETIMEDOUT;
> > > 
> > > Is this recoverable? The message will remain on the list and may be
> > > delivered at a later point(?), but qrtr and the app will learn that the
> > > message was lost - which is presumably considered fatal.
> > > 
> > > Is it guaranteed that qcom_mhi_qrtr_ul_callback() will happen later and
> > > find the head of the list?
> > > 
> > 
> > There are 2 scenarios:
> > 
> > 1. If the completion interrupt happens after timeout, ul_callback()
> > will be called. But it will only try to fetch the current head of ul_pkts.
> > In most cases, we can hope that the completion interrupt will happen before
> > next queue_skb().
> > 
> > 2. If we don't get completion interrupt, timeout will happen and at the final
> > stage (during mhi_driver_remove()), MHI stack will go over the pending TREs
> > for all channels in queue and call ul_callback() with -ENOTCONN. But in the
> > callback, we don't have any idea of the pkt which was not successfully
> > transferred to the device and currently just fetching first entry.
> > 
> > Now I'm seeing some issue here which I missed earlier. If the completion
> > interrupt never happens then the corresponding pkt will never get freed and
> > therefore we have a leak. Eventhough the ul_callback() will get called during
> > mhi_driver_remove() for pending TREs, we don't exactly fetch the right pkt.
> > 
> > Chris, our assumption of the ul_callback() gets called irrespective of
> > transfer status is wrong. I think this code needs a bit of rework.
> > 

All fine here. I overlooked the list_del() part where the sent packet
gets popped out of list and when the MHI stack calls ul_callback() during
mhi_driver_remove() the leftover packets will get handled correctly.

> > > 
> > > The reason for my question is that without this you have one of two
> > > scenarios;
> > > 1) the message is put on the list, decremented in
> > > qcom_mhi_qrtr_ul_callback() then we get back here and decrement it
> > > again.
> > > 2) the message is put on the list, then qcom_mhi_qrtr_status_callback()
> > > happens and all messages are released - presumably then
> > > qcom_mhi_qrtr_ul_callback() won't happen.
> > > 
> > > 
> > > So if the third case (where we return here and then later
> > > qcom_mhi_qrtr_ul_callback() must find this particular packet at the
> > > front of the queue) can't happen, then you can just skip the entire
> > > refcounting.
> > > 
> > > Further more, you could carry qrtr_mhi_pkt on the stack.
> > > 
> > > 
> > > ...or to flip this around, is there a reason to wait here at all? What
> > > would happen if you just return immediately after calling
> > > mhi_queue_skb()? Wouldn't that provide you better throughput?
> > > 
> > 
> > Chris would be best person to answer this question.
> > 
> 
> Yea - after working with MHI for a bit now, I think we can definitely return
> after queueing the skb.
> 
> The IPC-Router Protocol, wasn't really designed with the ability to recover
> from dropped packets since SMD/GLINK are "Lossless". I figured it would be
> better for the client to definitely know that the packet reached the other
> side which is why it blocks until ul callback.
> 

Agree.

> I thought having the client get an error on timeout and resend the packet
> would be better than silently dropping it. In practice, we've really only
> seen the timeout or ul_callback errors on unrecoverable errors so I think
> the timeout handling can definitely be redone.
> 

You mean we can just remove the timeout handling part and return after
kref_put()?

Thanks,
Mani

> Thanks,
> Chris
> 
> > > > +	else if (rc > 0)
> > > > +		rc = 0;
> > > > +
> > > > +	kref_put(&pkt->refcount, qrtr_mhi_pkt_release);
> > > > +
> > > > +	return rc;
> > > > +}
> > > > +
> > > > +static int qcom_mhi_qrtr_probe(struct mhi_device *mhi_dev,
> > > > +			       const struct mhi_device_id *id)
> > > > +{
> > > > +	struct qrtr_mhi_dev *qdev;
> > > > +	u32 net_id;
> > > > +	int rc;
> > > > +
> > > > +	qdev = devm_kzalloc(&mhi_dev->dev, sizeof(*qdev), GFP_KERNEL);
> > > > +	if (!qdev)
> > > > +		return -ENOMEM;
> > > > +
> > > > +	qdev->mhi_dev = mhi_dev;
> > > > +	qdev->dev = &mhi_dev->dev;
> > > > +	qdev->ep.xmit = qcom_mhi_qrtr_send;
> > > > +	atomic_set(&qdev->in_reset, 0);
> > > > +
> > > > +	net_id = QRTR_EP_NID_AUTO;
> > > 
> > > Just pass QRTR_EP_NID_AUTO directly in the function call below.
> > > 
> > 
> > Okay.
> > 
> > Thanks,
> > Mani
> > 
> > > Regards,
> > > Bjorn
> > > 
> > > > +
> > > > +	INIT_LIST_HEAD(&qdev->ul_pkts);
> > > > +	spin_lock_init(&qdev->ul_lock);
> > > > +
> > > > +	dev_set_drvdata(&mhi_dev->dev, qdev);
> > > > +	rc = qrtr_endpoint_register(&qdev->ep, net_id);
> > > > +	if (rc)
> > > > +		return rc;
> > > > +
> > > > +	dev_dbg(qdev->dev, "Qualcomm MHI QRTR driver probed\n");
> > > > +
> > > > +	return 0;
> > > > +}
> > > > +
> > > > +static void qcom_mhi_qrtr_remove(struct mhi_device *mhi_dev)
> > > > +{
> > > > +	struct qrtr_mhi_dev *qdev = dev_get_drvdata(&mhi_dev->dev);
> > > > +
> > > > +	qrtr_endpoint_unregister(&qdev->ep);
> > > > +	dev_set_drvdata(&mhi_dev->dev, NULL);
> > > > +}
> > > > +
> > > > +static const struct mhi_device_id qcom_mhi_qrtr_id_table[] = {
> > > > +	{ .chan = "IPCR" },
> > > > +	{}
> > > > +};
> > > > +MODULE_DEVICE_TABLE(mhi, qcom_mhi_qrtr_id_table);
> > > > +
> > > > +static struct mhi_driver qcom_mhi_qrtr_driver = {
> > > > +	.probe = qcom_mhi_qrtr_probe,
> > > > +	.remove = qcom_mhi_qrtr_remove,
> > > > +	.dl_xfer_cb = qcom_mhi_qrtr_dl_callback,
> > > > +	.ul_xfer_cb = qcom_mhi_qrtr_ul_callback,
> > > > +	.status_cb = qcom_mhi_qrtr_status_callback,
> > > > +	.id_table = qcom_mhi_qrtr_id_table,
> > > > +	.driver = {
> > > > +		.name = "qcom_mhi_qrtr",
> > > > +	},
> > > > +};
> > > > +
> > > > +module_mhi_driver(qcom_mhi_qrtr_driver);
> > > > +
> > > > +MODULE_DESCRIPTION("Qualcomm IPC-Router MHI interface driver");
> > > > +MODULE_LICENSE("GPL v2");
> > > > -- 
> > > > 2.17.1
> > > > 
> 
> -- 
> Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux
> Foundation Collaborative Project
Bjorn Andersson March 31, 2020, 5:40 p.m. UTC | #4
On Tue 31 Mar 04:23 PDT 2020, Manivannan Sadhasivam wrote:

> Hi Bjorn,
> 
> On Mon, Mar 30, 2020 at 03:19:32PM -0700, Bjorn Andersson wrote:
> > On Mon 30 Mar 02:49 PDT 2020, Manivannan Sadhasivam wrote:
> > 
> > > Hi Chris,
> > > 
> > > On Thu, Mar 26, 2020 at 03:54:42PM -0700, Chris Lew wrote:
> > > > 
> > > > 
> > > > On 3/25/2020 3:37 AM, Manivannan Sadhasivam wrote:
> > > > > Hi Bjorn,
> > > > > 
> > > > > + Chris Lew
> > > > > 
> > > > > On Tue, Mar 24, 2020 at 01:39:52PM -0700, Bjorn Andersson wrote:
> > > > > > On Mon 23 Mar 23:10 PDT 2020, Manivannan Sadhasivam wrote:
> > [..]
> > > > > > > +	spin_lock_irqsave(&qdev->ul_lock, flags);
> > > > > > > +	list_for_each_entry(pkt, &qdev->ul_pkts, node)
> > > > > > > +		complete_all(&pkt->done);
> > > > > 
> > > > > Chris, shouldn't we require list_del(&pkt->node) here?
> > > > > 
> > > > 
> > > > No this isn't a full cleanup, with the "early notifier" we just unblocked
> > > > any threads waiting for the ul_callback. Those threads will wake, check
> > > > in_reset, return an error back to the caller. Any list cleanup will be done
> > > > in the ul_callbacks that the mhi bus will do for each queued packet right
> > > > before device remove.
> > > > 
> > > > Again to simplify the code, we can probable remove the in_reset handling
> > > > since it's not required with the current feature set.
> > > > 
> > > 
> > > So since we are not getting status_cb for fatal errors, I think we should just
> > > remove status_cb, in_reset and timeout code.
> > > 
> > 
> > Looks reasonable.
> > 
> > [..]
> > > > I thought having the client get an error on timeout and resend the packet
> > > > would be better than silently dropping it. In practice, we've really only
> > > > seen the timeout or ul_callback errors on unrecoverable errors so I think
> > > > the timeout handling can definitely be redone.
> > > > 
> > > 
> > > You mean we can just remove the timeout handling part and return after
> > > kref_put()?
> > > 
> > 
> > If all messages are "generated" by qcom_mhi_qrtr_send() and "released"
> > in qcom_mhi_qrtr_ul_callback() I don't think you need the refcounting at
> > all.
> > 
> 
> Hmm, you're right. We can move the packet releasing part to ul_callback now.
> 
> > 
> > Presumably though, it would have been nice to not have to carry a
> > separate list of packets (and hope that it's in sync with the mhi core)
> > and instead have the ul callback somehow allow us to derive the skb to
> > be freed.
> > 
> 
> Yep, MHI stack holds the skb in buf_addr member of mhi_result. So, we can just
> use below to get the skb in ul_callback:
> 
> struct sk_buff *skb = (struct sk_buff *)mhi_res->buf_addr;
> 
> This will help us to avoid the use of pkt, ul_pkts list and use the skb directly
> everywhere. At the same time I think we can also remove the ul_lock which
> was added to protect the ul_pkts list.
> 
> Let me know your opinion, I'll just send a series with this modified QRTR MHI
> client driver and MHI suspend/resume patches.
> 

This looks more robust than having the separate list shadowing the
internal state of the MHI core.

+1

Thanks,
Bjorn
diff mbox series

Patch

diff --git a/net/qrtr/Kconfig b/net/qrtr/Kconfig
index 63f89cc6e82c..8eb876471564 100644
--- a/net/qrtr/Kconfig
+++ b/net/qrtr/Kconfig
@@ -29,4 +29,11 @@  config QRTR_TUN
 	  implement endpoints of QRTR, for purpose of tunneling data to other
 	  hosts or testing purposes.
 
+config QRTR_MHI
+	tristate "MHI IPC Router channels"
+	depends on MHI_BUS
+	help
+	  Say Y here to support MHI based ipcrouter channels. MHI is the
+	  transport used for communicating to external modems.
+
 endif # QRTR
diff --git a/net/qrtr/Makefile b/net/qrtr/Makefile
index 1c6d6c120fb7..3dc0a7c9d455 100644
--- a/net/qrtr/Makefile
+++ b/net/qrtr/Makefile
@@ -5,3 +5,5 @@  obj-$(CONFIG_QRTR_SMD) += qrtr-smd.o
 qrtr-smd-y	:= smd.o
 obj-$(CONFIG_QRTR_TUN) += qrtr-tun.o
 qrtr-tun-y	:= tun.o
+obj-$(CONFIG_QRTR_MHI) += qrtr-mhi.o
+qrtr-mhi-y	:= mhi.o
diff --git a/net/qrtr/mhi.c b/net/qrtr/mhi.c
new file mode 100644
index 000000000000..90af208f34c1
--- /dev/null
+++ b/net/qrtr/mhi.c
@@ -0,0 +1,208 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018-2020, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/mhi.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+
+#include "qrtr.h"
+
+struct qrtr_mhi_dev {
+	struct qrtr_endpoint ep;
+	struct mhi_device *mhi_dev;
+	struct device *dev;
+	spinlock_t ul_lock;		/* lock to protect ul_pkts */
+	struct list_head ul_pkts;
+	atomic_t in_reset;
+};
+
+struct qrtr_mhi_pkt {
+	struct list_head node;
+	struct sk_buff *skb;
+	struct kref refcount;
+	struct completion done;
+};
+
+static void qrtr_mhi_pkt_release(struct kref *ref)
+{
+	struct qrtr_mhi_pkt *pkt = container_of(ref, struct qrtr_mhi_pkt,
+						refcount);
+	struct sock *sk = pkt->skb->sk;
+
+	consume_skb(pkt->skb);
+	if (sk)
+		sock_put(sk);
+
+	kfree(pkt);
+}
+
+/* From MHI to QRTR */
+static void qcom_mhi_qrtr_dl_callback(struct mhi_device *mhi_dev,
+				      struct mhi_result *mhi_res)
+{
+	struct qrtr_mhi_dev *qdev = dev_get_drvdata(&mhi_dev->dev);
+	int rc;
+
+	if (!qdev || mhi_res->transaction_status)
+		return;
+
+	rc = qrtr_endpoint_post(&qdev->ep, mhi_res->buf_addr,
+				mhi_res->bytes_xferd);
+	if (rc == -EINVAL)
+		dev_err(qdev->dev, "invalid ipcrouter packet\n");
+}
+
+/* From QRTR to MHI */
+static void qcom_mhi_qrtr_ul_callback(struct mhi_device *mhi_dev,
+				      struct mhi_result *mhi_res)
+{
+	struct qrtr_mhi_dev *qdev = dev_get_drvdata(&mhi_dev->dev);
+	struct qrtr_mhi_pkt *pkt;
+	unsigned long flags;
+
+	spin_lock_irqsave(&qdev->ul_lock, flags);
+	pkt = list_first_entry(&qdev->ul_pkts, struct qrtr_mhi_pkt, node);
+	list_del(&pkt->node);
+	complete_all(&pkt->done);
+
+	kref_put(&pkt->refcount, qrtr_mhi_pkt_release);
+	spin_unlock_irqrestore(&qdev->ul_lock, flags);
+}
+
+static void qcom_mhi_qrtr_status_callback(struct mhi_device *mhi_dev,
+					  enum mhi_callback mhi_cb)
+{
+	struct qrtr_mhi_dev *qdev = dev_get_drvdata(&mhi_dev->dev);
+	struct qrtr_mhi_pkt *pkt;
+	unsigned long flags;
+
+	if (mhi_cb != MHI_CB_FATAL_ERROR)
+		return;
+
+	atomic_inc(&qdev->in_reset);
+	spin_lock_irqsave(&qdev->ul_lock, flags);
+	list_for_each_entry(pkt, &qdev->ul_pkts, node)
+		complete_all(&pkt->done);
+	spin_unlock_irqrestore(&qdev->ul_lock, flags);
+}
+
+/* Send data over MHI */
+static int qcom_mhi_qrtr_send(struct qrtr_endpoint *ep, struct sk_buff *skb)
+{
+	struct qrtr_mhi_dev *qdev = container_of(ep, struct qrtr_mhi_dev, ep);
+	struct qrtr_mhi_pkt *pkt;
+	int rc;
+
+	rc = skb_linearize(skb);
+	if (rc) {
+		kfree_skb(skb);
+		return rc;
+	}
+
+	pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
+	if (!pkt) {
+		kfree_skb(skb);
+		return -ENOMEM;
+	}
+
+	init_completion(&pkt->done);
+	kref_init(&pkt->refcount);
+	kref_get(&pkt->refcount);
+	pkt->skb = skb;
+
+	spin_lock_bh(&qdev->ul_lock);
+	list_add_tail(&pkt->node, &qdev->ul_pkts);
+	rc = mhi_queue_skb(qdev->mhi_dev, DMA_TO_DEVICE, skb, skb->len,
+			   MHI_EOT);
+	if (rc) {
+		list_del(&pkt->node);
+		/* Reference count needs to be dropped 2 times */
+		kref_put(&pkt->refcount, qrtr_mhi_pkt_release);
+		kref_put(&pkt->refcount, qrtr_mhi_pkt_release);
+		kfree_skb(skb);
+		spin_unlock_bh(&qdev->ul_lock);
+		return rc;
+	}
+
+	spin_unlock_bh(&qdev->ul_lock);
+	if (skb->sk)
+		sock_hold(skb->sk);
+
+	rc = wait_for_completion_interruptible_timeout(&pkt->done, HZ * 5);
+	if (atomic_read(&qdev->in_reset))
+		rc = -ECONNRESET;
+	else if (rc == 0)
+		rc = -ETIMEDOUT;
+	else if (rc > 0)
+		rc = 0;
+
+	kref_put(&pkt->refcount, qrtr_mhi_pkt_release);
+
+	return rc;
+}
+
+static int qcom_mhi_qrtr_probe(struct mhi_device *mhi_dev,
+			       const struct mhi_device_id *id)
+{
+	struct qrtr_mhi_dev *qdev;
+	u32 net_id;
+	int rc;
+
+	qdev = devm_kzalloc(&mhi_dev->dev, sizeof(*qdev), GFP_KERNEL);
+	if (!qdev)
+		return -ENOMEM;
+
+	qdev->mhi_dev = mhi_dev;
+	qdev->dev = &mhi_dev->dev;
+	qdev->ep.xmit = qcom_mhi_qrtr_send;
+	atomic_set(&qdev->in_reset, 0);
+
+	net_id = QRTR_EP_NID_AUTO;
+
+	INIT_LIST_HEAD(&qdev->ul_pkts);
+	spin_lock_init(&qdev->ul_lock);
+
+	dev_set_drvdata(&mhi_dev->dev, qdev);
+	rc = qrtr_endpoint_register(&qdev->ep, net_id);
+	if (rc)
+		return rc;
+
+	dev_dbg(qdev->dev, "Qualcomm MHI QRTR driver probed\n");
+
+	return 0;
+}
+
+static void qcom_mhi_qrtr_remove(struct mhi_device *mhi_dev)
+{
+	struct qrtr_mhi_dev *qdev = dev_get_drvdata(&mhi_dev->dev);
+
+	qrtr_endpoint_unregister(&qdev->ep);
+	dev_set_drvdata(&mhi_dev->dev, NULL);
+}
+
+static const struct mhi_device_id qcom_mhi_qrtr_id_table[] = {
+	{ .chan = "IPCR" },
+	{}
+};
+MODULE_DEVICE_TABLE(mhi, qcom_mhi_qrtr_id_table);
+
+static struct mhi_driver qcom_mhi_qrtr_driver = {
+	.probe = qcom_mhi_qrtr_probe,
+	.remove = qcom_mhi_qrtr_remove,
+	.dl_xfer_cb = qcom_mhi_qrtr_dl_callback,
+	.ul_xfer_cb = qcom_mhi_qrtr_ul_callback,
+	.status_cb = qcom_mhi_qrtr_status_callback,
+	.id_table = qcom_mhi_qrtr_id_table,
+	.driver = {
+		.name = "qcom_mhi_qrtr",
+	},
+};
+
+module_mhi_driver(qcom_mhi_qrtr_driver);
+
+MODULE_DESCRIPTION("Qualcomm IPC-Router MHI interface driver");
+MODULE_LICENSE("GPL v2");