diff mbox series

[1/3] udmabuf: Keep track current device mappings

Message ID 20240123221227.868341-1-afd@ti.com
State New
Headers show
Series [1/3] udmabuf: Keep track current device mappings | expand

Commit Message

Andrew Davis Jan. 23, 2024, 10:12 p.m. UTC
When a device attaches to and maps our buffer we need to keep track
of this mapping/device. This is needed for synchronization with these
devices when beginning and ending CPU access for instance. Add a list
that tracks device mappings as part of {map,unmap}_udmabuf().

Signed-off-by: Andrew Davis <afd@ti.com>
---
 drivers/dma-buf/udmabuf.c | 43 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 41 insertions(+), 2 deletions(-)

Comments

Kasireddy, Vivek Jan. 24, 2024, 10:36 p.m. UTC | #1
Hi Andrew,

> When a device attaches to and maps our buffer we need to keep track
> of this mapping/device. This is needed for synchronization with these
> devices when beginning and ending CPU access for instance. Add a list
> that tracks device mappings as part of {map,unmap}_udmabuf().
> 
> Signed-off-by: Andrew Davis <afd@ti.com>
> ---
>  drivers/dma-buf/udmabuf.c | 43
> +++++++++++++++++++++++++++++++++++++--
>  1 file changed, 41 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c
> index c406459996489..3a23f0a7d112a 100644
> --- a/drivers/dma-buf/udmabuf.c
> +++ b/drivers/dma-buf/udmabuf.c
> @@ -28,6 +28,14 @@ struct udmabuf {
>  	struct page **pages;
>  	struct sg_table *sg;
>  	struct miscdevice *device;
> +	struct list_head attachments;
> +	struct mutex lock;
> +};
> +
> +struct udmabuf_attachment {
> +	struct device *dev;
> +	struct sg_table *table;
> +	struct list_head list;
>  };
> 
>  static vm_fault_t udmabuf_vm_fault(struct vm_fault *vmf)
> @@ -120,14 +128,42 @@ static void put_sg_table(struct device *dev, struct
> sg_table *sg,
>  static struct sg_table *map_udmabuf(struct dma_buf_attachment *at,
>  				    enum dma_data_direction direction)
>  {
> -	return get_sg_table(at->dev, at->dmabuf, direction);
> +	struct udmabuf *ubuf = at->dmabuf->priv;
> +	struct udmabuf_attachment *a;
> +
> +	a = kzalloc(sizeof(*a), GFP_KERNEL);
> +	if (!a)
> +		return ERR_PTR(-ENOMEM);
> +
> +	a->table = get_sg_table(at->dev, at->dmabuf, direction);
> +	if (IS_ERR(a->table)) {
> +		kfree(a);
> +		return a->table;
Isn't that a use-after-free bug?
Rest of the patch lgtm.

Thanks,
Vivek

> +	}
> +
> +	a->dev = at->dev;
> +
> +	mutex_lock(&ubuf->lock);
> +	list_add(&a->list, &ubuf->attachments);
> +	mutex_unlock(&ubuf->lock);
> +
> +	return a->table;
>  }
> 
>  static void unmap_udmabuf(struct dma_buf_attachment *at,
>  			  struct sg_table *sg,
>  			  enum dma_data_direction direction)
>  {
> -	return put_sg_table(at->dev, sg, direction);
> +	struct udmabuf_attachment *a = at->priv;
> +	struct udmabuf *ubuf = at->dmabuf->priv;
> +
> +	mutex_lock(&ubuf->lock);
> +	list_del(&a->list);
> +	mutex_unlock(&ubuf->lock);
> +
> +	put_sg_table(at->dev, sg, direction);
> +
> +	kfree(a);
>  }
> 
>  static void release_udmabuf(struct dma_buf *buf)
> @@ -263,6 +299,9 @@ static long udmabuf_create(struct miscdevice
> *device,
>  		memfd = NULL;
>  	}
> 
> +	INIT_LIST_HEAD(&ubuf->attachments);
> +	mutex_init(&ubuf->lock);
> +
>  	exp_info.ops  = &udmabuf_ops;
>  	exp_info.size = ubuf->pagecount << PAGE_SHIFT;
>  	exp_info.priv = ubuf;
> --
> 2.39.2
Kasireddy, Vivek Jan. 24, 2024, 11:05 p.m. UTC | #2
Hi Andrew,

> Currently this driver creates a SGT table using the CPU as the
> target device, then performs the dma_sync operations against
> that SGT. This is backwards to how DMA-BUFs are supposed to behave.
> This may have worked for the case where these buffers were given
> only back to the same CPU that produced them as in the QEMU case.
> And only then because the original author had the dma_sync
> operations also backwards, syncing for the "device" on begin_cpu.
> This was noticed and "fixed" in this patch[0].
> 
> That then meant we were sync'ing from the CPU to the CPU using
> a pseudo-device "miscdevice". Which then caused another issue
> due to the miscdevice not having a proper DMA mask (and why should
> it, the CPU is not a DMA device). The fix for that was an even
> more egregious hack[1] that declares the CPU is coherent with
> itself and can access its own memory space..
> 
> Unwind all this and perform the correct action by doing the dma_sync
> operations for each device currently attached to the backing buffer.
Makes sense.

> 
> [0] commit 1ffe09590121 ("udmabuf: fix dma-buf cpu access")
> [1] commit 9e9fa6a9198b ("udmabuf: Set the DMA mask for the udmabuf
> device (v2)")
> 
> Signed-off-by: Andrew Davis <afd@ti.com>
> ---
>  drivers/dma-buf/udmabuf.c | 41 +++++++++++++++------------------------
>  1 file changed, 16 insertions(+), 25 deletions(-)
> 
> diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c
> index 3a23f0a7d112a..ab6764322523c 100644
> --- a/drivers/dma-buf/udmabuf.c
> +++ b/drivers/dma-buf/udmabuf.c
> @@ -26,8 +26,6 @@ MODULE_PARM_DESC(size_limit_mb, "Max size of a
> dmabuf, in megabytes. Default is
>  struct udmabuf {
>  	pgoff_t pagecount;
>  	struct page **pages;
> -	struct sg_table *sg;
> -	struct miscdevice *device;
>  	struct list_head attachments;
>  	struct mutex lock;
>  };
> @@ -169,12 +167,8 @@ static void unmap_udmabuf(struct
> dma_buf_attachment *at,
>  static void release_udmabuf(struct dma_buf *buf)
>  {
>  	struct udmabuf *ubuf = buf->priv;
> -	struct device *dev = ubuf->device->this_device;
>  	pgoff_t pg;
> 
> -	if (ubuf->sg)
> -		put_sg_table(dev, ubuf->sg, DMA_BIDIRECTIONAL);
What happens if the last importer maps the dmabuf but erroneously
closes it immediately? Would unmap somehow get called in this case?

Thanks,
Vivek

> -
>  	for (pg = 0; pg < ubuf->pagecount; pg++)
>  		put_page(ubuf->pages[pg]);
>  	kfree(ubuf->pages);
> @@ -185,33 +179,31 @@ static int begin_cpu_udmabuf(struct dma_buf
> *buf,
>  			     enum dma_data_direction direction)
>  {
>  	struct udmabuf *ubuf = buf->priv;
> -	struct device *dev = ubuf->device->this_device;
> -	int ret = 0;
> -
> -	if (!ubuf->sg) {
> -		ubuf->sg = get_sg_table(dev, buf, direction);
> -		if (IS_ERR(ubuf->sg)) {
> -			ret = PTR_ERR(ubuf->sg);
> -			ubuf->sg = NULL;
> -		}
> -	} else {
> -		dma_sync_sg_for_cpu(dev, ubuf->sg->sgl, ubuf->sg->nents,
> -				    direction);
> -	}
> +	struct udmabuf_attachment *a;
> 
> -	return ret;
> +	mutex_lock(&ubuf->lock);
> +
> +	list_for_each_entry(a, &ubuf->attachments, list)
> +		dma_sync_sgtable_for_cpu(a->dev, a->table, direction);
> +
> +	mutex_unlock(&ubuf->lock);
> +
> +	return 0;
>  }
> 
>  static int end_cpu_udmabuf(struct dma_buf *buf,
>  			   enum dma_data_direction direction)
>  {
>  	struct udmabuf *ubuf = buf->priv;
> -	struct device *dev = ubuf->device->this_device;
> +	struct udmabuf_attachment *a;
> 
> -	if (!ubuf->sg)
> -		return -EINVAL;
> +	mutex_lock(&ubuf->lock);
> +
> +	list_for_each_entry(a, &ubuf->attachments, list)
> +		dma_sync_sgtable_for_device(a->dev, a->table, direction);
> +
> +	mutex_unlock(&ubuf->lock);
> 
> -	dma_sync_sg_for_device(dev, ubuf->sg->sgl, ubuf->sg->nents,
> direction);
>  	return 0;
>  }
> 
> @@ -307,7 +299,6 @@ static long udmabuf_create(struct miscdevice
> *device,
>  	exp_info.priv = ubuf;
>  	exp_info.flags = O_RDWR;
> 
> -	ubuf->device = device;
>  	buf = dma_buf_export(&exp_info);
>  	if (IS_ERR(buf)) {
>  		ret = PTR_ERR(buf);
> --
> 2.39.2
Andrew Davis Jan. 25, 2024, 3:22 p.m. UTC | #3
On 1/24/24 4:36 PM, Kasireddy, Vivek wrote:
> Hi Andrew,
> 
>> When a device attaches to and maps our buffer we need to keep track
>> of this mapping/device. This is needed for synchronization with these
>> devices when beginning and ending CPU access for instance. Add a list
>> that tracks device mappings as part of {map,unmap}_udmabuf().
>>
>> Signed-off-by: Andrew Davis <afd@ti.com>
>> ---
>>   drivers/dma-buf/udmabuf.c | 43
>> +++++++++++++++++++++++++++++++++++++--
>>   1 file changed, 41 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c
>> index c406459996489..3a23f0a7d112a 100644
>> --- a/drivers/dma-buf/udmabuf.c
>> +++ b/drivers/dma-buf/udmabuf.c
>> @@ -28,6 +28,14 @@ struct udmabuf {
>>   	struct page **pages;
>>   	struct sg_table *sg;
>>   	struct miscdevice *device;
>> +	struct list_head attachments;
>> +	struct mutex lock;
>> +};
>> +
>> +struct udmabuf_attachment {
>> +	struct device *dev;
>> +	struct sg_table *table;
>> +	struct list_head list;
>>   };
>>
>>   static vm_fault_t udmabuf_vm_fault(struct vm_fault *vmf)
>> @@ -120,14 +128,42 @@ static void put_sg_table(struct device *dev, struct
>> sg_table *sg,
>>   static struct sg_table *map_udmabuf(struct dma_buf_attachment *at,
>>   				    enum dma_data_direction direction)
>>   {
>> -	return get_sg_table(at->dev, at->dmabuf, direction);
>> +	struct udmabuf *ubuf = at->dmabuf->priv;
>> +	struct udmabuf_attachment *a;
>> +
>> +	a = kzalloc(sizeof(*a), GFP_KERNEL);
>> +	if (!a)
>> +		return ERR_PTR(-ENOMEM);
>> +
>> +	a->table = get_sg_table(at->dev, at->dmabuf, direction);
>> +	if (IS_ERR(a->table)) {
>> +		kfree(a);
>> +		return a->table;
> Isn't that a use-after-free bug?

Indeed it is, will fix.

Seems coccicheck also caught this but I missed it when
reviewing its output, my bad :(

Andrew

> Rest of the patch lgtm.
> 
> Thanks,
> Vivek
> 
>> +	}
>> +
>> +	a->dev = at->dev;
>> +
>> +	mutex_lock(&ubuf->lock);
>> +	list_add(&a->list, &ubuf->attachments);
>> +	mutex_unlock(&ubuf->lock);
>> +
>> +	return a->table;
>>   }
>>
>>   static void unmap_udmabuf(struct dma_buf_attachment *at,
>>   			  struct sg_table *sg,
>>   			  enum dma_data_direction direction)
>>   {
>> -	return put_sg_table(at->dev, sg, direction);
>> +	struct udmabuf_attachment *a = at->priv;
>> +	struct udmabuf *ubuf = at->dmabuf->priv;
>> +
>> +	mutex_lock(&ubuf->lock);
>> +	list_del(&a->list);
>> +	mutex_unlock(&ubuf->lock);
>> +
>> +	put_sg_table(at->dev, sg, direction);
>> +
>> +	kfree(a);
>>   }
>>
>>   static void release_udmabuf(struct dma_buf *buf)
>> @@ -263,6 +299,9 @@ static long udmabuf_create(struct miscdevice
>> *device,
>>   		memfd = NULL;
>>   	}
>>
>> +	INIT_LIST_HEAD(&ubuf->attachments);
>> +	mutex_init(&ubuf->lock);
>> +
>>   	exp_info.ops  = &udmabuf_ops;
>>   	exp_info.size = ubuf->pagecount << PAGE_SHIFT;
>>   	exp_info.priv = ubuf;
>> --
>> 2.39.2
>
Daniel Vetter Jan. 25, 2024, 8:30 p.m. UTC | #4
On Tue, Jan 23, 2024 at 04:12:26PM -0600, Andrew Davis wrote:
> Currently this driver creates a SGT table using the CPU as the
> target device, then performs the dma_sync operations against
> that SGT. This is backwards to how DMA-BUFs are supposed to behave.
> This may have worked for the case where these buffers were given
> only back to the same CPU that produced them as in the QEMU case.
> And only then because the original author had the dma_sync
> operations also backwards, syncing for the "device" on begin_cpu.
> This was noticed and "fixed" in this patch[0].
> 
> That then meant we were sync'ing from the CPU to the CPU using
> a pseudo-device "miscdevice". Which then caused another issue
> due to the miscdevice not having a proper DMA mask (and why should
> it, the CPU is not a DMA device). The fix for that was an even
> more egregious hack[1] that declares the CPU is coherent with
> itself and can access its own memory space..
> 
> Unwind all this and perform the correct action by doing the dma_sync
> operations for each device currently attached to the backing buffer.
> 
> [0] commit 1ffe09590121 ("udmabuf: fix dma-buf cpu access")
> [1] commit 9e9fa6a9198b ("udmabuf: Set the DMA mask for the udmabuf device (v2)")
> 
> Signed-off-by: Andrew Davis <afd@ti.com>

So yeah the above hacks are terrible, but I don't think this is better.
What you're doing now is that you're potentially doing the flushing
multiple times, so if you have a lot of importers with life mappings this
is a performance regression.

It's probably time to bite the bullet and teach the dma-api about flushing
for multiple devices. Or some way we can figure out which is the one
device we need to pick which gives us the right amount of flushing.

Cheers, Sima

> ---
>  drivers/dma-buf/udmabuf.c | 41 +++++++++++++++------------------------
>  1 file changed, 16 insertions(+), 25 deletions(-)
> 
> diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c
> index 3a23f0a7d112a..ab6764322523c 100644
> --- a/drivers/dma-buf/udmabuf.c
> +++ b/drivers/dma-buf/udmabuf.c
> @@ -26,8 +26,6 @@ MODULE_PARM_DESC(size_limit_mb, "Max size of a dmabuf, in megabytes. Default is
>  struct udmabuf {
>  	pgoff_t pagecount;
>  	struct page **pages;
> -	struct sg_table *sg;
> -	struct miscdevice *device;
>  	struct list_head attachments;
>  	struct mutex lock;
>  };
> @@ -169,12 +167,8 @@ static void unmap_udmabuf(struct dma_buf_attachment *at,
>  static void release_udmabuf(struct dma_buf *buf)
>  {
>  	struct udmabuf *ubuf = buf->priv;
> -	struct device *dev = ubuf->device->this_device;
>  	pgoff_t pg;
>  
> -	if (ubuf->sg)
> -		put_sg_table(dev, ubuf->sg, DMA_BIDIRECTIONAL);
> -
>  	for (pg = 0; pg < ubuf->pagecount; pg++)
>  		put_page(ubuf->pages[pg]);
>  	kfree(ubuf->pages);
> @@ -185,33 +179,31 @@ static int begin_cpu_udmabuf(struct dma_buf *buf,
>  			     enum dma_data_direction direction)
>  {
>  	struct udmabuf *ubuf = buf->priv;
> -	struct device *dev = ubuf->device->this_device;
> -	int ret = 0;
> -
> -	if (!ubuf->sg) {
> -		ubuf->sg = get_sg_table(dev, buf, direction);
> -		if (IS_ERR(ubuf->sg)) {
> -			ret = PTR_ERR(ubuf->sg);
> -			ubuf->sg = NULL;
> -		}
> -	} else {
> -		dma_sync_sg_for_cpu(dev, ubuf->sg->sgl, ubuf->sg->nents,
> -				    direction);
> -	}
> +	struct udmabuf_attachment *a;
>  
> -	return ret;
> +	mutex_lock(&ubuf->lock);
> +
> +	list_for_each_entry(a, &ubuf->attachments, list)
> +		dma_sync_sgtable_for_cpu(a->dev, a->table, direction);
> +
> +	mutex_unlock(&ubuf->lock);
> +
> +	return 0;
>  }
>  
>  static int end_cpu_udmabuf(struct dma_buf *buf,
>  			   enum dma_data_direction direction)
>  {
>  	struct udmabuf *ubuf = buf->priv;
> -	struct device *dev = ubuf->device->this_device;
> +	struct udmabuf_attachment *a;
>  
> -	if (!ubuf->sg)
> -		return -EINVAL;
> +	mutex_lock(&ubuf->lock);
> +
> +	list_for_each_entry(a, &ubuf->attachments, list)
> +		dma_sync_sgtable_for_device(a->dev, a->table, direction);
> +
> +	mutex_unlock(&ubuf->lock);
>  
> -	dma_sync_sg_for_device(dev, ubuf->sg->sgl, ubuf->sg->nents, direction);
>  	return 0;
>  }
>  
> @@ -307,7 +299,6 @@ static long udmabuf_create(struct miscdevice *device,
>  	exp_info.priv = ubuf;
>  	exp_info.flags = O_RDWR;
>  
> -	ubuf->device = device;
>  	buf = dma_buf_export(&exp_info);
>  	if (IS_ERR(buf)) {
>  		ret = PTR_ERR(buf);
> -- 
> 2.39.2
> 
> _______________________________________________
> Linaro-mm-sig mailing list -- linaro-mm-sig@lists.linaro.org
> To unsubscribe send an email to linaro-mm-sig-leave@lists.linaro.org
Kasireddy, Vivek Jan. 26, 2024, 7:25 a.m. UTC | #5
> >> Currently this driver creates a SGT table using the CPU as the
> >> target device, then performs the dma_sync operations against
> >> that SGT. This is backwards to how DMA-BUFs are supposed to behave.
> >> This may have worked for the case where these buffers were given
> >> only back to the same CPU that produced them as in the QEMU case.
> >> And only then because the original author had the dma_sync
> >> operations also backwards, syncing for the "device" on begin_cpu.
> >> This was noticed and "fixed" in this patch[0].
> >>
> >> That then meant we were sync'ing from the CPU to the CPU using
> >> a pseudo-device "miscdevice". Which then caused another issue
> >> due to the miscdevice not having a proper DMA mask (and why should
> >> it, the CPU is not a DMA device). The fix for that was an even
> >> more egregious hack[1] that declares the CPU is coherent with
> >> itself and can access its own memory space..
> >>
> >> Unwind all this and perform the correct action by doing the dma_sync
> >> operations for each device currently attached to the backing buffer.
> > Makes sense.
> >
> >>
> >> [0] commit 1ffe09590121 ("udmabuf: fix dma-buf cpu access")
> >> [1] commit 9e9fa6a9198b ("udmabuf: Set the DMA mask for the udmabuf
> >> device (v2)")
> >>
> >> Signed-off-by: Andrew Davis <afd@ti.com>
> >> ---
> >>   drivers/dma-buf/udmabuf.c | 41 +++++++++++++++------------------------
> >>   1 file changed, 16 insertions(+), 25 deletions(-)
> >>
> >> diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c
> >> index 3a23f0a7d112a..ab6764322523c 100644
> >> --- a/drivers/dma-buf/udmabuf.c
> >> +++ b/drivers/dma-buf/udmabuf.c
> >> @@ -26,8 +26,6 @@ MODULE_PARM_DESC(size_limit_mb, "Max size of a
> >> dmabuf, in megabytes. Default is
> >>   struct udmabuf {
> >>   	pgoff_t pagecount;
> >>   	struct page **pages;
> >> -	struct sg_table *sg;
> >> -	struct miscdevice *device;
> >>   	struct list_head attachments;
> >>   	struct mutex lock;
> >>   };
> >> @@ -169,12 +167,8 @@ static void unmap_udmabuf(struct
> >> dma_buf_attachment *at,
> >>   static void release_udmabuf(struct dma_buf *buf)
> >>   {
> >>   	struct udmabuf *ubuf = buf->priv;
> >> -	struct device *dev = ubuf->device->this_device;
> >>   	pgoff_t pg;
> >>
> >> -	if (ubuf->sg)
> >> -		put_sg_table(dev, ubuf->sg, DMA_BIDIRECTIONAL);
> > What happens if the last importer maps the dmabuf but erroneously
> > closes it immediately? Would unmap somehow get called in this case?
> >
> 
> Good question, had to scan the framework code a bit here. I thought
> closing a DMABUF handle would automatically unwind any current
> attachments/mappings, but it seems nothing in the framework does that.
> 
> Looks like that is up to the importing drivers[0]:
> 
> > Once a driver is done with a shared buffer it needs to call
> > dma_buf_detach() (after cleaning up any mappings) and then
> > release the reference acquired with dma_buf_get() by
> > calling dma_buf_put().
> 
> So closing a DMABUF after mapping without first unmapping it would
> be a bug in the importer, it is not the exporters problem to check
It may be a bug in the importer but wouldn't the memory associated
with the sg table and attachment get leaked if unmap doesn't get called
in this scenario?

Thanks,
Vivek

> for (although some more warnings in the framework checking for that
> might not be a bad idea..).
> 
> Andrew
> 
> [0] https://www.kernel.org/doc/html/v6.7/driver-api/dma-buf.html
> 
> > Thanks,
> > Vivek
> >
> >> -
> >>   	for (pg = 0; pg < ubuf->pagecount; pg++)
> >>   		put_page(ubuf->pages[pg]);
> >>   	kfree(ubuf->pages);
> >> @@ -185,33 +179,31 @@ static int begin_cpu_udmabuf(struct dma_buf
> >> *buf,
> >>   			     enum dma_data_direction direction)
> >>   {
> >>   	struct udmabuf *ubuf = buf->priv;
> >> -	struct device *dev = ubuf->device->this_device;
> >> -	int ret = 0;
> >> -
> >> -	if (!ubuf->sg) {
> >> -		ubuf->sg = get_sg_table(dev, buf, direction);
> >> -		if (IS_ERR(ubuf->sg)) {
> >> -			ret = PTR_ERR(ubuf->sg);
> >> -			ubuf->sg = NULL;
> >> -		}
> >> -	} else {
> >> -		dma_sync_sg_for_cpu(dev, ubuf->sg->sgl, ubuf->sg->nents,
> >> -				    direction);
> >> -	}
> >> +	struct udmabuf_attachment *a;
> >>
> >> -	return ret;
> >> +	mutex_lock(&ubuf->lock);
> >> +
> >> +	list_for_each_entry(a, &ubuf->attachments, list)
> >> +		dma_sync_sgtable_for_cpu(a->dev, a->table, direction);
> >> +
> >> +	mutex_unlock(&ubuf->lock);
> >> +
> >> +	return 0;
> >>   }
> >>
> >>   static int end_cpu_udmabuf(struct dma_buf *buf,
> >>   			   enum dma_data_direction direction)
> >>   {
> >>   	struct udmabuf *ubuf = buf->priv;
> >> -	struct device *dev = ubuf->device->this_device;
> >> +	struct udmabuf_attachment *a;
> >>
> >> -	if (!ubuf->sg)
> >> -		return -EINVAL;
> >> +	mutex_lock(&ubuf->lock);
> >> +
> >> +	list_for_each_entry(a, &ubuf->attachments, list)
> >> +		dma_sync_sgtable_for_device(a->dev, a->table, direction);
> >> +
> >> +	mutex_unlock(&ubuf->lock);
> >>
> >> -	dma_sync_sg_for_device(dev, ubuf->sg->sgl, ubuf->sg->nents,
> >> direction);
> >>   	return 0;
> >>   }
> >>
> >> @@ -307,7 +299,6 @@ static long udmabuf_create(struct miscdevice
> >> *device,
> >>   	exp_info.priv = ubuf;
> >>   	exp_info.flags = O_RDWR;
> >>
> >> -	ubuf->device = device;
> >>   	buf = dma_buf_export(&exp_info);
> >>   	if (IS_ERR(buf)) {
> >>   		ret = PTR_ERR(buf);
> >> --
> >> 2.39.2
> >
Andrew Davis Jan. 26, 2024, 5:24 p.m. UTC | #6
On 1/25/24 2:30 PM, Daniel Vetter wrote:
> On Tue, Jan 23, 2024 at 04:12:26PM -0600, Andrew Davis wrote:
>> Currently this driver creates a SGT table using the CPU as the
>> target device, then performs the dma_sync operations against
>> that SGT. This is backwards to how DMA-BUFs are supposed to behave.
>> This may have worked for the case where these buffers were given
>> only back to the same CPU that produced them as in the QEMU case.
>> And only then because the original author had the dma_sync
>> operations also backwards, syncing for the "device" on begin_cpu.
>> This was noticed and "fixed" in this patch[0].
>>
>> That then meant we were sync'ing from the CPU to the CPU using
>> a pseudo-device "miscdevice". Which then caused another issue
>> due to the miscdevice not having a proper DMA mask (and why should
>> it, the CPU is not a DMA device). The fix for that was an even
>> more egregious hack[1] that declares the CPU is coherent with
>> itself and can access its own memory space..
>>
>> Unwind all this and perform the correct action by doing the dma_sync
>> operations for each device currently attached to the backing buffer.
>>
>> [0] commit 1ffe09590121 ("udmabuf: fix dma-buf cpu access")
>> [1] commit 9e9fa6a9198b ("udmabuf: Set the DMA mask for the udmabuf device (v2)")
>>
>> Signed-off-by: Andrew Davis <afd@ti.com>
> 
> So yeah the above hacks are terrible, but I don't think this is better.
> What you're doing now is that you're potentially doing the flushing
> multiple times, so if you have a lot of importers with life mappings this
> is a performance regression.

I'd take lower performing but correct than fast and broken. :)

Syncing for CPU/device is about making sure the CPU/device can see
the data produced by the other. Some devices might be dma-coherent
and syncing for them would be a NOP, but we cant know that here
in this driver. Let's say we have two attached devices, one that
is cache coherent and one that isn't. If we only sync for first
attached device then that is converted to a NOP and we never flush
like the second device needed.

Same is true for devices behind IOMMU or with an L3 cache when
syncing in the other direction for CPU. So we have to sync for all
attached devices to ensure we get even the lowest common denominator
device sync'd. It is up to the DMA-API layer to decide which syncs
need to actually do something. If all attached devices are coherent
then all syncs will be NOPs and we have no performance penalty.

> 
> It's probably time to bite the bullet and teach the dma-api about flushing
> for multiple devices. Or some way we can figure out which is the one
> device we need to pick which gives us the right amount of flushing.
> 

Seems like a constraint solving micro-optimization. The DMA-API layer
would have to track which buffers have already been flushed from CPU
cache and also track that nothing has been written into those caches
since that point, only then could it skip the flush. But that is already
the point of the dirty bit in the caches themselves, cleaning already
clean cache lines is essentially free in hardware. And so is invalidating
lines, it is just flipping a bit.

Andrew

> Cheers, Sima
> 
>> ---
>>   drivers/dma-buf/udmabuf.c | 41 +++++++++++++++------------------------
>>   1 file changed, 16 insertions(+), 25 deletions(-)
>>
>> diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c
>> index 3a23f0a7d112a..ab6764322523c 100644
>> --- a/drivers/dma-buf/udmabuf.c
>> +++ b/drivers/dma-buf/udmabuf.c
>> @@ -26,8 +26,6 @@ MODULE_PARM_DESC(size_limit_mb, "Max size of a dmabuf, in megabytes. Default is
>>   struct udmabuf {
>>   	pgoff_t pagecount;
>>   	struct page **pages;
>> -	struct sg_table *sg;
>> -	struct miscdevice *device;
>>   	struct list_head attachments;
>>   	struct mutex lock;
>>   };
>> @@ -169,12 +167,8 @@ static void unmap_udmabuf(struct dma_buf_attachment *at,
>>   static void release_udmabuf(struct dma_buf *buf)
>>   {
>>   	struct udmabuf *ubuf = buf->priv;
>> -	struct device *dev = ubuf->device->this_device;
>>   	pgoff_t pg;
>>   
>> -	if (ubuf->sg)
>> -		put_sg_table(dev, ubuf->sg, DMA_BIDIRECTIONAL);
>> -
>>   	for (pg = 0; pg < ubuf->pagecount; pg++)
>>   		put_page(ubuf->pages[pg]);
>>   	kfree(ubuf->pages);
>> @@ -185,33 +179,31 @@ static int begin_cpu_udmabuf(struct dma_buf *buf,
>>   			     enum dma_data_direction direction)
>>   {
>>   	struct udmabuf *ubuf = buf->priv;
>> -	struct device *dev = ubuf->device->this_device;
>> -	int ret = 0;
>> -
>> -	if (!ubuf->sg) {
>> -		ubuf->sg = get_sg_table(dev, buf, direction);
>> -		if (IS_ERR(ubuf->sg)) {
>> -			ret = PTR_ERR(ubuf->sg);
>> -			ubuf->sg = NULL;
>> -		}
>> -	} else {
>> -		dma_sync_sg_for_cpu(dev, ubuf->sg->sgl, ubuf->sg->nents,
>> -				    direction);
>> -	}
>> +	struct udmabuf_attachment *a;
>>   
>> -	return ret;
>> +	mutex_lock(&ubuf->lock);
>> +
>> +	list_for_each_entry(a, &ubuf->attachments, list)
>> +		dma_sync_sgtable_for_cpu(a->dev, a->table, direction);
>> +
>> +	mutex_unlock(&ubuf->lock);
>> +
>> +	return 0;
>>   }
>>   
>>   static int end_cpu_udmabuf(struct dma_buf *buf,
>>   			   enum dma_data_direction direction)
>>   {
>>   	struct udmabuf *ubuf = buf->priv;
>> -	struct device *dev = ubuf->device->this_device;
>> +	struct udmabuf_attachment *a;
>>   
>> -	if (!ubuf->sg)
>> -		return -EINVAL;
>> +	mutex_lock(&ubuf->lock);
>> +
>> +	list_for_each_entry(a, &ubuf->attachments, list)
>> +		dma_sync_sgtable_for_device(a->dev, a->table, direction);
>> +
>> +	mutex_unlock(&ubuf->lock);
>>   
>> -	dma_sync_sg_for_device(dev, ubuf->sg->sgl, ubuf->sg->nents, direction);
>>   	return 0;
>>   }
>>   
>> @@ -307,7 +299,6 @@ static long udmabuf_create(struct miscdevice *device,
>>   	exp_info.priv = ubuf;
>>   	exp_info.flags = O_RDWR;
>>   
>> -	ubuf->device = device;
>>   	buf = dma_buf_export(&exp_info);
>>   	if (IS_ERR(buf)) {
>>   		ret = PTR_ERR(buf);
>> -- 
>> 2.39.2
>>
>> _______________________________________________
>> Linaro-mm-sig mailing list -- linaro-mm-sig@lists.linaro.org
>> To unsubscribe send an email to linaro-mm-sig-leave@lists.linaro.org
>
Andrew Davis Jan. 26, 2024, 5:40 p.m. UTC | #7
On 1/26/24 1:25 AM, Kasireddy, Vivek wrote:
>>>> Currently this driver creates a SGT table using the CPU as the
>>>> target device, then performs the dma_sync operations against
>>>> that SGT. This is backwards to how DMA-BUFs are supposed to behave.
>>>> This may have worked for the case where these buffers were given
>>>> only back to the same CPU that produced them as in the QEMU case.
>>>> And only then because the original author had the dma_sync
>>>> operations also backwards, syncing for the "device" on begin_cpu.
>>>> This was noticed and "fixed" in this patch[0].
>>>>
>>>> That then meant we were sync'ing from the CPU to the CPU using
>>>> a pseudo-device "miscdevice". Which then caused another issue
>>>> due to the miscdevice not having a proper DMA mask (and why should
>>>> it, the CPU is not a DMA device). The fix for that was an even
>>>> more egregious hack[1] that declares the CPU is coherent with
>>>> itself and can access its own memory space..
>>>>
>>>> Unwind all this and perform the correct action by doing the dma_sync
>>>> operations for each device currently attached to the backing buffer.
>>> Makes sense.
>>>
>>>>
>>>> [0] commit 1ffe09590121 ("udmabuf: fix dma-buf cpu access")
>>>> [1] commit 9e9fa6a9198b ("udmabuf: Set the DMA mask for the udmabuf
>>>> device (v2)")
>>>>
>>>> Signed-off-by: Andrew Davis <afd@ti.com>
>>>> ---
>>>>    drivers/dma-buf/udmabuf.c | 41 +++++++++++++++------------------------
>>>>    1 file changed, 16 insertions(+), 25 deletions(-)
>>>>
>>>> diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c
>>>> index 3a23f0a7d112a..ab6764322523c 100644
>>>> --- a/drivers/dma-buf/udmabuf.c
>>>> +++ b/drivers/dma-buf/udmabuf.c
>>>> @@ -26,8 +26,6 @@ MODULE_PARM_DESC(size_limit_mb, "Max size of a
>>>> dmabuf, in megabytes. Default is
>>>>    struct udmabuf {
>>>>    	pgoff_t pagecount;
>>>>    	struct page **pages;
>>>> -	struct sg_table *sg;
>>>> -	struct miscdevice *device;
>>>>    	struct list_head attachments;
>>>>    	struct mutex lock;
>>>>    };
>>>> @@ -169,12 +167,8 @@ static void unmap_udmabuf(struct
>>>> dma_buf_attachment *at,
>>>>    static void release_udmabuf(struct dma_buf *buf)
>>>>    {
>>>>    	struct udmabuf *ubuf = buf->priv;
>>>> -	struct device *dev = ubuf->device->this_device;
>>>>    	pgoff_t pg;
>>>>
>>>> -	if (ubuf->sg)
>>>> -		put_sg_table(dev, ubuf->sg, DMA_BIDIRECTIONAL);
>>> What happens if the last importer maps the dmabuf but erroneously
>>> closes it immediately? Would unmap somehow get called in this case?
>>>
>>
>> Good question, had to scan the framework code a bit here. I thought
>> closing a DMABUF handle would automatically unwind any current
>> attachments/mappings, but it seems nothing in the framework does that.
>>
>> Looks like that is up to the importing drivers[0]:
>>
>>> Once a driver is done with a shared buffer it needs to call
>>> dma_buf_detach() (after cleaning up any mappings) and then
>>> release the reference acquired with dma_buf_get() by
>>> calling dma_buf_put().
>>
>> So closing a DMABUF after mapping without first unmapping it would
>> be a bug in the importer, it is not the exporters problem to check
> It may be a bug in the importer but wouldn't the memory associated
> with the sg table and attachment get leaked if unmap doesn't get called
> in this scenario?
> 

Yes the attachment data would be leaked if unattach was not called,
but that is true for all DMABUF exporters. The .release() callback
is meant to be the mirror of the export function and it only cleans
up that. Same for attach/unattach, map/unmap, etc.. If these calls
are not balanced then yes they can leak memory.

Since balance is guaranteed by the API, checking the balance should
be done at that level, not in each and every exporter. If your
comment is that we should add those checks into the DMABUF framework
layer then I would agree.

Andrew

> Thanks,
> Vivek
> 
>> for (although some more warnings in the framework checking for that
>> might not be a bad idea..).
>>
>> Andrew
>>
>> [0] https://www.kernel.org/doc/html/v6.7/driver-api/dma-buf.html
>>
>>> Thanks,
>>> Vivek
>>>
>>>> -
>>>>    	for (pg = 0; pg < ubuf->pagecount; pg++)
>>>>    		put_page(ubuf->pages[pg]);
>>>>    	kfree(ubuf->pages);
>>>> @@ -185,33 +179,31 @@ static int begin_cpu_udmabuf(struct dma_buf
>>>> *buf,
>>>>    			     enum dma_data_direction direction)
>>>>    {
>>>>    	struct udmabuf *ubuf = buf->priv;
>>>> -	struct device *dev = ubuf->device->this_device;
>>>> -	int ret = 0;
>>>> -
>>>> -	if (!ubuf->sg) {
>>>> -		ubuf->sg = get_sg_table(dev, buf, direction);
>>>> -		if (IS_ERR(ubuf->sg)) {
>>>> -			ret = PTR_ERR(ubuf->sg);
>>>> -			ubuf->sg = NULL;
>>>> -		}
>>>> -	} else {
>>>> -		dma_sync_sg_for_cpu(dev, ubuf->sg->sgl, ubuf->sg->nents,
>>>> -				    direction);
>>>> -	}
>>>> +	struct udmabuf_attachment *a;
>>>>
>>>> -	return ret;
>>>> +	mutex_lock(&ubuf->lock);
>>>> +
>>>> +	list_for_each_entry(a, &ubuf->attachments, list)
>>>> +		dma_sync_sgtable_for_cpu(a->dev, a->table, direction);
>>>> +
>>>> +	mutex_unlock(&ubuf->lock);
>>>> +
>>>> +	return 0;
>>>>    }
>>>>
>>>>    static int end_cpu_udmabuf(struct dma_buf *buf,
>>>>    			   enum dma_data_direction direction)
>>>>    {
>>>>    	struct udmabuf *ubuf = buf->priv;
>>>> -	struct device *dev = ubuf->device->this_device;
>>>> +	struct udmabuf_attachment *a;
>>>>
>>>> -	if (!ubuf->sg)
>>>> -		return -EINVAL;
>>>> +	mutex_lock(&ubuf->lock);
>>>> +
>>>> +	list_for_each_entry(a, &ubuf->attachments, list)
>>>> +		dma_sync_sgtable_for_device(a->dev, a->table, direction);
>>>> +
>>>> +	mutex_unlock(&ubuf->lock);
>>>>
>>>> -	dma_sync_sg_for_device(dev, ubuf->sg->sgl, ubuf->sg->nents,
>>>> direction);
>>>>    	return 0;
>>>>    }
>>>>
>>>> @@ -307,7 +299,6 @@ static long udmabuf_create(struct miscdevice
>>>> *device,
>>>>    	exp_info.priv = ubuf;
>>>>    	exp_info.flags = O_RDWR;
>>>>
>>>> -	ubuf->device = device;
>>>>    	buf = dma_buf_export(&exp_info);
>>>>    	if (IS_ERR(buf)) {
>>>>    		ret = PTR_ERR(buf);
>>>> --
>>>> 2.39.2
>>>
diff mbox series

Patch

diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c
index c406459996489..3a23f0a7d112a 100644
--- a/drivers/dma-buf/udmabuf.c
+++ b/drivers/dma-buf/udmabuf.c
@@ -28,6 +28,14 @@  struct udmabuf {
 	struct page **pages;
 	struct sg_table *sg;
 	struct miscdevice *device;
+	struct list_head attachments;
+	struct mutex lock;
+};
+
+struct udmabuf_attachment {
+	struct device *dev;
+	struct sg_table *table;
+	struct list_head list;
 };
 
 static vm_fault_t udmabuf_vm_fault(struct vm_fault *vmf)
@@ -120,14 +128,42 @@  static void put_sg_table(struct device *dev, struct sg_table *sg,
 static struct sg_table *map_udmabuf(struct dma_buf_attachment *at,
 				    enum dma_data_direction direction)
 {
-	return get_sg_table(at->dev, at->dmabuf, direction);
+	struct udmabuf *ubuf = at->dmabuf->priv;
+	struct udmabuf_attachment *a;
+
+	a = kzalloc(sizeof(*a), GFP_KERNEL);
+	if (!a)
+		return ERR_PTR(-ENOMEM);
+
+	a->table = get_sg_table(at->dev, at->dmabuf, direction);
+	if (IS_ERR(a->table)) {
+		kfree(a);
+		return a->table;
+	}
+
+	a->dev = at->dev;
+
+	mutex_lock(&ubuf->lock);
+	list_add(&a->list, &ubuf->attachments);
+	mutex_unlock(&ubuf->lock);
+
+	return a->table;
 }
 
 static void unmap_udmabuf(struct dma_buf_attachment *at,
 			  struct sg_table *sg,
 			  enum dma_data_direction direction)
 {
-	return put_sg_table(at->dev, sg, direction);
+	struct udmabuf_attachment *a = at->priv;
+	struct udmabuf *ubuf = at->dmabuf->priv;
+
+	mutex_lock(&ubuf->lock);
+	list_del(&a->list);
+	mutex_unlock(&ubuf->lock);
+
+	put_sg_table(at->dev, sg, direction);
+
+	kfree(a);
 }
 
 static void release_udmabuf(struct dma_buf *buf)
@@ -263,6 +299,9 @@  static long udmabuf_create(struct miscdevice *device,
 		memfd = NULL;
 	}
 
+	INIT_LIST_HEAD(&ubuf->attachments);
+	mutex_init(&ubuf->lock);
+
 	exp_info.ops  = &udmabuf_ops;
 	exp_info.size = ubuf->pagecount << PAGE_SHIFT;
 	exp_info.priv = ubuf;