diff mbox series

[v4,2/3] usb: xhci: Use temporary buffer to consolidate SG

Message ID a4cb87733fe1e82a1a3f2b8998add6af7d363fe4.1602592488.git.joglekar@synopsys.com
State Superseded
Headers show
Series Add logic to consolidate TRBs for Synopsys xHC | expand

Commit Message

Tejas Joglekar Oct. 13, 2020, 12:43 p.m. UTC
The Synopsys xHC has an internal TRB cache of size TRB_CACHE_SIZE for
each endpoint. The default value for TRB_CACHE_SIZE is 16 for SS and 8
for HS. The controller loads and updates the TRB cache from the transfer
ring in system memory whenever the driver issues a start transfer or
update transfer command.

For chained TRBs, the Synopsys xHC requires that the total amount of
bytes for all TRBs loaded in the TRB cache be greater than or equal to 1
MPS. Or the chain ends within the TRB cache (with a last TRB).

If this requirement is not met, the controller will not be able to send
or receive a packet and it will hang causing a driver timeout and error.

This can be a problem if a class driver queues SG requests with many
small-buffer entries. The XHCI driver will create a chained TRB for each
entry which may trigger this issue.

This patch adds logic to the XHCI driver to detect and prevent this from
happening.

For every (TRB_CACHE_SIZE - 2), we check the total buffer size of
the SG list and if the last window of (TRB_CACHE_SIZE - 2) SG list length
and we don't make up at least 1 MPS, we create a temporary buffer to
consolidate full SG list into the buffer.

We check at (TRB_CACHE_SIZE - 2) window because it is possible that there
would be a link and/or event data TRB that take up to 2 of the cache
entries.

We discovered this issue with devices on other platforms but have not
yet come across any device that triggers this on Linux. But it could be
a real problem now or in the future. All it takes is N number of small
chained TRBs. And other instances of the Synopsys IP may have smaller
values for the TRB_CACHE_SIZE which would exacerbate the problem.

Signed-off-by: Tejas Joglekar <joglekar@synopsys.com>
---
 drivers/usb/host/xhci-ring.c |   2 +-
 drivers/usb/host/xhci.c      | 137 ++++++++++++++++++++++++++++++++++-
 drivers/usb/host/xhci.h      |   4 +
 3 files changed, 141 insertions(+), 2 deletions(-)

Comments

kernel test robot Oct. 13, 2020, 2:57 p.m. UTC | #1
Hi Tejas,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on usb/usb-testing]
[also build test WARNING on next-20201013]
[cannot apply to balbi-usb/testing/next peter.chen-usb/ci-for-usb-next v5.9]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Tejas-Joglekar/Add-logic-to-consolidate-TRBs-for-Synopsys-xHC/20201013-204605
base:   https://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git usb-testing
config: m68k-randconfig-m031-20201013 (attached as .config)
compiler: m68k-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/9d7453725e3adcd62b021dbf4cbd4f29476e71e4
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Tejas-Joglekar/Add-logic-to-consolidate-TRBs-for-Synopsys-xHC/20201013-204605
        git checkout 9d7453725e3adcd62b021dbf4cbd4f29476e71e4
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=m68k 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   drivers/usb/host/xhci.c: In function 'xhci_map_temp_buffer':
>> drivers/usb/host/xhci.c:1269:19: warning: variable 'xhci' set but not used [-Wunused-but-set-variable]
    1269 |  struct xhci_hcd *xhci;
         |                   ^~~~
>> drivers/usb/host/xhci.c:1266:15: warning: variable 'len' set but not used [-Wunused-but-set-variable]
    1266 |  unsigned int len;
         |               ^~~
   drivers/usb/host/xhci.c: In function 'xhci_urb_temp_buffer_required':
>> drivers/usb/host/xhci.c:1305:15: warning: variable 'buf_len' set but not used [-Wunused-but-set-variable]
    1305 |  unsigned int buf_len;
         |               ^~~~~~~
   drivers/usb/host/xhci.c: In function 'xhci_unmap_temp_buf':
   drivers/usb/host/xhci.c:1345:15: warning: variable 'len' set but not used [-Wunused-but-set-variable]
    1345 |  unsigned int len;
         |               ^~~
>> drivers/usb/host/xhci.c:1344:22: warning: variable 'sg' set but not used [-Wunused-but-set-variable]
    1344 |  struct scatterlist *sg;
         |                      ^~

vim +/xhci +1269 drivers/usb/host/xhci.c

  1261	
  1262	static int xhci_map_temp_buffer(struct usb_hcd *hcd, struct urb *urb)
  1263	{
  1264		void *temp;
  1265		int ret = 0;
> 1266		unsigned int len;
  1267		unsigned int buf_len;
  1268		enum dma_data_direction dir;
> 1269		struct xhci_hcd *xhci;
  1270	
  1271		xhci = hcd_to_xhci(hcd);
  1272		dir = usb_urb_dir_in(urb) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
  1273		buf_len = urb->transfer_buffer_length;
  1274	
  1275		temp = kzalloc_node(buf_len, GFP_ATOMIC,
  1276				    dev_to_node(hcd->self.sysdev));
  1277	
  1278		if (usb_urb_dir_out(urb))
  1279			len = sg_pcopy_to_buffer(urb->sg, urb->num_sgs,
  1280						 temp, buf_len, 0);
  1281	
  1282		urb->transfer_buffer = temp;
  1283		urb->transfer_dma = dma_map_single(hcd->self.sysdev,
  1284						   urb->transfer_buffer,
  1285						   urb->transfer_buffer_length,
  1286						   dir);
  1287	
  1288		if (dma_mapping_error(hcd->self.sysdev,
  1289				      urb->transfer_dma)) {
  1290			ret = -EAGAIN;
  1291			kfree(temp);
  1292		} else {
  1293			urb->transfer_flags |= URB_DMA_MAP_SINGLE;
  1294		}
  1295	
  1296		return ret;
  1297	}
  1298	
  1299	static bool xhci_urb_temp_buffer_required(struct usb_hcd *hcd,
  1300						  struct urb *urb)
  1301	{
  1302		bool ret = false;
  1303		unsigned int i;
  1304		unsigned int len = 0;
> 1305		unsigned int buf_len;
  1306		unsigned int trb_size;
  1307		unsigned int max_pkt;
  1308		struct scatterlist *sg;
  1309		struct scatterlist *tail_sg;
  1310	
  1311		sg = urb->sg;
  1312		tail_sg = urb->sg;
  1313		buf_len = urb->transfer_buffer_length;
  1314		max_pkt = usb_endpoint_maxp(&urb->ep->desc);
  1315	
  1316		if (!urb->num_sgs)
  1317			return ret;
  1318	
  1319		if (urb->dev->speed >= USB_SPEED_SUPER)
  1320			trb_size = TRB_CACHE_SIZE_SS;
  1321		else
  1322			trb_size = TRB_CACHE_SIZE_HS;
  1323	
  1324		if (urb->transfer_buffer_length != 0 &&
  1325		    !(urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP)) {
  1326			for_each_sg(urb->sg, sg, urb->num_sgs, i) {
  1327				len = len + sg->length;
  1328				if (i > trb_size - 2) {
  1329					len = len - tail_sg->length;
  1330					if (len < max_pkt) {
  1331						ret = true;
  1332						break;
  1333					}
  1334	
  1335					tail_sg = sg_next(tail_sg);
  1336				}
  1337			}
  1338		}
  1339		return ret;
  1340	}
  1341	
  1342	static void xhci_unmap_temp_buf(struct usb_hcd *hcd, struct urb *urb)
  1343	{
> 1344		struct scatterlist *sg;
  1345		unsigned int len;
  1346		unsigned int buf_len;
  1347		enum dma_data_direction dir;
  1348	
  1349		dir = usb_urb_dir_in(urb) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
  1350	
  1351		sg = urb->sg;
  1352		buf_len = urb->transfer_buffer_length;
  1353	
  1354		if (IS_ENABLED(CONFIG_HAS_DMA) &&
  1355		    (urb->transfer_flags & URB_DMA_MAP_SINGLE))
  1356			dma_unmap_single(hcd->self.sysdev,
  1357					 urb->transfer_dma,
  1358					 urb->transfer_buffer_length,
  1359					 dir);
  1360	
  1361		if (usb_urb_dir_in(urb))
  1362			len = sg_pcopy_from_buffer(urb->sg, urb->num_sgs,
  1363						   urb->transfer_buffer,
  1364						   buf_len,
  1365						   0);
  1366	
  1367		urb->transfer_flags &= ~URB_DMA_MAP_SINGLE;
  1368		kfree(urb->transfer_buffer);
  1369		urb->transfer_buffer = NULL;
  1370	}
  1371	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
diff mbox series

Patch

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 167dae117f73..6d4dae5e5f21 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -3325,7 +3325,7 @@  int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
 
 	full_len = urb->transfer_buffer_length;
 	/* If we have scatter/gather list, we use it. */
-	if (urb->num_sgs) {
+	if (urb->num_sgs && !(urb->transfer_flags & URB_DMA_MAP_SINGLE)) {
 		num_sgs = urb->num_mapped_sgs;
 		sg = urb->sg;
 		addr = (u64) sg_dma_address(sg);
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 482fe8c5e3b4..8c6089a4a6a4 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -1259,6 +1259,116 @@  EXPORT_SYMBOL_GPL(xhci_resume);
 
 /*-------------------------------------------------------------------------*/
 
+static int xhci_map_temp_buffer(struct usb_hcd *hcd, struct urb *urb)
+{
+	void *temp;
+	int ret = 0;
+	unsigned int len;
+	unsigned int buf_len;
+	enum dma_data_direction dir;
+	struct xhci_hcd *xhci;
+
+	xhci = hcd_to_xhci(hcd);
+	dir = usb_urb_dir_in(urb) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
+	buf_len = urb->transfer_buffer_length;
+
+	temp = kzalloc_node(buf_len, GFP_ATOMIC,
+			    dev_to_node(hcd->self.sysdev));
+
+	if (usb_urb_dir_out(urb))
+		len = sg_pcopy_to_buffer(urb->sg, urb->num_sgs,
+					 temp, buf_len, 0);
+
+	urb->transfer_buffer = temp;
+	urb->transfer_dma = dma_map_single(hcd->self.sysdev,
+					   urb->transfer_buffer,
+					   urb->transfer_buffer_length,
+					   dir);
+
+	if (dma_mapping_error(hcd->self.sysdev,
+			      urb->transfer_dma)) {
+		ret = -EAGAIN;
+		kfree(temp);
+	} else {
+		urb->transfer_flags |= URB_DMA_MAP_SINGLE;
+	}
+
+	return ret;
+}
+
+static bool xhci_urb_temp_buffer_required(struct usb_hcd *hcd,
+					  struct urb *urb)
+{
+	bool ret = false;
+	unsigned int i;
+	unsigned int len = 0;
+	unsigned int buf_len;
+	unsigned int trb_size;
+	unsigned int max_pkt;
+	struct scatterlist *sg;
+	struct scatterlist *tail_sg;
+
+	sg = urb->sg;
+	tail_sg = urb->sg;
+	buf_len = urb->transfer_buffer_length;
+	max_pkt = usb_endpoint_maxp(&urb->ep->desc);
+
+	if (!urb->num_sgs)
+		return ret;
+
+	if (urb->dev->speed >= USB_SPEED_SUPER)
+		trb_size = TRB_CACHE_SIZE_SS;
+	else
+		trb_size = TRB_CACHE_SIZE_HS;
+
+	if (urb->transfer_buffer_length != 0 &&
+	    !(urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP)) {
+		for_each_sg(urb->sg, sg, urb->num_sgs, i) {
+			len = len + sg->length;
+			if (i > trb_size - 2) {
+				len = len - tail_sg->length;
+				if (len < max_pkt) {
+					ret = true;
+					break;
+				}
+
+				tail_sg = sg_next(tail_sg);
+			}
+		}
+	}
+	return ret;
+}
+
+static void xhci_unmap_temp_buf(struct usb_hcd *hcd, struct urb *urb)
+{
+	struct scatterlist *sg;
+	unsigned int len;
+	unsigned int buf_len;
+	enum dma_data_direction dir;
+
+	dir = usb_urb_dir_in(urb) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
+
+	sg = urb->sg;
+	buf_len = urb->transfer_buffer_length;
+
+	if (IS_ENABLED(CONFIG_HAS_DMA) &&
+	    (urb->transfer_flags & URB_DMA_MAP_SINGLE))
+		dma_unmap_single(hcd->self.sysdev,
+				 urb->transfer_dma,
+				 urb->transfer_buffer_length,
+				 dir);
+
+	if (usb_urb_dir_in(urb))
+		len = sg_pcopy_from_buffer(urb->sg, urb->num_sgs,
+					   urb->transfer_buffer,
+					   buf_len,
+					   0);
+
+	urb->transfer_flags &= ~URB_DMA_MAP_SINGLE;
+	kfree(urb->transfer_buffer);
+	urb->transfer_buffer = NULL;
+}
+
 /*
  * Bypass the DMA mapping if URB is suitable for Immediate Transfer (IDT),
  * we'll copy the actual data into the TRB address register. This is limited to
@@ -1268,13 +1378,37 @@  EXPORT_SYMBOL_GPL(xhci_resume);
 static int xhci_map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb,
 				gfp_t mem_flags)
 {
+	struct xhci_hcd *xhci;
+
+	xhci = hcd_to_xhci(hcd);
+
 	if (xhci_urb_suitable_for_idt(urb))
 		return 0;
 
+	if (xhci->quirks & XHCI_SG_TRB_CACHE_SIZE_QUIRK) {
+		if (xhci_urb_temp_buffer_required(hcd, urb))
+			return xhci_map_temp_buffer(hcd, urb);
+	}
 	return usb_hcd_map_urb_for_dma(hcd, urb, mem_flags);
 }
 
-/*
+static void xhci_unmap_urb_for_dma(struct usb_hcd *hcd, struct urb *urb)
+{
+	struct xhci_hcd *xhci;
+	bool unmap_temp_buf = false;
+
+	xhci = hcd_to_xhci(hcd);
+
+	if (urb->num_sgs && (urb->transfer_flags & URB_DMA_MAP_SINGLE))
+		unmap_temp_buf = true;
+
+	if ((xhci->quirks & XHCI_SG_TRB_CACHE_SIZE_QUIRK) && unmap_temp_buf)
+		xhci_unmap_temp_buf(hcd, urb);
+	else
+		usb_hcd_unmap_urb_for_dma(hcd, urb);
+}
+
+/**
  * xhci_get_endpoint_index - Used for passing endpoint bitmasks between the core and
  * HCDs.  Find the index for an endpoint given its descriptor.  Use the return
  * value to right shift 1 for the bitmask.
@@ -5326,6 +5460,7 @@  static const struct hc_driver xhci_hc_driver = {
 	 * managing i/o requests and associated device resources
 	 */
 	.map_urb_for_dma =      xhci_map_urb_for_dma,
+	.unmap_urb_for_dma =    xhci_unmap_urb_for_dma,
 	.urb_enqueue =		xhci_urb_enqueue,
 	.urb_dequeue =		xhci_urb_dequeue,
 	.alloc_dev =		xhci_alloc_dev,
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index f8e453a2674d..f69504f91b5e 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1330,6 +1330,10 @@  enum xhci_setup_dev {
 #define TRB_SIA			(1<<31)
 #define TRB_FRAME_ID(p)		(((p) & 0x7ff) << 20)
 
+/* TRB cache size for xHC with TRB cache */
+#define TRB_CACHE_SIZE_HS	8
+#define TRB_CACHE_SIZE_SS	16
+
 struct xhci_generic_trb {
 	__le32 field[4];
 };