diff mbox series

[1/1] scsi: storvsc: Enable scatterlist entry lengths > 4Kbytes

Message ID 1613682087-102535-1-git-send-email-mikelley@microsoft.com
State New
Headers show
Series [1/1] scsi: storvsc: Enable scatterlist entry lengths > 4Kbytes | expand

Commit Message

Michael Kelley Feb. 18, 2021, 9:01 p.m. UTC
storvsc currently sets .dma_boundary to limit scatterlist entries
to 4 Kbytes, which is less efficient with huge pages that offer
large chunks of contiguous physical memory. Improve the algorithm
for creating the Hyper-V guest physical address PFN array so
that scatterlist entries with lengths > 4Kbytes are handled.
As a result, remove the .dma_boundary setting.

The improved algorithm also adds support for scatterlist
entries with offsets >= 4Kbytes, which is supported by many
other SCSI low-level drivers.  And it retains support for
architectures where possibly PAGE_SIZE != HV_HYP_PAGE_SIZE
(such as ARM64).

Signed-off-by: Michael Kelley <mikelley@microsoft.com>
---
 drivers/scsi/storvsc_drv.c | 63 ++++++++++++++++------------------------------
 1 file changed, 22 insertions(+), 41 deletions(-)

Comments

Vitaly Kuznetsov Feb. 23, 2021, 2:30 p.m. UTC | #1
Michael Kelley <mikelley@microsoft.com> writes:

> storvsc currently sets .dma_boundary to limit scatterlist entries

> to 4 Kbytes, which is less efficient with huge pages that offer

> large chunks of contiguous physical memory. Improve the algorithm

> for creating the Hyper-V guest physical address PFN array so

> that scatterlist entries with lengths > 4Kbytes are handled.

> As a result, remove the .dma_boundary setting.

>

> The improved algorithm also adds support for scatterlist

> entries with offsets >= 4Kbytes, which is supported by many

> other SCSI low-level drivers.  And it retains support for

> architectures where possibly PAGE_SIZE != HV_HYP_PAGE_SIZE

> (such as ARM64).

>

> Signed-off-by: Michael Kelley <mikelley@microsoft.com>

> ---

>  drivers/scsi/storvsc_drv.c | 63 ++++++++++++++++------------------------------

>  1 file changed, 22 insertions(+), 41 deletions(-)

>

> diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c

> index 2e4fa77..5d06061 100644

> --- a/drivers/scsi/storvsc_drv.c

> +++ b/drivers/scsi/storvsc_drv.c

> @@ -1678,9 +1678,8 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd)

>  	struct storvsc_cmd_request *cmd_request = scsi_cmd_priv(scmnd);

>  	int i;

>  	struct scatterlist *sgl;

> -	unsigned int sg_count = 0;

> +	unsigned int sg_count;

>  	struct vmscsi_request *vm_srb;

> -	struct scatterlist *cur_sgl;

>  	struct vmbus_packet_mpb_array  *payload;

>  	u32 payload_sz;

>  	u32 length;

> @@ -1759,7 +1758,7 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd)

>  	payload_sz = sizeof(cmd_request->mpb);

>  

>  	if (sg_count) {

> -		unsigned int hvpgoff = 0;

> +		unsigned int hvpgoff, sgl_size;

>  		unsigned long offset_in_hvpg = sgl->offset & ~HV_HYP_PAGE_MASK;

>  		unsigned int hvpg_count = HVPFN_UP(offset_in_hvpg + length);

>  		u64 hvpfn;

> @@ -1773,51 +1772,35 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd)

>  				return SCSI_MLQUEUE_DEVICE_BUSY;

>  		}

>  

> -		/*

> -		 * sgl is a list of PAGEs, and payload->range.pfn_array

> -		 * expects the page number in the unit of HV_HYP_PAGE_SIZE (the

> -		 * page size that Hyper-V uses, so here we need to divide PAGEs

> -		 * into HV_HYP_PAGE in case that PAGE_SIZE > HV_HYP_PAGE_SIZE.

> -		 * Besides, payload->range.offset should be the offset in one

> -		 * HV_HYP_PAGE.

> -		 */

>  		payload->range.len = length;

>  		payload->range.offset = offset_in_hvpg;

> -		hvpgoff = sgl->offset >> HV_HYP_PAGE_SHIFT;

>  

> -		cur_sgl = sgl;

> -		for (i = 0; i < hvpg_count; i++) {

> +

> +		for (i = 0; sgl != NULL; sgl = sg_next(sgl)) {

>  			/*

> -			 * 'i' is the index of hv pages in the payload and

> -			 * 'hvpgoff' is the offset (in hv pages) of the first

> -			 * hv page in the the first page. The relationship

> -			 * between the sum of 'i' and 'hvpgoff' and the offset

> -			 * (in hv pages) in a payload page ('hvpgoff_in_page')

> -			 * is as follow:

> -			 *

> -			 * |------------------ PAGE -------------------|

> -			 * |   NR_HV_HYP_PAGES_IN_PAGE hvpgs in total  |

> -			 * |hvpg|hvpg| ...              |hvpg|... |hvpg|

> -			 * ^         ^                                 ^                 ^

> -			 * +-hvpgoff-+                                 +-hvpgoff_in_page-+

> -			 *           ^                                                   |

> -			 *           +--------------------- i ---------------------------+

> +			 * Init values for the current sgl entry. sgl_size

> +			 * and hvpgoff are in units of Hyper-V size pages.

> +			 * Handling the PAGE_SIZE != HV_HYP_PAGE_SIZE case

> +			 * also handles values of sgl->offset that are

> +			 * larger than PAGE_SIZE. Such offsets are handled

> +			 * even on other than the first sgl entry, provided

> +			 * they are a multiple of PAGE_SIZE.

>  			 */

> -			unsigned int hvpgoff_in_page =

> -				(i + hvpgoff) % NR_HV_HYP_PAGES_IN_PAGE;

> +			sgl_size = HVPFN_UP(sgl->offset + sgl->length);

> +			hvpgoff = sgl->offset >> HV_HYP_PAGE_SHIFT;

> +			hvpfn = page_to_hvpfn(sg_page(sgl));

>  

>  			/*

> -			 * Two cases that we need to fetch a page:

> -			 * 1) i == 0, the first step or

> -			 * 2) hvpgoff_in_page == 0, when we reach the boundary

> -			 *    of a page.

> +			 * Fill the next portion of the PFN array with

> +			 * sequential Hyper-V PFNs for the continguous physical

> +			 * memory described by the sgl entry. The end of the

> +			 * last sgl should be reached at the same time that

> +			 * the PFN array is filled.

>  			 */

> -			if (hvpgoff_in_page == 0 || i == 0) {

> -				hvpfn = page_to_hvpfn(sg_page(cur_sgl));

> -				cur_sgl = sg_next(cur_sgl);

> +			while (hvpgoff != sgl_size) {

> +				payload->range.pfn_array[i++] =

> +							hvpfn + hvpgoff++;

>  			}


Minor nitpicking: while this seems to be correct I, personally, find it
a bit hard to read: 'hvpgoff' stands for "'sgl->offset' measured in
Hyper-V pages' but we immediately re-use it as a cycle counter.

If I'm not mistaken, we can count right away how many entries we're
going to add. Also, we could've introduced HVPFN_DOWN() to complement
HVPFN_UP():
...
#define HVPFN_DOWN(x)	((x) >> HV_HYP_PAGE_SHIFT)
...

hvpgoff = HVPFN_DOWN(sgl->offset);
hvpfn = page_to_hvpfn(sg_page(sgl)) + hvpgoff;
hvpfns_to_add = HVPFN_UP(sgl->offset + sgl->length) - hvpgoff;

and the cycle can look like:

while (hvpfns_to_add) {
	payload->range.pfn_array[i++] = hvpfn++;
	hvpfns_to_add--;
}

> -

> -			payload->range.pfn_array[i] = hvpfn + hvpgoff_in_page;

>  		}


and then we can also make an explicit 

BUG_ON(i != hvpg_count) after the cycle to prove our math is correct :-)

>  	}

>  

> @@ -1851,8 +1834,6 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd)

>  	.slave_configure =	storvsc_device_configure,

>  	.cmd_per_lun =		2048,

>  	.this_id =		-1,

> -	/* Make sure we dont get a sg segment crosses a page boundary */

> -	.dma_boundary =		PAGE_SIZE-1,

>  	/* Ensure there are no gaps in presented sgls */

>  	.virt_boundary_mask =	PAGE_SIZE-1,

>  	.no_write_same =	1,


-- 
Vitaly
Michael Kelley Feb. 23, 2021, 10:01 p.m. UTC | #2
From: Vitaly Kuznetsov <vkuznets@redhat.com> Sent: Tuesday, February 23, 2021 6:30 AM

> 

> Michael Kelley <mikelley@microsoft.com> writes:

> 

> > storvsc currently sets .dma_boundary to limit scatterlist entries

> > to 4 Kbytes, which is less efficient with huge pages that offer

> > large chunks of contiguous physical memory. Improve the algorithm

> > for creating the Hyper-V guest physical address PFN array so

> > that scatterlist entries with lengths > 4Kbytes are handled.

> > As a result, remove the .dma_boundary setting.

> >

> > The improved algorithm also adds support for scatterlist

> > entries with offsets >= 4Kbytes, which is supported by many

> > other SCSI low-level drivers.  And it retains support for

> > architectures where possibly PAGE_SIZE != HV_HYP_PAGE_SIZE

> > (such as ARM64).

> >

> > Signed-off-by: Michael Kelley <mikelley@microsoft.com>

> > ---

> >  drivers/scsi/storvsc_drv.c | 63 ++++++++++++++++------------------------------

> >  1 file changed, 22 insertions(+), 41 deletions(-)

> >

> > diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c

> > index 2e4fa77..5d06061 100644

> > --- a/drivers/scsi/storvsc_drv.c

> > +++ b/drivers/scsi/storvsc_drv.c

> > @@ -1678,9 +1678,8 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct

> scsi_cmnd *scmnd)

> >  	struct storvsc_cmd_request *cmd_request = scsi_cmd_priv(scmnd);

> >  	int i;

> >  	struct scatterlist *sgl;

> > -	unsigned int sg_count = 0;

> > +	unsigned int sg_count;

> >  	struct vmscsi_request *vm_srb;

> > -	struct scatterlist *cur_sgl;

> >  	struct vmbus_packet_mpb_array  *payload;

> >  	u32 payload_sz;

> >  	u32 length;

> > @@ -1759,7 +1758,7 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct

> scsi_cmnd *scmnd)

> >  	payload_sz = sizeof(cmd_request->mpb);

> >

> >  	if (sg_count) {

> > -		unsigned int hvpgoff = 0;

> > +		unsigned int hvpgoff, sgl_size;

> >  		unsigned long offset_in_hvpg = sgl->offset & ~HV_HYP_PAGE_MASK;

> >  		unsigned int hvpg_count = HVPFN_UP(offset_in_hvpg + length);

> >  		u64 hvpfn;

> > @@ -1773,51 +1772,35 @@ static int storvsc_queuecommand(struct Scsi_Host *host,

> struct scsi_cmnd *scmnd)

> >  				return SCSI_MLQUEUE_DEVICE_BUSY;

> >  		}

> >

> > -		/*

> > -		 * sgl is a list of PAGEs, and payload->range.pfn_array

> > -		 * expects the page number in the unit of HV_HYP_PAGE_SIZE (the

> > -		 * page size that Hyper-V uses, so here we need to divide PAGEs

> > -		 * into HV_HYP_PAGE in case that PAGE_SIZE > HV_HYP_PAGE_SIZE.

> > -		 * Besides, payload->range.offset should be the offset in one

> > -		 * HV_HYP_PAGE.

> > -		 */

> >  		payload->range.len = length;

> >  		payload->range.offset = offset_in_hvpg;

> > -		hvpgoff = sgl->offset >> HV_HYP_PAGE_SHIFT;

> >

> > -		cur_sgl = sgl;

> > -		for (i = 0; i < hvpg_count; i++) {

> > +

> > +		for (i = 0; sgl != NULL; sgl = sg_next(sgl)) {

> >  			/*

> > -			 * 'i' is the index of hv pages in the payload and

> > -			 * 'hvpgoff' is the offset (in hv pages) of the first

> > -			 * hv page in the the first page. The relationship

> > -			 * between the sum of 'i' and 'hvpgoff' and the offset

> > -			 * (in hv pages) in a payload page ('hvpgoff_in_page')

> > -			 * is as follow:

> > -			 *

> > -			 * |------------------ PAGE -------------------|

> > -			 * |   NR_HV_HYP_PAGES_IN_PAGE hvpgs in total  |

> > -			 * |hvpg|hvpg| ...              |hvpg|... |hvpg|

> > -			 * ^         ^                                 ^                 ^

> > -			 * +-hvpgoff-+                                 +-hvpgoff_in_page-+

> > -			 *           ^                                                   |

> > -			 *           +--------------------- i ---------------------------+

> > +			 * Init values for the current sgl entry. sgl_size

> > +			 * and hvpgoff are in units of Hyper-V size pages.

> > +			 * Handling the PAGE_SIZE != HV_HYP_PAGE_SIZE case

> > +			 * also handles values of sgl->offset that are

> > +			 * larger than PAGE_SIZE. Such offsets are handled

> > +			 * even on other than the first sgl entry, provided

> > +			 * they are a multiple of PAGE_SIZE.

> >  			 */

> > -			unsigned int hvpgoff_in_page =

> > -				(i + hvpgoff) % NR_HV_HYP_PAGES_IN_PAGE;

> > +			sgl_size = HVPFN_UP(sgl->offset + sgl->length);

> > +			hvpgoff = sgl->offset >> HV_HYP_PAGE_SHIFT;

> > +			hvpfn = page_to_hvpfn(sg_page(sgl));

> >

> >  			/*

> > -			 * Two cases that we need to fetch a page:

> > -			 * 1) i == 0, the first step or

> > -			 * 2) hvpgoff_in_page == 0, when we reach the boundary

> > -			 *    of a page.

> > +			 * Fill the next portion of the PFN array with

> > +			 * sequential Hyper-V PFNs for the continguous physical

> > +			 * memory described by the sgl entry. The end of the

> > +			 * last sgl should be reached at the same time that

> > +			 * the PFN array is filled.

> >  			 */

> > -			if (hvpgoff_in_page == 0 || i == 0) {

> > -				hvpfn = page_to_hvpfn(sg_page(cur_sgl));

> > -				cur_sgl = sg_next(cur_sgl);

> > +			while (hvpgoff != sgl_size) {

> > +				payload->range.pfn_array[i++] =

> > +							hvpfn + hvpgoff++;

> >  			}

> 

> Minor nitpicking: while this seems to be correct I, personally, find it

> a bit hard to read: 'hvpgoff' stands for "'sgl->offset' measured in

> Hyper-V pages' but we immediately re-use it as a cycle counter.

> 

> If I'm not mistaken, we can count right away how many entries we're

> going to add. Also, we could've introduced HVPFN_DOWN() to complement

> HVPFN_UP():

> ...

> #define HVPFN_DOWN(x)	((x) >> HV_HYP_PAGE_SHIFT)

> ...

> 

> hvpgoff = HVPFN_DOWN(sgl->offset);

> hvpfn = page_to_hvpfn(sg_page(sgl)) + hvpgoff;

> hvpfns_to_add = HVPFN_UP(sgl->offset + sgl->length) - hvpgoff;

> 

> and the cycle can look like:

> 

> while (hvpfns_to_add) {

> 	payload->range.pfn_array[i++] = hvpfn++;

> 	hvpfns_to_add--;

> }

> 

> > -

> > -			payload->range.pfn_array[i] = hvpfn + hvpgoff_in_page;

> >  		}

> 

> and then we can also make an explicit

> 

> BUG_ON(i != hvpg_count) after the cycle to prove our math is correct :-)

> 


Your proposal works for me, and it doesn't actually change the number of
arithmetic operations that need to be done.

But I don't think I'll actually add the BUG_ON().  The math is either right
or it isn't, and we have it right.

I'll spin a v2.

Michael

> >  	}

> >

> > @@ -1851,8 +1834,6 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct

> scsi_cmnd *scmnd)

> >  	.slave_configure =	storvsc_device_configure,

> >  	.cmd_per_lun =		2048,

> >  	.this_id =		-1,

> > -	/* Make sure we dont get a sg segment crosses a page boundary */

> > -	.dma_boundary =		PAGE_SIZE-1,

> >  	/* Ensure there are no gaps in presented sgls */

> >  	.virt_boundary_mask =	PAGE_SIZE-1,

> >  	.no_write_same =	1,

> 

> --

> Vitaly
Christoph Hellwig Feb. 24, 2021, 3:52 p.m. UTC | #3
Shouldn't storvsc just use blk_queue_virt_boundary instead of all this
mess?
Michael Kelley Feb. 24, 2021, 5:03 p.m. UTC | #4
From: Christoph Hellwig <hch@infradead.org>  Sent: Wednesday, February 24, 2021 7:53 AM

> 

> Shouldn't storvsc just use blk_queue_virt_boundary instead of all this

> mess?


The storvsc driver does set the virt boundary to PAGE_SIZE - 1.  But
the driver still has to translate the scatterlist into a list of guest
physical frame numbers (each representing 4K bytes) that the
Hyper-V host understands so it can do the I/O.

This patch improves that translation so it can handle a single
scatterlist entry that represents more than PAGE_SIZE bytes of
data.  Then the SCSI dma_boundary (which turns into the blk level
segment_boundary) no longer needs to be set to restrict scatterlist
entries to just PAGE_SIZE bytes.

We also have to preserve the ability to run guests on ARM64 with
PAGE_SIZE of 16K or 64K, while Hyper-V still expects each PFN to
represent only 4K bytes.

Michael
diff mbox series

Patch

diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 2e4fa77..5d06061 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -1678,9 +1678,8 @@  static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd)
 	struct storvsc_cmd_request *cmd_request = scsi_cmd_priv(scmnd);
 	int i;
 	struct scatterlist *sgl;
-	unsigned int sg_count = 0;
+	unsigned int sg_count;
 	struct vmscsi_request *vm_srb;
-	struct scatterlist *cur_sgl;
 	struct vmbus_packet_mpb_array  *payload;
 	u32 payload_sz;
 	u32 length;
@@ -1759,7 +1758,7 @@  static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd)
 	payload_sz = sizeof(cmd_request->mpb);
 
 	if (sg_count) {
-		unsigned int hvpgoff = 0;
+		unsigned int hvpgoff, sgl_size;
 		unsigned long offset_in_hvpg = sgl->offset & ~HV_HYP_PAGE_MASK;
 		unsigned int hvpg_count = HVPFN_UP(offset_in_hvpg + length);
 		u64 hvpfn;
@@ -1773,51 +1772,35 @@  static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd)
 				return SCSI_MLQUEUE_DEVICE_BUSY;
 		}
 
-		/*
-		 * sgl is a list of PAGEs, and payload->range.pfn_array
-		 * expects the page number in the unit of HV_HYP_PAGE_SIZE (the
-		 * page size that Hyper-V uses, so here we need to divide PAGEs
-		 * into HV_HYP_PAGE in case that PAGE_SIZE > HV_HYP_PAGE_SIZE.
-		 * Besides, payload->range.offset should be the offset in one
-		 * HV_HYP_PAGE.
-		 */
 		payload->range.len = length;
 		payload->range.offset = offset_in_hvpg;
-		hvpgoff = sgl->offset >> HV_HYP_PAGE_SHIFT;
 
-		cur_sgl = sgl;
-		for (i = 0; i < hvpg_count; i++) {
+
+		for (i = 0; sgl != NULL; sgl = sg_next(sgl)) {
 			/*
-			 * 'i' is the index of hv pages in the payload and
-			 * 'hvpgoff' is the offset (in hv pages) of the first
-			 * hv page in the the first page. The relationship
-			 * between the sum of 'i' and 'hvpgoff' and the offset
-			 * (in hv pages) in a payload page ('hvpgoff_in_page')
-			 * is as follow:
-			 *
-			 * |------------------ PAGE -------------------|
-			 * |   NR_HV_HYP_PAGES_IN_PAGE hvpgs in total  |
-			 * |hvpg|hvpg| ...              |hvpg|... |hvpg|
-			 * ^         ^                                 ^                 ^
-			 * +-hvpgoff-+                                 +-hvpgoff_in_page-+
-			 *           ^                                                   |
-			 *           +--------------------- i ---------------------------+
+			 * Init values for the current sgl entry. sgl_size
+			 * and hvpgoff are in units of Hyper-V size pages.
+			 * Handling the PAGE_SIZE != HV_HYP_PAGE_SIZE case
+			 * also handles values of sgl->offset that are
+			 * larger than PAGE_SIZE. Such offsets are handled
+			 * even on other than the first sgl entry, provided
+			 * they are a multiple of PAGE_SIZE.
 			 */
-			unsigned int hvpgoff_in_page =
-				(i + hvpgoff) % NR_HV_HYP_PAGES_IN_PAGE;
+			sgl_size = HVPFN_UP(sgl->offset + sgl->length);
+			hvpgoff = sgl->offset >> HV_HYP_PAGE_SHIFT;
+			hvpfn = page_to_hvpfn(sg_page(sgl));
 
 			/*
-			 * Two cases that we need to fetch a page:
-			 * 1) i == 0, the first step or
-			 * 2) hvpgoff_in_page == 0, when we reach the boundary
-			 *    of a page.
+			 * Fill the next portion of the PFN array with
+			 * sequential Hyper-V PFNs for the continguous physical
+			 * memory described by the sgl entry. The end of the
+			 * last sgl should be reached at the same time that
+			 * the PFN array is filled.
 			 */
-			if (hvpgoff_in_page == 0 || i == 0) {
-				hvpfn = page_to_hvpfn(sg_page(cur_sgl));
-				cur_sgl = sg_next(cur_sgl);
+			while (hvpgoff != sgl_size) {
+				payload->range.pfn_array[i++] =
+							hvpfn + hvpgoff++;
 			}
-
-			payload->range.pfn_array[i] = hvpfn + hvpgoff_in_page;
 		}
 	}
 
@@ -1851,8 +1834,6 @@  static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd)
 	.slave_configure =	storvsc_device_configure,
 	.cmd_per_lun =		2048,
 	.this_id =		-1,
-	/* Make sure we dont get a sg segment crosses a page boundary */
-	.dma_boundary =		PAGE_SIZE-1,
 	/* Ensure there are no gaps in presented sgls */
 	.virt_boundary_mask =	PAGE_SIZE-1,
 	.no_write_same =	1,