diff mbox series

[v13,43/45] sg: no_dxfer: move to/from kernel buffers

Message ID 20210113224526.861000-44-dgilbert@interlog.com
State Superseded
Headers show
Series None | expand

Commit Message

Douglas Gilbert Jan. 13, 2021, 10:45 p.m. UTC
When the NO_DXFER flag is use on a command/request, the data-in
and data-out buffers (if present) should not be ignored. Add
sg_rq_map_kern() function to handle this. Uses a single bio with
multiple bvec_s usually each holding multiple pages, if necessary.
The driver default element size is 32 KiB so if PAGE_SIZE is 4096
then get_order()==3 .

Signed-off-by: Douglas Gilbert <dgilbert@interlog.com>
---
 drivers/scsi/sg.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

Comments

Hannes Reinecke Jan. 14, 2021, 7:30 a.m. UTC | #1
On 1/13/21 11:45 PM, Douglas Gilbert wrote:
> When the NO_DXFER flag is use on a command/request, the data-in
> and data-out buffers (if present) should not be ignored. Add
> sg_rq_map_kern() function to handle this. Uses a single bio with
> multiple bvec_s usually each holding multiple pages, if necessary.
> The driver default element size is 32 KiB so if PAGE_SIZE is 4096
> then get_order()==3 .
> 
> Signed-off-by: Douglas Gilbert <dgilbert@interlog.com>
> ---
>   drivers/scsi/sg.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 59 insertions(+)
> 
> diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
> index a00f488ee5e2..ad97f0756a9c 100644
> --- a/drivers/scsi/sg.c
> +++ b/drivers/scsi/sg.c
> @@ -2865,6 +2865,63 @@ exit_sg(void)
>   	idr_destroy(&sg_index_idr);
>   }
>   
> +static struct bio *
> +sg_mk_kern_bio(int bvec_cnt)
> +{
> +	struct bio *biop;
> +
> +	if (bvec_cnt > BIO_MAX_PAGES)
> +		return NULL;
> +	biop = bio_alloc(GFP_ATOMIC, bvec_cnt);
> +	if (!biop)
> +		return NULL;
> +	biop->bi_end_io = bio_put;

Huh? That is the default action, is it not?
So why specify it separately?

> +	return biop;
> +}
> +
> +/*
> + * Setup to move data between kernel buffers managed by this driver and a SCSI device. Note that
> + * there is no corresponding 'unmap' call as is required by blk_rq_map_user() . Uses a single
> + * bio with an expanded appended bvec if necessary.
> + */
> +static int
> +sg_rq_map_kern(struct sg_request *srp, struct request_queue *q, struct request *rqq, int rw_ind)
> +{
> +	struct sg_scatter_hold *schp = &srp->sgat_h;
> +	struct bio *bio;
> +	int k, ln;
> +	int op_flags = 0;
> +	int num_sgat = schp->num_sgat;
> +	int dlen = schp->dlen;
> +	int pg_sz = 1 << (PAGE_SHIFT + schp->page_order);
> +	int num_segs = (1 << schp->page_order) * num_sgat;
> +	int res = 0;
> +
> +	SG_LOG(4, srp->parentfp, "%s: dlen=%d, pg_sz=%d\n", __func__, dlen, pg_sz);
> +	if (num_sgat <= 0)
> +		return 0;
> +	if (rw_ind == WRITE)
> +		op_flags = REQ_SYNC | REQ_IDLE;
> +	bio = sg_mk_kern_bio(num_sgat - k);
> +	if (!bio)
> +		return -ENOMEM;
> +	bio->bi_opf = req_op(rqq) | op_flags;
> +
> +	for (k = 0; k < num_sgat && dlen > 0; ++k, dlen -= ln) {
> +		ln = min_t(int, dlen, pg_sz);
> +		if (bio_add_pc_page(q, bio, schp->pages[k], ln, 0) < ln) {
> +			bio_put(bio);
> +			return -EINVAL;
> +		}
> +	}
> +	res = blk_rq_append_bio(rqq, &bio);
> +	if (unlikely(res))
> +		bio_put(bio);
> +	else
> +		rqq->nr_phys_segments = num_segs;
> +	return res;
> +}
> +
>   static inline void
>   sg_set_map_data(const struct sg_scatter_hold *schp, bool up_valid,
>   		struct rq_map_data *mdp)
> @@ -3028,6 +3085,8 @@ sg_start_req(struct sg_request *srp, struct sg_comm_wr_t *cwrp, int dxfer_dir)
>   		if (IS_ENABLED(CONFIG_SCSI_PROC_FS) && res)
>   			SG_LOG(1, sfp, "%s: blk_rq_map_user() res=%d\n",
>   			       __func__, res);
> +	} else {	/* transfer data to/from kernel buffers */
> +		res = sg_rq_map_kern(srp, q, rq, r0w);
>   	}
>   fini:
>   	if (unlikely(res)) {		/* failure, free up resources */
> 
Hmm. I must say I fail to see the rationale here.
Why do you need to do the additional mapping?
And doens't the 'NO_DXFER' flag indicate that _no_ data should be 
transferred?

Cheers,

Hannes
Hannes Reinecke Jan. 15, 2021, 4:17 p.m. UTC | #2
On 1/14/21 6:11 PM, Douglas Gilbert wrote:
> On 2021-01-14 2:30 a.m., Hannes Reinecke wrote:

>> On 1/13/21 11:45 PM, Douglas Gilbert wrote:

>>> When the NO_DXFER flag is use on a command/request, the data-in

>>> and data-out buffers (if present) should not be ignored. Add

>>> sg_rq_map_kern() function to handle this. Uses a single bio with

>>> multiple bvec_s usually each holding multiple pages, if necessary.

>>> The driver default element size is 32 KiB so if PAGE_SIZE is 4096

>>> then get_order()==3 .

>>>

>>> Signed-off-by: Douglas Gilbert <dgilbert@interlog.com>

>>> ---

>>>   drivers/scsi/sg.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++

>>>   1 file changed, 59 insertions(+)

>>>

>>> diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c

>>> index a00f488ee5e2..ad97f0756a9c 100644

>>> --- a/drivers/scsi/sg.c

>>> +++ b/drivers/scsi/sg.c

>>> @@ -2865,6 +2865,63 @@ exit_sg(void)

>>>       idr_destroy(&sg_index_idr);

>>>   }

>>> +static struct bio *

>>> +sg_mk_kern_bio(int bvec_cnt)

>>> +{

>>> +    struct bio *biop;

>>> +

>>> +    if (bvec_cnt > BIO_MAX_PAGES)

>>> +        return NULL;

>>> +    biop = bio_alloc(GFP_ATOMIC, bvec_cnt);

>>> +    if (!biop)

>>> +        return NULL;

>>> +    biop->bi_end_io = bio_put;

>>

>> Huh? That is the default action, is it not?

>> So why specify it separately?

> 

> I'll check. That code snippet is copied from NVMe which has equivalent

> code: moving storage data to/from _kernel_ buffers. Later in the driver

> rewrite, bios are re-used, so if any earlier path puts a different

> value in biop->bi_end_io, I'm screwed without that line. So I assumed

> the NVMe code did it for a good reason.

> 

Re-used? Uh-oh.
But okay, then it kinda makes sense.

[ .. ]
>> Why do you need to do the additional mapping?

> 

> I don't understand this question.

> 

>> And doens't the 'NO_DXFER' flag indicate that _no_ data should be 

>> transferred?

> 

> NO, it absolutely does not mean that! With indirect IO (i.e. the 

> default) there

> are two transfers, taking a READ operation is an example:

>     1) transfer user data from the device (a LU) to the internal buffer 

> allocated

>        by the sg driver. LLD arranges that transfer.

>     2) transfer that user data from the internal buffer to the user 

> space as

>        indicated by the call to ioctl(SG_IO) or its alternatives. This 

> transfer

>        is driven by the sg driver using copy_to_user().

> 

> The SG_FLAG_NO_DXFER flag skips step 2) _not_ 1) .

> 

> The SG_FLAG_NO_DXFER flag has been in the sg driver since 1998. Sometime 

> between

> 2010 and now that functionality was quietly dropped. Tony Battersby for one

> seemed quite peeved when I told him that functionality had been silently

> dropped.

> 

Ah. Now that makes sense. Data transfer with not data transfer.
You should've said :-)

I'll have another look with that in mind.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                Kernel Storage Architect
hare@suse.de                              +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer
diff mbox series

Patch

diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index a00f488ee5e2..ad97f0756a9c 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -2865,6 +2865,63 @@  exit_sg(void)
 	idr_destroy(&sg_index_idr);
 }
 
+static struct bio *
+sg_mk_kern_bio(int bvec_cnt)
+{
+	struct bio *biop;
+
+	if (bvec_cnt > BIO_MAX_PAGES)
+		return NULL;
+	biop = bio_alloc(GFP_ATOMIC, bvec_cnt);
+	if (!biop)
+		return NULL;
+	biop->bi_end_io = bio_put;
+	return biop;
+}
+
+/*
+ * Setup to move data between kernel buffers managed by this driver and a SCSI device. Note that
+ * there is no corresponding 'unmap' call as is required by blk_rq_map_user() . Uses a single
+ * bio with an expanded appended bvec if necessary.
+ */
+static int
+sg_rq_map_kern(struct sg_request *srp, struct request_queue *q, struct request *rqq, int rw_ind)
+{
+	struct sg_scatter_hold *schp = &srp->sgat_h;
+	struct bio *bio;
+	int k, ln;
+	int op_flags = 0;
+	int num_sgat = schp->num_sgat;
+	int dlen = schp->dlen;
+	int pg_sz = 1 << (PAGE_SHIFT + schp->page_order);
+	int num_segs = (1 << schp->page_order) * num_sgat;
+	int res = 0;
+
+	SG_LOG(4, srp->parentfp, "%s: dlen=%d, pg_sz=%d\n", __func__, dlen, pg_sz);
+	if (num_sgat <= 0)
+		return 0;
+	if (rw_ind == WRITE)
+		op_flags = REQ_SYNC | REQ_IDLE;
+	bio = sg_mk_kern_bio(num_sgat - k);
+	if (!bio)
+		return -ENOMEM;
+	bio->bi_opf = req_op(rqq) | op_flags;
+
+	for (k = 0; k < num_sgat && dlen > 0; ++k, dlen -= ln) {
+		ln = min_t(int, dlen, pg_sz);
+		if (bio_add_pc_page(q, bio, schp->pages[k], ln, 0) < ln) {
+			bio_put(bio);
+			return -EINVAL;
+		}
+	}
+	res = blk_rq_append_bio(rqq, &bio);
+	if (unlikely(res))
+		bio_put(bio);
+	else
+		rqq->nr_phys_segments = num_segs;
+	return res;
+}
+
 static inline void
 sg_set_map_data(const struct sg_scatter_hold *schp, bool up_valid,
 		struct rq_map_data *mdp)
@@ -3028,6 +3085,8 @@  sg_start_req(struct sg_request *srp, struct sg_comm_wr_t *cwrp, int dxfer_dir)
 		if (IS_ENABLED(CONFIG_SCSI_PROC_FS) && res)
 			SG_LOG(1, sfp, "%s: blk_rq_map_user() res=%d\n",
 			       __func__, res);
+	} else {	/* transfer data to/from kernel buffers */
+		res = sg_rq_map_kern(srp, q, rq, r0w);
 	}
 fini:
 	if (unlikely(res)) {		/* failure, free up resources */