[v18,74/83] sg: add ordered write flag

Message ID	20210427215733.417746-76-dgilbert@interlog.com
State	New
Headers	show Return-Path: <linux-scsi-owner@kernel.org> From: Douglas Gilbert <dgilbert@interlog.com> To: linux-scsi@vger.kernel.org Cc: martin.petersen@oracle.com, jejb@linux.vnet.ibm.com, hare@suse.de Subject: [PATCH v18 74/83] sg: add ordered write flag Date: Tue, 27 Apr 2021 17:57:24 -0400 Message-Id: <20210427215733.417746-76-dgilbert@interlog.com> In-Reply-To: <20210427215733.417746-1-dgilbert@interlog.com> References: <20210427215733.417746-1-dgilbert@interlog.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Precedence: bulk
Series	sg: add v4 interface, request sharing \| expand [v18,00/83] sg: add v4 interface, request sharing [v18,02/83] sg: remove typedefs, type+formatting cleanup [v18,03/83] sg: sg_log and is_enabled [v18,07/83] sg: move header to uapi section [v18,08/83] sg: speed sg_poll and sg_get_num_waiting [v18,09/83] sg: sg_allow_if_err_recovery and renames [v18,10/83] sg: improve naming [v18,14/83] sg: sg_common_write add structure for arguments [v18,16/83] sg: rework sg_mmap [v18,18/83] sg: rework scatter gather handling [v18,19/83] sg: introduce request state machine [v18,21/83] sg: sg_fill_request_element [v18,24/83] sg: xarray for reqs in fd [v18,28/83] sg: rework debug info [v18,29/83] sg: add 8 byte SCSI LUN to sg_scsi_id [v18,30/83] sg: expand sg_comm_wr_t [v18,31/83] sg: add sg_iosubmit_v3 and sg_ioreceive_v3 ioctls [v18,34/83] sg: protect multiple receivers [v18,36/83] sg: rework mmap support [v18,38/83] sg: warn v3 write system call users [v18,40/83] sg: remove rcv_done request state [v18,43/83] sg: no_dxfer: move to/from kernel buffers [v18,44/83] sg: add blk_poll support [v18,46/83] sg: add sg_ioabort ioctl [v18,47/83] sg: add sg_set_get_extended ioctl [v18,50/83] sg: add fd sharing , change, unshare [v18,51/83] sg: add shared requests [v18,52/83] sg: add multiple request support [v18,56/83] sg: reduce atomic operations [v18,58/83] sg: tweak sg_find_sfp_by_fd() [v18,60/83] sg: compress usercontext to uc [v18,62/83] sg: work on sg_mrq_sanity() [v18,63/83] sg: shared variable blocking [v18,65/83] sg: condition met is not an error [v18,66/83] sg: split sg_setup_req [v18,69/83] sg: add dlen to sg_comm_wr_t [v18,73/83] sg: table of error number explanations [v18,74/83] sg: add ordered write flag [v18,75/83] sg: expand source line length to 100 characters [v18,76/83] sg: add no_attach_msg parameter [v18,80/83] sg: expand bvec usage; re-use bio_s [v18,83/83] sg: bump version to 4.0.47

diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index c4421a426045..d6e18cb4df11 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -113,20 +113,21 @@ enum sg_shr_var { /* If sum_of(dlen) of a fd exceeds this, write() will yield E2BIG */ #define SG_TOT_FD_THRESHOLD (32 * 1024 * 1024) -#define SG_TIME_UNIT_MS 0 /* milliseconds */ -/* #define SG_TIME_UNIT_NS 1 nanoseconds */ +#define SG_TIME_UNIT_MS 0 /* command duration unit: a millisecond */ +/* #define SG_TIME_UNIT_NS 1 in nanoseconds, using high resolution timer (hrt) */ #define SG_DEF_TIME_UNIT SG_TIME_UNIT_MS #define SG_DEFAULT_TIMEOUT mult_frac(SG_DEFAULT_TIMEOUT_USER, HZ, USER_HZ) #define SG_FD_Q_AT_HEAD 0 #define SG_DEFAULT_Q_AT SG_FD_Q_AT_HEAD /* for backward compatibility */ #define SG_FL_MMAP_DIRECT (SG_FLAG_MMAP_IO | SG_FLAG_DIRECT_IO) -#define SG_MAX_RSV_REQS 8 +#define SG_MAX_RSV_REQS 8 /* number of svb requests done asynchronously; assume small-ish */ #define SG_PACK_ID_WILDCARD (-1) #define SG_TAG_WILDCARD (-1) #define SG_ADD_RQ_MAX_RETRIES 40 /* to stop infinite _trylock(s) */ +#define SG_DEF_BLK_POLL_LOOP_COUNT 1000 /* may allow user to tweak this */ /* Bit positions (flags) for sg_request::frq_bm bitmask follow */ #define SG_FRQ_IS_V4I 0 /* true (set) when is v4 interface */ @@ -333,16 +334,19 @@ struct sg_comm_wr_t { /* arguments to sg_common_write() */ struct sg_fd *sfp; const u8 __user *u_cmdp; const u8 *cmdp; + struct sg_request *possible_srp; /* possible candidate for this request */ }; -struct sg_mrq_hold { /* for passing context between mrq functions */ - bool blocking; - bool chk_abort; - bool immed; - bool stop_if; - bool co_mmap; +struct sg_mrq_hold { /* for passing context between multiple requests (mrq) functions */ + unsigned from_sg_io:1; + unsigned chk_abort:1; + unsigned immed:1; + unsigned hipri:1; + unsigned stop_if:1; + unsigned co_mmap:1; + unsigned ordered_wr:1; int id_of_mrq; - int s_res; /* secondary error: some-good-then-error */ + int s_res; /* secondary error: some-good-then-error; in co.spare_out */ u32 cdb_mxlen; /* cdb length in cdb_ap, actual be may less */ u32 tot_reqs; /* total number of requests and cdb_s */ struct sg_comm_wr_t *cwrp; /* cwrp->h4p is mrq control object */ @@ -351,6 +355,12 @@ struct sg_mrq_hold { /* for passing context between mrq functions */ struct sg_scatter_hold *co_mmap_sgatp; }; +struct sg_svb_elem { /* context of shared variable blocking (svb) per SG_MAX_RSV_REQS */ + int ws_pos; /* write-side position in user supplied sg_io_v4 array */ + struct sg_request *rs_srp; /* read-side object ptr, will be next */ + struct sg_request *prev_ws_srp; /* previous write-side object ptr, candidate for next */ +}; + /* tasklet or soft irq callback */ static void sg_rq_end_io(struct request *rqq, blk_status_t status); /* Declarations of other static functions used before they are defined */ @@ -366,8 +376,6 @@ static int sg_receive_v3(struct sg_fd *sfp, struct sg_request *srp, static int sg_submit_v3(struct sg_fd *sfp, struct sg_io_hdr *hp, bool sync, struct sg_request **o_srp); static struct sg_request *sg_common_write(struct sg_comm_wr_t *cwrp); -static int sg_wait_event_srp(struct sg_fd *sfp, void __user *p, - struct sg_io_v4 *h4p, struct sg_request *srp); static int sg_receive_v4(struct sg_fd *sfp, struct sg_request *srp, void __user *p, struct sg_io_v4 *h4p); static int sg_read_append(struct sg_request *srp, void __user *outp, @@ -378,7 +386,6 @@ static void sg_remove_sfp(struct kref *); static void sg_remove_sfp_share(struct sg_fd *sfp, bool is_rd_side); static struct sg_request *sg_find_srp_by_id(struct sg_fd *sfp, int id, bool is_tag); -static bool sg_mrq_get_ready_srp(struct sg_fd *sfp, struct sg_request **srpp); static struct sg_request *sg_setup_req(struct sg_comm_wr_t *cwrp, enum sg_shr_var sh_var); static void sg_deact_request(struct sg_fd *sfp, struct sg_request *srp); @@ -387,9 +394,15 @@ static void sg_device_destroy(struct kref *kref); static struct sg_request *sg_mk_srp_sgat(struct sg_fd *sfp, bool first, int db_len); static int sg_abort_req(struct sg_fd *sfp, struct sg_request *srp); +static int sg_rq_chg_state(struct sg_request *srp, enum sg_rq_state old_st, + enum sg_rq_state new_st); +static int sg_finish_rs_rq(struct sg_fd *sfp, struct sg_request *rs_srp, + bool even_if_in_ws); +static void sg_rq_chg_state_force(struct sg_request *srp, enum sg_rq_state new_st); static int sg_sfp_blk_poll(struct sg_fd *sfp, int loop_count); static int sg_srp_q_blk_poll(struct sg_request *srp, struct request_queue *q, int loop_count); + #if IS_ENABLED(CONFIG_SCSI_LOGGING) && IS_ENABLED(SG_DEBUG) static const char *sg_rq_st_str(enum sg_rq_state rq_st, bool long_str); static const char *sg_shr_str(enum sg_shr_var sh_var, bool long_str); @@ -492,7 +505,7 @@ static void sg_take_snap(struct sg_fd *sfp, bool clear_first); * EPROTOTYPE atomic state change failed unexpectedly * ERANGE multiple requests: usually bad flag values * ERESTARTSYS should not be seen in user space, associated with an - * interruptable wait; will restart system call or give EINTR + * interruptible wait; will restart system call or give EINTR * EWOULDBLOCK [aka EAGAIN]; additionally if the 'more async' flag is set * SG_IOSUBMIT may yield this error */ @@ -1144,6 +1157,71 @@ sg_mrq_arr_flush(struct sg_mrq_hold *mhp) return 0; } +static inline const char * +sg_side_str(struct sg_request *srp) +{ + return (srp->sh_var == SG_SHR_WS_NOT_SRQ || srp->sh_var == SG_SHR_WS_RQ) ? "write_side" : + "read-side"; +} + +static inline int +sg_num_waiting_maybe_acquire(struct sg_fd *sfp) +{ + int num = atomic_read(&sfp->waiting); + + if (num < 1) + num = atomic_read_acquire(&sfp->waiting); + return num; +} + +/* + * Returns true if a request is ready and its srp is written to *srpp . If nothing can be found + * returns false and NULL --> *srpp . If device is detaching, returns true and NULL --> *srpp . + */ +static bool +sg_mrq_get_ready_srp(struct sg_fd *sfp, struct sg_request **srpp) +{ + bool second = false; + int l_await_idx = READ_ONCE(sfp->low_await_idx); + unsigned long idx, s_idx, end_idx; + struct sg_request *srp; + struct xarray *xafp = &sfp->srp_arr; + + if (SG_IS_DETACHING(sfp->parentdp)) { + *srpp = ERR_PTR(-ENODEV); + return true; + } + if (sg_num_waiting_maybe_acquire(sfp) < 1) + goto fini; + + s_idx = (l_await_idx < 0) ? 0 : l_await_idx; + idx = s_idx; + end_idx = ULONG_MAX; + +second_time: + for (srp = xa_find(xafp, &idx, end_idx, SG_XA_RQ_AWAIT); + srp; + srp = xa_find_after(xafp, &idx, end_idx, SG_XA_RQ_AWAIT)) { + if (likely(sg_rq_chg_state(srp, SG_RQ_AWAIT_RCV, SG_RQ_BUSY) == 0)) { + *srpp = srp; + WRITE_ONCE(sfp->low_await_idx, idx + 1); + return true; + } + } + /* If not found so far, need to wrap around and search [0 ... s_idx) */ + if (!srp && !second && s_idx > 0) { + end_idx = s_idx - 1; + s_idx = 0; + idx = s_idx; + second = true; + goto second_time; + } +fini: + *srpp = NULL; + return false; +} + +/* N.B. After this function is completed what srp points to should be considered invalid. */ static int sg_mrq_1complet(struct sg_mrq_hold *mhp, struct sg_fd *sfp, struct sg_request *srp) @@ -1152,6 +1230,7 @@ sg_mrq_1complet(struct sg_mrq_hold *mhp, struct sg_fd *sfp, int tot_reqs = mhp->tot_reqs; struct sg_io_v4 *hp; struct sg_io_v4 *a_hds = mhp->a_hds; + struct sg_io_v4 *cop = mhp->cwrp->h4p; if (unlikely(!srp)) return -EPROTO; @@ -1161,26 +1240,32 @@ sg_mrq_1complet(struct sg_mrq_hold *mhp, struct sg_fd *sfp, __func__, indx); return -EPROTO; } - SG_LOG(3, sfp, "%s: mrq_ind=%d, pack_id=%d\n", __func__, indx, - srp->pack_id); + SG_LOG(3, sfp, "%s: %s, mrq_ind=%d, pack_id=%d\n", __func__, + sg_side_str(srp), indx, srp->pack_id); if (unlikely(indx < 0 || indx >= tot_reqs)) return -EPROTO; hp = a_hds + indx; s_res = sg_receive_v4(sfp, srp, NULL, hp); + if (unlikely(!sg_result_is_good(srp->rq_result))) + SG_LOG(2, sfp, "%s: %s, bad status: drv/tran/scsi=0x%x/0x%x/0x%x\n", + __func__, sg_side_str(srp), hp->driver_status, + hp->transport_status, hp->device_status); if (unlikely(s_res == -EFAULT)) return s_res; hp->info |= SG_INFO_MRQ_FINI; + ++cop->info; + if (cop->din_xfer_len > 0) + --cop->din_resid; if (mhp->co_mmap) { sg_sgat_cp_into(mhp->co_mmap_sgatp, indx * SZ_SG_IO_V4, (const u8 *)hp, SZ_SG_IO_V4); if (sfp->async_qp && (hp->flags & SGV4_FLAG_SIGNAL)) kill_fasync(&sfp->async_qp, SIGPOLL, POLL_IN); - if (sfp->efd_ctxp && (srp->rq_flags & SGV4_FLAG_EVENTFD)) { + if (sfp->efd_ctxp && (hp->flags & SGV4_FLAG_EVENTFD)) { u64 n = eventfd_signal(sfp->efd_ctxp, 1); if (n != 1) - pr_info("%s: srp=%pK eventfd_signal problem\n", - __func__, srp); + pr_info("%s: eventfd_signal problem\n", __func__); } } else if (sfp->async_qp && (hp->flags & SGV4_FLAG_SIGNAL)) { s_res = sg_mrq_arr_flush(mhp); @@ -1192,7 +1277,7 @@ sg_mrq_1complet(struct sg_mrq_hold *mhp, struct sg_fd *sfp, } static int -sg_wait_mrq_event(struct sg_fd *sfp, struct sg_request **srpp) +sg_wait_any_mrq(struct sg_fd *sfp, struct sg_request **srpp) { if (test_bit(SG_FFD_EXCL_WAITQ, sfp->ffd_bm)) return __wait_event_interruptible_exclusive @@ -1202,6 +1287,159 @@ sg_wait_mrq_event(struct sg_fd *sfp, struct sg_request **srpp) sg_mrq_get_ready_srp(sfp, srpp)); } +static bool +sg_srp_hybrid_sleep(struct sg_request *srp) +{ + struct hrtimer_sleeper hs; + enum hrtimer_mode mode; + ktime_t kt = ns_to_ktime(5000); + + if (test_and_set_bit(SG_FRQ_POLL_SLEPT, srp->frq_bm)) + return false; + if (kt == 0) + return false; + + mode = HRTIMER_MODE_REL; + hrtimer_init_sleeper_on_stack(&hs, CLOCK_MONOTONIC, mode); + hrtimer_set_expires(&hs.timer, kt); + + do { + if (atomic_read(&srp->rq_st) != SG_RQ_INFLIGHT) + break; + set_current_state(TASK_UNINTERRUPTIBLE); + hrtimer_sleeper_start_expires(&hs, mode); + if (hs.task) + io_schedule(); + hrtimer_cancel(&hs.timer); + mode = HRTIMER_MODE_ABS; + } while (hs.task && !signal_pending(current)); + + __set_current_state(TASK_RUNNING); + destroy_hrtimer_on_stack(&hs.timer); + return true; +} + +static inline bool +sg_rq_landed(struct sg_device *sdp, struct sg_request *srp) +{ + return atomic_read_acquire(&srp->rq_st) != SG_RQ_INFLIGHT || SG_IS_DETACHING(sdp); +} + +/* This is a blocking wait (or poll) for a given srp. */ +static int +sg_wait_poll_for_given_srp(struct sg_fd *sfp, struct sg_request *srp, bool do_poll) +{ + int res; + struct sg_device *sdp = sfp->parentdp; + + SG_LOG(3, sfp, "%s: do_poll=%d\n", __func__, (int)do_poll); + if (do_poll || (srp->rq_flags & SGV4_FLAG_HIPRI)) + goto poll_loop; + + if (atomic_read(&srp->rq_st) != SG_RQ_INFLIGHT) + goto skip_wait; /* and skip _acquire() */ + /* N.B. The SG_FFD_EXCL_WAITQ flag is ignored here. */ + res = __wait_event_interruptible(sfp->cmpl_wait, + sg_rq_landed(sdp, srp)); + if (unlikely(res)) { /* -ERESTARTSYS because signal hit thread */ + set_bit(SG_FRQ_IS_ORPHAN, srp->frq_bm); + /* orphans harvested when sfp->keep_orphan is false */ + sg_rq_chg_state_force(srp, SG_RQ_INFLIGHT); + SG_LOG(1, sfp, "%s: wait_event_interruptible(): %s[%d]\n", + __func__, (res == -ERESTARTSYS ? "ERESTARTSYS" : ""), + res); + return res; + } +skip_wait: + if (SG_IS_DETACHING(sdp)) + goto detaching; + return sg_rq_chg_state(srp, SG_RQ_AWAIT_RCV, SG_RQ_BUSY); +poll_loop: + if (srp->rq_flags & SGV4_FLAG_HIPRI) { + long state = current->state; + + do { + res = sg_srp_q_blk_poll(srp, sdp->device->request_queue, + SG_DEF_BLK_POLL_LOOP_COUNT); + if (res == -ENODATA || res > 0) { + __set_current_state(TASK_RUNNING); + break; + } + if (unlikely(res < 0)) { + __set_current_state(TASK_RUNNING); + return res; + } + if (signal_pending_state(state, current)) { + __set_current_state(TASK_RUNNING); + return -ERESTARTSYS; + } + if (SG_IS_DETACHING(sdp)) { + __set_current_state(TASK_RUNNING); + goto detaching; + } + cpu_relax(); + } while (true); + } else { + enum sg_rq_state sr_st; + + if (!sg_srp_hybrid_sleep(srp)) + return -EINVAL; + if (signal_pending(current)) + return -ERESTARTSYS; + if (SG_IS_DETACHING(sdp)) + goto detaching; + sr_st = atomic_read(&srp->rq_st); + if (unlikely(sr_st != SG_RQ_AWAIT_RCV)) + return -EPROTO; /* Logic error */ + return sg_rq_chg_state(srp, sr_st, SG_RQ_BUSY); + } + if (atomic_read_acquire(&srp->rq_st) != SG_RQ_AWAIT_RCV) + return (test_bit(SG_FRQ_COUNT_ACTIVE, srp->frq_bm) && + atomic_read(&sfp->submitted) < 1) ? -ENODATA : 0; + return unlikely(sg_rq_chg_state(srp, SG_RQ_AWAIT_RCV, SG_RQ_BUSY)) ? -EPROTO : 0; + +detaching: + sg_rq_chg_state_force(srp, SG_RQ_INACTIVE); + atomic_inc(&sfp->inactives); + return -ENODEV; +} + +static struct sg_request * +sg_mrq_poll_either(struct sg_fd *sfp, struct sg_fd *sec_sfp, bool *on_sfp) +{ + bool sig_pending = false; + long state = current->state; + struct sg_request *srp; + + do { /* alternating polling loop */ + if (sfp) { + if (sg_mrq_get_ready_srp(sfp, &srp)) { + if (!srp) + return ERR_PTR(-ENODEV); + *on_sfp = true; + __set_current_state(TASK_RUNNING); + return srp; + } + } + if (sec_sfp && sfp != sec_sfp) { + if (sg_mrq_get_ready_srp(sec_sfp, &srp)) { + if (!srp) + return ERR_PTR(-ENODEV); + *on_sfp = false; + __set_current_state(TASK_RUNNING); + return srp; + } + } + if (signal_pending_state(state, current)) { + sig_pending = true; + break; + } + cpu_relax(); + } while (!need_resched()); + __set_current_state(TASK_RUNNING); + return ERR_PTR(sig_pending ? -ERESTARTSYS : -EAGAIN); +} + /* * This is a fair-ish algorithm for an interruptible wait on two file * descriptors. It favours the main fd over the secondary fd (sec_sfp). @@ -1211,48 +1449,31 @@ static int sg_mrq_complets(struct sg_mrq_hold *mhp, struct sg_fd *sfp, struct sg_fd *sec_sfp, int mreqs, int sec_reqs) { - int res = 0; - int rres; + bool on_sfp; + int res; struct sg_request *srp; - struct sg_io_v4 *cop = mhp->cwrp->h4p; SG_LOG(3, sfp, "%s: mreqs=%d, sec_reqs=%d\n", __func__, mreqs, sec_reqs); while (mreqs + sec_reqs > 0) { while (mreqs > 0 && sg_mrq_get_ready_srp(sfp, &srp)) { - if (IS_ERR(srp)) { /* -ENODATA: no mrqs here */ - if (PTR_ERR(srp) == -ENODATA) - break; - res = PTR_ERR(srp); - break; - } --mreqs; res = sg_mrq_1complet(mhp, sfp, srp); if (unlikely(res)) return res; - ++cop->info; - if (cop->din_xfer_len > 0) - --cop->din_resid; } while (sec_reqs > 0 && sg_mrq_get_ready_srp(sec_sfp, &srp)) { - if (IS_ERR(srp)) { - if (PTR_ERR(srp) == -ENODATA) - break; - res = PTR_ERR(srp); - break; - } --sec_reqs; - rres = sg_mrq_1complet(mhp, sec_sfp, srp); - if (unlikely(rres)) - return rres; - ++cop->info; - if (cop->din_xfer_len > 0) - --cop->din_resid; + res = sg_mrq_1complet(mhp, sec_sfp, srp); + if (unlikely(res)) + return res; } + if (mhp->hipri) + goto start_polling; if (res) break; if (mreqs > 0) { - res = sg_wait_mrq_event(sfp, &srp); + res = sg_wait_any_mrq(sfp, &srp); if (unlikely(res)) return res; /* signal --> -ERESTARTSYS */ if (IS_ERR(srp)) { @@ -1262,13 +1483,10 @@ sg_mrq_complets(struct sg_mrq_hold *mhp, struct sg_fd *sfp, res = sg_mrq_1complet(mhp, sfp, srp); if (unlikely(res)) return res; - ++cop->info; - if (cop->din_xfer_len > 0) - --cop->din_resid; } } if (sec_reqs > 0) { - res = sg_wait_mrq_event(sec_sfp, &srp); + res = sg_wait_any_mrq(sec_sfp, &srp); if (unlikely(res)) return res; /* signal --> -ERESTARTSYS */ if (IS_ERR(srp)) { @@ -1278,20 +1496,43 @@ sg_mrq_complets(struct sg_mrq_hold *mhp, struct sg_fd *sfp, res = sg_mrq_1complet(mhp, sec_sfp, srp); if (unlikely(res)) return res; - ++cop->info; - if (cop->din_xfer_len > 0) - --cop->din_resid; } } } /* end of outer while loop (while requests still inflight) */ - return res; + return 0; +start_polling: + while (mreqs + sec_reqs > 0) { + srp = sg_mrq_poll_either(sfp, sec_sfp, &on_sfp); + if (IS_ERR(srp)) + return PTR_ERR(srp); + if (on_sfp) { + --mreqs; + res = sg_mrq_1complet(mhp, sfp, srp); + if (unlikely(res)) + return res; + } else { + --sec_reqs; + res = sg_mrq_1complet(mhp, sec_sfp, srp); + if (unlikely(res)) + return res; + } + } + return 0; } -static int -sg_mrq_sanity(struct sg_mrq_hold *mhp) +/* + * Does once pass through the request array looking mainly for bad flag settings and other + * contradictions such as setting the SGV4_FLAG_SHARE flag when no file share is set up. Has + * code toward the end of the loop for checking the share variable blocking (svb) is using + * a strict READ (like) thence WRITE (like) sequence on all data carrying commands; also + * a dangling READ is not allowed at the end of a scb request array. + */ +static bool +sg_mrq_sanity(struct sg_mrq_hold *mhp, bool is_svb) { bool last_is_keep_share = false; - bool share, have_mrq_sense; + bool expect_wr = false; + bool share, have_mrq_sense, have_file_share; int k; struct sg_io_v4 *cop = mhp->cwrp->h4p; u32 cdb_alen = cop->request_len; @@ -1304,149 +1545,116 @@ sg_mrq_sanity(struct sg_mrq_hold *mhp) __maybe_unused const char *rip = "request index"; have_mrq_sense = (cop->response && cop->max_response_len); + have_file_share = sg_fd_is_shared(sfp); + if (is_svb && unlikely(!have_file_share)) { + SG_LOG(1, sfp, "%s: share variable blocking (svb) needs file share\n", __func__); + return false; + } /* Pre-check each request for anomalies, plus some preparation */ for (k = 0, hp = a_hds; k < mhp->tot_reqs; ++k, ++hp) { flags = hp->flags; sg_v4h_partial_zero(hp); - if (unlikely(hp->guard != 'Q' || hp->protocol != 0 || - hp->subprotocol != 0)) { - SG_LOG(1, sfp, "%s: req index %u: %s or protocol\n", - __func__, k, "bad guard"); - return -ERANGE; + if (unlikely(hp->guard != 'Q' || hp->protocol != 0 || hp->subprotocol != 0)) { + SG_LOG(1, sfp, "%s: req index %u: bad guard or protocol\n", __func__, k); + return false; } - last_is_keep_share = !!(flags & SGV4_FLAG_KEEP_SHARE); if (unlikely(flags & SGV4_FLAG_MULTIPLE_REQS)) { - SG_LOG(1, sfp, "%s: %s %u: no nested multi-reqs\n", - __func__, rip, k); - return -ERANGE; + SG_LOG(1, sfp, "%s: %s %u: no nested multi-reqs\n", __func__, rip, k); + return false; } share = !!(flags & SGV4_FLAG_SHARE); - if (mhp->immed) {/* only accept async submits on current fd */ - if (unlikely(flags & SGV4_FLAG_DO_ON_OTHER)) { - SG_LOG(1, sfp, "%s: %s %u, %s\n", __func__, - rip, k, "no IMMED with ON_OTHER"); - return -ERANGE; - } else if (unlikely(share)) { - SG_LOG(1, sfp, "%s: %s %u, %s\n", __func__, - rip, k, "no IMMED with FLAG_SHARE"); - return -ERANGE; - } else if (unlikely(flags & SGV4_FLAG_COMPLETE_B4)) { - SG_LOG(1, sfp, "%s: %s %u, %s\n", __func__, - rip, k, "no IMMED with COMPLETE_B4"); - return -ERANGE; - } + last_is_keep_share = !!(flags & SGV4_FLAG_KEEP_SHARE); + if (mhp->immed && + unlikely(flags & (SGV4_FLAG_DO_ON_OTHER | SGV4_FLAG_COMPLETE_B4))) { + SG_LOG(1, sfp, "%s: %s %u, no IMMED with ON_OTHER or COMPLETE_B4\n", + __func__, rip, k); + return false; + } + if (mhp->immed && unlikely(share)) { + SG_LOG(1, sfp, "%s: %s %u, no IMMED with FLAG_SHARE\n", __func__, rip, k); + return false; } if (mhp->co_mmap && (flags & SGV4_FLAG_MMAP_IO)) { - SG_LOG(1, sfp, "%s: %s %u, MMAP in co AND here\n", - __func__, rip, k); - return -ERANGE; + SG_LOG(1, sfp, "%s: %s %u, MMAP in co AND here\n", __func__, rip, k); + return false; } - if (!sg_fd_is_shared(sfp)) { - if (unlikely(share)) { - SG_LOG(1, sfp, "%s: %s %u, no share\n", - __func__, rip, k); - return -ERANGE; - } else if (unlikely(flags & SGV4_FLAG_DO_ON_OTHER)) { - SG_LOG(1, sfp, "%s: %s %u, %s do on\n", - __func__, rip, k, "no other fd to"); - return -ERANGE; - } + if (unlikely(!have_file_share && share)) { + SG_LOG(1, sfp, "%s: %s %u, no file share\n", __func__, rip, k); + return false; } - if (cdb_ap) { - if (unlikely(hp->request_len > cdb_mxlen)) { - SG_LOG(1, sfp, "%s: %s %u, cdb too long\n", - __func__, rip, k); - return -ERANGE; - } + if (unlikely(!have_file_share && !!(flags & SGV4_FLAG_DO_ON_OTHER))) { + SG_LOG(1, sfp, "%s: %s %u, no other fd to do on\n", __func__, rip, k); + return false; } - if (have_mrq_sense && hp->response == 0 && - hp->max_response_len == 0) { + if (cdb_ap && unlikely(hp->request_len > cdb_mxlen)) { + SG_LOG(1, sfp, "%s: %s %u, cdb too long\n", __func__, rip, k); + return false; + } + if (have_mrq_sense && hp->response == 0 && hp->max_response_len == 0) { hp->response = cop->response; hp->max_response_len = cop->max_response_len; } - } - if (last_is_keep_share) { - SG_LOG(1, sfp, - "%s: Can't set SGV4_FLAG_KEEP_SHARE on last mrq req\n", - __func__); - return -ERANGE; - } - return 0; -} - -/* - * Read operation (din) must precede any write (dout) operations and a din - * operation can't be last (data transferring) operations. Non data - * transferring operations can appear anywhere. Data transferring operations - * must have SGV4_FLAG_SHARE set. Dout operations must additionally have - * SGV4_FLAG_NO_DXFER and SGV4_FLAG_DO_ON_OTHER set. Din operations must - * not set SGV4_FLAG_DO_ON_OTHER. - */ -static bool -sg_mrq_svb_chk(struct sg_io_v4 *a_hds, u32 tot_reqs) -{ - bool last_rd = false; - bool seen_wr = false; - int k; - u32 flags; - struct sg_io_v4 *hp; - - /* expect read-write pairs, all with SGV4_FLAG_NO_DXFER set */ - for (k = 0, hp = a_hds; k < tot_reqs; ++k, ++hp) { - flags = hp->flags; - if (flags & SGV4_FLAG_COMPLETE_B4) + if (!is_svb) + continue; + /* mrq share variable blocking (svb) additional constraints checked here */ + if (unlikely(flags & (SGV4_FLAG_COMPLETE_B4 | SGV4_FLAG_KEEP_SHARE))) { + SG_LOG(1, sfp, "%s: %s %u: no KEEP_SHARE with svb\n", __func__, rip, k); return false; - if (!seen_wr) { + } + if (!expect_wr) { if (hp->dout_xfer_len > 0) - return false; + goto bad_svb; if (hp->din_xfer_len > 0) { if (!(flags & SGV4_FLAG_SHARE)) - return false; + goto bad_svb; if (flags & SGV4_FLAG_DO_ON_OTHER) - return false; - seen_wr = true; - last_rd = true; + goto bad_svb; + expect_wr = true; } - /* allowing commands with no dxfer */ + /* allowing commands with no dxfer (in both cases) */ } else { /* checking write side */ if (hp->dout_xfer_len > 0) { - if (~flags & - (SGV4_FLAG_NO_DXFER | SGV4_FLAG_SHARE | - SGV4_FLAG_DO_ON_OTHER)) - return false; - last_rd = false; - } - if (hp->din_xfer_len > 0) { - if (!(flags & SGV4_FLAG_SHARE)) - return false; - if (flags & SGV4_FLAG_DO_ON_OTHER) - return false; - last_rd = true; + if (unlikely(~flags & (SGV4_FLAG_NO_DXFER | SGV4_FLAG_SHARE | + SGV4_FLAG_DO_ON_OTHER))) + goto bad_svb; + expect_wr = false; + } else if (unlikely(hp->din_xfer_len > 0)) { + goto bad_svb; } } + } /* end of request array iterating loop */ + if (last_is_keep_share) { + SG_LOG(1, sfp, "%s: Can't set SGV4_FLAG_KEEP_SHARE on last mrq req\n", __func__); + return false; + } + if (is_svb && expect_wr) { + SG_LOG(1, sfp, "%s: svb: unpaired READ at end of request array\n", __func__); + return false; } - return !last_rd; + return true; +bad_svb: + SG_LOG(1, sfp, "%s: %s %u: svb alternating read-then-write or flags bad\n", __func__, + rip, k); + return false; } - static struct sg_request * -sg_mrq_submit(struct sg_fd *rq_sfp, struct sg_mrq_hold *mhp, int pos_hdr, - int rsv_idx, bool keep_share) +sg_mrq_submit(struct sg_fd *rq_sfp, struct sg_mrq_hold *mhp, int pos_in_rq_arr, int rsv_idx, + struct sg_request *possible_srp) { unsigned long ul_timeout; struct sg_comm_wr_t r_cwr; struct sg_comm_wr_t *r_cwrp = &r_cwr; - struct sg_io_v4 *hp = mhp->a_hds + pos_hdr; + struct sg_io_v4 *hp = mhp->a_hds + pos_in_rq_arr; sg_comm_wr_init(r_cwrp); if (mhp->cdb_ap) /* already have array of cdbs */ - r_cwrp->cmdp = mhp->cdb_ap + (pos_hdr * mhp->cdb_mxlen); + r_cwrp->cmdp = mhp->cdb_ap + (pos_in_rq_arr * mhp->cdb_mxlen); else /* fetch each cdb from user space */ r_cwrp->u_cmdp = cuptr64(hp->request); r_cwrp->cmd_len = hp->request_len; r_cwrp->rsv_idx = rsv_idx; ul_timeout = msecs_to_jiffies(hp->timeout); - __assign_bit(SG_FRQ_SYNC_INVOC, r_cwrp->frq_bm, - (int)mhp->blocking); + __assign_bit(SG_FRQ_SYNC_INVOC, r_cwrp->frq_bm, (int)mhp->from_sg_io); __set_bit(SG_FRQ_IS_V4I, r_cwrp->frq_bm); r_cwrp->h4p = hp; r_cwrp->dlen = hp->din_xfer_len ? hp->din_xfer_len : hp->dout_xfer_len; @@ -1454,7 +1662,7 @@ sg_mrq_submit(struct sg_fd *rq_sfp, struct sg_mrq_hold *mhp, int pos_hdr, if (hp->flags & SGV4_FLAG_DOUT_OFFSET) r_cwrp->wr_offset = hp->spare_in; r_cwrp->sfp = rq_sfp; - r_cwrp->keep_share = keep_share; + r_cwrp->possible_srp = possible_srp; return sg_common_write(r_cwrp); } @@ -1490,7 +1698,7 @@ sg_process_most_mrq(struct sg_fd *fp, struct sg_fd *o_sfp, } flags = hp->flags; rq_sfp = (flags & SGV4_FLAG_DO_ON_OTHER) ? o_sfp : fp; - srp = sg_mrq_submit(rq_sfp, mhp, j, -1, false); + srp = sg_mrq_submit(rq_sfp, mhp, j, -1, NULL); if (IS_ERR(srp)) { mhp->s_res = PTR_ERR(srp); break; @@ -1499,50 +1707,24 @@ sg_process_most_mrq(struct sg_fd *fp, struct sg_fd *o_sfp, if (mhp->chk_abort) atomic_set(&srp->s_hdr4.pack_id_of_mrq, mhp->id_of_mrq); - if (mhp->immed || - (!(mhp->blocking || (flags & shr_complet_b4)))) { + if (mhp->immed || (!(mhp->from_sg_io || (flags & shr_complet_b4)))) { if (fp == rq_sfp) ++this_fp_sent; else ++other_fp_sent; continue; /* defer completion until all submitted */ } - mhp->s_res = sg_wait_event_srp(rq_sfp, NULL, hp, srp); + mhp->s_res = sg_wait_poll_for_given_srp(rq_sfp, srp, mhp->hipri); if (unlikely(mhp->s_res)) { - if (mhp->s_res == -ERESTARTSYS) + if (mhp->s_res == -ERESTARTSYS || mhp->s_res == -ENODEV) return mhp->s_res; break; } + res = sg_mrq_1complet(mhp, rq_sfp, srp); + if (unlikely(res)) + break; ++num_cmpl; - hp->info |= SG_INFO_MRQ_FINI; - if (mhp->stop_if && !sg_result_is_good(srp->rq_result)) { - SG_LOG(2, fp, "%s: %s=0x%x/0x%x/0x%x] cause exit\n", - __func__, "STOP_IF and status [drv/tran/scsi", - hp->driver_status, hp->transport_status, - hp->device_status); - break; /* cop->driver_status <-- 0 in this case */ - } - if (mhp->co_mmap) { - sg_sgat_cp_into(mhp->co_mmap_sgatp, j * SZ_SG_IO_V4, - (const u8 *)hp, SZ_SG_IO_V4); - if (rq_sfp->async_qp && (hp->flags & SGV4_FLAG_SIGNAL)) - kill_fasync(&rq_sfp->async_qp, SIGPOLL, - POLL_IN); - if (rq_sfp->efd_ctxp && - (srp->rq_flags & SGV4_FLAG_EVENTFD)) { - u64 n = eventfd_signal(rq_sfp->efd_ctxp, 1); - - if (n != 1) - pr_info("%s: eventfd_signal prob\n", - __func__); - } - } else if (rq_sfp->async_qp && - (hp->flags & SGV4_FLAG_SIGNAL)) { - res = sg_mrq_arr_flush(mhp); - if (unlikely(res)) - break; - kill_fasync(&rq_sfp->async_qp, SIGPOLL, POLL_IN); - } + } /* end of dispatch request and optionally wait response loop */ cop->dout_resid = mhp->tot_reqs - num_subm; cop->info = mhp->immed ? num_subm : num_cmpl; @@ -1565,238 +1747,342 @@ sg_process_most_mrq(struct sg_fd *fp, struct sg_fd *o_sfp, return res; } +/* For multiple requests (mrq) share variable blocking (svb) with no SGV4_FLAG_ORDERED_WR */ static int -sg_find_srp_idx(struct sg_fd *sfp, const struct sg_request *srp) +sg_svb_mrq_first_come(struct sg_fd *fp, struct sg_fd *o_sfp, struct sg_mrq_hold *mhp, int ra_ind, + int *num_submp) { - int k; - struct sg_request **rapp = sfp->rsv_arr; + bool chk_oth_first = false; + bool is_first = true; + enum sg_rq_state rq_st; + int this_fp_sent = 0; + int other_fp_sent = 0; + int res = 0; + int first_err = 0; + int k, m, idx, ws_pos, num_reads, sent, dir; + struct sg_io_v4 *hp = mhp->a_hds + ra_ind; + struct sg_request *srp; + struct sg_request *rs_srp; + struct sg_svb_elem svb_arr[SG_MAX_RSV_REQS]; + + memset(svb_arr, 0, sizeof(svb_arr)); + for (k = 0; k < SG_MAX_RSV_REQS && ra_ind < mhp->tot_reqs; + ++hp, ++ra_ind, is_first = false) { + if (hp->flags & SGV4_FLAG_DO_ON_OTHER) { + if (hp->dout_xfer_len > 0) { /* need to await read-side completion */ + svb_arr[k].ws_pos = ra_ind; + ++k; + continue; /* deferred to next loop */ + } + if (is_first) + chk_oth_first = true; + SG_LOG(6, o_sfp, "%s: subm-nodat p_id=%d on write-side\n", __func__, + (int)hp->request_extra); + srp = sg_mrq_submit(o_sfp, mhp, ra_ind, -1, NULL); + if (!IS_ERR(srp)) + ++other_fp_sent; + } else { + rs_srp = (hp->din_xfer_len > 0) ? svb_arr[k].rs_srp : NULL; + SG_LOG(6, fp, "%s: submit p_id=%d on read-side\n", __func__, + (int)hp->request_extra); + srp = sg_mrq_submit(fp, mhp, ra_ind, -1, rs_srp); + if (!IS_ERR(srp)) + ++this_fp_sent; + } + if (IS_ERR(srp)) { + mhp->s_res = PTR_ERR(srp); + if (first_err == 0) + first_err = mhp->s_res; + SG_LOG(1, fp, "%s: sg_mrq_submit() err: %d\n", __func__, mhp->s_res); + break; /* stop doing rs submits */ + } + ++*num_submp; + if (hp->din_xfer_len > 0) + svb_arr[k].rs_srp = srp; + srp->s_hdr4.mrq_ind = ra_ind; + if (mhp->chk_abort) + atomic_set(&srp->s_hdr4.pack_id_of_mrq, mhp->id_of_mrq); + } /* end of read-side submission, write-side defer loop */ - for (k = 0; k < SG_MAX_RSV_REQS; ++k, ++rapp) { - if (*rapp == srp) - return k; + num_reads = k; + sent = this_fp_sent + other_fp_sent; + + for (k = 0; k < sent; ++k) { + if (other_fp_sent > 0 && sg_mrq_get_ready_srp(o_sfp, &srp)) { +other_found: + --other_fp_sent; + res = sg_mrq_1complet(mhp, o_sfp, srp); + if (unlikely(res)) + break; + continue; /* do available submits first */ + } + if (this_fp_sent > 0 && sg_mrq_get_ready_srp(fp, &srp)) { +this_found: + --this_fp_sent; + dir = srp->s_hdr4.dir; + res = sg_mrq_1complet(mhp, fp, srp); + if (unlikely(res)) + break; + if (dir != SG_DXFER_FROM_DEV) + continue; + if (test_bit(SG_FFD_READ_SIDE_ERR, fp->ffd_bm)) + continue; + /* read-side req completed, submit its write-side(s) */ + rs_srp = srp; + for (m = 0; m < num_reads; ++m) { + if (rs_srp == svb_arr[m].rs_srp) + break; + } + if (m >= num_reads) { + SG_LOG(1, fp, "%s: rs [pack_id=%d]: missing ws\n", __func__, + srp->pack_id); + continue; + } + rq_st = atomic_read(&rs_srp->rq_st); + if (rq_st == SG_RQ_INACTIVE) + continue; /* probably an error, bypass paired write-side rq */ + else if (rq_st != SG_RQ_SHR_SWAP) { + SG_LOG(1, fp, "%s: expect rs_srp to be in shr_swap\n", __func__); + res = -EPROTO; + break; + } + ws_pos = svb_arr[m].ws_pos; + for (idx = 0; idx < SG_MAX_RSV_REQS; ++idx) { + if (fp->rsv_arr[idx] == srp) + break; + } + if (idx >= SG_MAX_RSV_REQS) { + SG_LOG(1, fp, "%s: srp not in rsv_arr\n", __func__); + res = -EPROTO; + break; + } + SG_LOG(6, o_sfp, "%s: ws_pos=%d, rs_idx=%d\n", __func__, ws_pos, idx); + srp = sg_mrq_submit(o_sfp, mhp, ws_pos, idx, svb_arr[m].prev_ws_srp); + if (IS_ERR(srp)) { + mhp->s_res = PTR_ERR(srp); + if (mhp->s_res == -EFBIG) { /* out of reserve slots */ + if (first_err) + break; + res = mhp->s_res; + break; + } + if (first_err == 0) + first_err = mhp->s_res; + svb_arr[m].prev_ws_srp = NULL; + SG_LOG(1, o_sfp, "%s: mrq_submit(oth)->%d\n", __func__, mhp->s_res); + continue; + } + svb_arr[m].prev_ws_srp = srp; + ++*num_submp; + ++other_fp_sent; + ++sent; + srp->s_hdr4.mrq_ind = ws_pos; + if (mhp->chk_abort) + atomic_set(&srp->s_hdr4.pack_id_of_mrq, mhp->id_of_mrq); + continue; /* do available submits first */ + } + /* waits maybe interrupted by signals (-ERESTARTSYS) */ + if (chk_oth_first) + goto oth_first; +this_second: + if (this_fp_sent > 0) { + res = sg_wait_any_mrq(fp, &srp); + if (unlikely(res)) + break; + goto this_found; + } + if (chk_oth_first) + continue; +oth_first: + if (other_fp_sent > 0) { + res = sg_wait_any_mrq(o_sfp, &srp); + if (unlikely(res)) + break; + goto other_found; + } + if (chk_oth_first) + goto this_second; + } /* end of loop for deferred ws submits and all responses */ + + if (res == 0 && first_err) + res = first_err; + return res; +} + +static int +sg_svb_mrq_ordered(struct sg_fd *fp, struct sg_fd *o_sfp, struct sg_mrq_hold *mhp, int ra_ind, + int *num_submp) +{ + enum sg_rq_state rq_st; + int k, m, res, idx, ws_pos, num_reads; + int this_fp_sent = 0; + int other_fp_sent = 0; + struct sg_io_v4 *hp = mhp->a_hds + ra_ind; + struct sg_request *srp; + struct sg_request *rs_srp; + struct sg_svb_elem svb_arr[SG_MAX_RSV_REQS]; + + memset(svb_arr, 0, sizeof(svb_arr)); + for (k = 0; k < SG_MAX_RSV_REQS && ra_ind < mhp->tot_reqs; ++hp, ++ra_ind) { + if (hp->flags & SGV4_FLAG_DO_ON_OTHER) { + if (hp->dout_xfer_len > 0) { + /* need to await read-side completion */ + svb_arr[k].ws_pos = ra_ind; + ++k; + continue; /* deferred to next loop */ + } + SG_LOG(6, o_sfp, "%s: subm-nodat p_id=%d on write-side\n", __func__, + (int)hp->request_extra); + srp = sg_mrq_submit(o_sfp, mhp, ra_ind, -1, NULL); + if (!IS_ERR(srp)) + ++other_fp_sent; + } else { + rs_srp = (hp->din_xfer_len > 0) ? svb_arr[k].rs_srp : NULL; + SG_LOG(6, fp, "%s: submit p_id=%d on read-side\n", __func__, + (int)hp->request_extra); + srp = sg_mrq_submit(fp, mhp, ra_ind, -1, rs_srp); + if (!IS_ERR(srp)) + ++this_fp_sent; + } + if (IS_ERR(srp)) { + mhp->s_res = PTR_ERR(srp); + res = mhp->s_res; /* don't loop again */ + SG_LOG(1, fp, "%s: sg_mrq_submit() err: %d\n", __func__, res); + break; + } + ++*num_submp; + if (hp->din_xfer_len > 0) + svb_arr[k].rs_srp = srp; + srp->s_hdr4.mrq_ind = ra_ind; + if (mhp->chk_abort) + atomic_set(&srp->s_hdr4.pack_id_of_mrq, mhp->id_of_mrq); + } /* end of first, inner for loop */ + + num_reads = k; + + if (this_fp_sent + other_fp_sent <= 0) + return 0; + for (m = 0; m < num_reads; ++m) { + rs_srp = svb_arr[m].rs_srp; + if (!rs_srp) + continue; + res = sg_wait_poll_for_given_srp(fp, rs_srp, mhp->hipri); + if (unlikely(res)) + return res; + --this_fp_sent; + res = sg_mrq_1complet(mhp, fp, rs_srp); + if (unlikely(res)) + return res; + if (test_bit(SG_FFD_READ_SIDE_ERR, fp->ffd_bm)) + continue; + rq_st = atomic_read(&rs_srp->rq_st); + if (rq_st == SG_RQ_INACTIVE) + continue; /* probably an error, bypass paired write-side rq */ + else if (rq_st != SG_RQ_SHR_SWAP) { + SG_LOG(1, fp, "%s: expect rs_srp to be in shr_swap\n", __func__); + res = -EPROTO; + break; + } + ws_pos = svb_arr[m].ws_pos; + for (idx = 0; idx < SG_MAX_RSV_REQS; ++idx) { + if (fp->rsv_arr[idx] == rs_srp) + break; + } + if (idx >= SG_MAX_RSV_REQS) { + SG_LOG(1, rs_srp->parentfp, "%s: srp not in rsv_arr\n", __func__); + res = -EPROTO; + return res; + } + SG_LOG(6, o_sfp, "%s: ws_pos=%d, rs_idx=%d\n", __func__, ws_pos, idx); + srp = sg_mrq_submit(o_sfp, mhp, ws_pos, idx, svb_arr[m].prev_ws_srp); + if (IS_ERR(srp)) { + mhp->s_res = PTR_ERR(srp); + res = mhp->s_res; + SG_LOG(1, o_sfp, + "%s: mrq_submit(oth)->%d\n", + __func__, res); + return res; + } + svb_arr[m].prev_ws_srp = srp; + ++*num_submp; + ++other_fp_sent; + srp->s_hdr4.mrq_ind = ws_pos; + if (mhp->chk_abort) + atomic_set(&srp->s_hdr4.pack_id_of_mrq, + mhp->id_of_mrq); } - return -1; + while (this_fp_sent > 0) { /* non-data requests */ + res = sg_wait_any_mrq(fp, &srp); + if (unlikely(res)) + return res; + --this_fp_sent; + res = sg_mrq_1complet(mhp, fp, srp); + if (unlikely(res)) + return res; + } + while (other_fp_sent > 0) { + res = sg_wait_any_mrq(o_sfp, &srp); + if (unlikely(res)) + return res; + --other_fp_sent; + res = sg_mrq_1complet(mhp, o_sfp, srp); + if (unlikely(res)) + return res; + } + return 0; } /* - * Processes shared variable blocking. First inner loop submits a chunk of - * requests (some read-side, some non-data) but defers any write-side requests. The - * second inner loop processes the completions from the first inner loop, plus - * for any completed read-side request it submits the paired write-side request. The - * second inner loop also waits for the completions of those write-side requests. - * The outer loop then moves onto the next chunk, working its way through - * the multiple requests. The user sees a blocking command, but the chunks - * are run in parallel apart from read-write ordering requirement. - * N.B. Only one svb mrq permitted per file descriptor at a time. + * Processes shared variable blocking (svb) method for multiple requests (mrq). There are two + * variants: unordered write-side requests; and ordered write-side requests. The read-side requests + * are always issued in the order specified in the request array. The unordered write-side requests + * are processed on a "first come, first serve" basis, with the majority of the work done by + * sg_svb_mrq_first_come(). Likewise sg_svb_mrq_ordered() handles the majoity of the ordered + * write-side requests variant. Those two functions process a "chunk" of mrq_s at a time. This + * function loops until request array is exhausted and does some clean-up. N.B. the "only one mrq + * per fd" rule is enforced by the SG_FFD_SVB_ACTIVE file descriptor flag. */ static int sg_process_svb_mrq(struct sg_fd *fp, struct sg_fd *o_sfp, struct sg_mrq_hold *mhp) { bool aborted = false; - bool chk_oth_first, keep_share; - int k, j, i, m, rcv_before, idx, ws_pos, sent; - int this_fp_sent, other_fp_sent; + int j, delta_subm, subm_before, cmpl_before; int num_subm = 0; int num_cmpl = 0; int res = 0; - struct sg_fd *rq_sfp; struct sg_io_v4 *cop = mhp->cwrp->h4p; - struct sg_io_v4 *hp; /* ptr to request object in a_hds */ - struct sg_request *srp; - struct sg_request *rs_srp; - struct sg_io_v4 *a_hds = mhp->a_hds; - int ws_pos_a[SG_MAX_RSV_REQS]; /* write-side hdr pos within a_hds */ - struct sg_request *rs_srp_a[SG_MAX_RSV_REQS]; SG_LOG(3, fp, "%s: id_of_mrq=%d, tot_reqs=%d, enter\n", __func__, mhp->id_of_mrq, mhp->tot_reqs); - /* work through mrq array, SG_MAX_RSV_REQS read-side requests at a time */ - for (hp = a_hds, j = 0; j < mhp->tot_reqs; ) { - this_fp_sent = 0; - other_fp_sent = 0; - chk_oth_first = false; - for (k = 0; k < SG_MAX_RSV_REQS && j < mhp->tot_reqs; - ++hp, ++j) { - if (mhp->chk_abort && - test_and_clear_bit(SG_FFD_MRQ_ABORT, fp->ffd_bm)) { - SG_LOG(1, fp, - "%s: id_of_mrq=%d aborting at pos=%d\n", - __func__, mhp->id_of_mrq, num_subm); - aborted = true; - /* - * after mrq abort detected, complete those - * already submitted, but don't submit any more - */ - } - if (aborted) - break; - if (hp->flags & SGV4_FLAG_DO_ON_OTHER) { - if (hp->dout_xfer_len > 0) { - /* need to await read-side completion */ - ws_pos_a[k] = j; - ++k; - continue; /* deferred to next loop */ - } - chk_oth_first = true; - SG_LOG(6, o_sfp, - "%s: subm-nodat p_id=%d on write-side\n", - __func__, (int)hp->request_extra); - rq_sfp = o_sfp; - } else { - SG_LOG(6, fp, "%s: submit p_id=%d on read-side\n", - __func__, (int)hp->request_extra); - rq_sfp = fp; - } - srp = sg_mrq_submit(rq_sfp, mhp, j, -1, false); - if (IS_ERR(srp)) { - mhp->s_res = PTR_ERR(srp); - res = mhp->s_res; /* don't loop again */ - SG_LOG(1, rq_sfp, "%s: mrq_submit()->%d\n", - __func__, res); - break; - } - num_subm++; - if (hp->din_xfer_len > 0) - rs_srp_a[k] = srp; - srp->s_hdr4.mrq_ind = j; - if (mhp->chk_abort) - atomic_set(&srp->s_hdr4.pack_id_of_mrq, - mhp->id_of_mrq); - if (fp == rq_sfp) - ++this_fp_sent; - else - ++other_fp_sent; + /* outer loop: SG_MAX_RSV_REQS read-side requests (chunks) at a time */ + for (j = 0; j < mhp->tot_reqs; j += delta_subm) { + if (mhp->chk_abort && + test_and_clear_bit(SG_FFD_MRQ_ABORT, fp->ffd_bm)) { + SG_LOG(1, fp, "%s: id_of_mrq=%d aborting at pos=%d\n", __func__, + mhp->id_of_mrq, num_subm); + aborted = true; } - sent = this_fp_sent + other_fp_sent; - if (sent <= 0) + if (aborted) + break; + + subm_before = num_subm; + cmpl_before = cop->info; + if (mhp->ordered_wr) + res = sg_svb_mrq_ordered(fp, o_sfp, mhp, j, &num_subm); + else /* write-side request done on first come, first served basis */ + res = sg_svb_mrq_first_come(fp, o_sfp, mhp, j, &num_subm); + delta_subm = num_subm - subm_before; + num_cmpl += (cop->info - cmpl_before); + if (res || delta_subm == 0) /* error or didn't make progress */ break; - /* - * We have just submitted a fixed number read-side reqs and any - * others (that don't move data). Now we pick up their - * responses. Any responses that were read-side requests have - * their paired write-side submitted. Finally we wait for those - * paired write-side to complete. - */ - rcv_before = cop->info; - for (i = 0; i < sent; ++i) { /* now process responses */ - if (other_fp_sent > 0 && - sg_mrq_get_ready_srp(o_sfp, &srp)) { -other_found: - if (IS_ERR(srp)) { - res = PTR_ERR(srp); - break; - } - --other_fp_sent; - res = sg_mrq_1complet(mhp, o_sfp, srp); - if (unlikely(res)) - return res; - ++cop->info; - if (cop->din_xfer_len > 0) - --cop->din_resid; - continue; /* do available submits first */ - } - if (this_fp_sent > 0 && - sg_mrq_get_ready_srp(fp, &srp)) { -this_found: - if (IS_ERR(srp)) { - res = PTR_ERR(srp); - break; - } - --this_fp_sent; - res = sg_mrq_1complet(mhp, fp, srp); - if (unlikely(res)) - return res; - ++cop->info; - if (cop->din_xfer_len > 0) - --cop->din_resid; - if (srp->s_hdr4.dir != SG_DXFER_FROM_DEV) - continue; - if (test_bit(SG_FFD_READ_SIDE_ERR, fp->ffd_bm)) - continue; - /* read-side req completed, submit its write-side */ - rs_srp = srp; - for (m = 0; m < k; ++m) { - if (rs_srp == rs_srp_a[m]) - break; - } - if (m >= k) { - SG_LOG(1, rs_srp->parentfp, - "%s: m >= %d, pack_id=%d\n", - __func__, k, rs_srp->pack_id); - res = -EPROTO; - break; - } - ws_pos = ws_pos_a[m]; - idx = sg_find_srp_idx(fp, rs_srp); - if (idx < 0) { - SG_LOG(1, rs_srp->parentfp, - "%s: idx < 0\n", __func__); - res = -EPROTO; - break; - } - keep_share = false; -another_dout: - SG_LOG(6, o_sfp, - "%s: submit ws_pos=%d, rs_idx=%d\n", - __func__, ws_pos, idx); - srp = sg_mrq_submit(o_sfp, mhp, ws_pos, idx, - keep_share); - if (IS_ERR(srp)) { - mhp->s_res = PTR_ERR(srp); - res = mhp->s_res; - SG_LOG(1, o_sfp, - "%s: mrq_submit(oth)->%d\n", - __func__, res); - break; - } - ++num_subm; - ++other_fp_sent; - ++sent; - srp->s_hdr4.mrq_ind = ws_pos; - if (srp->rq_flags & SGV4_FLAG_KEEP_SHARE) { - ++ws_pos; /* next for same read-side */ - keep_share = true; - goto another_dout; - } - if (mhp->chk_abort) - atomic_set(&srp->s_hdr4.pack_id_of_mrq, - mhp->id_of_mrq); - continue; /* do available submits first */ - } - /* waits maybe interrupted by signals (-ERESTARTSYS) */ - if (chk_oth_first) - goto oth_first; -this_second: - if (this_fp_sent > 0) { - res = sg_wait_mrq_event(fp, &srp); - if (unlikely(res)) - return res; - goto this_found; - } - if (chk_oth_first) - continue; -oth_first: - if (other_fp_sent > 0) { - res = sg_wait_mrq_event(o_sfp, &srp); - if (unlikely(res)) - return res; - goto other_found; - } - if (chk_oth_first) - goto this_second; - } /* end of response/write_side_submit/write_side_response loop */ if (unlikely(mhp->s_res == -EFAULT || mhp->s_res == -ERESTARTSYS)) res = mhp->s_res; /* this may leave orphans */ - num_cmpl += (cop->info - rcv_before); if (res) break; - if (aborted) - break; - } /* end of outer for loop */ - + } cop->dout_resid = mhp->tot_reqs - num_subm; if (cop->din_xfer_len > 0) { cop->din_resid = mhp->tot_reqs - num_cmpl; @@ -1809,11 +2095,11 @@ sg_process_svb_mrq(struct sg_fd *fp, struct sg_fd *o_sfp, #if IS_ENABLED(SG_LOG_ACTIVE) static const char * -sg_mrq_name(bool blocking, u32 flags) +sg_mrq_name(bool from_sg_io, u32 flags) { if (!(flags & SGV4_FLAG_MULTIPLE_REQS)) return "_not_ multiple requests control object"; - if (blocking) + if (from_sg_io) return "ordered blocking"; if (flags & SGV4_FLAG_IMMED) return "submit or full non-blocking"; @@ -1824,16 +2110,16 @@ sg_mrq_name(bool blocking, u32 flags) #endif /* - * Implements the multiple request functionality. When 'blocking' is true + * Implements the multiple request functionality. When 'from_sg_io' is true * invocation was via ioctl(SG_IO), otherwise it was via ioctl(SG_IOSUBMIT). * Submit non-blocking if IMMED flag given or when ioctl(SG_IOSUBMIT) * is used with O_NONBLOCK set on its file descriptor. Hipri non-blocking * is when the HIPRI flag is given. */ static int -sg_do_multi_req(struct sg_comm_wr_t *cwrp, bool blocking) +sg_do_multi_req(struct sg_comm_wr_t *cwrp, bool from_sg_io) { - bool f_non_block, co_share; + bool f_non_block, is_svb; int res = 0; int existing_id; u32 cdb_mxlen; @@ -1854,14 +2140,16 @@ sg_do_multi_req(struct sg_comm_wr_t *cwrp, bool blocking) #endif mhp->cwrp = cwrp; - mhp->blocking = blocking; + mhp->from_sg_io = from_sg_io; /* false if from SG_IOSUBMIT */ #if IS_ENABLED(SG_LOG_ACTIVE) - mrq_name = sg_mrq_name(blocking, cop->flags); + mrq_name = sg_mrq_name(from_sg_io, cop->flags); #endif f_non_block = !!(fp->filp->f_flags & O_NONBLOCK); - co_share = !!(cop->flags & SGV4_FLAG_SHARE); + is_svb = !!(cop->flags & SGV4_FLAG_SHARE); /* via ioctl(SG_IOSUBMIT) only */ mhp->immed = !!(cop->flags & SGV4_FLAG_IMMED); + mhp->hipri = !!(cop->flags & SGV4_FLAG_HIPRI); mhp->stop_if = !!(cop->flags & SGV4_FLAG_STOP_IF); + mhp->ordered_wr = !!(cop->flags & SGV4_FLAG_ORDERED_WR); mhp->co_mmap = !!(cop->flags & SGV4_FLAG_MMAP_IO); if (mhp->co_mmap) mhp->co_mmap_sgatp = fp->rsv_arr[0]->sgatp; @@ -1881,13 +2169,13 @@ sg_do_multi_req(struct sg_comm_wr_t *cwrp, bool blocking) } else { mhp->chk_abort = false; } - if (blocking) { /* came from ioctl(SG_IO) */ + if (from_sg_io) { if (unlikely(mhp->immed)) { SG_LOG(1, fp, "%s: ioctl(SG_IO) %s contradicts\n", __func__, "with SGV4_FLAG_IMMED"); return -ERANGE; } - if (unlikely(co_share)) { + if (unlikely(is_svb)) { SG_LOG(1, fp, "%s: ioctl(SG_IO) %s contradicts\n", __func__, "with SGV4_FLAG_SHARE"); return -ERANGE; @@ -1899,7 +2187,7 @@ sg_do_multi_req(struct sg_comm_wr_t *cwrp, bool blocking) } } if (!mhp->immed && f_non_block) - mhp->immed = true; + mhp->immed = true; /* hmm, think about this */ SG_LOG(3, fp, "%s: %s, tot_reqs=%u, id_of_mrq=%d\n", __func__, mrq_name, tot_reqs, mhp->id_of_mrq); sg_v4h_partial_zero(cop); @@ -1943,10 +2231,16 @@ sg_do_multi_req(struct sg_comm_wr_t *cwrp, bool blocking) if (SG_IS_DETACHING(sdp) || (o_sfp && SG_IS_DETACHING(o_sfp->parentdp))) return -ENODEV; - + if (is_svb && unlikely(test_and_set_bit(SG_FFD_SVB_ACTIVE, fp->ffd_bm))) { + SG_LOG(1, fp, "%s: %s already active\n", __func__, mrq_name); + return -EBUSY; + } a_hds = kcalloc(tot_reqs, SZ_SG_IO_V4, GFP_KERNEL | __GFP_NOWARN); - if (unlikely(!a_hds)) + if (unlikely(!a_hds)) { + if (is_svb) + clear_bit(SG_FFD_SVB_ACTIVE, fp->ffd_bm); return -ENOMEM; + } if (copy_from_user(a_hds, cuptr64(cop->dout_xferp), tot_reqs * SZ_SG_IO_V4)) { res = -EFAULT; @@ -1967,40 +2261,29 @@ sg_do_multi_req(struct sg_comm_wr_t *cwrp, bool blocking) mhp->a_hds = a_hds; mhp->cdb_mxlen = cdb_mxlen; /* do sanity checks on all requests before starting */ - res = sg_mrq_sanity(mhp); - if (unlikely(res)) + if (unlikely(!sg_mrq_sanity(mhp, is_svb))) { + res = -ERANGE; goto fini; + } /* override cmd queuing setting to allow */ clear_bit(SG_FFD_NO_CMD_Q, fp->ffd_bm); if (o_sfp) clear_bit(SG_FFD_NO_CMD_Q, o_sfp->ffd_bm); - if (co_share) { - bool ok; - - /* check for 'shared' variable blocking (svb) */ - ok = sg_mrq_svb_chk(a_hds, tot_reqs); - if (!ok) { - SG_LOG(1, fp, "%s: %s failed on req(s)\n", __func__, - mrq_name); - res = -ERANGE; - goto fini; - } - if (test_and_set_bit(SG_FFD_SVB_ACTIVE, fp->ffd_bm)) { - SG_LOG(1, fp, "%s: %s already active\n", __func__, - mrq_name); - res = -EBUSY; - goto fini; - } + if (is_svb) res = sg_process_svb_mrq(fp, o_sfp, mhp); - clear_bit(SG_FFD_SVB_ACTIVE, fp->ffd_bm); - } else { + else res = sg_process_most_mrq(fp, o_sfp, mhp); - } fini: - if (likely(res == 0) && !mhp->immed) - res = sg_mrq_arr_flush(mhp); + if (!mhp->immed) { /* for the blocking mrq invocations */ + int rres = sg_mrq_arr_flush(mhp); + + if (unlikely(rres > 0 && res == 0)) + res = rres; + } + if (is_svb) + clear_bit(SG_FFD_SVB_ACTIVE, fp->ffd_bm); kfree(cdb_ap); kfree(a_hds); return res; @@ -2008,7 +2291,7 @@ sg_do_multi_req(struct sg_comm_wr_t *cwrp, bool blocking) static int sg_submit_v4(struct sg_fd *sfp, void __user *p, struct sg_io_v4 *h4p, - bool sync, struct sg_request **o_srp) + bool from_sg_io, struct sg_request **o_srp) { int res = 0; int dlen; @@ -2029,7 +2312,7 @@ sg_submit_v4(struct sg_fd *sfp, void __user *p, struct sg_io_v4 *h4p, *o_srp = NULL; cwr.sfp = sfp; cwr.h4p = h4p; - res = sg_do_multi_req(&cwr, sync); + res = sg_do_multi_req(&cwr, from_sg_io); if (unlikely(res)) return res; if (likely(p)) { @@ -2049,7 +2332,7 @@ sg_submit_v4(struct sg_fd *sfp, void __user *p, struct sg_io_v4 *h4p, clear_bit(SG_FFD_NO_CMD_Q, sfp->ffd_bm); ul_timeout = msecs_to_jiffies(h4p->timeout); cwr.sfp = sfp; - __assign_bit(SG_FRQ_SYNC_INVOC, cwr.frq_bm, (int)sync); + __assign_bit(SG_FRQ_SYNC_INVOC, cwr.frq_bm, (int)from_sg_io); __set_bit(SG_FRQ_IS_V4I, cwr.frq_bm); cwr.h4p = h4p; cwr.timeout = min_t(unsigned long, ul_timeout, INT_MAX); @@ -2062,7 +2345,7 @@ sg_submit_v4(struct sg_fd *sfp, void __user *p, struct sg_io_v4 *h4p, return PTR_ERR(srp); if (o_srp) *o_srp = srp; - if (p && !sync && (srp->rq_flags & SGV4_FLAG_YIELD_TAG)) { + if (p && !from_sg_io && (srp->rq_flags & SGV4_FLAG_YIELD_TAG)) { u64 gen_tag = srp->tag; struct sg_io_v4 __user *h4_up = (struct sg_io_v4 __user *)p; @@ -2239,12 +2522,11 @@ static const int sg_rq_state_arr[] = {1, 0, 4, 0, 0, 0}; static const int sg_rq_state_mul2arr[] = {2, 0, 8, 0, 0, 0}; /* - * This function keeps the srp->rq_st state and associated marks on the - * owning xarray's element in sync. An attempt si made to change state with - * a call to atomic_cmpxchg(). If the actual srp->rq_st is not old_st, then - * -EPROTOTYPE is returned. If the actual srp->rq_st is old_st then it is - * replaced by new_st and the xarray marks are setup accordingly and 0 is - * returned. This assumes srp_arr xarray spinlock is held. + * This function keeps the srp->rq_st state and associated marks on the owning xarray's element in + * sync. An attempt si made to change state with a call to atomic_cmpxchg(). If the actual + * srp->rq_st is not old_st, then -EPROTOTYPE is returned. If the actual srp->rq_st is old_st then + * it is replaced by new_st and the xarray marks are setup accordingly and 0 is returned. This + * function (and others ending in '_ulck') assumes srp_arr xarray spinlock is already held. */ static int sg_rq_chg_state_ulck(struct sg_request *srp, enum sg_rq_state old_st, @@ -2376,37 +2658,29 @@ sg_get_idx_available(struct sg_fd *sfp) static struct sg_request * sg_get_probable_read_side(struct sg_fd *sfp) { - struct sg_request **rapp = sfp->rsv_arr; - struct sg_request **end_rapp = rapp + SG_MAX_RSV_REQS; + struct sg_request **rapp; struct sg_request *rs_srp; + struct sg_request *rs_inactive_srp = NULL; - for ( ; rapp < end_rapp; ++rapp) { + for (rapp = sfp->rsv_arr; rapp < rapp + SG_MAX_RSV_REQS; ++rapp) { rs_srp = *rapp; if (IS_ERR_OR_NULL(rs_srp) || rs_srp->sh_srp) continue; - switch (atomic_read(&rs_srp->rq_st)) { + switch (atomic_read_acquire(&rs_srp->rq_st)) { case SG_RQ_INFLIGHT: case SG_RQ_AWAIT_RCV: case SG_RQ_BUSY: case SG_RQ_SHR_SWAP: return rs_srp; - default: - break; - } - } - /* Subsequent dout data transfers (e.g. WRITE) on a request share */ - for (rapp = sfp->rsv_arr; rapp < end_rapp; ++rapp) { - rs_srp = *rapp; - if (IS_ERR_OR_NULL(rs_srp) || rs_srp->sh_srp) - continue; - switch (atomic_read(&rs_srp->rq_st)) { case SG_RQ_INACTIVE: - return rs_srp; + if (!rs_inactive_srp) + rs_inactive_srp = rs_srp; + break; default: break; } } - return NULL; + return rs_inactive_srp; } /* @@ -2468,11 +2742,10 @@ sg_get_rsv_str_lck(struct sg_request *srp, const char *leadin, static void sg_execute_cmd(struct sg_fd *sfp, struct sg_request *srp) { - bool at_head, sync; + bool at_head; struct sg_device *sdp = sfp->parentdp; struct request *rqq = READ_ONCE(srp->rqq); - sync = test_bit(SG_FRQ_SYNC_INVOC, srp->frq_bm); SG_LOG(3, sfp, "%s: pack_id=%d\n", __func__, srp->pack_id); if (test_bit(SG_FFD_NO_DURATION, sfp->ffd_bm)) srp->start_ns = 0; @@ -2491,7 +2764,7 @@ sg_execute_cmd(struct sg_fd *sfp, struct sg_request *srp) kref_get(&sfp->f_ref); /* put usually in: sg_rq_end_io() */ sg_rq_chg_state_force(srp, SG_RQ_INFLIGHT); /* >>>>>>> send cmd/req off to other levels <<<<<<<< */ - if (!sync) { + if (!test_bit(SG_FRQ_SYNC_INVOC, srp->frq_bm)) { atomic_inc(&sfp->submitted); set_bit(SG_FRQ_COUNT_ACTIVE, srp->frq_bm); } @@ -2550,6 +2823,7 @@ sg_common_write(struct sg_comm_wr_t *cwrp) res = sg_share_chk_flags(fp, rq_flags, dlen, dir, &sh_var); if (unlikely(res < 0)) return ERR_PTR(res); + cwrp->keep_share = !!(rq_flags & SGV4_FLAG_KEEP_SHARE); } else { sh_var = SG_SHR_NONE; if (unlikely(rq_flags & SGV4_FLAG_SHARE)) @@ -2673,14 +2947,15 @@ sg_rec_state_v3v4(struct sg_fd *sfp, struct sg_request *srp, bool v4_active) enum sg_rq_state rs_st = SG_RQ_INACTIVE; struct sg_request *rs_srp; - if (unlikely(!scsi_status_is_good(rq_res))) { - int sb_len_wr = sg_copy_sense(srp, v4_active); + if (unlikely(!sg_result_is_good(rq_res))) { + srp->rq_info |= SG_INFO_CHECK; + if (!scsi_status_is_good(rq_res)) { + int sb_len_wr = sg_copy_sense(srp, v4_active); - if (unlikely(sb_len_wr < 0)) - return sb_len_wr; + if (unlikely(sb_len_wr < 0)) + return sb_len_wr; + } } - if (!sg_result_is_good(rq_res)) - srp->rq_info |= SG_INFO_CHECK; if (unlikely(test_bit(SG_FRQ_ABORTING, srp->frq_bm))) srp->rq_info |= SG_INFO_ABORTED; @@ -2881,7 +3156,7 @@ sg_mrq_iorec_complets(struct sg_fd *sfp, bool non_block, int max_mrqs, return k; for ( ; k < max_mrqs; ++k) { - res = sg_wait_mrq_event(sfp, &srp); + res = sg_wait_any_mrq(sfp, &srp); if (unlikely(res)) return res; /* signal --> -ERESTARTSYS */ if (IS_ERR(srp)) @@ -2945,10 +3220,14 @@ sg_mrq_ioreceive(struct sg_fd *sfp, struct sg_io_v4 *cop, void __user *p, return res; } +// sg_wait_id_event static int -sg_wait_id_event(struct sg_fd *sfp, struct sg_request **srpp, int id, - bool is_tag) +sg_wait_poll_by_id(struct sg_fd *sfp, struct sg_request **srpp, int id, + bool is_tag, int do_poll) { + if (do_poll) + goto poll_loop; + if (test_bit(SG_FFD_EXCL_WAITQ, sfp->ffd_bm)) return __wait_event_interruptible_exclusive (sfp->cmpl_wait, @@ -2956,6 +3235,28 @@ sg_wait_id_event(struct sg_fd *sfp, struct sg_request **srpp, int id, return __wait_event_interruptible (sfp->cmpl_wait, sg_get_ready_srp(sfp, srpp, id, is_tag)); +poll_loop: + { + bool sig_pending = false; + long state = current->state; + struct sg_request *srp; + + do { + srp = sg_find_srp_by_id(sfp, id, is_tag); + if (srp) { + __set_current_state(TASK_RUNNING); + *srpp = srp; + return 0; + } + if (signal_pending_state(state, current)) { + sig_pending = true; + break; + } + cpu_relax(); + } while (!need_resched()); + __set_current_state(TASK_RUNNING); + return sig_pending ? -ERESTARTSYS : -EAGAIN; + } } /* @@ -2988,9 +3289,10 @@ sg_ctl_ioreceive(struct sg_fd *sfp, void __user *p) if (unlikely(h4p->guard != 'Q' || h4p->protocol != 0 || h4p->subprotocol != 0)) return -EPERM; + SG_LOG(3, sfp, "%s: non_block=%d, immed=%d, hipri=%d\n", __func__, non_block, + !!(h4p->flags & SGV4_FLAG_IMMED), !!(h4p->flags & SGV4_FLAG_HIPRI)); if (h4p->flags & SGV4_FLAG_IMMED) non_block = true; /* set by either this or O_NONBLOCK */ - SG_LOG(3, sfp, "%s: non_block(+IMMED)=%d\n", __func__, non_block); if (h4p->flags & SGV4_FLAG_MULTIPLE_REQS) return sg_mrq_ioreceive(sfp, h4p, p, non_block); /* read in part of v3 or v4 header for pack_id or tag based find */ @@ -3001,20 +3303,20 @@ sg_ctl_ioreceive(struct sg_fd *sfp, void __user *p) else pack_id = h4p->request_extra; } - id = use_tag ? tag : pack_id; -try_again: - srp = sg_find_srp_by_id(sfp, id, use_tag); - if (!srp) { /* nothing available so wait on packet or */ - if (SG_IS_DETACHING(sdp)) - return -ENODEV; - if (non_block) - return -EAGAIN; - res = sg_wait_id_event(sfp, &srp, id, use_tag); - if (unlikely(res)) - return res; /* signal --> -ERESTARTSYS */ + id = use_tag ? tag : pack_id; +try_again: + if (non_block) { + srp = sg_find_srp_by_id(sfp, id, use_tag); + if (!srp) + return SG_IS_DETACHING(sdp) ? -ENODEV : -EAGAIN; + } else { + res = sg_wait_poll_by_id(sfp, &srp, pack_id, use_tag, + !!(h4p->flags & SGV4_FLAG_HIPRI)); if (IS_ERR(srp)) return PTR_ERR(srp); - } /* now srp should be valid */ + if (unlikely(res)) + return res; /* signal --> -ERESTARTSYS */ + } if (test_and_set_bit(SG_FRQ_RECEIVING, srp->frq_bm)) { cpu_relax(); goto try_again; @@ -3058,18 +3360,18 @@ sg_ctl_ioreceive_v3(struct sg_fd *sfp, void __user *p) if (test_bit(SG_FFD_FORCE_PACKID, sfp->ffd_bm)) pack_id = h3p->pack_id; try_again: - srp = sg_find_srp_by_id(sfp, pack_id, false); - if (!srp) { /* nothing available so wait on packet or */ - if (SG_IS_DETACHING(sdp)) - return -ENODEV; - if (non_block) - return -EAGAIN; - res = sg_wait_id_event(sfp, &srp, pack_id, false); + if (non_block) { + srp = sg_find_srp_by_id(sfp, pack_id, false); + if (!srp) + return SG_IS_DETACHING(sdp) ? -ENODEV : -EAGAIN; + } else { + res = sg_wait_poll_by_id(sfp, &srp, pack_id, false, + !!(h3p->flags & SGV4_FLAG_HIPRI)); if (unlikely(res)) return res; /* signal --> -ERESTARTSYS */ if (IS_ERR(srp)) return PTR_ERR(srp); - } /* now srp should be valid */ + } if (test_and_set_bit(SG_FRQ_RECEIVING, srp->frq_bm)) { cpu_relax(); goto try_again; @@ -3239,18 +3541,16 @@ sg_read(struct file *filp, char __user *p, size_t count, loff_t *ppos) } } try_again: - srp = sg_find_srp_by_id(sfp, want_id, false); - if (!srp) { /* nothing available so wait on packet to arrive or */ - if (SG_IS_DETACHING(sdp)) - return -ENODEV; - if (non_block) /* O_NONBLOCK or v3::flags & SGV4_FLAG_IMMED */ - return -EAGAIN; - ret = sg_wait_id_event(sfp, &srp, want_id, false); - if (unlikely(ret)) /* -ERESTARTSYS as signal hit process */ - return ret; + if (non_block) { + srp = sg_find_srp_by_id(sfp, want_id, false); + if (!srp) + return SG_IS_DETACHING(sdp) ? -ENODEV : -EAGAIN; + } else { + ret = sg_wait_poll_by_id(sfp, &srp, want_id, false, false); + if (unlikely(ret)) + return ret; /* signal --> -ERESTARTSYS */ if (IS_ERR(srp)) return PTR_ERR(srp); - /* otherwise srp should be valid */ } if (test_and_set_bit(SG_FRQ_RECEIVING, srp->frq_bm)) { cpu_relax(); @@ -3354,16 +3654,18 @@ sg_calc_sgat_param(struct sg_device *sdp) } /* - * Only valid for shared file descriptors. Designed to be called after a - * read-side request has successfully completed leaving valid data in a - * reserve request buffer. The read-side is moved from SG_RQ_SHR_SWAP - * to SG_RQ_INACTIVE state and returns 0. Acts on first reserve requests. - * Otherwise -EINVAL is returned, unless write-side is in progress in + * Only valid for shared file descriptors. Designed to be called after a read-side request has + * successfully completed leaving valid data in a reserve request buffer. May also be called after + * a write-side request that has the SGV4_FLAG_KEEP_SHARE flag set. If rs_srp is NULL, acts + * on first reserve request in SG_RQ_SHR_SWAP state, making it inactive and returning 0. If rs_srp + * is non-NULL and is a reserve request and is in SG_RQ_SHR_SWAP state, makes it busy then + * inactive and returns 0. Otherwise -EINVAL is returned, unless write-side is in progress in * which case -EBUSY is returned. */ static int -sg_finish_rs_rq(struct sg_fd *sfp) +sg_finish_rs_rq(struct sg_fd *sfp, struct sg_request *rs_srp, bool even_if_in_ws) { + bool found_one = false; int res = -EINVAL; int k; enum sg_rq_state sr_st; @@ -3381,26 +3683,24 @@ sg_finish_rs_rq(struct sg_fd *sfp) for (k = 0; k < SG_MAX_RSV_REQS; ++k) { res = -EINVAL; rs_rsv_srp = rs_sfp->rsv_arr[k]; + if (rs_srp) { + if (rs_srp != rs_rsv_srp) + continue; + } if (IS_ERR_OR_NULL(rs_rsv_srp)) continue; xa_lock_irqsave(&rs_sfp->srp_arr, iflags); sr_st = atomic_read(&rs_rsv_srp->rq_st); switch (sr_st) { case SG_RQ_SHR_SWAP: - res = sg_rq_chg_state_ulck(rs_rsv_srp, sr_st, SG_RQ_BUSY); - if (!res) - atomic_inc(&rs_sfp->inactives); - rs_rsv_srp->tag = SG_TAG_WILDCARD; - rs_rsv_srp->sh_var = SG_SHR_NONE; - set_bit(SG_FRQ_RESERVED, rs_rsv_srp->frq_bm); - rs_rsv_srp->in_resid = 0; - rs_rsv_srp->rq_info = 0; - rs_rsv_srp->sense_len = 0; - rs_rsv_srp->sh_srp = NULL; - sg_finish_scsi_blk_rq(rs_rsv_srp); - sg_deact_request(rs_rsv_srp->parentfp, rs_rsv_srp); + found_one = true; + break; + case SG_RQ_SHR_IN_WS: + if (even_if_in_ws) + found_one = true; + else + res = -EBUSY; break; - case SG_RQ_SHR_IN_WS: /* too late, write-side rq active */ case SG_RQ_BUSY: res = -EBUSY; break; @@ -3408,14 +3708,31 @@ sg_finish_rs_rq(struct sg_fd *sfp) res = -EINVAL; break; } + if (found_one) + goto found; xa_unlock_irqrestore(&rs_sfp->srp_arr, iflags); - if (res == 0) - return res; + if (rs_srp) + return res; /* found rs_srp but was in wrong state */ } fini: if (unlikely(res)) SG_LOG(1, sfp, "%s: err=%d\n", __func__, -res); return res; +found: + res = sg_rq_chg_state_ulck(rs_rsv_srp, sr_st, SG_RQ_BUSY); + if (!res) + atomic_inc(&rs_sfp->inactives); + rs_rsv_srp->tag = SG_TAG_WILDCARD; + rs_rsv_srp->sh_var = SG_SHR_NONE; + set_bit(SG_FRQ_RESERVED, rs_rsv_srp->frq_bm); + rs_rsv_srp->in_resid = 0; + rs_rsv_srp->rq_info = 0; + rs_rsv_srp->sense_len = 0; + rs_rsv_srp->sh_srp = NULL; + xa_unlock_irqrestore(&rs_sfp->srp_arr, iflags); + sg_finish_scsi_blk_rq(rs_rsv_srp); + sg_deact_request(rs_rsv_srp->parentfp, rs_rsv_srp); + return 0; } static void @@ -3523,7 +3840,7 @@ sg_remove_sfp_share(struct sg_fd *sfp, bool is_rd_side) if (IS_ERR_OR_NULL(rsv_srp) || rsv_srp->sh_var != SG_SHR_RS_RQ) continue; - sr_st = atomic_read(&rsv_srp->rq_st); + sr_st = atomic_read_acquire(&rsv_srp->rq_st); switch (sr_st) { case SG_RQ_SHR_SWAP: set_inactive = true; @@ -3732,66 +4049,6 @@ sg_fill_request_element(struct sg_fd *sfp, struct sg_request *srp, xa_unlock_irqrestore(&sfp->srp_arr, iflags); } -static inline bool -sg_rq_landed(struct sg_device *sdp, struct sg_request *srp) -{ - return atomic_read_acquire(&srp->rq_st) != SG_RQ_INFLIGHT || SG_IS_DETACHING(sdp); -} - -/* This is a blocking wait then complete for a specific srp. */ -static int -sg_wait_event_srp(struct sg_fd *sfp, void __user *p, struct sg_io_v4 *h4p, - struct sg_request *srp) -{ - int res; - struct sg_device *sdp = sfp->parentdp; - enum sg_rq_state sr_st; - - if (atomic_read(&srp->rq_st) != SG_RQ_INFLIGHT) - goto skip_wait; /* and skip _acquire() */ - if (srp->rq_flags & SGV4_FLAG_HIPRI) { - /* call blk_poll(), spinning till found */ - res = sg_srp_q_blk_poll(srp, sdp->device->request_queue, -1); - if (res != -ENODATA && unlikely(res < 0)) - return res; - goto skip_wait; - } - SG_LOG(3, sfp, "%s: about to wait_event...()\n", __func__); - /* N.B. The SG_FFD_EXCL_WAITQ flag is ignored here. */ - res = __wait_event_interruptible(sfp->cmpl_wait, - sg_rq_landed(sdp, srp)); - if (unlikely(res)) { /* -ERESTARTSYS because signal hit thread */ - set_bit(SG_FRQ_IS_ORPHAN, srp->frq_bm); - /* orphans harvested when sfp->keep_orphan is false */ - sg_rq_chg_state_force(srp, SG_RQ_INFLIGHT); - SG_LOG(1, sfp, "%s: wait_event_interruptible(): %s[%d]\n", - __func__, (res == -ERESTARTSYS ? "ERESTARTSYS" : ""), - res); - return res; - } -skip_wait: - if (SG_IS_DETACHING(sdp)) { - sg_rq_chg_state_force(srp, SG_RQ_INACTIVE); - atomic_inc(&sfp->inactives); - return -ENODEV; - } - sr_st = atomic_read(&srp->rq_st); - if (unlikely(sr_st != SG_RQ_AWAIT_RCV)) - return -EPROTO; /* Logic error */ - res = sg_rq_chg_state(srp, sr_st, SG_RQ_BUSY); - if (unlikely(res)) { -#if IS_ENABLED(SG_LOG_ACTIVE) - sg_rq_state_fail_msg(sfp, sr_st, SG_RQ_BUSY, __func__); -#endif - return res; - } - if (SG_IS_V4I(srp)) - res = sg_receive_v4(sfp, srp, p, h4p); - else - res = sg_receive_v3(sfp, srp, p); - return (res < 0) ? res : 0; -} - /* * Handles ioctl(SG_IO) for blocking (sync) usage of v3 or v4 interface. * Returns 0 on success else a negated errno. @@ -3799,6 +4056,7 @@ sg_wait_event_srp(struct sg_fd *sfp, void __user *p, struct sg_io_v4 *h4p, static int sg_ctl_sg_io(struct sg_device *sdp, struct sg_fd *sfp, void __user *p) { + bool is_v4, hipri; int res; struct sg_request *srp = NULL; u8 hu8arr[SZ_SG_IO_V4]; @@ -3828,8 +4086,12 @@ sg_ctl_sg_io(struct sg_device *sdp, struct sg_fd *sfp, void __user *p) ((u8 __user *)p) + v3_len, SZ_SG_IO_V4 - v3_len)) return -EFAULT; + is_v4 = true; + hipri = !!(h4p->flags & SGV4_FLAG_HIPRI); res = sg_submit_v4(sfp, p, h4p, true, &srp); } else if (h3p->interface_id == 'S') { + is_v4 = false; + hipri = !!(h3p->flags & SGV4_FLAG_HIPRI); res = sg_submit_v3(sfp, h3p, true, &srp); } else { pr_info_once("sg: %s: v3 or v4 interface only here\n", @@ -3840,7 +4102,7 @@ sg_ctl_sg_io(struct sg_device *sdp, struct sg_fd *sfp, void __user *p) return res; if (!srp) /* mrq case: already processed all responses */ return res; - res = sg_wait_event_srp(sfp, p, h4p, srp); + res = sg_wait_poll_for_given_srp(sfp, srp, hipri); #if IS_ENABLED(SG_LOG_ACTIVE) if (unlikely(res)) SG_LOG(1, sfp, "%s: %s=0x%pK state: %s, share: %s\n", @@ -3848,19 +4110,15 @@ sg_ctl_sg_io(struct sg_device *sdp, struct sg_fd *sfp, void __user *p) sg_rq_st_str(atomic_read(&srp->rq_st), false), sg_shr_str(srp->sh_var, false)); #endif + if (likely(res == 0)) { + if (is_v4) + res = sg_receive_v4(sfp, srp, p, h4p); + else + res = sg_receive_v3(sfp, srp, p); + } return res; } -static inline int -sg_num_waiting_maybe_acquire(struct sg_fd *sfp) -{ - int num = atomic_read(&sfp->waiting); - - if (num < 1) - num = atomic_read_acquire(&sfp->waiting); - return num; -} - /* * When use_tag is true then id is a tag, else it is a pack_id. Returns * valid srp if match, else returns NULL. @@ -3943,7 +4201,7 @@ sg_abort_req(struct sg_fd *sfp, struct sg_request *srp) __func__, srp->pack_id, srp->tag); goto fini; /* skip quietly if already aborted */ } - rq_st = atomic_read(&srp->rq_st); + rq_st = atomic_read_acquire(&srp->rq_st); SG_LOG(3, sfp, "%s: req pack_id/tag=%d/%d, status=%s\n", __func__, srp->pack_id, srp->tag, sg_rq_st_str(rq_st, false)); switch (rq_st) { @@ -4252,8 +4510,9 @@ sg_fd_share(struct sg_fd *ws_sfp, int m_fd) /* * After checking the proposed file share relationship is unique and - * valid, sets up pointers between read-side and write-side sg_fd objects. - * Allows previous write-side to be the same as the new new_ws_fd . + * valid, sets up pointers between read-side and write-side sg_fd objects. Allows + * previous write-side to be the same as the new write-side (fd). Return 0 on success + * or negated errno value. */ static int sg_fd_reshare(struct sg_fd *rs_sfp, int new_ws_fd) @@ -4447,6 +4706,7 @@ static int put_compat_request_table(struct compat_sg_req_info __user *o, struct sg_req_info *rinfo) { int i; + for (i = 0; i < SG_MAX_QUEUE; i++) { if (copy_to_user(o + i, rinfo + i, offsetof(sg_req_info_t, usr_ptr)) || put_user((uintptr_t)rinfo[i].usr_ptr, &o[i].usr_ptr) || @@ -4638,7 +4898,7 @@ sg_extended_bool_flags(struct sg_fd *sfp, struct sg_extended_info *seip) if (rs_sfp && !IS_ERR_OR_NULL(rs_sfp->rsv_arr[0])) { struct sg_request *res_srp = rs_sfp->rsv_arr[0]; - if (atomic_read(&res_srp->rq_st) == SG_RQ_SHR_SWAP) + if (atomic_read_acquire(&res_srp->rq_st) == SG_RQ_SHR_SWAP) c_flgs_val_out |= SG_CTL_FLAGM_READ_SIDE_FINI; else c_flgs_val_out &= ~SG_CTL_FLAGM_READ_SIDE_FINI; @@ -4647,8 +4907,8 @@ sg_extended_bool_flags(struct sg_fd *sfp, struct sg_extended_info *seip) } } if ((c_flgs_wm & SG_CTL_FLAGM_READ_SIDE_FINI) && - (c_flgs_val_in & SG_CTL_FLAGM_READ_SIDE_FINI)) - res = sg_finish_rs_rq(sfp); + (c_flgs_val_out & SG_CTL_FLAGM_READ_SIDE_FINI)) + res = sg_finish_rs_rq(sfp, NULL, false); /* READ_SIDE_ERR boolean, [ro] share: read-side finished with error */ if (c_flgs_rm & SG_CTL_FLAGM_READ_SIDE_ERR) { struct sg_fd *rs_sfp = sg_fd_share_ptr(sfp); @@ -4835,10 +5095,8 @@ sg_ctl_extended(struct sg_fd *sfp, void __user *p) } /* yields minor_index (type: u32) [ro] */ if (or_masks & SG_SEIM_MINOR_INDEX) { - if (s_wr_mask & SG_SEIM_MINOR_INDEX) { - SG_LOG(2, sfp, "%s: writing to minor_index ignored\n", - __func__); - } + if (s_wr_mask & SG_SEIM_MINOR_INDEX) + SG_LOG(2, sfp, "%s: writing to minor_index ignored\n", __func__); if (s_rd_mask & SG_SEIM_MINOR_INDEX) seip->minor_index = sdp->index; } @@ -4892,7 +5150,7 @@ sg_ctl_extended(struct sg_fd *sfp, void __user *p) n = 0; if (s_wr_mask & SG_SEIM_BLK_POLL) { result = sg_sfp_blk_poll(sfp, seip->num); - if (result < 0) { + if (unlikely(result < 0)) { if (ret == 0) ret = result; } else { @@ -5035,8 +5293,11 @@ sg_ioctl_common(struct file *filp, struct sg_device *sdp, struct sg_fd *sfp, switch (cmd_in) { case SG_GET_NUM_WAITING: /* Want as fast as possible, with a useful result */ - if (test_bit(SG_FFD_HIPRI_SEEN, sfp->ffd_bm)) - sg_sfp_blk_poll(sfp, 0); /* LLD may have some ready */ + if (test_bit(SG_FFD_HIPRI_SEEN, sfp->ffd_bm)) { + res = sg_sfp_blk_poll(sfp, 0); /* LLD may have some ready */ + if (unlikely(res < 0)) + return res; + } val = atomic_read(&sfp->waiting); if (val) return put_user(val, ip); @@ -5360,7 +5621,7 @@ sg_sfp_blk_poll(struct sg_fd *sfp, int loop_count) struct request_queue *q = sdev ? sdev->request_queue : NULL; struct xarray *xafp = &sfp->srp_arr; - if (!q) + if (unlikely(!q)) return -EINVAL; xa_lock_irqsave(xafp, iflags); xa_for_each(xafp, idx, srp) { @@ -5863,8 +6124,9 @@ sg_add_device(struct device *cl_dev, struct class_interface *cl_intf) if (unlikely(error)) pr_err("%s: unable to make symlink 'generic' back " "to sg%d\n", __func__, sdp->index); - } else + } else { pr_warn("%s: sg_sys Invalid\n", __func__); + } sdp->create_ns = ktime_get_boottime_ns(); sg_calc_sgat_param(sdp); @@ -6494,16 +6756,15 @@ sg_read_append(struct sg_request *srp, void __user *outp, int num_xfer) if (copy_to_user(outp, page_address(pgp), num_xfer)) res = -EFAULT; break; - } else { - if (copy_to_user(outp, page_address(pgp), num)) { - res = -EFAULT; - break; - } - num_xfer -= num; - if (num_xfer <= 0) - break; - outp += num; } + if (copy_to_user(outp, page_address(pgp), num)) { + res = -EFAULT; + break; + } + num_xfer -= num; + if (num_xfer <= 0) + break; + outp += num; } return res; } @@ -6520,10 +6781,8 @@ static struct sg_request * sg_find_srp_by_id(struct sg_fd *sfp, int id, bool is_tag) { __maybe_unused bool is_bad_st = false; - __maybe_unused enum sg_rq_state bad_sr_st = SG_RQ_INACTIVE; bool search_for_1 = (id != SG_TAG_WILDCARD); bool second = false; - enum sg_rq_state sr_st; int res; int l_await_idx = READ_ONCE(sfp->low_await_idx); unsigned long idx, s_idx; @@ -6531,8 +6790,11 @@ sg_find_srp_by_id(struct sg_fd *sfp, int id, bool is_tag) struct sg_request *srp = NULL; struct xarray *xafp = &sfp->srp_arr; - if (test_bit(SG_FFD_HIPRI_SEEN, sfp->ffd_bm)) - sg_sfp_blk_poll(sfp, 0); /* LLD may have some ready to push up */ + if (test_bit(SG_FFD_HIPRI_SEEN, sfp->ffd_bm)) { + res = sg_sfp_blk_poll(sfp, 0); /* LLD may have some ready to push up */ + if (unlikely(res < 0)) + return ERR_PTR(res); + } if (sg_num_waiting_maybe_acquire(sfp) < 1) return NULL; @@ -6552,30 +6814,9 @@ sg_find_srp_by_id(struct sg_fd *sfp, int id, bool is_tag) if (srp->pack_id != id) continue; } - sr_st = atomic_read(&srp->rq_st); - switch (sr_st) { - case SG_RQ_AWAIT_RCV: - res = sg_rq_chg_state(srp, sr_st, SG_RQ_BUSY); - if (likely(res == 0)) - goto good; - /* else another caller got it, move on */ - if (IS_ENABLED(CONFIG_SCSI_PROC_FS)) { - is_bad_st = true; - bad_sr_st = atomic_read(&srp->rq_st); - } - break; - case SG_RQ_SHR_IN_WS: + res = sg_rq_chg_state(srp, SG_RQ_AWAIT_RCV, SG_RQ_BUSY); + if (likely(res == 0)) goto good; - case SG_RQ_INFLIGHT: - break; - default: - if (IS_ENABLED(CONFIG_SCSI_PROC_FS)) { - is_bad_st = true; - bad_sr_st = sr_st; - } - break; - } - break; } /* If not found so far, need to wrap around and search [0 ... s_idx) */ if (!srp && !second && s_idx > 0) { @@ -6616,21 +6857,6 @@ sg_find_srp_by_id(struct sg_fd *sfp, int id, bool is_tag) goto second_time2; } } - /* here if one of above loops does _not_ find a match */ - if (IS_ENABLED(CONFIG_SCSI_PROC_FS)) { - if (search_for_1) { - __maybe_unused const char *cptp = is_tag ? "tag=" : - "pack_id="; - - if (unlikely(is_bad_st)) - SG_LOG(1, sfp, "%s: %s%d wrong state: %s\n", - __func__, cptp, id, - sg_rq_st_str(bad_sr_st, true)); - else - SG_LOG(6, sfp, "%s: %s%d not awaiting read\n", - __func__, cptp, id); - } - } return NULL; good: SG_LOG(5, sfp, "%s: %s%d found [srp=0x%pK]\n", __func__, @@ -6638,64 +6864,6 @@ sg_find_srp_by_id(struct sg_fd *sfp, int id, bool is_tag) return srp; } -/* - * Returns true if a request is ready and its srp is written to *srpp . If - * nothing can be found (because nothing is currently submitted) then true - * is returned and ERR_PTR(-ENODATA) --> *srpp . If nothing is found but - * sfp has requests submitted, returns false and NULL --> *srpp . - */ -static bool -sg_mrq_get_ready_srp(struct sg_fd *sfp, struct sg_request **srpp) -{ - bool second = false; - int res; - int l_await_idx = READ_ONCE(sfp->low_await_idx); - unsigned long idx, s_idx, end_idx; - struct sg_request *srp; - struct xarray *xafp = &sfp->srp_arr; - - if (SG_IS_DETACHING(sfp->parentdp)) { - *srpp = ERR_PTR(-ENODEV); - return true; - } - if (atomic_read(&sfp->submitted) < 1) { - *srpp = ERR_PTR(-ENODATA); - return true; - } - if (sg_num_waiting_maybe_acquire(sfp) < 1) - goto fini; - - s_idx = (l_await_idx < 0) ? 0 : l_await_idx; - idx = s_idx; - end_idx = ULONG_MAX; - -second_time: - for (srp = xa_find(xafp, &idx, end_idx, SG_XA_RQ_AWAIT); - srp; - srp = xa_find_after(xafp, &idx, end_idx, SG_XA_RQ_AWAIT)) { - res = sg_rq_chg_state(srp, SG_RQ_AWAIT_RCV, SG_RQ_BUSY); - if (likely(res == 0)) { - *srpp = srp; - WRITE_ONCE(sfp->low_await_idx, idx + 1); - return true; - } -#if IS_ENABLED(SG_LOG_ACTIVE) - sg_rq_state_fail_msg(sfp, SG_RQ_AWAIT_RCV, SG_RQ_BUSY, __func__); -#endif - } - /* If not found so far, need to wrap around and search [0 ... end_idx) */ - if (!srp && !second && s_idx > 0) { - end_idx = s_idx - 1; - s_idx = 0; - idx = s_idx; - second = true; - goto second_time; - } -fini: - *srpp = NULL; - return false; -} - /* * Makes a new sg_request object. If 'first' is set then use GFP_KERNEL which * may take time but has improved chance of success, otherwise use GFP_ATOMIC. @@ -6797,7 +6965,7 @@ static struct sg_request * sg_setup_req_ws_helper(struct sg_comm_wr_t *cwrp) { int res; - struct sg_request *r_srp; + struct sg_request *rs_srp; enum sg_rq_state rs_sr_st; struct sg_fd *fp = cwrp->sfp; struct sg_fd *rs_sfp = sg_fd_share_ptr(fp); @@ -6810,32 +6978,94 @@ sg_setup_req_ws_helper(struct sg_comm_wr_t *cwrp) * rq_state: SG_RQ_SHR_SWAP --> SG_RQ_SHR_IN_WS */ if (cwrp->rsv_idx >= 0) - r_srp = rs_sfp->rsv_arr[cwrp->rsv_idx]; + rs_srp = rs_sfp->rsv_arr[cwrp->rsv_idx]; else - r_srp = sg_get_probable_read_side(rs_sfp); - if (unlikely(!r_srp)) + rs_srp = sg_get_probable_read_side(rs_sfp); + if (unlikely(!rs_srp)) return ERR_PTR(-ENOSTR); - rs_sr_st = atomic_read(&r_srp->rq_st); + rs_sr_st = atomic_read(&rs_srp->rq_st); switch (rs_sr_st) { case SG_RQ_SHR_SWAP: break; case SG_RQ_AWAIT_RCV: case SG_RQ_INFLIGHT: - case SG_RQ_BUSY: - return ERR_PTR(-EBUSY); /* too early for write-side req */ - case SG_RQ_INACTIVE: - SG_LOG(1, fp, "%s: write-side finds read-side inactive\n", - __func__); + case SG_RQ_BUSY: /* too early for write-side req */ + return ERR_PTR(-EBUSY); + case SG_RQ_INACTIVE: /* read-side may have ended with an error */ + SG_LOG(1, fp, "%s: write-side finds read-side inactive\n", __func__); return ERR_PTR(-EADDRNOTAVAIL); - case SG_RQ_SHR_IN_WS: - SG_LOG(1, fp, "%s: write-side find read-side shr_in_ws\n", - __func__); + case SG_RQ_SHR_IN_WS: /* write-side already being processed, why another? */ + SG_LOG(1, fp, "%s: write-side find read-side shr_in_ws\n", __func__); return ERR_PTR(-EADDRINUSE); } - res = sg_rq_chg_state(r_srp, rs_sr_st, SG_RQ_SHR_IN_WS); + res = sg_rq_chg_state(rs_srp, rs_sr_st, SG_RQ_SHR_IN_WS); if (unlikely(res)) return ERR_PTR(-EADDRINUSE); + return rs_srp; +} + +static struct sg_request * +sg_setup_req_new_srp(struct sg_comm_wr_t *cwrp, bool new_rsv_srp, bool no_reqs, + bool *try_harderp) +{ + struct sg_fd *fp = cwrp->sfp; + int dlen = cwrp->dlen; + int res; + int ra_idx = 0; + u32 n_idx, sum_dlen; + unsigned long iflags; + struct sg_request *r_srp = NULL; + struct xarray *xafp = &fp->srp_arr; + + if (test_bit(SG_FFD_NO_CMD_Q, fp->ffd_bm) && atomic_read(&fp->submitted) > 0) { + SG_LOG(6, fp, "%s: trying 2nd req but cmd_q=false\n", __func__); + return ERR_PTR(-EDOM); + } else if (fp->tot_fd_thresh > 0) { + sum_dlen = atomic_read(&fp->sum_fd_dlens) + dlen; + if (unlikely(sum_dlen > (u32)fp->tot_fd_thresh)) { + SG_LOG(2, fp, "%s: sum_of_dlen(%u) > tot_fd_thresh\n", __func__, + sum_dlen); + return ERR_PTR(-E2BIG); + } + } + if (new_rsv_srp) { + ra_idx = sg_get_idx_new(fp); + if (ra_idx < 0) { + ra_idx = sg_get_idx_available(fp); + if (ra_idx < 0) { + SG_LOG(1, fp, "%s: run out of read-side reqs\n", __func__); + return ERR_PTR(-EFBIG); + } + } + } + r_srp = sg_mk_srp_sgat(fp, no_reqs, dlen); + if (IS_ERR(r_srp)) { + if (!*try_harderp && dlen < SG_DEF_SECTOR_SZ) { + *try_harderp = true; + return NULL; + } + return r_srp; + } + SG_LOG(4, fp, "%s: %smk_new_srp=0x%pK ++\n", __func__, (new_rsv_srp ? "rsv " : ""), + r_srp); + if (new_rsv_srp) { + fp->rsv_arr[ra_idx] = r_srp; + set_bit(SG_FRQ_RESERVED, r_srp->frq_bm); + r_srp->sh_srp = NULL; + } + xa_lock_irqsave(xafp, iflags); + res = __xa_alloc(xafp, &n_idx, r_srp, xa_limit_32b, GFP_ATOMIC); + if (unlikely(res < 0)) { + xa_unlock_irqrestore(xafp, iflags); + sg_remove_srp(r_srp); + kfree(r_srp); + SG_LOG(1, fp, "%s: xa_alloc() failed, errno=%d\n", __func__, -res); + return ERR_PTR(-EPROTOTYPE); + } + r_srp->rq_idx = n_idx; + r_srp->parentfp = fp; + xa_unlock_irqrestore(xafp, iflags); return r_srp; } @@ -6855,15 +7085,12 @@ sg_setup_req(struct sg_comm_wr_t *cwrp, enum sg_shr_var sh_var) bool new_rsv_srp = false; bool no_reqs = false; bool ws_rq = false; - bool some_inactive = false; bool try_harder = false; + bool keep_frq_bm = false; bool second = false; - bool is_rsv; - int ra_idx = 0; - int l_used_idx; + int res, ra_idx, l_used_idx; int dlen = cwrp->dlen; - u32 sum_dlen; - unsigned long idx, s_idx, end_idx, iflags; + unsigned long idx, s_idx, end_idx; enum sg_rq_state sr_st; struct sg_fd *fp = cwrp->sfp; struct sg_request *r_srp; /* returned value won't be NULL */ @@ -6875,16 +7102,27 @@ sg_setup_req(struct sg_comm_wr_t *cwrp, enum sg_shr_var sh_var) switch (sh_var) { case SG_SHR_RS_RQ: cp = "rs_rq"; + if (cwrp->possible_srp) { + r_srp = cwrp->possible_srp; + res = sg_rq_chg_state(r_srp, SG_RQ_INACTIVE, SG_RQ_BUSY); + if (unlikely(res)) { + r_srp = NULL; + } else { + atomic_dec(&fp->inactives); + keep_frq_bm = true; + r_srp->sh_srp = NULL; + goto final_setup; + } + } ra_idx = (test_bit(SG_FFD_RESHARE, fp->ffd_bm)) ? 0 : sg_get_idx_available(fp); if (ra_idx < 0) { new_rsv_srp = true; - goto good_fini; + goto maybe_new; } r_srp = fp->rsv_arr[ra_idx]; - sr_st = atomic_read(&r_srp->rq_st); + sr_st = atomic_read_acquire(&r_srp->rq_st); if (sr_st == SG_RQ_INACTIVE) { - int res = sg_rq_chg_state(r_srp, sr_st, SG_RQ_BUSY); - + res = sg_rq_chg_state(r_srp, sr_st, SG_RQ_BUSY); if (unlikely(res)) { r_srp = NULL; } else { @@ -6897,9 +7135,12 @@ sg_setup_req(struct sg_comm_wr_t *cwrp, enum sg_shr_var sh_var) } if (IS_ERR(r_srp)) goto err_out; - if (mk_new_srp) + if (mk_new_srp) { new_rsv_srp = true; - goto good_fini; + goto maybe_new; + } else { + goto final_setup; + } case SG_SHR_WS_RQ: cp = "rs_rq"; rs_rsv_srp = sg_setup_req_ws_helper(cwrp); @@ -6916,6 +7157,20 @@ sg_setup_req(struct sg_comm_wr_t *cwrp, enum sg_shr_var sh_var) goto err_out; } ws_rq = true; + r_srp = cwrp->possible_srp; + if (r_srp) { + sr_st = atomic_read_acquire(&r_srp->rq_st); + if (sr_st == SG_RQ_INACTIVE && dlen <= r_srp->sgat_h.buflen) { + res = sg_rq_chg_state(r_srp, sr_st, SG_RQ_BUSY); + if (likely(res == 0)) { + /* possible_srp bypasses loop to find candidate */ + mk_new_srp = false; + keep_frq_bm = true; + goto final_setup; + } + } + r_srp = NULL; + } dlen = 0; /* any srp for write-side will do, pick smallest */ break; case SG_SHR_RS_NOT_SRQ: @@ -6931,9 +7186,10 @@ sg_setup_req(struct sg_comm_wr_t *cwrp, enum sg_shr_var sh_var) mk_new_srp = true; } else if (atomic_read(&fp->inactives) <= 0) { mk_new_srp = true; - } else if (likely(!try_harder) && dlen < SG_DEF_SECTOR_SZ) { + } else if (dlen < SG_DEF_SECTOR_SZ && likely(!try_harder)) { struct sg_request *low_srp = NULL; + cp = "small dlen from inactives"; l_used_idx = READ_ONCE(fp->low_used_idx); s_idx = (l_used_idx < 0) ? 0 : l_used_idx; if (l_used_idx >= 0 && xa_get_mark(xafp, s_idx, SG_XA_RQ_INACTIVE)) { @@ -6965,13 +7221,13 @@ sg_setup_req(struct sg_comm_wr_t *cwrp, enum sg_shr_var sh_var) if (mk_new_srp && low_srp) { /* no candidate yet */ /* take non-NULL low_srp, irrespective of r_srp->sgat_h.buflen size */ r_srp = low_srp; - if (sg_rq_chg_state(r_srp, SG_RQ_INACTIVE, SG_RQ_BUSY) == 0) { + if (likely(sg_rq_chg_state(r_srp, SG_RQ_INACTIVE, SG_RQ_BUSY) == 0)) { mk_new_srp = false; atomic_dec(&fp->inactives); } } } else { - cp = "larger from srp_arr"; + cp = "larger dlen from inactives"; l_used_idx = READ_ONCE(fp->low_used_idx); s_idx = (l_used_idx < 0) ? 0 : l_used_idx; idx = s_idx; @@ -6982,7 +7238,7 @@ sg_setup_req(struct sg_comm_wr_t *cwrp, enum sg_shr_var sh_var) for (r_srp = xa_find(xafp, &idx, end_idx, SG_XA_RQ_INACTIVE); r_srp; r_srp = xa_find_after(xafp, &idx, end_idx, SG_XA_RQ_INACTIVE)) { - if (r_srp->sgat_h.buflen >= dlen) { + if (dlen <= r_srp->sgat_h.buflen) { if (sg_rq_chg_state(r_srp, SG_RQ_INACTIVE, SG_RQ_BUSY)) continue; atomic_dec(&fp->inactives); @@ -7003,7 +7259,7 @@ sg_setup_req(struct sg_comm_wr_t *cwrp, enum sg_shr_var sh_var) for (r_srp = xa_find(xafp, &idx, end_idx, SG_XA_RQ_INACTIVE); r_srp; r_srp = xa_find_after(xafp, &idx, end_idx, SG_XA_RQ_INACTIVE)) { - if (r_srp->sgat_h.buflen >= dlen && + if (dlen <= r_srp->sgat_h.buflen && !test_bit(SG_FRQ_RESERVED, r_srp->frq_bm)) { if (sg_rq_chg_state(r_srp, SG_RQ_INACTIVE, SG_RQ_BUSY)) continue; @@ -7023,89 +7279,34 @@ sg_setup_req(struct sg_comm_wr_t *cwrp, enum sg_shr_var sh_var) } } have_existing: - if (!mk_new_srp) { + if (!mk_new_srp) { /* re-using an existing sg_request object */ r_srp->in_resid = 0; r_srp->rq_info = 0; r_srp->sense_len = 0; } - -good_fini: +maybe_new: if (mk_new_srp) { /* Need new sg_request object */ - bool disallow_cmd_q = test_bit(SG_FFD_NO_CMD_Q, fp->ffd_bm); - int res; - u32 n_idx; - cp = "new"; - r_srp = NULL; - if (disallow_cmd_q && atomic_read(&fp->submitted) > 0) { - r_srp = ERR_PTR(-EDOM); - SG_LOG(6, fp, "%s: trying 2nd req but cmd_q=false\n", - __func__); - goto err_out; - } else if (fp->tot_fd_thresh > 0) { - sum_dlen = atomic_read(&fp->sum_fd_dlens) + dlen; - if (unlikely(sum_dlen > (u32)fp->tot_fd_thresh)) { - r_srp = ERR_PTR(-E2BIG); - SG_LOG(2, fp, "%s: sum_of_dlen(%u) > %s\n", - __func__, sum_dlen, "tot_fd_thresh"); - } - } - if (!IS_ERR(r_srp) && new_rsv_srp) { - ra_idx = sg_get_idx_new(fp); - if (ra_idx < 0) { - ra_idx = sg_get_idx_available(fp); - if (ra_idx < 0) { - SG_LOG(1, fp, - "%s: no read-side reqs available\n", - __func__); - r_srp = ERR_PTR(-EFBIG); - } - } - } - if (IS_ERR(r_srp)) /* NULL is _not_ an ERR here */ - goto err_out; - r_srp = sg_mk_srp_sgat(fp, no_reqs, dlen); - if (IS_ERR(r_srp)) { - if (!try_harder && dlen < SG_DEF_SECTOR_SZ && - some_inactive) { - try_harder = true; - goto start_again; - } + r_srp = sg_setup_req_new_srp(cwrp, new_rsv_srp, no_reqs, &try_harder); + if (IS_ERR(r_srp)) goto err_out; - } - SG_LOG(4, fp, "%s: %smk_new_srp=0x%pK ++\n", __func__, - (new_rsv_srp ? "rsv " : ""), r_srp); - if (new_rsv_srp) { - fp->rsv_arr[ra_idx] = r_srp; + if (!r_srp && try_harder) + goto start_again; + } +final_setup: + if (!keep_frq_bm) { + /* keep SG_FRQ_RESERVED setting from prior/new r_srp; clear rest */ + bool is_rsv = test_bit(SG_FRQ_RESERVED, r_srp->frq_bm); + + r_srp->frq_bm[0] = 0; + if (is_rsv) set_bit(SG_FRQ_RESERVED, r_srp->frq_bm); - r_srp->sh_srp = NULL; - } - xa_lock_irqsave(xafp, iflags); - res = __xa_alloc(xafp, &n_idx, r_srp, xa_limit_32b, GFP_ATOMIC); - xa_unlock_irqrestore(xafp, iflags); - if (unlikely(res < 0)) { - xa_unlock_irqrestore(xafp, iflags); - sg_remove_srp(r_srp); - kfree(r_srp); - r_srp = ERR_PTR(-EPROTOTYPE); - SG_LOG(1, fp, "%s: xa_alloc() failed, errno=%d\n", - __func__, -res); - goto err_out; - } - r_srp->rq_idx = n_idx; - r_srp->parentfp = fp; - xa_unlock_irqrestore(xafp, iflags); + /* r_srp inherits these flags from cwrp->frq_bm */ + if (test_bit(SG_FRQ_IS_V4I, cwrp->frq_bm)) + set_bit(SG_FRQ_IS_V4I, r_srp->frq_bm); + if (test_bit(SG_FRQ_SYNC_INVOC, cwrp->frq_bm)) + set_bit(SG_FRQ_SYNC_INVOC, r_srp->frq_bm); } - /* keep SG_FRQ_RESERVED setting from prior/new r_srp; clear rest */ - is_rsv = test_bit(SG_FRQ_RESERVED, r_srp->frq_bm); - WRITE_ONCE(r_srp->frq_bm[0], 0); - if (is_rsv) - set_bit(SG_FRQ_RESERVED, r_srp->frq_bm); - /* r_srp inherits these 3 flags from cwrp->frq_bm */ - if (test_bit(SG_FRQ_IS_V4I, cwrp->frq_bm)) - set_bit(SG_FRQ_IS_V4I, r_srp->frq_bm); - if (test_bit(SG_FRQ_SYNC_INVOC, cwrp->frq_bm)) - set_bit(SG_FRQ_SYNC_INVOC, r_srp->frq_bm); r_srp->sgatp->dlen = dlen; /* must be <= r_srp->sgat_h.buflen */ r_srp->sh_var = sh_var; r_srp->cmd_opcode = 0xff; /* set invalid opcode (VS), 0x0 is TUR */ @@ -7140,7 +7341,7 @@ sg_setup_req(struct sg_comm_wr_t *cwrp, enum sg_shr_var sh_var) /* * Sets srp to SG_RQ_INACTIVE unless it was in SG_RQ_SHR_SWAP state. Also - * change the asociated xarray entry flags to be consistent with + * change the associated xarray entry flags to be consistent with * SG_RQ_INACTIVE. Since this function can be called from many contexts, * then assume no xa locks held. * The state machine should insure that two threads should never race here. @@ -7157,7 +7358,7 @@ sg_deact_request(struct sg_fd *sfp, struct sg_request *srp) SG_LOG(3, sfp, "%s: srp=%pK\n", __func__, srp); sbp = srp->sense_bp; srp->sense_bp = NULL; - sr_st = atomic_read(&srp->rq_st); + sr_st = atomic_read_acquire(&srp->rq_st); if (sr_st != SG_RQ_SHR_SWAP) { /* * Can be called from many contexts and it is hard to know @@ -7621,17 +7822,16 @@ sg_proc_seq_show_dev(struct seq_file *s, void *v) read_lock_irqsave(&sg_index_lock, iflags); sdp = it ? sg_lookup_dev(it->index) : NULL; - if (unlikely(!sdp || !sdp->device || SG_IS_DETACHING(sdp))) + if (unlikely(!sdp || !sdp->device) || SG_IS_DETACHING(sdp)) { seq_puts(s, "-1\t-1\t-1\t-1\t-1\t-1\t-1\t-1\t-1\n"); - else { + } else { scsidp = sdp->device; seq_printf(s, "%d\t%d\t%d\t%llu\t%d\t%d\t%d\t%d\t%d\n", - scsidp->host->host_no, scsidp->channel, - scsidp->id, scsidp->lun, (int)scsidp->type, - 1, - (int) scsidp->queue_depth, - (int) scsi_device_busy(scsidp), - (int) scsi_device_online(scsidp)); + scsidp->host->host_no, scsidp->channel, + scsidp->id, scsidp->lun, (int)scsidp->type, 1, + (int)scsidp->queue_depth, + (int)scsi_device_busy(scsidp), + (int)scsi_device_online(scsidp)); } read_unlock_irqrestore(&sg_index_lock, iflags); return 0; @@ -7663,8 +7863,7 @@ sg_proc_seq_show_devstrs(struct seq_file *s, void *v) /* Writes debug info for one sg_request in obp buffer */ static int -sg_proc_debug_sreq(struct sg_request *srp, int to, bool t_in_ns, char *obp, - int len) +sg_proc_debug_sreq(struct sg_request *srp, int to, bool t_in_ns, bool inactive, char *obp, int len) { bool is_v3v4, v4, is_dur; int n = 0; @@ -7708,6 +7907,13 @@ sg_proc_debug_sreq(struct sg_request *srp, int to, bool t_in_ns, char *obp, n += scnprintf(obp + n, len - n, " sgat=%d", srp->sgatp->num_sgat); cp = (srp->rq_flags & SGV4_FLAG_HIPRI) ? "hipri " : ""; n += scnprintf(obp + n, len - n, " %sop=0x%02x\n", cp, srp->cmd_opcode); + if (inactive && rq_st != SG_RQ_INACTIVE) { + if (xa_get_mark(&srp->parentfp->srp_arr, srp->rq_idx, SG_XA_RQ_INACTIVE)) + cp = "still marked inactive, BAD"; + else + cp = "no longer marked inactive"; + n += scnprintf(obp + n, len - n, " <<< xarray %s >>>\n", cp); + } return n; } @@ -7767,8 +7973,7 @@ sg_proc_debug_fd(struct sg_fd *fp, char *obp, int len, unsigned long idx, xa_lock_irqsave(&fp->srp_arr, iflags); xa_for_each(&fp->srp_arr, idx, srp) { if (srp->rq_idx != (unsigned long)idx) - n += scnprintf(obp + n, len - n, - ">>> xa_index=%lu, rq_idx=%d, bad\n", + n += scnprintf(obp + n, len - n, ">>> BAD: xa_index!=rq_idx [%lu,%u]\n", idx, srp->rq_idx); if (xa_get_mark(&fp->srp_arr, idx, SG_XA_RQ_INACTIVE)) continue; @@ -7778,8 +7983,7 @@ sg_proc_debug_fd(struct sg_fd *fp, char *obp, int len, unsigned long idx, else if (test_bit(SG_FRQ_ABORTING, srp->frq_bm)) n += scnprintf(obp + n, len - n, " abort>> "); - n += sg_proc_debug_sreq(srp, fp->timeout, t_in_ns, obp + n, - len - n); + n += sg_proc_debug_sreq(srp, fp->timeout, t_in_ns, false, obp + n, len - n); ++k; if ((k % 8) == 0) { /* don't hold up isr_s too long */ xa_unlock_irqrestore(&fp->srp_arr, iflags); @@ -7796,8 +8000,7 @@ sg_proc_debug_fd(struct sg_fd *fp, char *obp, int len, unsigned long idx, if (set_debug) n += scnprintf(obp + n, len - n, " rq_bm=0x%lx", srp->frq_bm[0]); - n += sg_proc_debug_sreq(srp, fp->timeout, t_in_ns, - obp + n, len - n); + n += sg_proc_debug_sreq(srp, fp->timeout, t_in_ns, true, obp + n, len - n); ++k; if ((k % 8) == 0) { /* don't hold up isr_s too long */ xa_unlock_irqrestore(&fp->srp_arr, iflags); diff --git a/include/uapi/scsi/sg.h b/include/uapi/scsi/sg.h index 148a5f2786ee..236ac4678f71 100644 --- a/include/uapi/scsi/sg.h +++ b/include/uapi/scsi/sg.h @@ -127,6 +127,7 @@ typedef struct sg_io_hdr { #define SGV4_FLAG_NO_DXFER SG_FLAG_NO_DXFER /* but keep dev<-->kernel xfr */ #define SGV4_FLAG_KEEP_SHARE 0x20000 /* ... buffer for another dout command */ #define SGV4_FLAG_MULTIPLE_REQS 0x40000 /* 1 or more sg_io_v4-s in data-in */ +#define SGV4_FLAG_ORDERED_WR 0x80000 /* svb: issue in-order writes */ /* Output (potentially OR-ed together) in v3::info or v4::info field */ #define SG_INFO_OK_MASK 0x1

[v18,74/83] sg: add ordered write flag

Commit Message

Patch