Message ID | 1681996394-13099-6-git-send-email-quic_vnivarth@quicinc.com |
---|---|
State | New |
Headers | show |
Series | spi: Add DMA mode support to spi-qcom-qspi | expand |
Hi, On Thu, Apr 20, 2023 at 6:13 AM Vijaya Krishna Nivarthi <quic_vnivarth@quicinc.com> wrote: > > @@ -137,11 +155,29 @@ enum qspi_clocks { > QSPI_NUM_CLKS > }; > > +enum qspi_xfer_mode { > + QSPI_FIFO, > + QSPI_DMA > +}; > + > +/* > + * Number of entries in sgt returned from spi framework that- > + * will be supported. Can be modified as required. > + * In practice, given max_dma_len is 64KB, the number of > + * entries is not expected to exceed 1. > + */ > +#define QSPI_MAX_SG 5 I actually wonder if this would be more nicely done just using a linked list, which naturally mirrors how SGs work anyway. You'd add "struct list_head" to the end of "struct qspi_cmd_desc" and just store a pointer to the head in "struct qcom_qspi". For freeing, you can always get back the "virtual" address because it's just the address of each node. You can always get back the physical address because it's stored in "data_address". > @@ -223,6 +261,16 @@ static void qcom_qspi_handle_err(struct spi_master *master, > spin_lock_irqsave(&ctrl->lock, flags); > writel(0, ctrl->base + MSTR_INT_EN); Can you also clear all interrupts here? That will make sure that if the interrupt somehow fires after you run that it will detect that there's nothing to do. > ctrl->xfer.rem_bytes = 0; > + > + if (ctrl->xfer_mode == QSPI_DMA) { > + int i; > + > + /* free cmd descriptors */ > + for (i = 0; i < ctrl->n_cmd_desc; i++) > + dma_pool_free(ctrl->dma_cmd_pool, ctrl->virt_cmd_desc[i], > + ctrl->dma_cmd_desc[i]); > + ctrl->n_cmd_desc = 0; > + } Instead of checking for ctrl->xfer_mode, why not just check for ctrl->n_cmd_desc? Then you can get rid of "ctrl->xfer_mode". > @@ -258,6 +306,120 @@ static int qcom_qspi_set_speed(struct qcom_qspi *ctrl, unsigned long speed_hz) > return 0; > } > > +#define QSPI_ALIGN_REQ 32 nit: put this at the top of the file with other #defines. > +static int qcom_qspi_alloc_desc(struct qcom_qspi *ctrl, dma_addr_t dma_ptr, > + uint32_t n_bytes) > +{ > + struct qspi_cmd_desc *virt_cmd_desc, *prev; > + dma_addr_t dma_cmd_desc; > + > + /* allocate for dma cmd descriptor */ > + virt_cmd_desc = (struct qspi_cmd_desc *)dma_pool_alloc(ctrl->dma_cmd_pool, > + GFP_KERNEL, &dma_cmd_desc); Remove unnecessary cast; "void *" assigns fine w/out a cast. Add "| GFP_ZERO" and then get rid of the need to clear the "reserved" and "next_descriptor" stuff below. > + if (!virt_cmd_desc) { > + dev_err(ctrl->dev, > + "Could not allocate for cmd_desc\n"); > + return -ENOMEM; > + } You never need to add an extra message for allocation failures (they already splat). Remove it. > + ctrl->virt_cmd_desc[ctrl->n_cmd_desc] = virt_cmd_desc; > + ctrl->dma_cmd_desc[ctrl->n_cmd_desc] = dma_cmd_desc; > + ctrl->n_cmd_desc++; > + > + /* setup cmd descriptor */ > + virt_cmd_desc->data_address = dma_ptr; > + virt_cmd_desc->next_descriptor = 0; > + virt_cmd_desc->direction = ctrl->xfer.dir; > + virt_cmd_desc->multi_io_mode = qspi_buswidth_to_iomode(ctrl, ctrl->xfer.buswidth); > + virt_cmd_desc->reserved1 = 0; > + virt_cmd_desc->fragment = ctrl->xfer.is_last ? 0 : 1; virt_cmd_desc->fragment = !ctrl->xfer.is_last; > + virt_cmd_desc->reserved2 = 0; > + virt_cmd_desc->length = n_bytes; > + > + /* update previous descriptor */ > + if (ctrl->n_cmd_desc >= 2) { > + prev = (ctrl->virt_cmd_desc)[ctrl->n_cmd_desc - 2]; > + prev->next_descriptor = dma_cmd_desc; > + prev->fragment = 1; > + } > + > + return 0; > +} > + > +static int qcom_qspi_setup_dma_desc(struct qcom_qspi *ctrl, > + struct spi_transfer *xfer) > +{ > + int ret; > + struct sg_table *sgt; > + unsigned int sg_total_len = 0; > + dma_addr_t dma_ptr_sg; > + unsigned int dma_len_sg; > + int i; > + > + if (ctrl->n_cmd_desc) { > + dev_err(ctrl->dev, "Remnant dma buffers n_cmd_desc-%d\n", ctrl->n_cmd_desc); > + return -EIO; > + } > + > + sgt = (ctrl->xfer.dir == QSPI_READ) ? &xfer->rx_sg : &xfer->tx_sg; > + if (!sgt->nents || sgt->nents > QSPI_MAX_SG) { > + dev_err(ctrl->dev, "Cannot handle %d entries in scatter list\n", sgt->nents); > + return -EAGAIN; If you're retrying, don't use "dev_err" but instead "dev_warn". Similar in other places. > + } > + > + for (i = 0; i < sgt->nents; i++) { > + dma_ptr_sg = sg_dma_address(sgt->sgl + i); > + if (!IS_ALIGNED(dma_ptr_sg, QSPI_ALIGN_REQ)) { > + dev_err(ctrl->dev, "dma address-%pad not aligned to %d\n", > + &dma_ptr_sg, QSPI_ALIGN_REQ); In general it's not good practice to put pointer values into the error log as it can be an attack vector. Probably the %p will be replaced with something bogus anyway, but it'll look weird. > + return -EAGAIN; > + } > + sg_total_len += sg_dma_len(sgt->sgl + i); > + } > + > + if (sg_total_len != xfer->len) { > + dev_err(ctrl->dev, "Data lengths mismatch\n"); > + return -EAGAIN; > + } This feels like overly defensive programming. The SPI framework is what's in charge of setting up the scatter gather lists and it can be trusted to give you something where the total transfer length matches. IMO, drop that validation. I'm OK w/ keeping the double-check of the alignment since that's handled by the client drivers and they are less trustworthy. > + > + for (i = 0; i < sgt->nents; i++) { > + dma_ptr_sg = sg_dma_address(sgt->sgl + i); > + dma_len_sg = sg_dma_len(sgt->sgl + i); > + > + ret = qcom_qspi_alloc_desc(ctrl, dma_ptr_sg, dma_len_sg); > + if (ret) > + goto cleanup; > + } > + return 0; > + > +cleanup: > + dev_err(ctrl->dev, "ERROR cleanup in setup_dma_desc\n"); Drop above print--we should have already printed any relevant errors. > @@ -290,8 +454,37 @@ static int qcom_qspi_transfer_one(struct spi_master *master, > ctrl->xfer.is_last = list_is_last(&xfer->transfer_list, > &master->cur_msg->transfers); > ctrl->xfer.rem_bytes = xfer->len; > + > + if (xfer->rx_sg.nents || xfer->tx_sg.nents) { > + /* do DMA transfer */ > + ctrl->xfer_mode = QSPI_DMA; > + if (!(mstr_cfg & DMA_ENABLE)) { > + mstr_cfg |= DMA_ENABLE; > + writel(mstr_cfg, ctrl->base + MSTR_CONFIG); > + } > + > + ret = qcom_qspi_setup_dma_desc(ctrl, xfer); > + if (ret) { > + if (ret == -EAGAIN) { > + dev_err_once(ctrl->dev, "DMA failure, falling back to PIO"); > + goto do_pio; > + } > + spin_unlock_irqrestore(&ctrl->lock, flags); > + return ret; > + } > + qcom_qspi_dma_xfer(ctrl); > + goto end; > + } > + > +do_pio: A bit nitty, but the "do_pio" label feels like a slight stretch from what I consider the acceptable uses of "goto". Maybe it's better to avoid it? The "end" label is OK w/ me, though usually I see it called "exit". AKA: ... ret = qcom_qspi_setup_dma_desc(ctrl, xfer); if (ret != -EAGAIN) { if (!ret) qcom_qspi_dma_xfer(ctrl); goto exit; } dev_warn_once(...); ret = 0; /* We'll retry w/ PIO */ } ... ... exit: spin_unlock_irqrestore(&ctrl->lock, flags); if (ret) return ret; /* We'll call spi_finalize_current_transfer() when done */ return 1; > @@ -328,6 +521,17 @@ static int qcom_qspi_prepare_message(struct spi_master *master, > return 0; > } > > +static int qcom_qspi_alloc_dma(struct qcom_qspi *ctrl) > +{ > + /* allocate for cmd descriptors pool */ The above comment doesn't add much. Drop? > @@ -426,27 +630,48 @@ static irqreturn_t qcom_qspi_irq(int irq, void *dev_id) > int_status = readl(ctrl->base + MSTR_INT_STATUS); > writel(int_status, ctrl->base + MSTR_INT_STATUS); > > - if (ctrl->xfer.dir == QSPI_WRITE) { > - if (int_status & WR_FIFO_EMPTY) > - ret = pio_write(ctrl); > - } else { > - if (int_status & RESP_FIFO_RDY) > - ret = pio_read(ctrl); > - } > - > - if (int_status & QSPI_ERR_IRQS) { > - if (int_status & RESP_FIFO_UNDERRUN) > - dev_err(ctrl->dev, "IRQ error: FIFO underrun\n"); > - if (int_status & WR_FIFO_OVERRUN) > - dev_err(ctrl->dev, "IRQ error: FIFO overrun\n"); > - if (int_status & HRESP_FROM_NOC_ERR) > - dev_err(ctrl->dev, "IRQ error: NOC response error\n"); > - ret = IRQ_HANDLED; > - } > - > - if (!ctrl->xfer.rem_bytes) { > - writel(0, ctrl->base + MSTR_INT_EN); > - spi_finalize_current_transfer(dev_get_drvdata(ctrl->dev)); > + switch (ctrl->xfer_mode) { > + case QSPI_FIFO: > + if (ctrl->xfer.dir == QSPI_WRITE) { > + if (int_status & WR_FIFO_EMPTY) > + ret = pio_write(ctrl); > + } else { > + if (int_status & RESP_FIFO_RDY) > + ret = pio_read(ctrl); > + } > + > + if (int_status & QSPI_ERR_IRQS) { > + if (int_status & RESP_FIFO_UNDERRUN) > + dev_err(ctrl->dev, "IRQ error: FIFO underrun\n"); > + if (int_status & WR_FIFO_OVERRUN) > + dev_err(ctrl->dev, "IRQ error: FIFO overrun\n"); > + if (int_status & HRESP_FROM_NOC_ERR) > + dev_err(ctrl->dev, "IRQ error: NOC response error\n"); > + ret = IRQ_HANDLED; > + } > + > + if (!ctrl->xfer.rem_bytes) { > + writel(0, ctrl->base + MSTR_INT_EN); > + spi_finalize_current_transfer(dev_get_drvdata(ctrl->dev)); > + } > + break; > + case QSPI_DMA: > + if (int_status & DMA_CHAIN_DONE) { > + int i; > + > + writel(0, ctrl->base + MSTR_INT_EN); > + > + for (i = 0; i < ctrl->n_cmd_desc; i++) > + dma_pool_free(ctrl->dma_cmd_pool, ctrl->virt_cmd_desc[i], > + ctrl->dma_cmd_desc[i]); > + ctrl->n_cmd_desc = 0; > + > + ret = IRQ_HANDLED; > + spi_finalize_current_transfer(dev_get_drvdata(ctrl->dev)); > + } > + break; > + default: > + dev_err(ctrl->dev, "Unknown xfer mode:%d", ctrl->xfer_mode); I'm still of the opinion that you should drop xfer_mode, which means deleting it from above. > @@ -517,7 +742,14 @@ static int qcom_qspi_probe(struct platform_device *pdev) > return ret; > } > > + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)); > + if (ret) > + return dev_err_probe(dev, ret, "could not set DMA mask\n"); > + > master->max_speed_hz = 300000000; > + master->max_dma_len = 65536; /* as per HPG */ > + /* intimate protocal drivers about alignment requirement */ Comment above doesn't add much and is already in the comment in the definition of the structure. Drop it. > + master->dma_alignment = QSPI_ALIGN_REQ; > master->num_chipselect = QSPI_NUM_CS; > master->bus_num = -1; > master->dev.of_node = pdev->dev.of_node; > @@ -528,6 +760,7 @@ static int qcom_qspi_probe(struct platform_device *pdev) > master->prepare_message = qcom_qspi_prepare_message; > master->transfer_one = qcom_qspi_transfer_one; > master->handle_err = qcom_qspi_handle_err; > + master->can_dma = qcom_qspi_can_dma; > master->auto_runtime_pm = true; > > ret = devm_pm_opp_set_clkname(&pdev->dev, "core"); > @@ -540,6 +773,11 @@ static int qcom_qspi_probe(struct platform_device *pdev) > return ret; > } > > + /* allocate for DMA descriptor pools */ Above comment is obvious from the name of the function you're calling.
Hi, On Thu, Apr 20, 2023 at 6:13 AM Vijaya Krishna Nivarthi <quic_vnivarth@quicinc.com> wrote: > > @@ -528,6 +760,7 @@ static int qcom_qspi_probe(struct platform_device *pdev) > master->prepare_message = qcom_qspi_prepare_message; > master->transfer_one = qcom_qspi_transfer_one; > master->handle_err = qcom_qspi_handle_err; > + master->can_dma = qcom_qspi_can_dma; One extra comment: it might be worth adding something like this (untested): if (of_property_read_bool(np, "iommus")) master->can_dma = qcom_qspi_can_dma; That will have the dual benefit of making old device trees work (which we're supposed to to if possible) and also making it easier to land these changes. Without something like that then I think transfers will fail for anyone who pulls in the SPI tree but not the Qualcomm DT tree.
Hi, Thanks a lot for the review and inputs... On 4/20/2023 10:49 PM, Doug Anderson wrote: > Hi, > > On Thu, Apr 20, 2023 at 6:13 AM Vijaya Krishna Nivarthi > <quic_vnivarth@quicinc.com> wrote: >> @@ -137,11 +155,29 @@ enum qspi_clocks { >> QSPI_NUM_CLKS >> }; >> >> +enum qspi_xfer_mode { >> + QSPI_FIFO, >> + QSPI_DMA >> +}; >> + >> +/* >> + * Number of entries in sgt returned from spi framework that- >> + * will be supported. Can be modified as required. >> + * In practice, given max_dma_len is 64KB, the number of >> + * entries is not expected to exceed 1. >> + */ >> +#define QSPI_MAX_SG 5 > I actually wonder if this would be more nicely done just using a > linked list, which naturally mirrors how SGs work anyway. You'd add > "struct list_head" to the end of "struct qspi_cmd_desc" and just store > a pointer to the head in "struct qcom_qspi". > > For freeing, you can always get back the "virtual" address because > it's just the address of each node. You can always get back the > physical address because it's stored in "data_address". > Please note that in "struct qspi_cmd_desc" data_address - dma_address of data buffer returned by spi framework next_descriptor - dma_address of the next descriptor in chain If we were to have a linked list of descriptors that we can parse and free, it would require 2 more fields this_descriptor_dma - dma address of the current descriptor next_descriptor_virt - virtual address of the next descriptor in chain I tried adding same and it seemed like it was getting a little confusing. Given the SGL is also an array in SGT, it seemed ok to retain an array, though it would have good to have a chain of cmd_descriptors in SW like in HW. Agreed the fixed size array comes at a cost of artificial limitation of 5 entries, which anyway seems like something we are not going to encounter in practice. So for now, I retained the array. All the other comments are addressed as recommended, except 1 change below Please let know what you think. Thank you, Vijay/ > >> +static int qcom_qspi_alloc_desc(struct qcom_qspi *ctrl, dma_addr_t dma_ptr, >> + uint32_t n_bytes) >> +{ >> + struct qspi_cmd_desc *virt_cmd_desc, *prev; >> + dma_addr_t dma_cmd_desc; >> + >> + /* allocate for dma cmd descriptor */ >> + virt_cmd_desc = (struct qspi_cmd_desc *)dma_pool_alloc(ctrl->dma_cmd_pool, >> + GFP_KERNEL, &dma_cmd_desc); > Remove unnecessary cast; "void *" assigns fine w/out a cast. > > Add "| GFP_ZERO" and then get rid of the need to clear the "reserved" > and "next_descriptor" stuff below. > I needed __GFP_ZERO to prevent a compile error, used same.
Hi, On Fri, Apr 21, 2023 at 9:58 AM Vijaya Krishna Nivarthi <quic_vnivarth@quicinc.com> wrote: > > Hi, > > Thanks a lot for the review and inputs... > > > On 4/20/2023 10:49 PM, Doug Anderson wrote: > > Hi, > > > > On Thu, Apr 20, 2023 at 6:13 AM Vijaya Krishna Nivarthi > > <quic_vnivarth@quicinc.com> wrote: > >> @@ -137,11 +155,29 @@ enum qspi_clocks { > >> QSPI_NUM_CLKS > >> }; > >> > >> +enum qspi_xfer_mode { > >> + QSPI_FIFO, > >> + QSPI_DMA > >> +}; > >> + > >> +/* > >> + * Number of entries in sgt returned from spi framework that- > >> + * will be supported. Can be modified as required. > >> + * In practice, given max_dma_len is 64KB, the number of > >> + * entries is not expected to exceed 1. > >> + */ > >> +#define QSPI_MAX_SG 5 > > I actually wonder if this would be more nicely done just using a > > linked list, which naturally mirrors how SGs work anyway. You'd add > > "struct list_head" to the end of "struct qspi_cmd_desc" and just store > > a pointer to the head in "struct qcom_qspi". > > > > For freeing, you can always get back the "virtual" address because > > it's just the address of each node. You can always get back the > > physical address because it's stored in "data_address". > > > Please note that in "struct qspi_cmd_desc" > > data_address - dma_address of data buffer returned by spi framework > > next_descriptor - dma_address of the next descriptor in chain > > > If we were to have a linked list of descriptors that we can parse and > free, it would require 2 more fields > > this_descriptor_dma - dma address of the current descriptor Isn't that exactly the same value as "data_address"? Sure, "data_address" is a u32 and the DMA address is 64-bits, but elsewhere in the code you already rely on the fact that the upper bits of the DMA address are 0 when you do: virt_cmd_desc->data_address = dma_ptr > next_descriptor_virt - virtual address of the next descriptor in chain Right, this would be the value of the next node in the linked list, right? So basically by adding a list_node_t you can find it easily. > >> +static int qcom_qspi_alloc_desc(struct qcom_qspi *ctrl, dma_addr_t dma_ptr, > >> + uint32_t n_bytes) > >> +{ > >> + struct qspi_cmd_desc *virt_cmd_desc, *prev; > >> + dma_addr_t dma_cmd_desc; > >> + > >> + /* allocate for dma cmd descriptor */ > >> + virt_cmd_desc = (struct qspi_cmd_desc *)dma_pool_alloc(ctrl->dma_cmd_pool, > >> + GFP_KERNEL, &dma_cmd_desc); > > Remove unnecessary cast; "void *" assigns fine w/out a cast. > > > > Add "| GFP_ZERO" and then get rid of the need to clear the "reserved" > > and "next_descriptor" stuff below. > > > I needed __GFP_ZERO to prevent a compile error, used same. Ah, sorry. Sounds good. -Doug
Hi, On 4/21/2023 11:05 PM, Doug Anderson wrote: > Hi, > > On Fri, Apr 21, 2023 at 9:58 AM Vijaya Krishna Nivarthi > <quic_vnivarth@quicinc.com> wrote: >> Hi, >> >> Thanks a lot for the review and inputs... >> >> >> On 4/20/2023 10:49 PM, Doug Anderson wrote: >>> Hi, >>> >>> On Thu, Apr 20, 2023 at 6:13 AM Vijaya Krishna Nivarthi >>> <quic_vnivarth@quicinc.com> wrote: >>>> @@ -137,11 +155,29 @@ enum qspi_clocks { >>>> QSPI_NUM_CLKS >>>> }; >>>> >>>> +enum qspi_xfer_mode { >>>> + QSPI_FIFO, >>>> + QSPI_DMA >>>> +}; >>>> + >>>> +/* >>>> + * Number of entries in sgt returned from spi framework that- >>>> + * will be supported. Can be modified as required. >>>> + * In practice, given max_dma_len is 64KB, the number of >>>> + * entries is not expected to exceed 1. >>>> + */ >>>> +#define QSPI_MAX_SG 5 >>> I actually wonder if this would be more nicely done just using a >>> linked list, which naturally mirrors how SGs work anyway. You'd add >>> "struct list_head" to the end of "struct qspi_cmd_desc" and just store >>> a pointer to the head in "struct qcom_qspi". >>> >>> For freeing, you can always get back the "virtual" address because >>> it's just the address of each node. You can always get back the >>> physical address because it's stored in "data_address". >>> >> Please note that in "struct qspi_cmd_desc" >> >> data_address - dma_address of data buffer returned by spi framework >> >> next_descriptor - dma_address of the next descriptor in chain >> >> >> If we were to have a linked list of descriptors that we can parse and >> free, it would require 2 more fields >> >> this_descriptor_dma - dma address of the current descriptor > Isn't that exactly the same value as "data_address"? Sure, > "data_address" is a u32 and the DMA address is 64-bits, but elsewhere > in the code you already rely on the fact that the upper bits of the > DMA address are 0 when you do: No; data_address is the dma_address mapped to the xfer buffer. This is provided by spi framework and retrieved from sgl. "this_descriptor" would be the dma_address of the current cmd_descriptor. this is returned from dma_pool_alloc() this would be required for freeing. > virt_cmd_desc->data_address = dma_ptr > > >> next_descriptor_virt - virtual address of the next descriptor in chain > Right, this would be the value of the next node in the linked list, > right? So basically by adding a list_node_t you can find it easily. > > >>>> +static int qcom_qspi_alloc_desc(struct qcom_qspi *ctrl, dma_addr_t dma_ptr, >>>> + uint32_t n_bytes) >>>> +{ >>>> + struct qspi_cmd_desc *virt_cmd_desc, *prev; >>>> + dma_addr_t dma_cmd_desc; >>>> + >>>> + /* allocate for dma cmd descriptor */ >>>> + virt_cmd_desc = (struct qspi_cmd_desc *)dma_pool_alloc(ctrl->dma_cmd_pool, >>>> + GFP_KERNEL, &dma_cmd_desc); >>> Remove unnecessary cast; "void *" assigns fine w/out a cast. >>> >>> Add "| GFP_ZERO" and then get rid of the need to clear the "reserved" >>> and "next_descriptor" stuff below. >>> >> I needed __GFP_ZERO to prevent a compile error, used same. > Ah, sorry. Sounds good. > > -Doug
On Fri, Apr 21, 2023 at 11:21 AM Vijaya Krishna Nivarthi <quic_vnivarth@quicinc.com> wrote: > > >> If we were to have a linked list of descriptors that we can parse and > >> free, it would require 2 more fields > >> > >> this_descriptor_dma - dma address of the current descriptor > > Isn't that exactly the same value as "data_address"? Sure, > > "data_address" is a u32 and the DMA address is 64-bits, but elsewhere > > in the code you already rely on the fact that the upper bits of the > > DMA address are 0 when you do: > > > No; data_address is the dma_address mapped to the xfer buffer. > > This is provided by spi framework and retrieved from sgl. > > "this_descriptor" would be the dma_address of the current cmd_descriptor. > > this is returned from dma_pool_alloc() > > this would be required for freeing. Oh! Of course, that's right. So you are correct, you'd need to add "this_descriptor_dma", but not the virtual address since that would be the same as the address of the structure via the list_node_t. I guess I won't insist on using a linked list even though it seems more elegant to me. In the very least it should fall back to PIO if the array isn't enough and if we need to change it later we always can. -Doug
Hi, Thank you very much for the review and all the inputs... On 4/22/2023 1:28 AM, Doug Anderson wrote: > On Fri, Apr 21, 2023 at 11:21 AM Vijaya Krishna Nivarthi > <quic_vnivarth@quicinc.com> wrote: >>>> If we were to have a linked list of descriptors that we can parse and >>>> free, it would require 2 more fields >>>> >>>> this_descriptor_dma - dma address of the current descriptor >>> Isn't that exactly the same value as "data_address"? Sure, >>> "data_address" is a u32 and the DMA address is 64-bits, but elsewhere >>> in the code you already rely on the fact that the upper bits of the >>> DMA address are 0 when you do: >> >> No; data_address is the dma_address mapped to the xfer buffer. >> >> This is provided by spi framework and retrieved from sgl. >> >> "this_descriptor" would be the dma_address of the current cmd_descriptor. >> >> this is returned from dma_pool_alloc() >> >> this would be required for freeing. > Oh! Of course, that's right. So you are correct, you'd need to add > "this_descriptor_dma", but not the virtual address since that would be > the same as the address of the structure via the list_node_t. I guess > I won't insist on using a linked list even though it seems more > elegant to me. In the very least it should fall back to PIO if the > array isn't enough and if we need to change it later we always can. > > -Doug Retained the array, addressed all other comments and uploaded v5. Conditional can_dma() and clearing interrupts in handle_err(), I thought were particularly helpful as they were potential problems later. test script was very useful too. Thank you.
diff --git a/drivers/spi/spi-qcom-qspi.c b/drivers/spi/spi-qcom-qspi.c index fab1553..879c293 100644 --- a/drivers/spi/spi-qcom-qspi.c +++ b/drivers/spi/spi-qcom-qspi.c @@ -2,6 +2,8 @@ // Copyright (c) 2017-2018, The Linux foundation. All rights reserved. #include <linux/clk.h> +#include <linux/dmapool.h> +#include <linux/dma-mapping.h> #include <linux/interconnect.h> #include <linux/interrupt.h> #include <linux/io.h> @@ -62,6 +64,7 @@ #define WR_FIFO_FULL BIT(10) #define WR_FIFO_OVERRUN BIT(11) #define TRANSACTION_DONE BIT(16) +#define DMA_CHAIN_DONE BIT(31) #define QSPI_ERR_IRQS (RESP_FIFO_UNDERRUN | HRESP_FROM_NOC_ERR | \ WR_FIFO_OVERRUN) #define QSPI_ALL_IRQS (QSPI_ERR_IRQS | RESP_FIFO_RDY | \ @@ -108,6 +111,10 @@ #define RD_FIFO_RESET 0x0030 #define RESET_FIFO BIT(0) +#define NEXT_DMA_DESC_ADDR 0x0040 +#define CURRENT_DMA_DESC_ADDR 0x0044 +#define CURRENT_MEM_ADDR 0x0048 + #define CUR_MEM_ADDR 0x0048 #define HW_VERSION 0x004c #define RD_FIFO 0x0050 @@ -120,6 +127,17 @@ enum qspi_dir { QSPI_WRITE, }; +struct qspi_cmd_desc { + u32 data_address; + u32 next_descriptor; + u32 direction:1; + u32 multi_io_mode:3; + u32 reserved1:4; + u32 fragment:1; + u32 reserved2:7; + u32 length:16; +}; + struct qspi_xfer { union { const void *tx_buf; @@ -137,11 +155,29 @@ enum qspi_clocks { QSPI_NUM_CLKS }; +enum qspi_xfer_mode { + QSPI_FIFO, + QSPI_DMA +}; + +/* + * Number of entries in sgt returned from spi framework that- + * will be supported. Can be modified as required. + * In practice, given max_dma_len is 64KB, the number of + * entries is not expected to exceed 1. + */ +#define QSPI_MAX_SG 5 + struct qcom_qspi { void __iomem *base; struct device *dev; struct clk_bulk_data *clks; struct qspi_xfer xfer; + struct dma_pool *dma_cmd_pool; + dma_addr_t dma_cmd_desc[QSPI_MAX_SG]; + void *virt_cmd_desc[QSPI_MAX_SG]; + unsigned int n_cmd_desc; + enum qspi_xfer_mode xfer_mode; struct icc_path *icc_path_cpu_to_qspi; unsigned long last_speed; /* Lock to protect data accessed by IRQs */ @@ -153,21 +189,22 @@ static u32 qspi_buswidth_to_iomode(struct qcom_qspi *ctrl, { switch (buswidth) { case 1: - return SDR_1BIT << MULTI_IO_MODE_SHFT; + return SDR_1BIT; case 2: - return SDR_2BIT << MULTI_IO_MODE_SHFT; + return SDR_2BIT; case 4: - return SDR_4BIT << MULTI_IO_MODE_SHFT; + return SDR_4BIT; default: dev_warn_once(ctrl->dev, "Unexpected bus width: %u\n", buswidth); - return SDR_1BIT << MULTI_IO_MODE_SHFT; + return SDR_1BIT; } } static void qcom_qspi_pio_xfer_cfg(struct qcom_qspi *ctrl) { u32 pio_xfer_cfg; + u32 iomode; const struct qspi_xfer *xfer; xfer = &ctrl->xfer; @@ -179,7 +216,8 @@ static void qcom_qspi_pio_xfer_cfg(struct qcom_qspi *ctrl) else pio_xfer_cfg |= TRANSFER_FRAGMENT; pio_xfer_cfg &= ~MULTI_IO_MODE_MSK; - pio_xfer_cfg |= qspi_buswidth_to_iomode(ctrl, xfer->buswidth); + iomode = qspi_buswidth_to_iomode(ctrl, xfer->buswidth); + pio_xfer_cfg |= iomode << MULTI_IO_MODE_SHFT; writel(pio_xfer_cfg, ctrl->base + PIO_XFER_CFG); } @@ -223,6 +261,16 @@ static void qcom_qspi_handle_err(struct spi_master *master, spin_lock_irqsave(&ctrl->lock, flags); writel(0, ctrl->base + MSTR_INT_EN); ctrl->xfer.rem_bytes = 0; + + if (ctrl->xfer_mode == QSPI_DMA) { + int i; + + /* free cmd descriptors */ + for (i = 0; i < ctrl->n_cmd_desc; i++) + dma_pool_free(ctrl->dma_cmd_pool, ctrl->virt_cmd_desc[i], + ctrl->dma_cmd_desc[i]); + ctrl->n_cmd_desc = 0; + } spin_unlock_irqrestore(&ctrl->lock, flags); } @@ -242,7 +290,7 @@ static int qcom_qspi_set_speed(struct qcom_qspi *ctrl, unsigned long speed_hz) } /* - * Set BW quota for CPU as driver supports FIFO mode only. + * Set BW quota for CPU. * We don't have explicit peak requirement so keep it equal to avg_bw. */ avg_bw_cpu = Bps_to_icc(speed_hz); @@ -258,6 +306,120 @@ static int qcom_qspi_set_speed(struct qcom_qspi *ctrl, unsigned long speed_hz) return 0; } +#define QSPI_ALIGN_REQ 32 + +static int qcom_qspi_alloc_desc(struct qcom_qspi *ctrl, dma_addr_t dma_ptr, + uint32_t n_bytes) +{ + struct qspi_cmd_desc *virt_cmd_desc, *prev; + dma_addr_t dma_cmd_desc; + + /* allocate for dma cmd descriptor */ + virt_cmd_desc = (struct qspi_cmd_desc *)dma_pool_alloc(ctrl->dma_cmd_pool, + GFP_KERNEL, &dma_cmd_desc); + if (!virt_cmd_desc) { + dev_err(ctrl->dev, + "Could not allocate for cmd_desc\n"); + return -ENOMEM; + } + ctrl->virt_cmd_desc[ctrl->n_cmd_desc] = virt_cmd_desc; + ctrl->dma_cmd_desc[ctrl->n_cmd_desc] = dma_cmd_desc; + ctrl->n_cmd_desc++; + + /* setup cmd descriptor */ + virt_cmd_desc->data_address = dma_ptr; + virt_cmd_desc->next_descriptor = 0; + virt_cmd_desc->direction = ctrl->xfer.dir; + virt_cmd_desc->multi_io_mode = qspi_buswidth_to_iomode(ctrl, ctrl->xfer.buswidth); + virt_cmd_desc->reserved1 = 0; + virt_cmd_desc->fragment = ctrl->xfer.is_last ? 0 : 1; + virt_cmd_desc->reserved2 = 0; + virt_cmd_desc->length = n_bytes; + + /* update previous descriptor */ + if (ctrl->n_cmd_desc >= 2) { + prev = (ctrl->virt_cmd_desc)[ctrl->n_cmd_desc - 2]; + prev->next_descriptor = dma_cmd_desc; + prev->fragment = 1; + } + + return 0; +} + +static int qcom_qspi_setup_dma_desc(struct qcom_qspi *ctrl, + struct spi_transfer *xfer) +{ + int ret; + struct sg_table *sgt; + unsigned int sg_total_len = 0; + dma_addr_t dma_ptr_sg; + unsigned int dma_len_sg; + int i; + + if (ctrl->n_cmd_desc) { + dev_err(ctrl->dev, "Remnant dma buffers n_cmd_desc-%d\n", ctrl->n_cmd_desc); + return -EIO; + } + + sgt = (ctrl->xfer.dir == QSPI_READ) ? &xfer->rx_sg : &xfer->tx_sg; + if (!sgt->nents || sgt->nents > QSPI_MAX_SG) { + dev_err(ctrl->dev, "Cannot handle %d entries in scatter list\n", sgt->nents); + return -EAGAIN; + } + + for (i = 0; i < sgt->nents; i++) { + dma_ptr_sg = sg_dma_address(sgt->sgl + i); + if (!IS_ALIGNED(dma_ptr_sg, QSPI_ALIGN_REQ)) { + dev_err(ctrl->dev, "dma address-%pad not aligned to %d\n", + &dma_ptr_sg, QSPI_ALIGN_REQ); + return -EAGAIN; + } + sg_total_len += sg_dma_len(sgt->sgl + i); + } + + if (sg_total_len != xfer->len) { + dev_err(ctrl->dev, "Data lengths mismatch\n"); + return -EAGAIN; + } + + for (i = 0; i < sgt->nents; i++) { + dma_ptr_sg = sg_dma_address(sgt->sgl + i); + dma_len_sg = sg_dma_len(sgt->sgl + i); + + ret = qcom_qspi_alloc_desc(ctrl, dma_ptr_sg, dma_len_sg); + if (ret) + goto cleanup; + } + return 0; + +cleanup: + dev_err(ctrl->dev, "ERROR cleanup in setup_dma_desc\n"); + + for (i = 0; i < ctrl->n_cmd_desc; i++) + dma_pool_free(ctrl->dma_cmd_pool, ctrl->virt_cmd_desc[i], + ctrl->dma_cmd_desc[i]); + ctrl->n_cmd_desc = 0; + return ret; +} + +static void qcom_qspi_dma_xfer(struct qcom_qspi *ctrl) +{ + /* Setup new interrupts */ + writel(DMA_CHAIN_DONE, ctrl->base + MSTR_INT_EN); + + /* kick off transfer */ + writel((u32)((ctrl->dma_cmd_desc)[0]), ctrl->base + NEXT_DMA_DESC_ADDR); +} + +/* Switch to DMA if transfer length exceeds this */ +#define QSPI_MAX_BYTES_FIFO 64 + +static bool qcom_qspi_can_dma(struct spi_controller *ctlr, + struct spi_device *slv, struct spi_transfer *xfer) +{ + return xfer->len > QSPI_MAX_BYTES_FIFO; +} + static int qcom_qspi_transfer_one(struct spi_master *master, struct spi_device *slv, struct spi_transfer *xfer) @@ -266,6 +428,7 @@ static int qcom_qspi_transfer_one(struct spi_master *master, int ret; unsigned long speed_hz; unsigned long flags; + u32 mstr_cfg; speed_hz = slv->max_speed_hz; if (xfer->speed_hz) @@ -276,6 +439,7 @@ static int qcom_qspi_transfer_one(struct spi_master *master, return ret; spin_lock_irqsave(&ctrl->lock, flags); + mstr_cfg = readl(ctrl->base + MSTR_CONFIG); /* We are half duplex, so either rx or tx will be set */ if (xfer->rx_buf) { @@ -290,8 +454,37 @@ static int qcom_qspi_transfer_one(struct spi_master *master, ctrl->xfer.is_last = list_is_last(&xfer->transfer_list, &master->cur_msg->transfers); ctrl->xfer.rem_bytes = xfer->len; + + if (xfer->rx_sg.nents || xfer->tx_sg.nents) { + /* do DMA transfer */ + ctrl->xfer_mode = QSPI_DMA; + if (!(mstr_cfg & DMA_ENABLE)) { + mstr_cfg |= DMA_ENABLE; + writel(mstr_cfg, ctrl->base + MSTR_CONFIG); + } + + ret = qcom_qspi_setup_dma_desc(ctrl, xfer); + if (ret) { + if (ret == -EAGAIN) { + dev_err_once(ctrl->dev, "DMA failure, falling back to PIO"); + goto do_pio; + } + spin_unlock_irqrestore(&ctrl->lock, flags); + return ret; + } + qcom_qspi_dma_xfer(ctrl); + goto end; + } + +do_pio: + ctrl->xfer_mode = QSPI_FIFO; + if (mstr_cfg & DMA_ENABLE) { + mstr_cfg &= ~DMA_ENABLE; + writel(mstr_cfg, ctrl->base + MSTR_CONFIG); + } qcom_qspi_pio_xfer(ctrl); +end: spin_unlock_irqrestore(&ctrl->lock, flags); /* We'll call spi_finalize_current_transfer() when done */ @@ -328,6 +521,17 @@ static int qcom_qspi_prepare_message(struct spi_master *master, return 0; } +static int qcom_qspi_alloc_dma(struct qcom_qspi *ctrl) +{ + /* allocate for cmd descriptors pool */ + ctrl->dma_cmd_pool = dmam_pool_create("qspi cmd desc pool", + ctrl->dev, sizeof(struct qspi_cmd_desc), 0, 0); + if (!ctrl->dma_cmd_pool) + return -ENOMEM; + + return 0; +} + static irqreturn_t pio_read(struct qcom_qspi *ctrl) { u32 rd_fifo_status; @@ -426,27 +630,48 @@ static irqreturn_t qcom_qspi_irq(int irq, void *dev_id) int_status = readl(ctrl->base + MSTR_INT_STATUS); writel(int_status, ctrl->base + MSTR_INT_STATUS); - if (ctrl->xfer.dir == QSPI_WRITE) { - if (int_status & WR_FIFO_EMPTY) - ret = pio_write(ctrl); - } else { - if (int_status & RESP_FIFO_RDY) - ret = pio_read(ctrl); - } - - if (int_status & QSPI_ERR_IRQS) { - if (int_status & RESP_FIFO_UNDERRUN) - dev_err(ctrl->dev, "IRQ error: FIFO underrun\n"); - if (int_status & WR_FIFO_OVERRUN) - dev_err(ctrl->dev, "IRQ error: FIFO overrun\n"); - if (int_status & HRESP_FROM_NOC_ERR) - dev_err(ctrl->dev, "IRQ error: NOC response error\n"); - ret = IRQ_HANDLED; - } - - if (!ctrl->xfer.rem_bytes) { - writel(0, ctrl->base + MSTR_INT_EN); - spi_finalize_current_transfer(dev_get_drvdata(ctrl->dev)); + switch (ctrl->xfer_mode) { + case QSPI_FIFO: + if (ctrl->xfer.dir == QSPI_WRITE) { + if (int_status & WR_FIFO_EMPTY) + ret = pio_write(ctrl); + } else { + if (int_status & RESP_FIFO_RDY) + ret = pio_read(ctrl); + } + + if (int_status & QSPI_ERR_IRQS) { + if (int_status & RESP_FIFO_UNDERRUN) + dev_err(ctrl->dev, "IRQ error: FIFO underrun\n"); + if (int_status & WR_FIFO_OVERRUN) + dev_err(ctrl->dev, "IRQ error: FIFO overrun\n"); + if (int_status & HRESP_FROM_NOC_ERR) + dev_err(ctrl->dev, "IRQ error: NOC response error\n"); + ret = IRQ_HANDLED; + } + + if (!ctrl->xfer.rem_bytes) { + writel(0, ctrl->base + MSTR_INT_EN); + spi_finalize_current_transfer(dev_get_drvdata(ctrl->dev)); + } + break; + case QSPI_DMA: + if (int_status & DMA_CHAIN_DONE) { + int i; + + writel(0, ctrl->base + MSTR_INT_EN); + + for (i = 0; i < ctrl->n_cmd_desc; i++) + dma_pool_free(ctrl->dma_cmd_pool, ctrl->virt_cmd_desc[i], + ctrl->dma_cmd_desc[i]); + ctrl->n_cmd_desc = 0; + + ret = IRQ_HANDLED; + spi_finalize_current_transfer(dev_get_drvdata(ctrl->dev)); + } + break; + default: + dev_err(ctrl->dev, "Unknown xfer mode:%d", ctrl->xfer_mode); } spin_unlock(&ctrl->lock); @@ -517,7 +742,14 @@ static int qcom_qspi_probe(struct platform_device *pdev) return ret; } + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)); + if (ret) + return dev_err_probe(dev, ret, "could not set DMA mask\n"); + master->max_speed_hz = 300000000; + master->max_dma_len = 65536; /* as per HPG */ + /* intimate protocal drivers about alignment requirement */ + master->dma_alignment = QSPI_ALIGN_REQ; master->num_chipselect = QSPI_NUM_CS; master->bus_num = -1; master->dev.of_node = pdev->dev.of_node; @@ -528,6 +760,7 @@ static int qcom_qspi_probe(struct platform_device *pdev) master->prepare_message = qcom_qspi_prepare_message; master->transfer_one = qcom_qspi_transfer_one; master->handle_err = qcom_qspi_handle_err; + master->can_dma = qcom_qspi_can_dma; master->auto_runtime_pm = true; ret = devm_pm_opp_set_clkname(&pdev->dev, "core"); @@ -540,6 +773,11 @@ static int qcom_qspi_probe(struct platform_device *pdev) return ret; } + /* allocate for DMA descriptor pools */ + ret = qcom_qspi_alloc_dma(ctrl); + if (ret) + return ret; + pm_runtime_use_autosuspend(dev); pm_runtime_set_autosuspend_delay(dev, 250); pm_runtime_enable(dev);
Current driver supports only PIO mode. HW supports DMA, so add DMA mode support to the driver for better performance for larger xfers. Signed-off-by: Vijaya Krishna Nivarthi <quic_vnivarth@quicinc.com> --- v3 -> v4: - corrected tabs spacing - dropped dma data descriptors - dropped INVALID from xfer_mode enum - qspi_buswidth_to_iomode() to return iomode without shifting - dropped non-functional change in qcom_qspi_set_speed() - drop redundant call to wmb() - fallback to pio if dma fails to setup - use dmam_pool_create() the devm version - thus drop dma_pool_destroy() - set dma_alignment field in probe() - other minor changes v2 -> v3: - dropped Reported-by tag v1 -> v2: - modified commit message - addressed kernel test robot error - correct placement of header file includes - added more comments - coding style related changes - renamed variables - used u32/u8 instead of uint32_t/8_t - removed unnecessary casting - retain same MSTR_CONFIG as PIO for DMA - unset fragment bit only for last cmd_desc of last xfer --- drivers/spi/spi-qcom-qspi.c | 292 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 265 insertions(+), 27 deletions(-)