diff mbox series

[1/2] mmc: Support kmsg dumper based on pstore/blk

Message ID 20201207115753.21728-2-bbudiredla@marvell.com
State New
Headers show
Series [1/2] mmc: Support kmsg dumper based on pstore/blk | expand

Commit Message

Bhaskara Budiredla Dec. 7, 2020, 11:57 a.m. UTC
This patch introduces to mmcpstore. The functioning of mmcpstore
is similar to mtdpstore. mmcpstore works on FTL based flash devices
whereas mtdpstore works on raw flash devices. When the system crashes,
mmcpstore stores the kmsg panic and oops logs to a user specified
MMC device.

It collects the details about the host MMC device through pstore/blk
"blkdev" parameter. The user can specify the MMC device in many ways
by checking in Documentation/admin-guide/pstore-blk.rst.

The individual mmc host drivers have to define suitable polling and
cleanup subroutines to write kmsg panic/oops logs through mmcpstore.
These new host operations are needed as pstore panic write runs with
interrupts disabled.

Signed-off-by: Bhaskara Budiredla <bbudiredla@marvell.com>
---
 drivers/mmc/core/Kconfig     |  14 +-
 drivers/mmc/core/Makefile    |   1 +
 drivers/mmc/core/block.c     |  19 +++
 drivers/mmc/core/block.h     |   9 ++
 drivers/mmc/core/core.c      |  24 +++
 drivers/mmc/core/mmcpstore.c | 302 +++++++++++++++++++++++++++++++++++
 include/linux/mmc/core.h     |   4 +
 include/linux/mmc/host.h     |  12 ++
 8 files changed, 384 insertions(+), 1 deletion(-)
 create mode 100644 drivers/mmc/core/mmcpstore.c

Comments

Ulf Hansson Dec. 11, 2020, 11:31 a.m. UTC | #1
+ Christoph

On Mon, 7 Dec 2020 at 12:58, Bhaskara Budiredla <bbudiredla@marvell.com> wrote:
>

> This patch introduces to mmcpstore. The functioning of mmcpstore

> is similar to mtdpstore. mmcpstore works on FTL based flash devices

> whereas mtdpstore works on raw flash devices. When the system crashes,

> mmcpstore stores the kmsg panic and oops logs to a user specified

> MMC device.

>

> It collects the details about the host MMC device through pstore/blk

> "blkdev" parameter. The user can specify the MMC device in many ways

> by checking in Documentation/admin-guide/pstore-blk.rst.

>

> The individual mmc host drivers have to define suitable polling and

> cleanup subroutines to write kmsg panic/oops logs through mmcpstore.

> These new host operations are needed as pstore panic write runs with

> interrupts disabled.


Apologies for the delay. I have tried to give this some more thinking,
more comments below.

[...]

> diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c

> index d42037f0f10d..7682b267f1d5 100644

> --- a/drivers/mmc/core/core.c

> +++ b/drivers/mmc/core/core.c

> @@ -569,6 +569,30 @@ int mmc_cqe_recovery(struct mmc_host *host)

>  }

>  EXPORT_SYMBOL(mmc_cqe_recovery);

>

> +#if IS_ENABLED(CONFIG_MMC_PSTORE)

> +/**

> + *     mmc_wait_for_pstore_req - initiate a blocking mmc request

> + *     @host: MMC host to start command

> + *     @mrq: MMC request to start

> + *

> + *     Start a blocking MMC request for a host and wait for the request

> + *     to complete that is based on polling and timeout.

> + */

> +void mmc_wait_for_pstore_req(struct mmc_host *host, struct mmc_request *mrq)

> +{

> +       unsigned int timeout;

> +

> +       host->ops->req_cleanup_pending(host);


So, the host driver should through this callback, be able to terminate
any ongoing requests/commands - and also try to make sure that the
(e)MMC/SD card remains in the data transfer state. Moreover, no locks
and no IRQs must be used to manage this, right?

Have you really tried if this works for real?

> +       mmc_start_request(host, mrq);


This looks like the wrong approach to me, as it will try to re-use the
regular request based path, where the host driver is allowed to use
locks, IRQs, DMAs, etc, etc.

I would suggest inventing a new separate request path and a new host
ops, to deal with these kinds of requests.

In this way, the below part with host->ops->req_completion_poll, can
probably be removed. Instead we may just wait for the request to
return from the new request path.

> +

> +       if (mrq->data) {

> +               timeout = mrq->data->timeout_ns / NSEC_PER_MSEC;

> +               host->ops->req_completion_poll(host, timeout);

> +       }

> +}

> +EXPORT_SYMBOL(mmc_wait_for_pstore_req);

> +#endif

> +

>  /**

>   *     mmc_is_req_done - Determine if a 'cap_cmd_during_tfr' request is done

>   *     @host: MMC host

> diff --git a/drivers/mmc/core/mmcpstore.c b/drivers/mmc/core/mmcpstore.c

> new file mode 100644

> index 000000000000..1113eae0756c

> --- /dev/null

> +++ b/drivers/mmc/core/mmcpstore.c

> @@ -0,0 +1,302 @@

> +// SPDX-License-Identifier: GPL-2.0

> +/*

> + * MMC pstore support based on pstore/blk

> + *

> + * Copyright (c) 2020 Marvell.

> + * Author: Bhaskara Budiredla <bbudiredla@marvell.com>

> + */

> +

> +#define pr_fmt(fmt) "mmcpstore: " fmt

> +

> +#include <linux/kernel.h>

> +#include <linux/module.h>

> +#include <linux/pstore_blk.h>

> +#include <linux/blkdev.h>

> +#include <linux/mount.h>

> +#include <linux/slab.h>

> +#include <linux/mmc/mmc.h>

> +#include <linux/mmc/host.h>

> +#include <linux/mmc/card.h>

> +#include <linux/scatterlist.h>

> +#include "block.h"

> +#include "card.h"

> +#include "core.h"

> +

> +static struct mmcpstore_context {

> +       char dev_name[BDEVNAME_SIZE];

> +       int partno;

> +       sector_t start_sect;

> +       sector_t size;

> +       struct pstore_device_info dev;

> +       struct pstore_blk_config conf;

> +       struct pstore_blk_info info;

> +

> +       char *sub;

> +       struct mmc_card *card;

> +       struct mmc_request *mrq;

> +} oops_cxt;

> +

> +static void mmc_prep_req(struct mmc_request *mrq,

> +               unsigned int sect_offset, unsigned int nsects,

> +               struct scatterlist *sg, u32 opcode, unsigned int flags)

> +{

> +       mrq->cmd->opcode = opcode;

> +       mrq->cmd->arg = sect_offset;

> +       mrq->cmd->flags = MMC_RSP_R1 | MMC_CMD_ADTC;

> +

> +       if (nsects == 1) {

> +               mrq->stop = NULL;

> +       } else {

> +               mrq->stop->opcode = MMC_STOP_TRANSMISSION;

> +               mrq->stop->arg = 0;

> +               mrq->stop->flags = MMC_RSP_R1B | MMC_CMD_AC;

> +       }

> +

> +       mrq->data->blksz = SECTOR_SIZE;

> +       mrq->data->blocks = nsects;

> +       mrq->data->flags = flags;

> +       mrq->data->sg = sg;

> +       mrq->data->sg_len = 1;

> +}

> +

> +static int mmcpstore_rdwr_req(const char *buf, unsigned int nsects,

> +                       unsigned int sect_offset, unsigned int flags)

> +{

> +       struct mmcpstore_context *cxt = &oops_cxt;

> +       struct mmc_request *mrq = cxt->mrq;

> +       struct mmc_card *card = cxt->card;

> +       struct mmc_host *host = card->host;

> +       struct scatterlist sg;

> +       u32 opcode;

> +

> +       if (flags == MMC_DATA_READ)

> +               opcode  = (nsects > 1) ?

> +                       MMC_READ_MULTIPLE_BLOCK : MMC_READ_SINGLE_BLOCK;

> +       else

> +               opcode = (nsects > 1) ?

> +                       MMC_WRITE_MULTIPLE_BLOCK : MMC_WRITE_BLOCK;

> +

> +       mmc_prep_req(mrq, sect_offset, nsects, &sg, opcode, flags);

> +       sg_init_one(&sg, buf, (nsects << SECTOR_SHIFT));

> +       mmc_set_data_timeout(mrq->data, cxt->card);

> +

> +       mmc_claim_host(host);

> +       mmc_wait_for_req(host, mrq);

> +       mdelay(mrq->data->timeout_ns / NSEC_PER_MSEC);

> +       mmc_release_host(host);

> +

> +       if (mrq->cmd->error) {

> +               pr_err("Cmd error: %d\n", mrq->cmd->error);

> +               return mrq->cmd->error;

> +       }

> +       if (mrq->data->error) {

> +               pr_err("Data error: %d\n", mrq->data->error);

> +               return mrq->data->error;

> +       }

> +

> +       return 0;

> +}

> +

> +static ssize_t mmcpstore_write(const char *buf, size_t size, loff_t off)

> +{

> +       struct mmcpstore_context *cxt = &oops_cxt;

> +       int ret;

> +

> +       ret = mmcpstore_rdwr_req(buf, (size >> SECTOR_SHIFT),

> +               cxt->start_sect + (off >> SECTOR_SHIFT), MMC_DATA_WRITE);

> +       if (ret)

> +               return ret;

> +

> +       return size;

> +}

> +

> +static ssize_t mmcpstore_read(char *buf, size_t size, loff_t off)

> +{

> +       struct mmcpstore_context *cxt = &oops_cxt;

> +       unsigned int sect_off = cxt->start_sect  + (off >> SECTOR_SHIFT);

> +       unsigned long sects = (cxt->conf.kmsg_size >> SECTOR_SHIFT);

> +       int ret;

> +

> +       if (unlikely(!buf || !size))

> +               return -EINVAL;

> +

> +       ret = mmcpstore_rdwr_req(cxt->sub, sects, sect_off, MMC_DATA_READ);

> +       if (ret)

> +               return ret;

> +       memcpy(buf, cxt->sub, size);

> +

> +       return size;

> +}


It looks like the above I/O read/write interface for pstore is
intended to be used when the platform is up and running and not during
a panic, correct?

If so, I don't get why it can't use the regular block interface, as
any other file system does, for example?

> +

> +static void mmcpstore_panic_write_req(const char *buf,

> +               unsigned int nsects, unsigned int sect_offset)

> +{

> +       struct mmcpstore_context *cxt = &oops_cxt;

> +       struct mmc_request *mrq = cxt->mrq;

> +       struct mmc_card *card = cxt->card;

> +       struct mmc_host *host = card->host;

> +       struct scatterlist sg;

> +       u32 opcode;

> +

> +       opcode = (nsects > 1) ? MMC_WRITE_MULTIPLE_BLOCK : MMC_WRITE_BLOCK;

> +       mmc_prep_req(mrq, sect_offset, nsects, &sg, opcode, MMC_DATA_WRITE);

> +       sg_init_one(&sg, buf, (nsects << SECTOR_SHIFT));

> +       mmc_set_data_timeout(mrq->data, cxt->card);

> +

> +       mmc_claim_host(host);


So, this will use several locks, which may be a problem, right?

Moreover, if there is an ongoing I/O request (or any other active
command/request for that matter), then the host is already claimed by
the mmc core. Normally, we would then wait for that request to be
completed, to trigger the release of the host and then allow us to
claim it here.

However, because of the kernel panic, I assume it's quite likely that
any ongoing request will not be completed at all, as IRQs may not
work, for example.

In other words, we may be hanging here forever waiting to claim the
host. Unless we are lucky, because of no ongoing request, although we
would still have to succeed walking through all the locking, etc, in
mmc_claim_host().

Do note, as part of the mmc_claim_host() we may also runtime resume
the host, if it was runtime suspended (which is quite likely). To
runtime resume a host via runtime PM, we may end up ungating device
clocks, power on corresponding PM domains, run a so called re-tuning
sequence to restore communication with the card, etc, etc. The point
is, all these things must also be possible to do, without locks and by
using a polling based "mode"...

> +       mmc_wait_for_pstore_req(host, mrq);


Okay, so let's assume that we are lucky and succeed to claim and
runtime resume the host above.

Then we also need to runtime resume the card, as it may be runtime
suspended. In the "worst case", runtime resuming the card means a
complete re-initialization of it, covering a series of commands and
turning on regulators, for example.

> +       mmc_release_host(card->host);

> +}

> +

> +static ssize_t mmcpstore_panic_write(const char *buf, size_t size, loff_t off)

> +{

> +       struct mmcpstore_context *cxt = &oops_cxt;

> +

> +       mmcpstore_panic_write_req(buf, (size >> SECTOR_SHIFT),

> +                       cxt->start_sect + (off >> SECTOR_SHIFT));

> +       return size;

> +}


[...]

Having said the above, I am not entirely convinced that it makes sense
to support this, at all.

Not only, will the support be highly fragile from the mmc core point
of view, but there is also a significant complexity for an mmc host
driver to support this (at least in general).

Kind regards
Uffe
Bhaskara Budiredla Dec. 15, 2020, 6:52 a.m. UTC | #2
>-----Original Message-----

>From: Ulf Hansson <ulf.hansson@linaro.org>

>Sent: Friday, December 11, 2020 5:02 PM

>To: Bhaskara Budiredla <bbudiredla@marvell.com>

>Cc: Kees Cook <keescook@chromium.org>; Colin Cross

><ccross@android.com>; Tony Luck <tony.luck@intel.com>; Sunil Kovvuri

>Goutham <sgoutham@marvell.com>; linux-mmc@vger.kernel.org; Linux

>Kernel Mailing List <linux-kernel@vger.kernel.org>; Christoph Hellwig

><hch@lst.de>

>Subject: [EXT] Re: [PATCH 1/2] mmc: Support kmsg dumper based on

>pstore/blk

>

>External Email

>

>----------------------------------------------------------------------

>+ Christoph

>

>On Mon, 7 Dec 2020 at 12:58, Bhaskara Budiredla <bbudiredla@marvell.com>

>wrote:

>>

>> This patch introduces to mmcpstore. The functioning of mmcpstore is

>> similar to mtdpstore. mmcpstore works on FTL based flash devices

>> whereas mtdpstore works on raw flash devices. When the system crashes,

>> mmcpstore stores the kmsg panic and oops logs to a user specified MMC

>> device.

>>

>> It collects the details about the host MMC device through pstore/blk

>> "blkdev" parameter. The user can specify the MMC device in many ways

>> by checking in Documentation/admin-guide/pstore-blk.rst.

>>

>> The individual mmc host drivers have to define suitable polling and

>> cleanup subroutines to write kmsg panic/oops logs through mmcpstore.

>> These new host operations are needed as pstore panic write runs with

>> interrupts disabled.

>

>Apologies for the delay. I have tried to give this some more thinking, more

>comments below.

>

>[...]

>

>> diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index

>> d42037f0f10d..7682b267f1d5 100644

>> --- a/drivers/mmc/core/core.c

>> +++ b/drivers/mmc/core/core.c

>> @@ -569,6 +569,30 @@ int mmc_cqe_recovery(struct mmc_host *host)  }

>> EXPORT_SYMBOL(mmc_cqe_recovery);

>>

>> +#if IS_ENABLED(CONFIG_MMC_PSTORE)

>> +/**

>> + *     mmc_wait_for_pstore_req - initiate a blocking mmc request

>> + *     @host: MMC host to start command

>> + *     @mrq: MMC request to start

>> + *

>> + *     Start a blocking MMC request for a host and wait for the request

>> + *     to complete that is based on polling and timeout.

>> + */

>> +void mmc_wait_for_pstore_req(struct mmc_host *host, struct

>> +mmc_request *mrq) {

>> +       unsigned int timeout;

>> +

>> +       host->ops->req_cleanup_pending(host);

>

>So, the host driver should through this callback, be able to terminate any

>ongoing requests/commands - and also try to make sure that the (e)MMC/SD

>card remains in the data transfer state. Moreover, no locks and no IRQs must

>be used to manage this, right?

>


Yes, that's correct. 

>Have you really tried if this works for real?

>


Yes, it's a working solution. Patch were submitted after testing.

>> +       mmc_start_request(host, mrq);

>

>This looks like the wrong approach to me, as it will try to re-use the regular

>request based path, where the host driver is allowed to use locks, IRQs,

>DMAs, etc, etc.

>


No. The locks on host driver will be dropped in CONFIG_MMC_PSTORE path.
Similarly, the IRQs will be replaced with polling subroutines during panic
write. Please take a look at patch 2/2 which does this for cavium host driver. 

>I would suggest inventing a new separate request path and a new host ops, to

>deal with these kinds of requests.

>


The polling and cleanup host operations are the ones invented for this purpose.
Polling to replace IRQs and cleanup to terminate ongoing requests/commands.

>In this way, the below part with host->ops->req_completion_poll, can

>probably be removed. Instead we may just wait for the request to return

>from the new request path.

>


This is not possible unless CPU itself does the mmc write (through some block interface?).
If the answer is block interface, sleeping cannot be avoided as part of it. 
Do you really think DMA can be avoided for MMC panic writes?  

>> +

>> +       if (mrq->data) {

>> +               timeout = mrq->data->timeout_ns / NSEC_PER_MSEC;

>> +               host->ops->req_completion_poll(host, timeout);

>> +       }

>> +}

>> +EXPORT_SYMBOL(mmc_wait_for_pstore_req);

>> +#endif

>> +

>>  /**

>>   *     mmc_is_req_done - Determine if a 'cap_cmd_during_tfr' request is

>done

>>   *     @host: MMC host

>> diff --git a/drivers/mmc/core/mmcpstore.c

>> b/drivers/mmc/core/mmcpstore.c new file mode 100644 index

>> 000000000000..1113eae0756c

>> --- /dev/null

>> +++ b/drivers/mmc/core/mmcpstore.c

>> @@ -0,0 +1,302 @@

>> +// SPDX-License-Identifier: GPL-2.0

>> +/*

>> + * MMC pstore support based on pstore/blk

>> + *

>> + * Copyright (c) 2020 Marvell.

>> + * Author: Bhaskara Budiredla <bbudiredla@marvell.com>  */

>> +

>> +#define pr_fmt(fmt) "mmcpstore: " fmt

>> +

>> +#include <linux/kernel.h>

>> +#include <linux/module.h>

>> +#include <linux/pstore_blk.h>

>> +#include <linux/blkdev.h>

>> +#include <linux/mount.h>

>> +#include <linux/slab.h>

>> +#include <linux/mmc/mmc.h>

>> +#include <linux/mmc/host.h>

>> +#include <linux/mmc/card.h>

>> +#include <linux/scatterlist.h>

>> +#include "block.h"

>> +#include "card.h"

>> +#include "core.h"

>> +

>> +static struct mmcpstore_context {

>> +       char dev_name[BDEVNAME_SIZE];

>> +       int partno;

>> +       sector_t start_sect;

>> +       sector_t size;

>> +       struct pstore_device_info dev;

>> +       struct pstore_blk_config conf;

>> +       struct pstore_blk_info info;

>> +

>> +       char *sub;

>> +       struct mmc_card *card;

>> +       struct mmc_request *mrq;

>> +} oops_cxt;

>> +

>> +static void mmc_prep_req(struct mmc_request *mrq,

>> +               unsigned int sect_offset, unsigned int nsects,

>> +               struct scatterlist *sg, u32 opcode, unsigned int

>> +flags) {

>> +       mrq->cmd->opcode = opcode;

>> +       mrq->cmd->arg = sect_offset;

>> +       mrq->cmd->flags = MMC_RSP_R1 | MMC_CMD_ADTC;

>> +

>> +       if (nsects == 1) {

>> +               mrq->stop = NULL;

>> +       } else {

>> +               mrq->stop->opcode = MMC_STOP_TRANSMISSION;

>> +               mrq->stop->arg = 0;

>> +               mrq->stop->flags = MMC_RSP_R1B | MMC_CMD_AC;

>> +       }

>> +

>> +       mrq->data->blksz = SECTOR_SIZE;

>> +       mrq->data->blocks = nsects;

>> +       mrq->data->flags = flags;

>> +       mrq->data->sg = sg;

>> +       mrq->data->sg_len = 1;

>> +}

>> +

>> +static int mmcpstore_rdwr_req(const char *buf, unsigned int nsects,

>> +                       unsigned int sect_offset, unsigned int flags)

>> +{

>> +       struct mmcpstore_context *cxt = &oops_cxt;

>> +       struct mmc_request *mrq = cxt->mrq;

>> +       struct mmc_card *card = cxt->card;

>> +       struct mmc_host *host = card->host;

>> +       struct scatterlist sg;

>> +       u32 opcode;

>> +

>> +       if (flags == MMC_DATA_READ)

>> +               opcode  = (nsects > 1) ?

>> +                       MMC_READ_MULTIPLE_BLOCK : MMC_READ_SINGLE_BLOCK;

>> +       else

>> +               opcode = (nsects > 1) ?

>> +                       MMC_WRITE_MULTIPLE_BLOCK : MMC_WRITE_BLOCK;

>> +

>> +       mmc_prep_req(mrq, sect_offset, nsects, &sg, opcode, flags);

>> +       sg_init_one(&sg, buf, (nsects << SECTOR_SHIFT));

>> +       mmc_set_data_timeout(mrq->data, cxt->card);

>> +

>> +       mmc_claim_host(host);

>> +       mmc_wait_for_req(host, mrq);

>> +       mdelay(mrq->data->timeout_ns / NSEC_PER_MSEC);

>> +       mmc_release_host(host);

>> +

>> +       if (mrq->cmd->error) {

>> +               pr_err("Cmd error: %d\n", mrq->cmd->error);

>> +               return mrq->cmd->error;

>> +       }

>> +       if (mrq->data->error) {

>> +               pr_err("Data error: %d\n", mrq->data->error);

>> +               return mrq->data->error;

>> +       }

>> +

>> +       return 0;

>> +}

>> +

>> +static ssize_t mmcpstore_write(const char *buf, size_t size, loff_t

>> +off) {

>> +       struct mmcpstore_context *cxt = &oops_cxt;

>> +       int ret;

>> +

>> +       ret = mmcpstore_rdwr_req(buf, (size >> SECTOR_SHIFT),

>> +               cxt->start_sect + (off >> SECTOR_SHIFT), MMC_DATA_WRITE);

>> +       if (ret)

>> +               return ret;

>> +

>> +       return size;

>> +}

>> +

>> +static ssize_t mmcpstore_read(char *buf, size_t size, loff_t off) {

>> +       struct mmcpstore_context *cxt = &oops_cxt;

>> +       unsigned int sect_off = cxt->start_sect  + (off >> SECTOR_SHIFT);

>> +       unsigned long sects = (cxt->conf.kmsg_size >> SECTOR_SHIFT);

>> +       int ret;

>> +

>> +       if (unlikely(!buf || !size))

>> +               return -EINVAL;

>> +

>> +       ret = mmcpstore_rdwr_req(cxt->sub, sects, sect_off,

>MMC_DATA_READ);

>> +       if (ret)

>> +               return ret;

>> +       memcpy(buf, cxt->sub, size);

>> +

>> +       return size;

>> +}

>

>It looks like the above I/O read/write interface for pstore is intended to be

>used when the platform is up and running and not during a panic, correct?

>

>If so, I don't get why it can't use the regular block interface, as any other file

>system does, for example?

>


The pstore read and write operations are used as part of pstore file system mounting
to retrieve the stored logs from MMC platform backend and to manage pstore read/write
counters. Sleeping would be allowed during this time. Whereas, pstore PANIC write will be
called if there happens a crash in the system. Sleeping is NOT allowed at this time.

It seems you are mixing the sleeping paths of the mmcpstore with that of atomic path.


>> +

>> +static void mmcpstore_panic_write_req(const char *buf,

>> +               unsigned int nsects, unsigned int sect_offset) {

>> +       struct mmcpstore_context *cxt = &oops_cxt;

>> +       struct mmc_request *mrq = cxt->mrq;

>> +       struct mmc_card *card = cxt->card;

>> +       struct mmc_host *host = card->host;

>> +       struct scatterlist sg;

>> +       u32 opcode;

>> +

>> +       opcode = (nsects > 1) ? MMC_WRITE_MULTIPLE_BLOCK :

>MMC_WRITE_BLOCK;

>> +       mmc_prep_req(mrq, sect_offset, nsects, &sg, opcode,

>MMC_DATA_WRITE);

>> +       sg_init_one(&sg, buf, (nsects << SECTOR_SHIFT));

>> +       mmc_set_data_timeout(mrq->data, cxt->card);

>> +

>> +       mmc_claim_host(host);

>

>So, this will use several locks, which may be a problem, right?

>


No, as said above locks are present on host driver will be dropped
in CONFIG_MMC_PSTORE path. 

>Moreover, if there is an ongoing I/O request (or any other active

>command/request for that matter), then the host is already claimed by the

>mmc core. Normally, we would then wait for that request to be completed, to

>trigger the release of the host and then allow us to claim it here.

>

>However, because of the kernel panic, I assume it's quite likely that any

>ongoing request will not be completed at all, as IRQs may not work, for

>example.

>

>In other words, we may be hanging here forever waiting to claim the host.

>Unless we are lucky, because of no ongoing request, although we would still

>have to succeed walking through all the locking, etc, in mmc_claim_host().

>


host->ops->req_cleanup_pending(host) was introduced to clean up the queued
and ongoing requests/commands. Terminating ongoing requests is not a complicated
thing for the host drivers. 


>Do note, as part of the mmc_claim_host() we may also runtime resume the

>host, if it was runtime suspended (which is quite likely). To runtime resume a

>host via runtime PM, we may end up ungating device clocks, power on

>corresponding PM domains, run a so called re-tuning sequence to restore

>communication with the card, etc, etc. The point is, all these things must also

>be possible to do, without locks and by using a polling based "mode"...

>

>> +       mmc_wait_for_pstore_req(host, mrq);

>

>Okay, so let's assume that we are lucky and succeed to claim and runtime

>resume the host above.

>

>Then we also need to runtime resume the card, as it may be runtime

>suspended. In the "worst case", runtime resuming the card means a complete

>re-initialization of it, covering a series of commands and turning on regulators,

>for example.

>

>> +       mmc_release_host(card->host);

>> +}



All the above said things (runtime resuming host, clocks being updated, restoring
communication with host) are valid even for the newly asked request path. They cannot
be avoided as we have to terminate the ongoing requests to complete the panic write.


>> +

>> +static ssize_t mmcpstore_panic_write(const char *buf, size_t size,

>> +loff_t off) {

>> +       struct mmcpstore_context *cxt = &oops_cxt;

>> +

>> +       mmcpstore_panic_write_req(buf, (size >> SECTOR_SHIFT),

>> +                       cxt->start_sect + (off >> SECTOR_SHIFT));

>> +       return size;

>> +}

>

>[...]

>

>Having said the above, I am not entirely convinced that it makes sense to

>support this, at all.

>

>Not only, will the support be highly fragile from the mmc core point of view,

>but there is also a significant complexity for an mmc host driver to support this

>(at least in general).

>


I am not sure if the comments on host driver complexity is true. Terminating
ongoing requests and introducing polling functions on host drivers should be
straight forward. None those would disturb the core functionality. They are 
completely independent.
 

>Kind regards

>Uffe
Ulf Hansson Dec. 15, 2020, 11:42 a.m. UTC | #3
On Tue, 15 Dec 2020 at 07:52, Bhaskara Budiredla <bbudiredla@marvell.com> wrote:
>

>

>

> >-----Original Message-----

> >From: Ulf Hansson <ulf.hansson@linaro.org>

> >Sent: Friday, December 11, 2020 5:02 PM

> >To: Bhaskara Budiredla <bbudiredla@marvell.com>

> >Cc: Kees Cook <keescook@chromium.org>; Colin Cross

> ><ccross@android.com>; Tony Luck <tony.luck@intel.com>; Sunil Kovvuri

> >Goutham <sgoutham@marvell.com>; linux-mmc@vger.kernel.org; Linux

> >Kernel Mailing List <linux-kernel@vger.kernel.org>; Christoph Hellwig

> ><hch@lst.de>

> >Subject: [EXT] Re: [PATCH 1/2] mmc: Support kmsg dumper based on

> >pstore/blk

> >

> >External Email

> >

> >----------------------------------------------------------------------

> >+ Christoph

> >

> >On Mon, 7 Dec 2020 at 12:58, Bhaskara Budiredla <bbudiredla@marvell.com>

> >wrote:

> >>

> >> This patch introduces to mmcpstore. The functioning of mmcpstore is

> >> similar to mtdpstore. mmcpstore works on FTL based flash devices

> >> whereas mtdpstore works on raw flash devices. When the system crashes,

> >> mmcpstore stores the kmsg panic and oops logs to a user specified MMC

> >> device.

> >>

> >> It collects the details about the host MMC device through pstore/blk

> >> "blkdev" parameter. The user can specify the MMC device in many ways

> >> by checking in Documentation/admin-guide/pstore-blk.rst.

> >>

> >> The individual mmc host drivers have to define suitable polling and

> >> cleanup subroutines to write kmsg panic/oops logs through mmcpstore.

> >> These new host operations are needed as pstore panic write runs with

> >> interrupts disabled.

> >

> >Apologies for the delay. I have tried to give this some more thinking, more

> >comments below.

> >

> >[...]

> >

> >> diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index

> >> d42037f0f10d..7682b267f1d5 100644

> >> --- a/drivers/mmc/core/core.c

> >> +++ b/drivers/mmc/core/core.c

> >> @@ -569,6 +569,30 @@ int mmc_cqe_recovery(struct mmc_host *host)  }

> >> EXPORT_SYMBOL(mmc_cqe_recovery);

> >>

> >> +#if IS_ENABLED(CONFIG_MMC_PSTORE)

> >> +/**

> >> + *     mmc_wait_for_pstore_req - initiate a blocking mmc request

> >> + *     @host: MMC host to start command

> >> + *     @mrq: MMC request to start

> >> + *

> >> + *     Start a blocking MMC request for a host and wait for the request

> >> + *     to complete that is based on polling and timeout.

> >> + */

> >> +void mmc_wait_for_pstore_req(struct mmc_host *host, struct

> >> +mmc_request *mrq) {

> >> +       unsigned int timeout;

> >> +

> >> +       host->ops->req_cleanup_pending(host);

> >

> >So, the host driver should through this callback, be able to terminate any

> >ongoing requests/commands - and also try to make sure that the (e)MMC/SD

> >card remains in the data transfer state. Moreover, no locks and no IRQs must

> >be used to manage this, right?

> >

>

> Yes, that's correct.

>

> >Have you really tried if this works for real?

> >

>

> Yes, it's a working solution. Patch were submitted after testing.

>

> >> +       mmc_start_request(host, mrq);

> >

> >This looks like the wrong approach to me, as it will try to re-use the regular

> >request based path, where the host driver is allowed to use locks, IRQs,

> >DMAs, etc, etc.

> >

>

> No. The locks on host driver will be dropped in CONFIG_MMC_PSTORE path.

> Similarly, the IRQs will be replaced with polling subroutines during panic

> write. Please take a look at patch 2/2 which does this for cavium host driver.


Yes, but why is removing the locks okay for the other regular request case?

I assume the locks are there for a reason, right?

>

> >I would suggest inventing a new separate request path and a new host ops, to

> >deal with these kinds of requests.

> >

>

> The polling and cleanup host operations are the ones invented for this purpose.

> Polling to replace IRQs and cleanup to terminate ongoing requests/commands.


I understand the approach you have taken, but what I am saying is that
I don't think it is the best one to choose.

Then I think it's better to invent an entirely new path, rather than
trying to tweak the existing one. For example, in your case you could
leave the lock to be used (in patch2) for the existing regular request
path, but bypass the locks and use polling in the new one.

>

> >In this way, the below part with host->ops->req_completion_poll, can

> >probably be removed. Instead we may just wait for the request to return

> >from the new request path.

> >

>

> This is not possible unless CPU itself does the mmc write (through some block interface?).

> If the answer is block interface, sleeping cannot be avoided as part of it.


I wasn't thinking of the block interface, but rather of the internal
behavior of an mmc host driver.

For data transfers, it's common to use DMA when that is supported. If
DMA doesn't work, it's probably a FIFO being read/written to based
upon some IRQs telling when read/write should be done. Of course,
there are other cases as well.

For commands, I expect those to be managed through IRQs.

That said, all these things are preferably managed through a polling
based method instead, when doing the panic writes, correct?

> Do you really think DMA can be avoided for MMC panic writes?


That depends on the HW.

Certainly there are lots of cases where DMA isn't the only way, but
instead it's perfectly possible to fall back to polling based mode.

>

> >> +

> >> +       if (mrq->data) {

> >> +               timeout = mrq->data->timeout_ns / NSEC_PER_MSEC;

> >> +               host->ops->req_completion_poll(host, timeout);

> >> +       }

> >> +}

> >> +EXPORT_SYMBOL(mmc_wait_for_pstore_req);

> >> +#endif

> >> +

> >>  /**

> >>   *     mmc_is_req_done - Determine if a 'cap_cmd_during_tfr' request is

> >done

> >>   *     @host: MMC host

> >> diff --git a/drivers/mmc/core/mmcpstore.c

> >> b/drivers/mmc/core/mmcpstore.c new file mode 100644 index

> >> 000000000000..1113eae0756c

> >> --- /dev/null

> >> +++ b/drivers/mmc/core/mmcpstore.c

> >> @@ -0,0 +1,302 @@

> >> +// SPDX-License-Identifier: GPL-2.0

> >> +/*

> >> + * MMC pstore support based on pstore/blk

> >> + *

> >> + * Copyright (c) 2020 Marvell.

> >> + * Author: Bhaskara Budiredla <bbudiredla@marvell.com>  */

> >> +

> >> +#define pr_fmt(fmt) "mmcpstore: " fmt

> >> +

> >> +#include <linux/kernel.h>

> >> +#include <linux/module.h>

> >> +#include <linux/pstore_blk.h>

> >> +#include <linux/blkdev.h>

> >> +#include <linux/mount.h>

> >> +#include <linux/slab.h>

> >> +#include <linux/mmc/mmc.h>

> >> +#include <linux/mmc/host.h>

> >> +#include <linux/mmc/card.h>

> >> +#include <linux/scatterlist.h>

> >> +#include "block.h"

> >> +#include "card.h"

> >> +#include "core.h"

> >> +

> >> +static struct mmcpstore_context {

> >> +       char dev_name[BDEVNAME_SIZE];

> >> +       int partno;

> >> +       sector_t start_sect;

> >> +       sector_t size;

> >> +       struct pstore_device_info dev;

> >> +       struct pstore_blk_config conf;

> >> +       struct pstore_blk_info info;

> >> +

> >> +       char *sub;

> >> +       struct mmc_card *card;

> >> +       struct mmc_request *mrq;

> >> +} oops_cxt;

> >> +

> >> +static void mmc_prep_req(struct mmc_request *mrq,

> >> +               unsigned int sect_offset, unsigned int nsects,

> >> +               struct scatterlist *sg, u32 opcode, unsigned int

> >> +flags) {

> >> +       mrq->cmd->opcode = opcode;

> >> +       mrq->cmd->arg = sect_offset;

> >> +       mrq->cmd->flags = MMC_RSP_R1 | MMC_CMD_ADTC;

> >> +

> >> +       if (nsects == 1) {

> >> +               mrq->stop = NULL;

> >> +       } else {

> >> +               mrq->stop->opcode = MMC_STOP_TRANSMISSION;

> >> +               mrq->stop->arg = 0;

> >> +               mrq->stop->flags = MMC_RSP_R1B | MMC_CMD_AC;

> >> +       }

> >> +

> >> +       mrq->data->blksz = SECTOR_SIZE;

> >> +       mrq->data->blocks = nsects;

> >> +       mrq->data->flags = flags;

> >> +       mrq->data->sg = sg;

> >> +       mrq->data->sg_len = 1;

> >> +}

> >> +

> >> +static int mmcpstore_rdwr_req(const char *buf, unsigned int nsects,

> >> +                       unsigned int sect_offset, unsigned int flags)

> >> +{

> >> +       struct mmcpstore_context *cxt = &oops_cxt;

> >> +       struct mmc_request *mrq = cxt->mrq;

> >> +       struct mmc_card *card = cxt->card;

> >> +       struct mmc_host *host = card->host;

> >> +       struct scatterlist sg;

> >> +       u32 opcode;

> >> +

> >> +       if (flags == MMC_DATA_READ)

> >> +               opcode  = (nsects > 1) ?

> >> +                       MMC_READ_MULTIPLE_BLOCK : MMC_READ_SINGLE_BLOCK;

> >> +       else

> >> +               opcode = (nsects > 1) ?

> >> +                       MMC_WRITE_MULTIPLE_BLOCK : MMC_WRITE_BLOCK;

> >> +

> >> +       mmc_prep_req(mrq, sect_offset, nsects, &sg, opcode, flags);

> >> +       sg_init_one(&sg, buf, (nsects << SECTOR_SHIFT));

> >> +       mmc_set_data_timeout(mrq->data, cxt->card);

> >> +

> >> +       mmc_claim_host(host);

> >> +       mmc_wait_for_req(host, mrq);

> >> +       mdelay(mrq->data->timeout_ns / NSEC_PER_MSEC);

> >> +       mmc_release_host(host);

> >> +

> >> +       if (mrq->cmd->error) {

> >> +               pr_err("Cmd error: %d\n", mrq->cmd->error);

> >> +               return mrq->cmd->error;

> >> +       }

> >> +       if (mrq->data->error) {

> >> +               pr_err("Data error: %d\n", mrq->data->error);

> >> +               return mrq->data->error;

> >> +       }

> >> +

> >> +       return 0;

> >> +}

> >> +

> >> +static ssize_t mmcpstore_write(const char *buf, size_t size, loff_t

> >> +off) {

> >> +       struct mmcpstore_context *cxt = &oops_cxt;

> >> +       int ret;

> >> +

> >> +       ret = mmcpstore_rdwr_req(buf, (size >> SECTOR_SHIFT),

> >> +               cxt->start_sect + (off >> SECTOR_SHIFT), MMC_DATA_WRITE);

> >> +       if (ret)

> >> +               return ret;

> >> +

> >> +       return size;

> >> +}

> >> +

> >> +static ssize_t mmcpstore_read(char *buf, size_t size, loff_t off) {

> >> +       struct mmcpstore_context *cxt = &oops_cxt;

> >> +       unsigned int sect_off = cxt->start_sect  + (off >> SECTOR_SHIFT);

> >> +       unsigned long sects = (cxt->conf.kmsg_size >> SECTOR_SHIFT);

> >> +       int ret;

> >> +

> >> +       if (unlikely(!buf || !size))

> >> +               return -EINVAL;

> >> +

> >> +       ret = mmcpstore_rdwr_req(cxt->sub, sects, sect_off,

> >MMC_DATA_READ);

> >> +       if (ret)

> >> +               return ret;

> >> +       memcpy(buf, cxt->sub, size);

> >> +

> >> +       return size;

> >> +}

> >

> >It looks like the above I/O read/write interface for pstore is intended to be

> >used when the platform is up and running and not during a panic, correct?

> >

> >If so, I don't get why it can't use the regular block interface, as any other file

> >system does, for example?

> >

>

> The pstore read and write operations are used as part of pstore file system mounting

> to retrieve the stored logs from MMC platform backend and to manage pstore read/write

> counters. Sleeping would be allowed during this time. Whereas, pstore PANIC write will be

> called if there happens a crash in the system. Sleeping is NOT allowed at this time.

>

> It seems you are mixing the sleeping paths of the mmcpstore with that of atomic path.


No, I am not mixing them, but questioning them.

For the non atomic path, I don't understand why the pstore file system
mounting, etc, deserves to be managed through its own specific ops?
Are there any specific reasons for this that I am missing?

In principle, for non atomic path, I would rather see that the pstore
file system should be able to be mounted on top of any generic block
device partition - without requiring the block device driver to
implement specific pstore ops.

>

>

> >> +

> >> +static void mmcpstore_panic_write_req(const char *buf,

> >> +               unsigned int nsects, unsigned int sect_offset) {

> >> +       struct mmcpstore_context *cxt = &oops_cxt;

> >> +       struct mmc_request *mrq = cxt->mrq;

> >> +       struct mmc_card *card = cxt->card;

> >> +       struct mmc_host *host = card->host;

> >> +       struct scatterlist sg;

> >> +       u32 opcode;

> >> +

> >> +       opcode = (nsects > 1) ? MMC_WRITE_MULTIPLE_BLOCK :

> >MMC_WRITE_BLOCK;

> >> +       mmc_prep_req(mrq, sect_offset, nsects, &sg, opcode,

> >MMC_DATA_WRITE);

> >> +       sg_init_one(&sg, buf, (nsects << SECTOR_SHIFT));

> >> +       mmc_set_data_timeout(mrq->data, cxt->card);

> >> +

> >> +       mmc_claim_host(host);

> >

> >So, this will use several locks, which may be a problem, right?

> >

>

> No, as said above locks are present on host driver will be dropped

> in CONFIG_MMC_PSTORE path.


Please have a look at the code implementing mmc_claim_host(). It's not
just a simple spin_lock, but there is also a wait_queue and runtime PM
being managed from there.

>

> >Moreover, if there is an ongoing I/O request (or any other active

> >command/request for that matter), then the host is already claimed by the

> >mmc core. Normally, we would then wait for that request to be completed, to

> >trigger the release of the host and then allow us to claim it here.

> >

> >However, because of the kernel panic, I assume it's quite likely that any

> >ongoing request will not be completed at all, as IRQs may not work, for

> >example.

> >

> >In other words, we may be hanging here forever waiting to claim the host.

> >Unless we are lucky, because of no ongoing request, although we would still

> >have to succeed walking through all the locking, etc, in mmc_claim_host().

> >

>

> host->ops->req_cleanup_pending(host) was introduced to clean up the queued

> and ongoing requests/commands. Terminating ongoing requests is not a complicated

> thing for the host drivers.


Well, I don't agree. Resetting the host controller should not be a big
problem, but I am more worried about what state this will bring the
eMMC/SD card in.

It sounds to me that the only option is to try to rely on the
mmc_claim_host() to actually succeed. This makes it certain that there
is no ongoing request that needs to be terminated. Otherwise, things
will just fall apart.

The question is, can/should we rely on mmc_claim_host() to succeed in
this path? Maybe it will work, in cases when there is no ongoing
request, as it means the host should be available to be immediately
claimed. Although, then the problem ends up with runtime PM, as if the
host is available for claiming, it's likely that the host is runtime
suspended...

>

>

> >Do note, as part of the mmc_claim_host() we may also runtime resume the

> >host, if it was runtime suspended (which is quite likely). To runtime resume a

> >host via runtime PM, we may end up ungating device clocks, power on

> >corresponding PM domains, run a so called re-tuning sequence to restore

> >communication with the card, etc, etc. The point is, all these things must also

> >be possible to do, without locks and by using a polling based "mode"...

> >

> >> +       mmc_wait_for_pstore_req(host, mrq);

> >

> >Okay, so let's assume that we are lucky and succeed to claim and runtime

> >resume the host above.

> >

> >Then we also need to runtime resume the card, as it may be runtime

> >suspended. In the "worst case", runtime resuming the card means a complete

> >re-initialization of it, covering a series of commands and turning on regulators,

> >for example.

> >

> >> +       mmc_release_host(card->host);

> >> +}

>

>

> All the above said things (runtime resuming host, clocks being updated, restoring

> communication with host) are valid even for the newly asked request path. They cannot

> be avoided as we have to terminate the ongoing requests to complete the panic write.


Exactly. That's why I wonder if it's really worth it to support the
panic writes at all.

I realize that the host driver you are working on doesn't support
runtime PM, so it's not a problem for you, but for many other cases I
assume it would be.

Perhaps we could check that runtime PM is disabled for the mmc host as
a pre-condition to support this feature?

>

> >> +

> >> +static ssize_t mmcpstore_panic_write(const char *buf, size_t size,

> >> +loff_t off) {

> >> +       struct mmcpstore_context *cxt = &oops_cxt;

> >> +

> >> +       mmcpstore_panic_write_req(buf, (size >> SECTOR_SHIFT),

> >> +                       cxt->start_sect + (off >> SECTOR_SHIFT));

> >> +       return size;

> >> +}

> >

> >[...]

> >

> >Having said the above, I am not entirely convinced that it makes sense to

> >support this, at all.

> >

> >Not only, will the support be highly fragile from the mmc core point of view,

> >but there is also a significant complexity for an mmc host driver to support this

> >(at least in general).

> >

>

> I am not sure if the comments on host driver complexity is true. Terminating

> ongoing requests and introducing polling functions on host drivers should be

> straight forward. None those would disturb the core functionality. They are

> completely independent.


I think you are underestimating the part with terminating ongoing
requests. It sounds to me that you really haven't been terminating any
requests at all, but rather just doing a reset of the mmc controller
(which is what I observed in patch2).

When it comes to adding the new host ops to support the polling
functions, I have in principle no objections to that.

Kind regards
Uffe
Bhaskara Budiredla Dec. 15, 2020, 2:34 p.m. UTC | #4
>-----Original Message-----

>From: Ulf Hansson <ulf.hansson@linaro.org>

>Sent: Tuesday, December 15, 2020 5:13 PM

>To: Bhaskara Budiredla <bbudiredla@marvell.com>

>Cc: Kees Cook <keescook@chromium.org>; Colin Cross

><ccross@android.com>; Tony Luck <tony.luck@intel.com>; Sunil Kovvuri

>Goutham <sgoutham@marvell.com>; linux-mmc@vger.kernel.org; Linux

>Kernel Mailing List <linux-kernel@vger.kernel.org>; Christoph Hellwig

><hch@lst.de>

>Subject: Re: [EXT] Re: [PATCH 1/2] mmc: Support kmsg dumper based on

>pstore/blk

>

>On Tue, 15 Dec 2020 at 07:52, Bhaskara Budiredla <bbudiredla@marvell.com>

>wrote:

>>

>>

>>

>> >-----Original Message-----

>> >From: Ulf Hansson <ulf.hansson@linaro.org>

>> >Sent: Friday, December 11, 2020 5:02 PM

>> >To: Bhaskara Budiredla <bbudiredla@marvell.com>

>> >Cc: Kees Cook <keescook@chromium.org>; Colin Cross

>> ><ccross@android.com>; Tony Luck <tony.luck@intel.com>; Sunil Kovvuri

>> >Goutham <sgoutham@marvell.com>; linux-mmc@vger.kernel.org; Linux

>> >Kernel Mailing List <linux-kernel@vger.kernel.org>; Christoph Hellwig

>> ><hch@lst.de>

>> >Subject: [EXT] Re: [PATCH 1/2] mmc: Support kmsg dumper based on

>> >pstore/blk

>> >

>> >External Email

>> >

>> >---------------------------------------------------------------------

>> >-

>> >+ Christoph

>> >

>> >On Mon, 7 Dec 2020 at 12:58, Bhaskara Budiredla

>> ><bbudiredla@marvell.com>

>> >wrote:

>> >>

>> >> This patch introduces to mmcpstore. The functioning of mmcpstore is

>> >> similar to mtdpstore. mmcpstore works on FTL based flash devices

>> >> whereas mtdpstore works on raw flash devices. When the system

>> >> crashes, mmcpstore stores the kmsg panic and oops logs to a user

>> >> specified MMC device.

>> >>

>> >> It collects the details about the host MMC device through

>> >> pstore/blk "blkdev" parameter. The user can specify the MMC device

>> >> in many ways by checking in Documentation/admin-guide/pstore-blk.rst.

>> >>

>> >> The individual mmc host drivers have to define suitable polling and

>> >> cleanup subroutines to write kmsg panic/oops logs through mmcpstore.

>> >> These new host operations are needed as pstore panic write runs

>> >> with interrupts disabled.

>> >

>> >Apologies for the delay. I have tried to give this some more

>> >thinking, more comments below.

>> >

>> >[...]

>> >

>> >> diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c

>> >> index

>> >> d42037f0f10d..7682b267f1d5 100644

>> >> --- a/drivers/mmc/core/core.c

>> >> +++ b/drivers/mmc/core/core.c

>> >> @@ -569,6 +569,30 @@ int mmc_cqe_recovery(struct mmc_host *host)

>}

>> >> EXPORT_SYMBOL(mmc_cqe_recovery);

>> >>

>> >> +#if IS_ENABLED(CONFIG_MMC_PSTORE)

>> >> +/**

>> >> + *     mmc_wait_for_pstore_req - initiate a blocking mmc request

>> >> + *     @host: MMC host to start command

>> >> + *     @mrq: MMC request to start

>> >> + *

>> >> + *     Start a blocking MMC request for a host and wait for the request

>> >> + *     to complete that is based on polling and timeout.

>> >> + */

>> >> +void mmc_wait_for_pstore_req(struct mmc_host *host, struct

>> >> +mmc_request *mrq) {

>> >> +       unsigned int timeout;

>> >> +

>> >> +       host->ops->req_cleanup_pending(host);

>> >

>> >So, the host driver should through this callback, be able to

>> >terminate any ongoing requests/commands - and also try to make sure

>> >that the (e)MMC/SD card remains in the data transfer state. Moreover,

>> >no locks and no IRQs must be used to manage this, right?

>> >

>>

>> Yes, that's correct.

>>

>> >Have you really tried if this works for real?

>> >

>>

>> Yes, it's a working solution. Patch were submitted after testing.

>>

>> >> +       mmc_start_request(host, mrq);

>> >

>> >This looks like the wrong approach to me, as it will try to re-use

>> >the regular request based path, where the host driver is allowed to

>> >use locks, IRQs, DMAs, etc, etc.

>> >

>>

>> No. The locks on host driver will be dropped in CONFIG_MMC_PSTORE path.

>> Similarly, the IRQs will be replaced with polling subroutines during

>> panic write. Please take a look at patch 2/2 which does this for cavium host

>driver.

>

>Yes, but why is removing the locks okay for the other regular request case?

>

>I assume the locks are there for a reason, right?

>

Sorry, it was my mistake. The locks will not be applied to panic write case
in CONFIG_MMC_PSTORE path. They will continue to exist for regular
read/write path..

For quick reference, copying a snippet from cavium host driver (patch 2/2). host->pstore
will be true for panic write.
static void thunder_mmc_acquire_bus(struct cvm_mmc_host *host)
 {
+#if IS_ENABLED(CONFIG_MMC_PSTORE)
+	if (!host->pstore)
+		down(&host->mmc_serializer);
+#else
 	down(&host->mmc_serializer);
+#endif
 }


>>

>> >I would suggest inventing a new separate request path and a new host

>> >ops, to deal with these kinds of requests.

>> >

>>

>> The polling and cleanup host operations are the ones invented for this

>purpose.

>> Polling to replace IRQs and cleanup to terminate ongoing

>requests/commands.

>

>I understand the approach you have taken, but what I am saying is that I don't

>think it is the best one to choose.

>

>Then I think it's better to invent an entirely new path, rather than trying to

>tweak the existing one. For example, in your case you could leave the lock to

>be used (in patch2) for the existing regular request path, but bypass the locks

>and use polling in the new one.

>


The above clarification should answer this.

>>

>> >In this way, the below part with host->ops->req_completion_poll, can

>> >probably be removed. Instead we may just wait for the request to

>> >return from the new request path.

>> >

>>

>> This is not possible unless CPU itself does the mmc write (through some

>block interface?).

>> If the answer is block interface, sleeping cannot be avoided as part of it.

>

>I wasn't thinking of the block interface, but rather of the internal behavior of

>an mmc host driver.

>

>For data transfers, it's common to use DMA when that is supported. If DMA

>doesn't work, it's probably a FIFO being read/written to based upon some

>IRQs telling when read/write should be done. Of course, there are other cases

>as well.

>

>For commands, I expect those to be managed through IRQs.

>

>That said, all these things are preferably managed through a polling based

>method instead, when doing the panic writes, correct?

>

>> Do you really think DMA can be avoided for MMC panic writes?

>

>That depends on the HW.

>

>Certainly there are lots of cases where DMA isn't the only way, but instead it's

>perfectly possible to fall back to polling based mode.

>

>>

>> >> +

>> >> +       if (mrq->data) {

>> >> +               timeout = mrq->data->timeout_ns / NSEC_PER_MSEC;

>> >> +               host->ops->req_completion_poll(host, timeout);

>> >> +       }

>> >> +}

>> >> +EXPORT_SYMBOL(mmc_wait_for_pstore_req);

>> >> +#endif

>> >> +

>> >>  /**

>> >>   *     mmc_is_req_done - Determine if a 'cap_cmd_during_tfr' request is

>> >done

>> >>   *     @host: MMC host

>> >> diff --git a/drivers/mmc/core/mmcpstore.c

>> >> b/drivers/mmc/core/mmcpstore.c new file mode 100644 index

>> >> 000000000000..1113eae0756c

>> >> --- /dev/null

>> >> +++ b/drivers/mmc/core/mmcpstore.c

>> >> @@ -0,0 +1,302 @@

>> >> +// SPDX-License-Identifier: GPL-2.0

>> >> +/*

>> >> + * MMC pstore support based on pstore/blk

>> >> + *

>> >> + * Copyright (c) 2020 Marvell.

>> >> + * Author: Bhaskara Budiredla <bbudiredla@marvell.com>  */

>> >> +

>> >> +#define pr_fmt(fmt) "mmcpstore: " fmt

>> >> +

>> >> +#include <linux/kernel.h>

>> >> +#include <linux/module.h>

>> >> +#include <linux/pstore_blk.h>

>> >> +#include <linux/blkdev.h>

>> >> +#include <linux/mount.h>

>> >> +#include <linux/slab.h>

>> >> +#include <linux/mmc/mmc.h>

>> >> +#include <linux/mmc/host.h>

>> >> +#include <linux/mmc/card.h>

>> >> +#include <linux/scatterlist.h>

>> >> +#include "block.h"

>> >> +#include "card.h"

>> >> +#include "core.h"

>> >> +

>> >> +static struct mmcpstore_context {

>> >> +       char dev_name[BDEVNAME_SIZE];

>> >> +       int partno;

>> >> +       sector_t start_sect;

>> >> +       sector_t size;

>> >> +       struct pstore_device_info dev;

>> >> +       struct pstore_blk_config conf;

>> >> +       struct pstore_blk_info info;

>> >> +

>> >> +       char *sub;

>> >> +       struct mmc_card *card;

>> >> +       struct mmc_request *mrq;

>> >> +} oops_cxt;

>> >> +

>> >> +static void mmc_prep_req(struct mmc_request *mrq,

>> >> +               unsigned int sect_offset, unsigned int nsects,

>> >> +               struct scatterlist *sg, u32 opcode, unsigned int

>> >> +flags) {

>> >> +       mrq->cmd->opcode = opcode;

>> >> +       mrq->cmd->arg = sect_offset;

>> >> +       mrq->cmd->flags = MMC_RSP_R1 | MMC_CMD_ADTC;

>> >> +

>> >> +       if (nsects == 1) {

>> >> +               mrq->stop = NULL;

>> >> +       } else {

>> >> +               mrq->stop->opcode = MMC_STOP_TRANSMISSION;

>> >> +               mrq->stop->arg = 0;

>> >> +               mrq->stop->flags = MMC_RSP_R1B | MMC_CMD_AC;

>> >> +       }

>> >> +

>> >> +       mrq->data->blksz = SECTOR_SIZE;

>> >> +       mrq->data->blocks = nsects;

>> >> +       mrq->data->flags = flags;

>> >> +       mrq->data->sg = sg;

>> >> +       mrq->data->sg_len = 1;

>> >> +}

>> >> +

>> >> +static int mmcpstore_rdwr_req(const char *buf, unsigned int nsects,

>> >> +                       unsigned int sect_offset, unsigned int

>> >> +flags) {

>> >> +       struct mmcpstore_context *cxt = &oops_cxt;

>> >> +       struct mmc_request *mrq = cxt->mrq;

>> >> +       struct mmc_card *card = cxt->card;

>> >> +       struct mmc_host *host = card->host;

>> >> +       struct scatterlist sg;

>> >> +       u32 opcode;

>> >> +

>> >> +       if (flags == MMC_DATA_READ)

>> >> +               opcode  = (nsects > 1) ?

>> >> +                       MMC_READ_MULTIPLE_BLOCK :

>MMC_READ_SINGLE_BLOCK;

>> >> +       else

>> >> +               opcode = (nsects > 1) ?

>> >> +                       MMC_WRITE_MULTIPLE_BLOCK : MMC_WRITE_BLOCK;

>> >> +

>> >> +       mmc_prep_req(mrq, sect_offset, nsects, &sg, opcode, flags);

>> >> +       sg_init_one(&sg, buf, (nsects << SECTOR_SHIFT));

>> >> +       mmc_set_data_timeout(mrq->data, cxt->card);

>> >> +

>> >> +       mmc_claim_host(host);

>> >> +       mmc_wait_for_req(host, mrq);

>> >> +       mdelay(mrq->data->timeout_ns / NSEC_PER_MSEC);

>> >> +       mmc_release_host(host);

>> >> +

>> >> +       if (mrq->cmd->error) {

>> >> +               pr_err("Cmd error: %d\n", mrq->cmd->error);

>> >> +               return mrq->cmd->error;

>> >> +       }

>> >> +       if (mrq->data->error) {

>> >> +               pr_err("Data error: %d\n", mrq->data->error);

>> >> +               return mrq->data->error;

>> >> +       }

>> >> +

>> >> +       return 0;

>> >> +}

>> >> +

>> >> +static ssize_t mmcpstore_write(const char *buf, size_t size,

>> >> +loff_t

>> >> +off) {

>> >> +       struct mmcpstore_context *cxt = &oops_cxt;

>> >> +       int ret;

>> >> +

>> >> +       ret = mmcpstore_rdwr_req(buf, (size >> SECTOR_SHIFT),

>> >> +               cxt->start_sect + (off >> SECTOR_SHIFT), MMC_DATA_WRITE);

>> >> +       if (ret)

>> >> +               return ret;

>> >> +

>> >> +       return size;

>> >> +}

>> >> +

>> >> +static ssize_t mmcpstore_read(char *buf, size_t size, loff_t off) {

>> >> +       struct mmcpstore_context *cxt = &oops_cxt;

>> >> +       unsigned int sect_off = cxt->start_sect  + (off >> SECTOR_SHIFT);

>> >> +       unsigned long sects = (cxt->conf.kmsg_size >> SECTOR_SHIFT);

>> >> +       int ret;

>> >> +

>> >> +       if (unlikely(!buf || !size))

>> >> +               return -EINVAL;

>> >> +

>> >> +       ret = mmcpstore_rdwr_req(cxt->sub, sects, sect_off,

>> >MMC_DATA_READ);

>> >> +       if (ret)

>> >> +               return ret;

>> >> +       memcpy(buf, cxt->sub, size);

>> >> +

>> >> +       return size;

>> >> +}

>> >

>> >It looks like the above I/O read/write interface for pstore is

>> >intended to be used when the platform is up and running and not during a

>panic, correct?

>> >

>> >If so, I don't get why it can't use the regular block interface, as

>> >any other file system does, for example?

>> >

>>

>> The pstore read and write operations are used as part of pstore file

>> system mounting to retrieve the stored logs from MMC platform backend

>> and to manage pstore read/write counters. Sleeping would be allowed

>> during this time. Whereas, pstore PANIC write will be called if there happens

>a crash in the system. Sleeping is NOT allowed at this time.

>>

>> It seems you are mixing the sleeping paths of the mmcpstore with that of

>atomic path.

>

>No, I am not mixing them, but questioning them.

>

>For the non atomic path, I don't understand why the pstore file system

>mounting, etc, deserves to be managed through its own specific ops?

>Are there any specific reasons for this that I am missing?

>


Seems generic way is undergoing some changes. Kees already ACKed
the mmcpstore registration through block device registration. 

>In principle, for non atomic path, I would rather see that the pstore file system

>should be able to be mounted on top of any generic block device partition -

>without requiring the block device driver to implement specific pstore ops.

>


Scope to answer this is out of mmcpstore driver. Pstore/blk driver have to 
answer this.

>>

>>

>> >> +

>> >> +static void mmcpstore_panic_write_req(const char *buf,

>> >> +               unsigned int nsects, unsigned int sect_offset) {

>> >> +       struct mmcpstore_context *cxt = &oops_cxt;

>> >> +       struct mmc_request *mrq = cxt->mrq;

>> >> +       struct mmc_card *card = cxt->card;

>> >> +       struct mmc_host *host = card->host;

>> >> +       struct scatterlist sg;

>> >> +       u32 opcode;

>> >> +

>> >> +       opcode = (nsects > 1) ? MMC_WRITE_MULTIPLE_BLOCK :

>> >MMC_WRITE_BLOCK;

>> >> +       mmc_prep_req(mrq, sect_offset, nsects, &sg, opcode,

>> >MMC_DATA_WRITE);

>> >> +       sg_init_one(&sg, buf, (nsects << SECTOR_SHIFT));

>> >> +       mmc_set_data_timeout(mrq->data, cxt->card);

>> >> +

>> >> +       mmc_claim_host(host);

>> >

>> >So, this will use several locks, which may be a problem, right?

>> >

>>

>> No, as said above locks are present on host driver will be dropped in

>> CONFIG_MMC_PSTORE path.

>

>Please have a look at the code implementing mmc_claim_host(). It's not just a

>simple spin_lock, but there is also a wait_queue and runtime PM being

>managed from there.

>

>>

>> >Moreover, if there is an ongoing I/O request (or any other active

>> >command/request for that matter), then the host is already claimed by

>> >the mmc core. Normally, we would then wait for that request to be

>> >completed, to trigger the release of the host and then allow us to claim it

>here.

>> >

>> >However, because of the kernel panic, I assume it's quite likely that

>> >any ongoing request will not be completed at all, as IRQs may not

>> >work, for example.

>> >

>> >In other words, we may be hanging here forever waiting to claim the host.

>> >Unless we are lucky, because of no ongoing request, although we would

>> >still have to succeed walking through all the locking, etc, in

>mmc_claim_host().

>> >


I agree with your concerns about spin_lock, wait_queue and runtime PM in
mmc_claim_host(), but not the ongoing requests presence. They must 
have terminated cleanly through host->ops->req_cleanup_pending(host) 
before reaching mmc_claim_host().

I will come up with something to address these in the next patch.

>>

>> host->ops->req_cleanup_pending(host) was introduced to clean up the

>> host->ops->queued

>> and ongoing requests/commands. Terminating ongoing requests is not a

>> complicated thing for the host drivers.

>

>Well, I don't agree. Resetting the host controller should not be a big problem,

>but I am more worried about what state this will bring the eMMC/SD card in.

>


I am not sure why are you saying host controller reset. No ware host controller
reset was performed as part of these patches.

>It sounds to me that the only option is to try to rely on the

>mmc_claim_host() to actually succeed. This makes it certain that there is no

>ongoing request that needs to be terminated. Otherwise, things will just fall

>apart.

>


Agree. Seems I need to create an alternate path to forcefully gain access to the host
for the case of panic write. As you pointed out mmc_claim_host(), mmc_release_host()
and runtime PM can create issues.

>The question is, can/should we rely on mmc_claim_host() to succeed in this

>path? Maybe it will work, in cases when there is no ongoing request, as it

>means the host should be available to be immediately claimed. Although,

>then the problem ends up with runtime PM, as if the host is available for

>claiming, it's likely that the host is runtime suspended...

>


An extra check can be added to see if host was runtime suspended ahead
of panic write attempt. 

>>

>>

>> >Do note, as part of the mmc_claim_host() we may also runtime resume

>> >the host, if it was runtime suspended (which is quite likely). To

>> >runtime resume a host via runtime PM, we may end up ungating device

>> >clocks, power on corresponding PM domains, run a so called re-tuning

>> >sequence to restore communication with the card, etc, etc. The point

>> >is, all these things must also be possible to do, without locks and by using a

>polling based "mode"...

>> >

>> >> +       mmc_wait_for_pstore_req(host, mrq);

>> >

>> >Okay, so let's assume that we are lucky and succeed to claim and

>> >runtime resume the host above.

>> >

>> >Then we also need to runtime resume the card, as it may be runtime

>> >suspended. In the "worst case", runtime resuming the card means a

>> >complete re-initialization of it, covering a series of commands and

>> >turning on regulators, for example.

>> >

>> >> +       mmc_release_host(card->host); }

>>

>>

>> All the above said things (runtime resuming host, clocks being

>> updated, restoring communication with host) are valid even for the

>> newly asked request path. They cannot be avoided as we have to terminate

>the ongoing requests to complete the panic write.

>

>Exactly. That's why I wonder if it's really worth it to support the panic writes at

>all.

>


Definitely this feature is desirable for those who would like to have the mmc
as the platform backend.

>I realize that the host driver you are working on doesn't support runtime PM,

>so it's not a problem for you, but for many other cases I assume it would be.

>

>Perhaps we could check that runtime PM is disabled for the mmc host as a

>pre-condition to support this feature?

>


There are two things here. Runtime PM feature enable and its
suspension being in force by the time of panic write. We can ignore those panic writes whenever
host was runtime suspended. In other cases panic logs can be stored to mmc. Losing few of the
records is still fine as the other records provide the nature of common crashes happening on the platform.


>>

>> >> +

>> >> +static ssize_t mmcpstore_panic_write(const char *buf, size_t size,

>> >> +loff_t off) {

>> >> +       struct mmcpstore_context *cxt = &oops_cxt;

>> >> +

>> >> +       mmcpstore_panic_write_req(buf, (size >> SECTOR_SHIFT),

>> >> +                       cxt->start_sect + (off >> SECTOR_SHIFT));

>> >> +       return size;

>> >> +}

>> >

>> >[...]

>> >

>> >Having said the above, I am not entirely convinced that it makes

>> >sense to support this, at all.

>> >

>> >Not only, will the support be highly fragile from the mmc core point

>> >of view, but there is also a significant complexity for an mmc host

>> >driver to support this (at least in general).

>> >

>>

>> I am not sure if the comments on host driver complexity is true.

>> Terminating ongoing requests and introducing polling functions on host

>> drivers should be straight forward. None those would disturb the core

>> functionality. They are completely independent.

>

>I think you are underestimating the part with terminating ongoing requests. It

>sounds to me that you really haven't been terminating any requests at all, but

>rather just doing a reset of the mmc controller (which is what I observed in

>patch2).

>


No, it's not true. I am not doing any reset. Please point me to specific code snippet
where you have observed this.

>When it comes to adding the new host ops to support the polling functions, I

>have in principle no objections to that.

>

>Kind regards

>Uffe
Kees Cook Dec. 15, 2020, 8:37 p.m. UTC | #5
On Tue, Dec 15, 2020 at 12:42:58PM +0100, Ulf Hansson wrote:
> In principle, for non atomic path, I would rather see that the pstore

> file system should be able to be mounted on top of any generic block

> device partition - without requiring the block device driver to

> implement specific pstore ops.

> [...]

> Exactly. That's why I wonder if it's really worth it to support the

> panic writes at all.


pstore/blk already provides the generic hooking -- but it can't do
the panic write part (which that's very device/driver-specific). The
design was for individual backing devices to provide that directly
(which would needed read/write support too). And for those that don't
have panic/read/write support, they could still use the generic hooks
but they wouldn't be able to reliably (or at all?) catch panics (just
console writes, ftrace, pmsg, etc).

-- 
Kees Cook
Ulf Hansson Dec. 16, 2020, 9:57 a.m. UTC | #6
[...]

> >> >

> >> >It looks like the above I/O read/write interface for pstore is

> >> >intended to be used when the platform is up and running and not during a

> >panic, correct?

> >> >

> >> >If so, I don't get why it can't use the regular block interface, as

> >> >any other file system does, for example?

> >> >

> >>

> >> The pstore read and write operations are used as part of pstore file

> >> system mounting to retrieve the stored logs from MMC platform backend

> >> and to manage pstore read/write counters. Sleeping would be allowed

> >> during this time. Whereas, pstore PANIC write will be called if there happens

> >a crash in the system. Sleeping is NOT allowed at this time.

> >>

> >> It seems you are mixing the sleeping paths of the mmcpstore with that of

> >atomic path.

> >

> >No, I am not mixing them, but questioning them.

> >

> >For the non atomic path, I don't understand why the pstore file system

> >mounting, etc, deserves to be managed through its own specific ops?

> >Are there any specific reasons for this that I am missing?

> >

>

> Seems generic way is undergoing some changes. Kees already ACKed

> the mmcpstore registration through block device registration.

>

> >In principle, for non atomic path, I would rather see that the pstore file system

> >should be able to be mounted on top of any generic block device partition -

> >without requiring the block device driver to implement specific pstore ops.

> >

>

> Scope to answer this is out of mmcpstore driver. Pstore/blk driver have to

> answer this.


Yep, I am open to discuss this more.

>

> >>

> >>

> >> >> +

> >> >> +static void mmcpstore_panic_write_req(const char *buf,

> >> >> +               unsigned int nsects, unsigned int sect_offset) {

> >> >> +       struct mmcpstore_context *cxt = &oops_cxt;

> >> >> +       struct mmc_request *mrq = cxt->mrq;

> >> >> +       struct mmc_card *card = cxt->card;

> >> >> +       struct mmc_host *host = card->host;

> >> >> +       struct scatterlist sg;

> >> >> +       u32 opcode;

> >> >> +

> >> >> +       opcode = (nsects > 1) ? MMC_WRITE_MULTIPLE_BLOCK :

> >> >MMC_WRITE_BLOCK;

> >> >> +       mmc_prep_req(mrq, sect_offset, nsects, &sg, opcode,

> >> >MMC_DATA_WRITE);

> >> >> +       sg_init_one(&sg, buf, (nsects << SECTOR_SHIFT));

> >> >> +       mmc_set_data_timeout(mrq->data, cxt->card);

> >> >> +

> >> >> +       mmc_claim_host(host);

> >> >

> >> >So, this will use several locks, which may be a problem, right?

> >> >

> >>

> >> No, as said above locks are present on host driver will be dropped in

> >> CONFIG_MMC_PSTORE path.

> >

> >Please have a look at the code implementing mmc_claim_host(). It's not just a

> >simple spin_lock, but there is also a wait_queue and runtime PM being

> >managed from there.

> >

> >>

> >> >Moreover, if there is an ongoing I/O request (or any other active

> >> >command/request for that matter), then the host is already claimed by

> >> >the mmc core. Normally, we would then wait for that request to be

> >> >completed, to trigger the release of the host and then allow us to claim it

> >here.

> >> >

> >> >However, because of the kernel panic, I assume it's quite likely that

> >> >any ongoing request will not be completed at all, as IRQs may not

> >> >work, for example.

> >> >

> >> >In other words, we may be hanging here forever waiting to claim the host.

> >> >Unless we are lucky, because of no ongoing request, although we would

> >> >still have to succeed walking through all the locking, etc, in

> >mmc_claim_host().

> >> >

>

> I agree with your concerns about spin_lock, wait_queue and runtime PM in

> mmc_claim_host(), but not the ongoing requests presence. They must

> have terminated cleanly through host->ops->req_cleanup_pending(host)

> before reaching mmc_claim_host().

>

> I will come up with something to address these in the next patch.

>

> >>

> >> host->ops->req_cleanup_pending(host) was introduced to clean up the

> >> host->ops->queued

> >> and ongoing requests/commands. Terminating ongoing requests is not a

> >> complicated thing for the host drivers.

> >

> >Well, I don't agree. Resetting the host controller should not be a big problem,

> >but I am more worried about what state this will bring the eMMC/SD card in.

> >

>

> I am not sure why are you saying host controller reset. No ware host controller

> reset was performed as part of these patches.

>

> >It sounds to me that the only option is to try to rely on the

> >mmc_claim_host() to actually succeed. This makes it certain that there is no

> >ongoing request that needs to be terminated. Otherwise, things will just fall

> >apart.

> >

>

> Agree. Seems I need to create an alternate path to forcefully gain access to the host

> for the case of panic write. As you pointed out mmc_claim_host(), mmc_release_host()

> and runtime PM can create issues.

>

> >The question is, can/should we rely on mmc_claim_host() to succeed in this

> >path? Maybe it will work, in cases when there is no ongoing request, as it

> >means the host should be available to be immediately claimed. Although,

> >then the problem ends up with runtime PM, as if the host is available for

> >claiming, it's likely that the host is runtime suspended...

> >

>

> An extra check can be added to see if host was runtime suspended ahead

> of panic write attempt.


What if that is the case, should we just return an error?

Moreover, even the device belonging to the mmc card can be runtime
suspended too. So if that is the case, we should return an error too?

[...]

> >> >[...]

> >> >

> >> >Having said the above, I am not entirely convinced that it makes

> >> >sense to support this, at all.

> >> >

> >> >Not only, will the support be highly fragile from the mmc core point

> >> >of view, but there is also a significant complexity for an mmc host

> >> >driver to support this (at least in general).

> >> >

> >>

> >> I am not sure if the comments on host driver complexity is true.

> >> Terminating ongoing requests and introducing polling functions on host

> >> drivers should be straight forward. None those would disturb the core

> >> functionality. They are completely independent.

> >

> >I think you are underestimating the part with terminating ongoing requests. It

> >sounds to me that you really haven't been terminating any requests at all, but

> >rather just doing a reset of the mmc controller (which is what I observed in

> >patch2).

> >

>

> No, it's not true. I am not doing any reset. Please point me to specific code snippet

> where you have observed this.


I was looking at patch2 and the ->req_cleanup_pending() callback that
you have assigned to cvm_req_cleanup_pending().

In there you clear a potentially running DMA job, which is *kind* of a
reset of the controller. More importantly, it's definitely *not*
terminating an ongoing request, in a way that you can expect the
eMMC/SD card to be ready for new communications afterwards. This is my
main point.

Kind regards
Uffe
Bhaskara Budiredla Dec. 16, 2020, 10:42 a.m. UTC | #7
>-----Original Message-----

>From: Ulf Hansson <ulf.hansson@linaro.org>

>Sent: Wednesday, December 16, 2020 3:27 PM

>To: Bhaskara Budiredla <bbudiredla@marvell.com>

>Cc: Kees Cook <keescook@chromium.org>; Colin Cross

><ccross@android.com>; Tony Luck <tony.luck@intel.com>; Sunil Kovvuri

>Goutham <sgoutham@marvell.com>; linux-mmc@vger.kernel.org; Linux

>Kernel Mailing List <linux-kernel@vger.kernel.org>; Christoph Hellwig

><hch@lst.de>

>Subject: Re: [EXT] Re: [PATCH 1/2] mmc: Support kmsg dumper based on

>pstore/blk

>

>[...]

>

>> >> >

>> >> >It looks like the above I/O read/write interface for pstore is

>> >> >intended to be used when the platform is up and running and not

>> >> >during a

>> >panic, correct?

>> >> >

>> >> >If so, I don't get why it can't use the regular block interface,

>> >> >as any other file system does, for example?

>> >> >

>> >>

>> >> The pstore read and write operations are used as part of pstore

>> >> file system mounting to retrieve the stored logs from MMC platform

>> >> backend and to manage pstore read/write counters. Sleeping would be

>> >> allowed during this time. Whereas, pstore PANIC write will be

>> >> called if there happens

>> >a crash in the system. Sleeping is NOT allowed at this time.

>> >>

>> >> It seems you are mixing the sleeping paths of the mmcpstore with

>> >> that of

>> >atomic path.

>> >

>> >No, I am not mixing them, but questioning them.

>> >

>> >For the non atomic path, I don't understand why the pstore file

>> >system mounting, etc, deserves to be managed through its own specific

>ops?

>> >Are there any specific reasons for this that I am missing?

>> >

>>

>> Seems generic way is undergoing some changes. Kees already ACKed the

>> mmcpstore registration through block device registration.

>>

>> >In principle, for non atomic path, I would rather see that the pstore

>> >file system should be able to be mounted on top of any generic block

>> >device partition - without requiring the block device driver to implement

>specific pstore ops.

>> >

>>

>> Scope to answer this is out of mmcpstore driver. Pstore/blk driver

>> have to answer this.

>

>Yep, I am open to discuss this more.

>

>>

>> >>

>> >>

>> >> >> +

>> >> >> +static void mmcpstore_panic_write_req(const char *buf,

>> >> >> +               unsigned int nsects, unsigned int sect_offset) {

>> >> >> +       struct mmcpstore_context *cxt = &oops_cxt;

>> >> >> +       struct mmc_request *mrq = cxt->mrq;

>> >> >> +       struct mmc_card *card = cxt->card;

>> >> >> +       struct mmc_host *host = card->host;

>> >> >> +       struct scatterlist sg;

>> >> >> +       u32 opcode;

>> >> >> +

>> >> >> +       opcode = (nsects > 1) ? MMC_WRITE_MULTIPLE_BLOCK :

>> >> >MMC_WRITE_BLOCK;

>> >> >> +       mmc_prep_req(mrq, sect_offset, nsects, &sg, opcode,

>> >> >MMC_DATA_WRITE);

>> >> >> +       sg_init_one(&sg, buf, (nsects << SECTOR_SHIFT));

>> >> >> +       mmc_set_data_timeout(mrq->data, cxt->card);

>> >> >> +

>> >> >> +       mmc_claim_host(host);

>> >> >

>> >> >So, this will use several locks, which may be a problem, right?

>> >> >

>> >>

>> >> No, as said above locks are present on host driver will be dropped

>> >> in CONFIG_MMC_PSTORE path.

>> >

>> >Please have a look at the code implementing mmc_claim_host(). It's

>> >not just a simple spin_lock, but there is also a wait_queue and

>> >runtime PM being managed from there.

>> >

>> >>

>> >> >Moreover, if there is an ongoing I/O request (or any other active

>> >> >command/request for that matter), then the host is already claimed

>> >> >by the mmc core. Normally, we would then wait for that request to

>> >> >be completed, to trigger the release of the host and then allow us

>> >> >to claim it

>> >here.

>> >> >

>> >> >However, because of the kernel panic, I assume it's quite likely

>> >> >that any ongoing request will not be completed at all, as IRQs may

>> >> >not work, for example.

>> >> >

>> >> >In other words, we may be hanging here forever waiting to claim the

>host.

>> >> >Unless we are lucky, because of no ongoing request, although we

>> >> >would still have to succeed walking through all the locking, etc,

>> >> >in

>> >mmc_claim_host().

>> >> >

>>

>> I agree with your concerns about spin_lock, wait_queue and runtime PM

>> in mmc_claim_host(), but not the ongoing requests presence. They must

>> have terminated cleanly through host->ops->req_cleanup_pending(host)

>> before reaching mmc_claim_host().

>>

>> I will come up with something to address these in the next patch.

>>

>> >>

>> >> host->ops->req_cleanup_pending(host) was introduced to clean up the

>> >> host->ops->queued

>> >> and ongoing requests/commands. Terminating ongoing requests is not

>> >> a complicated thing for the host drivers.

>> >

>> >Well, I don't agree. Resetting the host controller should not be a

>> >big problem, but I am more worried about what state this will bring the

>eMMC/SD card in.

>> >

>>

>> I am not sure why are you saying host controller reset. No ware host

>> controller reset was performed as part of these patches.

>>

>> >It sounds to me that the only option is to try to rely on the

>> >mmc_claim_host() to actually succeed. This makes it certain that

>> >there is no ongoing request that needs to be terminated. Otherwise,

>> >things will just fall apart.

>> >

>>

>> Agree. Seems I need to create an alternate path to forcefully gain

>> access to the host for the case of panic write. As you pointed out

>> mmc_claim_host(), mmc_release_host() and runtime PM can create issues.

>>

>> >The question is, can/should we rely on mmc_claim_host() to succeed in

>> >this path? Maybe it will work, in cases when there is no ongoing

>> >request, as it means the host should be available to be immediately

>> >claimed. Although, then the problem ends up with runtime PM, as if

>> >the host is available for claiming, it's likely that the host is runtime

>suspended...

>> >

>>

>> An extra check can be added to see if host was runtime suspended ahead

>> of panic write attempt.

>

>What if that is the case, should we just return an error?

>

Yes.

>Moreover, even the device belonging to the mmc card can be runtime

>suspended too. So if that is the case, we should return an error too?

>


Yes, same here.

Assuming ->req_cleanup_pending() properly terminates the ongoing DMA transfers,
mmc_claim_host() and mmc_release_host() can be dropped in panic write case
as it has then exclusive access from then on.


>[...]

>

>> >> >[...]

>> >> >

>> >> >Having said the above, I am not entirely convinced that it makes

>> >> >sense to support this, at all.

>> >> >

>> >> >Not only, will the support be highly fragile from the mmc core

>> >> >point of view, but there is also a significant complexity for an

>> >> >mmc host driver to support this (at least in general).

>> >> >

>> >>

>> >> I am not sure if the comments on host driver complexity is true.

>> >> Terminating ongoing requests and introducing polling functions on

>> >> host drivers should be straight forward. None those would disturb

>> >> the core functionality. They are completely independent.

>> >

>> >I think you are underestimating the part with terminating ongoing

>> >requests. It sounds to me that you really haven't been terminating

>> >any requests at all, but rather just doing a reset of the mmc

>> >controller (which is what I observed in patch2).

>> >

>>

>> No, it's not true. I am not doing any reset. Please point me to

>> specific code snippet where you have observed this.

>

>I was looking at patch2 and the ->req_cleanup_pending() callback that you

>have assigned to cvm_req_cleanup_pending().

>

>In there you clear a potentially running DMA job, which is *kind* of a reset of

>the controller. More importantly, it's definitely *not* terminating an ongoing

>request, in a way that you can expect the eMMC/SD card to be ready for new

>communications afterwards. This is my main point.

>


I am not sure that clearing an ongoing DMA will reset the controller. These are host
controller specific. The idea is: To drop ongoing transfers, whatever a host software
has to do it will does through this cleanup function. We may not generalize this,
providing a hook and letting each host controller handling it seems better.

>Kind regards

>Uffe


Thanks,
Bhaskara
Ulf Hansson Dec. 16, 2020, 10:44 a.m. UTC | #8
On Tue, 15 Dec 2020 at 21:37, Kees Cook <keescook@chromium.org> wrote:
>

> On Tue, Dec 15, 2020 at 12:42:58PM +0100, Ulf Hansson wrote:

> > In principle, for non atomic path, I would rather see that the pstore

> > file system should be able to be mounted on top of any generic block

> > device partition - without requiring the block device driver to

> > implement specific pstore ops.

> > [...]

> > Exactly. That's why I wonder if it's really worth it to support the

> > panic writes at all.

>

> pstore/blk already provides the generic hooking -- but it can't do

> the panic write part (which that's very device/driver-specific). The

> design was for individual backing devices to provide that directly

> (which would needed read/write support too). And for those that don't

> have panic/read/write support, they could still use the generic hooks

> but they wouldn't be able to reliably (or at all?) catch panics (just

> console writes, ftrace, pmsg, etc).


I understand the motivation behind pstore's hook for panic-writes.
It's a special thing and perhaps it's easier to support this via a
specific hook, rather than adopting the regular block device request
path to cope with some special I/O request. On the other hand, in the
discussion I have had with Bhaskara, I have pointed out several severe
implications for mmc to support these panic writes (and believe me,
there are even more than those I have brought up). So I am starting to
think that, perhaps there is a better option.

In any case, I didn't catch *why* pstore needs to force block device
drivers to implement specific pstore hooks to support the pstore file
system. I don't think this is the way it should work, for many
reasons. The pstore file system should be able to be extended, to
support the regular block device request path, no?

Kind regards
Uffe
Ulf Hansson Dec. 16, 2020, 10:52 a.m. UTC | #9
[...]

> >>

> >> Agree. Seems I need to create an alternate path to forcefully gain

> >> access to the host for the case of panic write. As you pointed out

> >> mmc_claim_host(), mmc_release_host() and runtime PM can create issues.

> >>

> >> >The question is, can/should we rely on mmc_claim_host() to succeed in

> >> >this path? Maybe it will work, in cases when there is no ongoing

> >> >request, as it means the host should be available to be immediately

> >> >claimed. Although, then the problem ends up with runtime PM, as if

> >> >the host is available for claiming, it's likely that the host is runtime

> >suspended...

> >> >

> >>

> >> An extra check can be added to see if host was runtime suspended ahead

> >> of panic write attempt.

> >

> >What if that is the case, should we just return an error?

> >

> Yes.

>

> >Moreover, even the device belonging to the mmc card can be runtime

> >suspended too. So if that is the case, we should return an error too?

> >

>

> Yes, same here.

>

> Assuming ->req_cleanup_pending() properly terminates the ongoing DMA transfers,

> mmc_claim_host() and mmc_release_host() can be dropped in panic write case

> as it has then exclusive access from then on.


Again, I think you have misunderstood how it works from the core point of view.

->req_cleanup_pending() will never be able to terminate a request in a
proper way, without involving the mmc core's knowledge about the
eMMC/SD protocol.

Yes, it could clean up from an ongoing DMA transfer, but that doesn't
bring the eMMC/SD card back into a state where it can accept new
requests.

>

>

> >[...]

> >

> >> >> >[...]

> >> >> >

> >> >> >Having said the above, I am not entirely convinced that it makes

> >> >> >sense to support this, at all.

> >> >> >

> >> >> >Not only, will the support be highly fragile from the mmc core

> >> >> >point of view, but there is also a significant complexity for an

> >> >> >mmc host driver to support this (at least in general).

> >> >> >

> >> >>

> >> >> I am not sure if the comments on host driver complexity is true.

> >> >> Terminating ongoing requests and introducing polling functions on

> >> >> host drivers should be straight forward. None those would disturb

> >> >> the core functionality. They are completely independent.

> >> >

> >> >I think you are underestimating the part with terminating ongoing

> >> >requests. It sounds to me that you really haven't been terminating

> >> >any requests at all, but rather just doing a reset of the mmc

> >> >controller (which is what I observed in patch2).

> >> >

> >>

> >> No, it's not true. I am not doing any reset. Please point me to

> >> specific code snippet where you have observed this.

> >

> >I was looking at patch2 and the ->req_cleanup_pending() callback that you

> >have assigned to cvm_req_cleanup_pending().

> >

> >In there you clear a potentially running DMA job, which is *kind* of a reset of

> >the controller. More importantly, it's definitely *not* terminating an ongoing

> >request, in a way that you can expect the eMMC/SD card to be ready for new

> >communications afterwards. This is my main point.

> >

>

> I am not sure that clearing an ongoing DMA will reset the controller. These are host

> controller specific. The idea is: To drop ongoing transfers, whatever a host software

> has to do it will does through this cleanup function. We may not generalize this,

> providing a hook and letting each host controller handling it seems better.


I fully understand the approach, but again, it doesn't work. Sorry.

As I said, the only thing that *could* work is to call
mmc_claim_host() in the panic write hook -  and hope for the best. If
it succeeds, there is nothing to cleanup or terminate.

Kind regards
Uffe
Bhaskara Budiredla Dec. 17, 2020, 11:36 a.m. UTC | #10
[...]

>> >> An extra check can be added to see if host was runtime suspended

>> >> ahead of panic write attempt.

>> >

>> >What if that is the case, should we just return an error?

>> >

>> Yes.

>>

>> >Moreover, even the device belonging to the mmc card can be runtime

>> >suspended too. So if that is the case, we should return an error too?

>> >

>>

>> Yes, same here.

>>


Please comment if returning error is sufficient here or
can there be an attempt to wake the device through either of the atomic activation calls:
pm_runtime_get(),  pm_request_resume()? 


Thanks,
Bhaskara
Ulf Hansson Dec. 17, 2020, 5:11 p.m. UTC | #11
On Thu, 17 Dec 2020 at 12:36, Bhaskara Budiredla <bbudiredla@marvell.com> wrote:
>

>

> [...]

>

> >> >> An extra check can be added to see if host was runtime suspended

> >> >> ahead of panic write attempt.

> >> >

> >> >What if that is the case, should we just return an error?

> >> >

> >> Yes.

> >>

> >> >Moreover, even the device belonging to the mmc card can be runtime

> >> >suspended too. So if that is the case, we should return an error too?

> >> >

> >>

> >> Yes, same here.

> >>

>

> Please comment if returning error is sufficient here or

> can there be an attempt to wake the device through either of the atomic activation calls:

> pm_runtime_get(),  pm_request_resume()?


Hmm, I would start with playing with the below. mmc_claim_host
supports also nested claims.

mmc_claim_host(host)  - this will call pm_runtime_get_sync(host)
mmc_get_card(card, NULL) - this will call can
pm_runtime_get_sync(card)) and also try to claim the host

Kind regards
Uffe
Bhaskara Budiredla Dec. 18, 2020, 4:02 a.m. UTC | #12
>-----Original Message-----

>From: Ulf Hansson <ulf.hansson@linaro.org>

>Sent: Thursday, December 17, 2020 10:42 PM

>To: Bhaskara Budiredla <bbudiredla@marvell.com>

>Cc: Kees Cook <keescook@chromium.org>; Colin Cross

><ccross@android.com>; Tony Luck <tony.luck@intel.com>; Sunil Kovvuri

>Goutham <sgoutham@marvell.com>; linux-mmc@vger.kernel.org; Linux

>Kernel Mailing List <linux-kernel@vger.kernel.org>; Christoph Hellwig

><hch@lst.de>

>Subject: Re: [EXT] Re: [PATCH 1/2] mmc: Support kmsg dumper based on

>pstore/blk

>

>On Thu, 17 Dec 2020 at 12:36, Bhaskara Budiredla <bbudiredla@marvell.com>

>wrote:

>>

>>

>> [...]

>>

>> >> >> An extra check can be added to see if host was runtime suspended

>> >> >> ahead of panic write attempt.

>> >> >

>> >> >What if that is the case, should we just return an error?

>> >> >

>> >> Yes.

>> >>

>> >> >Moreover, even the device belonging to the mmc card can be runtime

>> >> >suspended too. So if that is the case, we should return an error too?

>> >> >

>> >>

>> >> Yes, same here.

>> >>

>>

>> Please comment if returning error is sufficient here or can there be

>> an attempt to wake the device through either of the atomic activation calls:

>> pm_runtime_get(),  pm_request_resume()?

>

>Hmm, I would start with playing with the below. mmc_claim_host supports

>also nested claims.

>

>mmc_claim_host(host)  - this will call pm_runtime_get_sync(host)

>mmc_get_card(card, NULL) - this will call can

>pm_runtime_get_sync(card)) and also try to claim the host

>


As you suggested I am creating a parallel path that avoids wait queue 
to claim the host. The *_sync()* routines could sleep, I can't use them
as part of panic write. 


>Kind regards

>Uffe


Thanks,
Bhaskara
Kees Cook Dec. 18, 2020, 6:50 p.m. UTC | #13
On Wed, Dec 16, 2020 at 11:44:26AM +0100, Ulf Hansson wrote:
> In any case, I didn't catch *why* pstore needs to force block device

> drivers to implement specific pstore hooks to support the pstore file

> system. I don't think this is the way it should work, for many


For panic, pstore must avoid any path that might sleep. In a perfect
world, it should also use as little code as possible, to avoid
potentially tripping over areas of the kernel that might be broken.

-- 
Kees Cook
diff mbox series

Patch

diff --git a/drivers/mmc/core/Kconfig b/drivers/mmc/core/Kconfig
index c12fe13e4b14..4c651da4f2d2 100644
--- a/drivers/mmc/core/Kconfig
+++ b/drivers/mmc/core/Kconfig
@@ -34,9 +34,22 @@  config PWRSEQ_SIMPLE
 	  This driver can also be built as a module. If so, the module
 	  will be called pwrseq_simple.
 
+config MMC_PSTORE_BACKEND
+	bool "Log panic/oops to a MMC buffer"
+	depends on MMC_BLOCK
+	help
+	  This option will let you create platform backend to store kmsg
+	  crash dumps to a user specified MMC device. This is primarily
+	  based on pstore/blk.
+
+config MMC_PSTORE
+	tristate
+	select PSTORE_BLK
+
 config MMC_BLOCK
 	tristate "MMC block device driver"
 	depends on BLOCK
+	select MMC_PSTORE if MMC_PSTORE_BACKEND=y
 	default y
 	help
 	  Say Y here to enable the MMC block device driver support.
@@ -80,4 +93,3 @@  config MMC_TEST
 
 	  This driver is only of interest to those developing or
 	  testing a host driver. Most people should say N here.
-
diff --git a/drivers/mmc/core/Makefile b/drivers/mmc/core/Makefile
index 95ffe008ebdf..7cb9a3af4827 100644
--- a/drivers/mmc/core/Makefile
+++ b/drivers/mmc/core/Makefile
@@ -16,5 +16,6 @@  obj-$(CONFIG_PWRSEQ_EMMC)	+= pwrseq_emmc.o
 mmc_core-$(CONFIG_DEBUG_FS)	+= debugfs.o
 obj-$(CONFIG_MMC_BLOCK)		+= mmc_block.o
 mmc_block-objs			:= block.o queue.o
+mmc_block-$(CONFIG_MMC_PSTORE)	+= mmcpstore.o
 obj-$(CONFIG_MMC_TEST)		+= mmc_test.o
 obj-$(CONFIG_SDIO_UART)		+= sdio_uart.o
diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index 8d3df0be0355..ed012a91e3a3 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -2870,6 +2870,21 @@  static void mmc_blk_remove_debugfs(struct mmc_card *card,
 
 #endif /* CONFIG_DEBUG_FS */
 
+#if IS_ENABLED(CONFIG_MMC_PSTORE)
+sector_t mmc_blk_get_part(struct mmc_card *card, int part_num, sector_t *size)
+{
+	struct mmc_blk_data *md = dev_get_drvdata(&card->dev);
+	struct gendisk *disk = md->disk;
+	struct disk_part_tbl *part_tbl = disk->part_tbl;
+
+	if (part_num < 0 || part_num >= part_tbl->len)
+		return 0;
+
+	*size = part_tbl->part[part_num]->nr_sects << SECTOR_SHIFT;
+	return part_tbl->part[part_num]->start_sect;
+}
+#endif
+
 static int mmc_blk_probe(struct mmc_card *card)
 {
 	struct mmc_blk_data *md, *part_md;
@@ -2913,6 +2928,9 @@  static int mmc_blk_probe(struct mmc_card *card)
 			goto out;
 	}
 
+	if (mmc_card_mmc(card) || mmc_card_sd(card))
+		mmcpstore_card_set(card, md->disk->disk_name);
+
 	/* Add two debugfs entries */
 	mmc_blk_add_debugfs(card, md);
 
@@ -3060,6 +3078,7 @@  static void __exit mmc_blk_exit(void)
 	unregister_blkdev(MMC_BLOCK_MAJOR, "mmc");
 	unregister_chrdev_region(mmc_rpmb_devt, MAX_DEVICES);
 	bus_unregister(&mmc_rpmb_bus_type);
+	unregister_mmcpstore();
 }
 
 module_init(mmc_blk_init);
diff --git a/drivers/mmc/core/block.h b/drivers/mmc/core/block.h
index 31153f656f41..2a4ee5568194 100644
--- a/drivers/mmc/core/block.h
+++ b/drivers/mmc/core/block.h
@@ -16,5 +16,14 @@  void mmc_blk_mq_recovery(struct mmc_queue *mq);
 struct work_struct;
 
 void mmc_blk_mq_complete_work(struct work_struct *work);
+#if IS_ENABLED(CONFIG_MMC_PSTORE)
+sector_t mmc_blk_get_part(struct mmc_card *card, int part_num, sector_t *size);
+void mmcpstore_card_set(struct mmc_card *card, const char *disk_name);
+void unregister_mmcpstore(void);
+#else
+static inline void mmcpstore_card_set(struct mmc_card *card,
+					const char *disk_name) {}
+static inline void unregister_mmcpstore(void) {}
+#endif
 
 #endif
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index d42037f0f10d..7682b267f1d5 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -569,6 +569,30 @@  int mmc_cqe_recovery(struct mmc_host *host)
 }
 EXPORT_SYMBOL(mmc_cqe_recovery);
 
+#if IS_ENABLED(CONFIG_MMC_PSTORE)
+/**
+ *	mmc_wait_for_pstore_req - initiate a blocking mmc request
+ *	@host: MMC host to start command
+ *	@mrq: MMC request to start
+ *
+ *	Start a blocking MMC request for a host and wait for the request
+ *	to complete that is based on polling and timeout.
+ */
+void mmc_wait_for_pstore_req(struct mmc_host *host, struct mmc_request *mrq)
+{
+	unsigned int timeout;
+
+	host->ops->req_cleanup_pending(host);
+	mmc_start_request(host, mrq);
+
+	if (mrq->data) {
+		timeout = mrq->data->timeout_ns / NSEC_PER_MSEC;
+		host->ops->req_completion_poll(host, timeout);
+	}
+}
+EXPORT_SYMBOL(mmc_wait_for_pstore_req);
+#endif
+
 /**
  *	mmc_is_req_done - Determine if a 'cap_cmd_during_tfr' request is done
  *	@host: MMC host
diff --git a/drivers/mmc/core/mmcpstore.c b/drivers/mmc/core/mmcpstore.c
new file mode 100644
index 000000000000..1113eae0756c
--- /dev/null
+++ b/drivers/mmc/core/mmcpstore.c
@@ -0,0 +1,302 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * MMC pstore support based on pstore/blk
+ *
+ * Copyright (c) 2020 Marvell.
+ * Author: Bhaskara Budiredla <bbudiredla@marvell.com>
+ */
+
+#define pr_fmt(fmt) "mmcpstore: " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pstore_blk.h>
+#include <linux/blkdev.h>
+#include <linux/mount.h>
+#include <linux/slab.h>
+#include <linux/mmc/mmc.h>
+#include <linux/mmc/host.h>
+#include <linux/mmc/card.h>
+#include <linux/scatterlist.h>
+#include "block.h"
+#include "card.h"
+#include "core.h"
+
+static struct mmcpstore_context {
+	char dev_name[BDEVNAME_SIZE];
+	int partno;
+	sector_t start_sect;
+	sector_t size;
+	struct pstore_device_info dev;
+	struct pstore_blk_config conf;
+	struct pstore_blk_info info;
+
+	char *sub;
+	struct mmc_card	*card;
+	struct mmc_request *mrq;
+} oops_cxt;
+
+static void mmc_prep_req(struct mmc_request *mrq,
+		unsigned int sect_offset, unsigned int nsects,
+		struct scatterlist *sg, u32 opcode, unsigned int flags)
+{
+	mrq->cmd->opcode = opcode;
+	mrq->cmd->arg = sect_offset;
+	mrq->cmd->flags = MMC_RSP_R1 | MMC_CMD_ADTC;
+
+	if (nsects == 1) {
+		mrq->stop = NULL;
+	} else {
+		mrq->stop->opcode = MMC_STOP_TRANSMISSION;
+		mrq->stop->arg = 0;
+		mrq->stop->flags = MMC_RSP_R1B | MMC_CMD_AC;
+	}
+
+	mrq->data->blksz = SECTOR_SIZE;
+	mrq->data->blocks = nsects;
+	mrq->data->flags = flags;
+	mrq->data->sg = sg;
+	mrq->data->sg_len = 1;
+}
+
+static int mmcpstore_rdwr_req(const char *buf, unsigned int nsects,
+			unsigned int sect_offset, unsigned int flags)
+{
+	struct mmcpstore_context *cxt = &oops_cxt;
+	struct mmc_request *mrq = cxt->mrq;
+	struct mmc_card *card = cxt->card;
+	struct mmc_host *host = card->host;
+	struct scatterlist sg;
+	u32 opcode;
+
+	if (flags == MMC_DATA_READ)
+		opcode	= (nsects > 1) ?
+			MMC_READ_MULTIPLE_BLOCK : MMC_READ_SINGLE_BLOCK;
+	else
+		opcode = (nsects > 1) ?
+			MMC_WRITE_MULTIPLE_BLOCK : MMC_WRITE_BLOCK;
+
+	mmc_prep_req(mrq, sect_offset, nsects, &sg, opcode, flags);
+	sg_init_one(&sg, buf, (nsects << SECTOR_SHIFT));
+	mmc_set_data_timeout(mrq->data, cxt->card);
+
+	mmc_claim_host(host);
+	mmc_wait_for_req(host, mrq);
+	mdelay(mrq->data->timeout_ns / NSEC_PER_MSEC);
+	mmc_release_host(host);
+
+	if (mrq->cmd->error) {
+		pr_err("Cmd error: %d\n", mrq->cmd->error);
+		return mrq->cmd->error;
+	}
+	if (mrq->data->error) {
+		pr_err("Data error: %d\n", mrq->data->error);
+		return mrq->data->error;
+	}
+
+	return 0;
+}
+
+static ssize_t mmcpstore_write(const char *buf, size_t size, loff_t off)
+{
+	struct mmcpstore_context *cxt = &oops_cxt;
+	int ret;
+
+	ret = mmcpstore_rdwr_req(buf, (size >> SECTOR_SHIFT),
+		cxt->start_sect + (off >> SECTOR_SHIFT), MMC_DATA_WRITE);
+	if (ret)
+		return ret;
+
+	return size;
+}
+
+static ssize_t mmcpstore_read(char *buf, size_t size, loff_t off)
+{
+	struct mmcpstore_context *cxt = &oops_cxt;
+	unsigned int sect_off = cxt->start_sect  + (off >> SECTOR_SHIFT);
+	unsigned long sects = (cxt->conf.kmsg_size >> SECTOR_SHIFT);
+	int ret;
+
+	if (unlikely(!buf || !size))
+		return -EINVAL;
+
+	ret = mmcpstore_rdwr_req(cxt->sub, sects, sect_off, MMC_DATA_READ);
+	if (ret)
+		return ret;
+	memcpy(buf, cxt->sub, size);
+
+	return size;
+}
+
+static void mmcpstore_panic_write_req(const char *buf,
+		unsigned int nsects, unsigned int sect_offset)
+{
+	struct mmcpstore_context *cxt = &oops_cxt;
+	struct mmc_request *mrq = cxt->mrq;
+	struct mmc_card *card = cxt->card;
+	struct mmc_host *host = card->host;
+	struct scatterlist sg;
+	u32 opcode;
+
+	opcode = (nsects > 1) ? MMC_WRITE_MULTIPLE_BLOCK : MMC_WRITE_BLOCK;
+	mmc_prep_req(mrq, sect_offset, nsects, &sg, opcode, MMC_DATA_WRITE);
+	sg_init_one(&sg, buf, (nsects << SECTOR_SHIFT));
+	mmc_set_data_timeout(mrq->data, cxt->card);
+
+	mmc_claim_host(host);
+	mmc_wait_for_pstore_req(host, mrq);
+	mmc_release_host(card->host);
+}
+
+static ssize_t mmcpstore_panic_write(const char *buf, size_t size, loff_t off)
+{
+	struct mmcpstore_context *cxt = &oops_cxt;
+
+	mmcpstore_panic_write_req(buf, (size >> SECTOR_SHIFT),
+			cxt->start_sect + (off >> SECTOR_SHIFT));
+	return size;
+}
+
+static struct block_device *mmcpstore_open_backend(const char *device)
+{
+	struct block_device *bdev;
+	dev_t devt;
+
+	bdev = blkdev_get_by_path(device, FMODE_READ, NULL);
+	if (IS_ERR(bdev)) {
+		devt = name_to_dev_t(device);
+		if (devt == 0)
+			return ERR_PTR(-ENODEV);
+
+		bdev = blkdev_get_by_dev(devt, FMODE_READ, NULL);
+		if (IS_ERR(bdev))
+			return bdev;
+	}
+
+	return bdev;
+}
+
+static void mmcpstore_close_backend(struct block_device *bdev)
+{
+	if (!bdev)
+		return;
+	blkdev_put(bdev, FMODE_READ);
+}
+
+void mmcpstore_card_set(struct mmc_card *card, const char *disk_name)
+{
+	struct mmcpstore_context *cxt = &oops_cxt;
+	struct pstore_blk_config *conf = &cxt->conf;
+	struct pstore_device_info *dev = &cxt->dev;
+	struct block_device *bdev;
+	struct mmc_command *stop;
+	struct mmc_command *cmd;
+	struct mmc_request *mrq;
+	struct mmc_data *data;
+	int ret;
+
+	ret = pstore_blk_get_config(conf);
+	if (!conf->device[0]) {
+		pr_debug("psblk backend is empty\n");
+		return;
+	}
+
+	/* Multiple backend devices not allowed */
+	if (cxt->dev_name[0])
+		return;
+
+	bdev =  mmcpstore_open_backend(conf->device);
+	if (IS_ERR(bdev)) {
+		pr_err("%s failed to open with %ld\n",
+				conf->device, PTR_ERR(bdev));
+		return;
+	}
+
+	bdevname(bdev, cxt->dev_name);
+	cxt->partno = bdev->bd_part->partno;
+	mmcpstore_close_backend(bdev);
+
+	if (strncmp(cxt->dev_name, disk_name, strlen(disk_name)))
+		return;
+
+	cxt->start_sect = mmc_blk_get_part(card, cxt->partno, &cxt->size);
+	if (!cxt->start_sect) {
+		pr_err("Non-existent partition %d selected\n", cxt->partno);
+		return;
+	}
+
+	/* Check for host mmc panic write polling function definitions */
+	if (!card->host->ops->req_cleanup_pending ||
+			!card->host->ops->req_completion_poll)
+		return;
+
+	cxt->card = card;
+
+	cxt->sub = kmalloc(conf->kmsg_size, GFP_KERNEL);
+	if (!cxt->sub)
+		goto out;
+
+	mrq = kzalloc(sizeof(struct mmc_request), GFP_KERNEL);
+	if (!mrq)
+		goto free_sub;
+
+	cmd = kzalloc(sizeof(struct mmc_command), GFP_KERNEL);
+	if (!cmd)
+		goto free_mrq;
+
+	stop = kzalloc(sizeof(struct mmc_command), GFP_KERNEL);
+	if (!stop)
+		goto free_cmd;
+
+	data = kzalloc(sizeof(struct mmc_data), GFP_KERNEL);
+	if (!data)
+		goto free_stop;
+
+	mrq->cmd = cmd;
+	mrq->data = data;
+	mrq->stop = stop;
+	cxt->mrq = mrq;
+
+	dev->total_size = cxt->size;
+	dev->flags = PSTORE_FLAGS_DMESG;
+	dev->read = mmcpstore_read;
+	dev->write = mmcpstore_write;
+	dev->erase = NULL;
+	dev->panic_write = mmcpstore_panic_write;
+
+	ret = register_pstore_device(&cxt->dev);
+	if (ret) {
+		pr_err("%s registering with psblk failed (%d)\n",
+				cxt->dev_name, ret);
+		goto free_data;
+	}
+
+	pr_info("%s registered as psblk backend\n", cxt->dev_name);
+	return;
+
+free_data:
+	kfree(data);
+free_stop:
+	kfree(stop);
+free_cmd:
+	kfree(cmd);
+free_mrq:
+	kfree(mrq);
+free_sub:
+	kfree(cxt->sub);
+out:
+	return;
+}
+
+void unregister_mmcpstore(void)
+{
+	struct mmcpstore_context *cxt = &oops_cxt;
+
+	unregister_pstore_device(&cxt->dev);
+	kfree(cxt->mrq->data);
+	kfree(cxt->mrq->stop);
+	kfree(cxt->mrq->cmd);
+	kfree(cxt->mrq);
+	kfree(cxt->sub);
+	cxt->card = NULL;
+}
diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index 29aa50711626..3889c2a90faa 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -166,6 +166,10 @@  struct mmc_request {
 
 struct mmc_card;
 
+#if IS_ENABLED(CONFIG_MMC_PSTORE)
+void mmc_wait_for_pstore_req(struct mmc_host *, struct mmc_request *);
+#endif
+
 void mmc_wait_for_req(struct mmc_host *host, struct mmc_request *mrq);
 int mmc_wait_for_cmd(struct mmc_host *host, struct mmc_command *cmd,
 		int retries);
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index c079b932330f..7d6751005ac6 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -173,6 +173,18 @@  struct mmc_host_ops {
 	 */
 	int	(*multi_io_quirk)(struct mmc_card *card,
 				  unsigned int direction, int blk_size);
+
+#if IS_ENABLED(CONFIG_MMC_PSTORE)
+	/*
+	 * The following two APIs are introduced to support mmcpstore
+	 * functionality. Cleanup API to terminate the ongoing and
+	 * pending requests before a panic write post, and polling API
+	 * to ensure that write succeeds before the Kernel dies.
+	 */
+	void	(*req_cleanup_pending)(struct mmc_host *host);
+	int	(*req_completion_poll)(struct mmc_host *host,
+					unsigned long timeout);
+#endif
 };
 
 struct mmc_cqe_ops {