diff mbox series

[15/17] xen: pvblock: Implement front-back protocol and do IO

Message ID 20200701162959.9814-16-vicooodin@gmail.com
State New
Headers show
Series Add new board: Xen guest for ARM64 | expand

Commit Message

Nastya Vicodin July 1, 2020, 4:29 p.m. UTC
From: Anastasiia Lukianenko <anastasiia_lukianenko at epam.com>

Implement Xen para-virtual frontend to backend communication
and actually read/write disk data.

This is based on mini-os implementation of the para-virtual block
frontend driver.

Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko at epam.com>
Signed-off-by: Anastasiia Lukianenko <anastasiia_lukianenko at epam.com>
---
 drivers/xen/events.c  |   2 +-
 drivers/xen/pvblock.c | 311 ++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 301 insertions(+), 12 deletions(-)

Comments

Simon Glass July 3, 2020, 3:50 a.m. UTC | #1
On Wed, 1 Jul 2020 at 10:30, Anastasiia Lukianenko <vicooodin at gmail.com> wrote:
>
> From: Anastasiia Lukianenko <anastasiia_lukianenko at epam.com>
>
> Implement Xen para-virtual frontend to backend communication
> and actually read/write disk data.
>
> This is based on mini-os implementation of the para-virtual block
> frontend driver.
>
> Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko at epam.com>
> Signed-off-by: Anastasiia Lukianenko <anastasiia_lukianenko at epam.com>
> ---
>  drivers/xen/events.c  |   2 +-
>  drivers/xen/pvblock.c | 311 ++++++++++++++++++++++++++++++++++++++++--
>  2 files changed, 301 insertions(+), 12 deletions(-)

Please can you comment structs and non-trival functions?
Anastasiia Lukianenko July 6, 2020, 9:10 a.m. UTC | #2
On Thu, 2020-07-02 at 21:50 -0600, Simon Glass wrote:
> On Wed, 1 Jul 2020 at 10:30, Anastasiia Lukianenko <
> vicooodin at gmail.com> wrote:
> > 
> > From: Anastasiia Lukianenko <anastasiia_lukianenko at epam.com>
> > 
> > Implement Xen para-virtual frontend to backend communication
> > and actually read/write disk data.
> > 
> > This is based on mini-os implementation of the para-virtual block
> > frontend driver.
> > 
> > Signed-off-by: Oleksandr Andrushchenko <
> > oleksandr_andrushchenko at epam.com>
> > Signed-off-by: Anastasiia Lukianenko <
> > anastasiia_lukianenko at epam.com>
> > ---
> >  drivers/xen/events.c  |   2 +-
> >  drivers/xen/pvblock.c | 311
> > ++++++++++++++++++++++++++++++++++++++++--
> >  2 files changed, 301 insertions(+), 12 deletions(-)
> 
> Please can you comment structs and non-trival functions?

Sure.

Regards,
Anastasiia
diff mbox series

Patch

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index a1b36a2196..192b290c02 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -104,7 +104,7 @@  void unbind_evtchn(evtchn_port_t port)
 	int rc;
 
 	if (ev_actions[port].handler == default_handler)
-		printf("WARN: No handler for port %d when unbinding\n", port);
+		debug("Default handler for port %d when unbinding\n", port);
 	mask_evtchn(port);
 	clear_evtchn(port);
 
diff --git a/drivers/xen/pvblock.c b/drivers/xen/pvblock.c
index 9ed18be633..a23afc2cb2 100644
--- a/drivers/xen/pvblock.c
+++ b/drivers/xen/pvblock.c
@@ -15,6 +15,7 @@ 
 #include <asm/io.h>
 #include <asm/xen/system.h>
 
+#include <linux/bug.h>
 #include <linux/compat.h>
 
 #include <xen/events.h>
@@ -31,6 +32,7 @@ 
 
 #define O_RDONLY	00
 #define O_RDWR		02
+#define WAIT_RING_TO_MS	10
 
 struct blkfront_info {
 	u64 sectors;
@@ -53,12 +55,30 @@  struct blkfront_dev {
 	char *backend;
 	struct blkfront_info info;
 	unsigned int devid;
+	u8 *bounce_buffer;
 };
 
 struct blkfront_platdata {
 	unsigned int devid;
 };
 
+struct blkfront_aiocb {
+	struct blkfront_dev *aio_dev;
+	u8 *aio_buf;
+	size_t aio_nbytes;
+	off_t aio_offset;
+	size_t total_bytes;
+	u8 is_write;
+	void *data;
+
+	grant_ref_t gref[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	int n;
+
+	void (*aio_cb)(struct blkfront_aiocb *aiocb, int ret);
+};
+
+static void blkfront_sync(struct blkfront_dev *dev);
+
 static void free_blkfront(struct blkfront_dev *dev)
 {
 	mask_evtchn(dev->evtchn);
@@ -69,16 +89,11 @@  static void free_blkfront(struct blkfront_dev *dev)
 
 	unbind_evtchn(dev->evtchn);
 
+	free(dev->bounce_buffer);
 	free(dev->nodename);
 	free(dev);
 }
 
-static void blkfront_handler(evtchn_port_t port, struct pt_regs *regs,
-			     void *data)
-{
-	printf("%s [Port %d] - event received\n", __func__, port);
-}
-
 static int init_blkfront(unsigned int devid, struct blkfront_dev *dev)
 {
 	xenbus_transaction_t xbt;
@@ -99,7 +114,7 @@  static int init_blkfront(unsigned int devid, struct blkfront_dev *dev)
 
 	snprintf(path, sizeof(path), "%s/backend-id", nodename);
 	dev->dom = xenbus_read_integer(path);
-	evtchn_alloc_unbound(dev->dom, blkfront_handler, dev, &dev->evtchn);
+	evtchn_alloc_unbound(dev->dom, NULL, dev, &dev->evtchn);
 
 	s = (struct blkif_sring *)memalign(PAGE_SIZE, PAGE_SIZE);
 	if (!s) {
@@ -220,8 +235,16 @@  done:
 	}
 	unmask_evtchn(dev->evtchn);
 
-	debug("%llu sectors of %u bytes\n",
-	      dev->info.sectors, dev->info.sector_size);
+	dev->bounce_buffer = memalign(dev->info.sector_size,
+				      dev->info.sector_size);
+	if (!dev->bounce_buffer) {
+		printf("Failed to allocate bouncing buffer\n");
+		goto error;
+	}
+
+	debug("%llu sectors of %u bytes, bounce buffer at %p\n",
+	      dev->info.sectors, dev->info.sector_size,
+	      dev->bounce_buffer);
 
 	return 0;
 
@@ -242,6 +265,8 @@  static void shutdown_blkfront(struct blkfront_dev *dev)
 
 	debug("Close " DRV_NAME ", device ID %d\n", dev->devid);
 
+	blkfront_sync(dev);
+
 	snprintf(path, sizeof(path), "%s/state", dev->backend);
 	snprintf(nodename, sizeof(nodename), "%s/state", dev->nodename);
 
@@ -296,16 +321,280 @@  close:
 		free_blkfront(dev);
 }
 
+static int blkfront_aio_poll(struct blkfront_dev *dev)
+{
+	RING_IDX rp, cons;
+	struct blkif_response *rsp;
+	int more;
+	int nr_consumed;
+
+moretodo:
+	rp = dev->ring.sring->rsp_prod;
+	rmb(); /* Ensure we see queued responses up to 'rp'. */
+	cons = dev->ring.rsp_cons;
+
+	nr_consumed = 0;
+	while ((cons != rp)) {
+		struct blkfront_aiocb *aiocbp;
+		int status;
+
+		rsp = RING_GET_RESPONSE(&dev->ring, cons);
+		nr_consumed++;
+
+		aiocbp = (void *)(uintptr_t)rsp->id;
+		status = rsp->status;
+
+		switch (rsp->operation) {
+		case BLKIF_OP_READ:
+		case BLKIF_OP_WRITE:
+		{
+			int j;
+
+			if (status != BLKIF_RSP_OKAY)
+				printf("%s error %d on %s at offset %llu, num bytes %llu\n",
+				       rsp->operation == BLKIF_OP_READ ?
+				       "read" : "write",
+				       status, aiocbp->aio_dev->nodename,
+				       (unsigned long long)aiocbp->aio_offset,
+				       (unsigned long long)aiocbp->aio_nbytes);
+
+			for (j = 0; j < aiocbp->n; j++)
+				gnttab_end_access(aiocbp->gref[j]);
+
+			break;
+		}
+
+		case BLKIF_OP_WRITE_BARRIER:
+			if (status != BLKIF_RSP_OKAY)
+				printf("write barrier error %d\n", status);
+			break;
+		case BLKIF_OP_FLUSH_DISKCACHE:
+			if (status != BLKIF_RSP_OKAY)
+				printf("flush error %d\n", status);
+			break;
+
+		default:
+			printf("unrecognized block operation %d response (status %d)\n",
+			       rsp->operation, status);
+			break;
+		}
+
+		dev->ring.rsp_cons = ++cons;
+		/* Nota: callback frees aiocbp itself */
+		if (aiocbp && aiocbp->aio_cb)
+			aiocbp->aio_cb(aiocbp, status ? -EIO : 0);
+		if (dev->ring.rsp_cons != cons)
+			/* We reentered, we must not continue here */
+			break;
+	}
+
+	RING_FINAL_CHECK_FOR_RESPONSES(&dev->ring, more);
+	if (more)
+		goto moretodo;
+
+	return nr_consumed;
+}
+
+static void blkfront_wait_slot(struct blkfront_dev *dev)
+{
+	/* Wait for a slot */
+	if (RING_FULL(&dev->ring)) {
+		while (true) {
+			blkfront_aio_poll(dev);
+			if (!RING_FULL(&dev->ring))
+				break;
+			wait_event_timeout(NULL, !RING_FULL(&dev->ring),
+					   WAIT_RING_TO_MS);
+		}
+	}
+}
+
+/* Issue an aio */
+static void blkfront_aio(struct blkfront_aiocb *aiocbp, int write)
+{
+	struct blkfront_dev *dev = aiocbp->aio_dev;
+	struct blkif_request *req;
+	RING_IDX i;
+	int notify;
+	int n, j;
+	uintptr_t start, end;
+
+	/* Can't io at non-sector-aligned location */
+	BUG_ON(aiocbp->aio_offset & (dev->info.sector_size - 1));
+	/* Can't io non-sector-sized amounts */
+	BUG_ON(aiocbp->aio_nbytes & (dev->info.sector_size - 1));
+	/* Can't io non-sector-aligned buffer */
+	BUG_ON(((uintptr_t)aiocbp->aio_buf & (dev->info.sector_size - 1)));
+
+	start = (uintptr_t)aiocbp->aio_buf & PAGE_MASK;
+	end = ((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes +
+	       PAGE_SIZE - 1) & PAGE_MASK;
+	n = (end - start) / PAGE_SIZE;
+	aiocbp->n = n;
+
+	BUG_ON(n > BLKIF_MAX_SEGMENTS_PER_REQUEST);
+
+	blkfront_wait_slot(dev);
+	i = dev->ring.req_prod_pvt;
+	req = RING_GET_REQUEST(&dev->ring, i);
+
+	req->operation = write ? BLKIF_OP_WRITE : BLKIF_OP_READ;
+	req->nr_segments = n;
+	req->handle = dev->handle;
+	req->id = (uintptr_t)aiocbp;
+	req->sector_number = aiocbp->aio_offset / dev->info.sector_size;
+
+	for (j = 0; j < n; j++) {
+		req->seg[j].first_sect = 0;
+		req->seg[j].last_sect = PAGE_SIZE / dev->info.sector_size - 1;
+	}
+	req->seg[0].first_sect = ((uintptr_t)aiocbp->aio_buf & ~PAGE_MASK) /
+		dev->info.sector_size;
+	req->seg[n - 1].last_sect = (((uintptr_t)aiocbp->aio_buf +
+		aiocbp->aio_nbytes - 1) & ~PAGE_MASK) / dev->info.sector_size;
+	for (j = 0; j < n; j++) {
+		uintptr_t data = start + j * PAGE_SIZE;
+
+		if (!write) {
+			/* Trigger CoW if needed */
+			*(char *)(data + (req->seg[j].first_sect *
+					  dev->info.sector_size)) = 0;
+			barrier();
+		}
+		req->seg[j].gref = gnttab_grant_access(dev->dom,
+						       virt_to_pfn((void *)data),
+						       write);
+		aiocbp->gref[j] = req->seg[j].gref;
+	}
+
+	dev->ring.req_prod_pvt = i + 1;
+
+	wmb();
+	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->ring, notify);
+
+	if (notify)
+		notify_remote_via_evtchn(dev->evtchn);
+}
+
+static void blkfront_aio_cb(struct blkfront_aiocb *aiocbp, int ret)
+{
+	aiocbp->data = (void *)1;
+	aiocbp->aio_cb = NULL;
+}
+
+static void blkfront_io(struct blkfront_aiocb *aiocbp, int write)
+{
+	aiocbp->aio_cb = blkfront_aio_cb;
+	blkfront_aio(aiocbp, write);
+	aiocbp->data = NULL;
+
+	while (true) {
+		blkfront_aio_poll(aiocbp->aio_dev);
+		if (aiocbp->data)
+			break;
+		cpu_relax();
+	}
+}
+
+static void blkfront_push_operation(struct blkfront_dev *dev, u8 op,
+				    uint64_t id)
+{
+	struct blkif_request *req;
+	int notify, i;
+
+	blkfront_wait_slot(dev);
+	i = dev->ring.req_prod_pvt;
+	req = RING_GET_REQUEST(&dev->ring, i);
+	req->operation = op;
+	req->nr_segments = 0;
+	req->handle = dev->handle;
+	req->id = id;
+	req->sector_number = 0;
+	dev->ring.req_prod_pvt = i + 1;
+	wmb();
+	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->ring, notify);
+	if (notify)
+		notify_remote_via_evtchn(dev->evtchn);
+}
+
+static void blkfront_sync(struct blkfront_dev *dev)
+{
+	if (dev->info.mode == O_RDWR) {
+		if (dev->info.barrier == 1)
+			blkfront_push_operation(dev,
+						BLKIF_OP_WRITE_BARRIER, 0);
+
+		if (dev->info.flush == 1)
+			blkfront_push_operation(dev,
+						BLKIF_OP_FLUSH_DISKCACHE, 0);
+	}
+
+	while (true) {
+		blkfront_aio_poll(dev);
+		if (RING_FREE_REQUESTS(&dev->ring) == RING_SIZE(&dev->ring))
+			break;
+		cpu_relax();
+	}
+}
+
+static ulong pvblock_iop(struct udevice *udev, lbaint_t blknr,
+			 lbaint_t blkcnt, void *buffer, int write)
+{
+	struct blkfront_dev *blk_dev = dev_get_priv(udev);
+	struct blk_desc *desc = dev_get_uclass_platdata(udev);
+	struct blkfront_aiocb aiocb;
+	lbaint_t blocks_todo;
+	bool unaligned;
+
+	if (blkcnt == 0)
+		return 0;
+
+	if ((blknr + blkcnt) > desc->lba) {
+		printf(DRV_NAME ": block number 0x" LBAF " exceeds max(0x" LBAF ")\n",
+		       blknr + blkcnt, desc->lba);
+		return 0;
+	}
+
+	unaligned = (uintptr_t)buffer & (blk_dev->info.sector_size - 1);
+
+	aiocb.aio_dev = blk_dev;
+	aiocb.aio_offset = blknr * desc->blksz;
+	aiocb.aio_cb = NULL;
+	aiocb.data = NULL;
+	blocks_todo = blkcnt;
+	do {
+		aiocb.aio_buf = unaligned ? blk_dev->bounce_buffer : buffer;
+
+		if (write && unaligned)
+			memcpy(blk_dev->bounce_buffer, buffer, desc->blksz);
+
+		aiocb.aio_nbytes = unaligned ? desc->blksz :
+			min((size_t)(BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE),
+			    (size_t)(blocks_todo * desc->blksz));
+
+		blkfront_io(&aiocb, write);
+
+		if (!write && unaligned)
+			memcpy(buffer, blk_dev->bounce_buffer, desc->blksz);
+
+		aiocb.aio_offset += aiocb.aio_nbytes;
+		buffer += aiocb.aio_nbytes;
+		blocks_todo -= aiocb.aio_nbytes / desc->blksz;
+	} while (blocks_todo > 0);
+
+	return blkcnt;
+}
+
 ulong pvblock_blk_read(struct udevice *udev, lbaint_t blknr, lbaint_t blkcnt,
 		       void *buffer)
 {
-	return 0;
+	return pvblock_iop(udev, blknr, blkcnt, buffer, 0);
 }
 
 ulong pvblock_blk_write(struct udevice *udev, lbaint_t blknr, lbaint_t blkcnt,
 			const void *buffer)
 {
-	return 0;
+	return pvblock_iop(udev, blknr, blkcnt, (void *)buffer, 1);
 }
 
 static int pvblock_blk_bind(struct udevice *udev)