@@ -11,7 +11,8 @@
struct msm_fence_context *
-msm_fence_context_alloc(struct drm_device *dev, const char *name)
+msm_fence_context_alloc(struct drm_device *dev, volatile uint32_t *fenceptr,
+ const char *name)
{
struct msm_fence_context *fctx;
@@ -22,6 +23,7 @@ msm_fence_context_alloc(struct drm_device *dev, const char *name)
fctx->dev = dev;
strncpy(fctx->name, name, sizeof(fctx->name));
fctx->context = dma_fence_context_alloc(1);
+ fctx->fenceptr = fenceptr;
init_waitqueue_head(&fctx->event);
spin_lock_init(&fctx->spinlock);
@@ -35,7 +37,12 @@ void msm_fence_context_free(struct msm_fence_context *fctx)
static inline bool fence_completed(struct msm_fence_context *fctx, uint32_t fence)
{
- return (int32_t)(fctx->completed_fence - fence) >= 0;
+ /*
+ * Note: Check completed_fence first, as fenceptr is in a write-combine
+ * mapping, so it will be more expensive to read.
+ */
+ return (int32_t)(fctx->completed_fence - fence) >= 0 ||
+ (int32_t)(*fctx->fenceptr - fence) >= 0;
}
/* legacy path for WAIT_FENCE ioctl: */
@@ -9,19 +9,52 @@
#include "msm_drv.h"
+/**
+ * struct msm_fence_context - fence context for gpu
+ *
+ * Each ringbuffer has a single fence context, with the GPU writing an
+ * incrementing fence seqno at the end of each submit
+ */
struct msm_fence_context {
struct drm_device *dev;
+ /** name: human readable name for fence timeline */
char name[32];
+ /** context: see dma_fence_context_alloc() */
unsigned context;
- /* last_fence == completed_fence --> no pending work */
- uint32_t last_fence; /* last assigned fence */
- uint32_t completed_fence; /* last completed fence */
+
+ /**
+ * last_fence:
+ *
+ * Last assigned fence, incremented each time a fence is created
+ * on this fence context. If last_fence == completed_fence,
+ * there is no remaining pending work
+ */
+ uint32_t last_fence;
+
+ /**
+ * completed_fence:
+ *
+ * The last completed fence, updated from the CPU after interrupt
+ * from GPU
+ */
+ uint32_t completed_fence;
+
+ /**
+ * fenceptr:
+ *
+ * The address that the GPU directly writes with completed fence
+ * seqno. This can be ahead of completed_fence. We can peek at
+ * this to see if a fence has already signaled but the CPU hasn't
+ * gotten around to handling the irq and updating completed_fence
+ */
+ volatile uint32_t *fenceptr;
+
wait_queue_head_t event;
spinlock_t spinlock;
};
struct msm_fence_context * msm_fence_context_alloc(struct drm_device *dev,
- const char *name);
+ volatile uint32_t *fenceptr, const char *name);
void msm_fence_context_free(struct msm_fence_context *fctx);
int msm_wait_fence(struct msm_fence_context *fctx, uint32_t fence,
@@ -51,7 +51,7 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id,
snprintf(name, sizeof(name), "gpu-ring-%d", ring->id);
- ring->fctx = msm_fence_context_alloc(gpu->dev, name);
+ ring->fctx = msm_fence_context_alloc(gpu->dev, &ring->memptrs->fence, name);
return ring;