@@ -304,8 +304,7 @@ struct intel_engine_cs {
u32 context_size;
u32 mmio_base;
- unsigned int context_tag;
-#define NUM_CONTEXT_TAG roundup_pow_of_two(2 * EXECLIST_MAX_PORTS)
+ unsigned long context_tag;
struct rb_node uabi_node;
@@ -1404,13 +1404,17 @@ __execlists_schedule_in(struct i915_request *rq)
ce->lrc_desc &= ~GENMASK_ULL(47, 37);
if (ce->tag) {
/* Use a fixed tag for OA and friends */
+ GEM_BUG_ON(ce->tag <= BITS_PER_TYPE(engine->context_tag));
ce->lrc_desc |= (u64)ce->tag << 32;
} else {
/* We don't need a strict matching tag, just different values */
- ce->lrc_desc |=
- (u64)(++engine->context_tag % NUM_CONTEXT_TAG) <<
- GEN11_SW_CTX_ID_SHIFT;
- BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID);
+ unsigned int tag = ffs(engine->context_tag);
+
+ GEM_BUG_ON(tag == 0 || tag >= BITS_PER_LONG);
+ clear_bit(tag - 1, &engine->context_tag);
+ ce->lrc_desc |= (u64)tag << GEN11_SW_CTX_ID_SHIFT;
+
+ BUILD_BUG_ON(BITS_PER_TYPE(engine->context_tag) > GEN12_MAX_CONTEXT_HW_ID);
}
__intel_gt_pm_get(engine->gt);
@@ -1452,7 +1456,8 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
static inline void
__execlists_schedule_out(struct i915_request *rq,
- struct intel_engine_cs * const engine)
+ struct intel_engine_cs * const engine,
+ unsigned int ccid)
{
struct intel_context * const ce = rq->context;
@@ -1470,6 +1475,10 @@ __execlists_schedule_out(struct i915_request *rq,
i915_request_completed(rq))
intel_engine_add_retire(engine, ce->timeline);
+ ccid >>= GEN11_SW_CTX_ID_SHIFT - 32;
+ if (ccid < BITS_PER_TYPE(engine->context_tag))
+ set_bit(ccid - 1, &engine->context_tag);
+
intel_context_update_runtime(ce);
intel_engine_context_out(engine);
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
@@ -1495,15 +1504,17 @@ execlists_schedule_out(struct i915_request *rq)
{
struct intel_context * const ce = rq->context;
struct intel_engine_cs *cur, *old;
+ unsigned int ccid;
trace_i915_request_out(rq);
+ ccid = upper_32_bits(rq->context->lrc_desc);
old = READ_ONCE(ce->inflight);
do
cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL;
while (!try_cmpxchg(&ce->inflight, &old, cur));
if (!cur)
- __execlists_schedule_out(rq, old);
+ __execlists_schedule_out(rq, old, ccid);
i915_request_put(rq);
}
@@ -1571,8 +1582,9 @@ dump_port(char *buf, int buflen, const char *prefix, struct i915_request *rq)
if (!rq)
return "";
- snprintf(buf, buflen, "%s%llx:%lld%s prio %d",
+ snprintf(buf, buflen, "%sccid:%x %llx:%lld%s prio %d",
prefix,
+ upper_32_bits(rq->context->lrc_desc),
rq->fence.context, rq->fence.seqno,
i915_request_completed(rq) ? "!" :
i915_request_started(rq) ? "*" :
@@ -3444,7 +3456,8 @@ __execlists_context_pin(struct intel_context *ce,
if (IS_ERR(vaddr))
return PTR_ERR(vaddr);
- ce->lrc_desc = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
+ ce->lrc_desc &= ~GENMASK_ULL(31, 0);
+ ce->lrc_desc |= lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET;
__execlists_update_reg_state(ce, engine, ce->ring->tail);
@@ -4002,7 +4015,7 @@ static void enable_execlists(struct intel_engine_cs *engine)
enable_error_interrupt(engine);
- engine->context_tag = 0;
+ engine->context_tag = GENMASK(BITS_PER_LONG - 2, 0);
}
static bool unexpected_starting_state(struct intel_engine_cs *engine)
@@ -1281,11 +1281,10 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32);
/*
* Pick an unused context id
- * 0 - (NUM_CONTEXT_TAG - 1) are used by other contexts
+ * 0 - BITS_PER_LONG are used by other contexts
* GEN12_MAX_CONTEXT_HW_ID (0x7ff) is used by idle context
*/
stream->specific_ctx_id = (GEN12_MAX_CONTEXT_HW_ID - 1) << (GEN11_SW_CTX_ID_SHIFT - 32);
- BUILD_BUG_ON((GEN12_MAX_CONTEXT_HW_ID - 1) < NUM_CONTEXT_TAG);
break;
}
@@ -173,7 +173,7 @@ static int igt_vma_create(void *arg)
}
nc = 0;
- for_each_prime_number(num_ctx, 2 * NUM_CONTEXT_TAG) {
+ for_each_prime_number(num_ctx, 2 * BITS_PER_LONG) {
for (; nc < num_ctx; nc++) {
ctx = mock_context(i915, "mock");
if (!ctx)
The bspec is confusing on the nature of the upper 32bits of the LRC descriptor. Once upon a time, it said that it uses the upper 32b to decide if it should perform a lite-restore, and so we must ensure that each unique context submitted to HW is given a unique CCID [for the duration of it being on the HW]. Currently, this was thought to be achieved by using a small circular tag, and assigning every context submitted to HW a new id. However, due to preemption replacing an inflight context, it would be possible to assign an id to a new context that matched the currently in flight context -- a situation we sought to avoid. Rather than use a simple circular counter, switch over to a small bitmap of inflight ids so we can avoid reusing one that is still potentially active. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/1796 Fixes: 2935ed5339c4 ("drm/i915: Remove logical HW ID") Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Cc: <stable@vger.kernel.org> # v5.5+ --- drivers/gpu/drm/i915/gt/intel_engine_types.h | 3 +- drivers/gpu/drm/i915/gt/intel_lrc.c | 31 ++++++++++++++------ drivers/gpu/drm/i915/i915_perf.c | 3 +- drivers/gpu/drm/i915/selftests/i915_vma.c | 2 +- 4 files changed, 25 insertions(+), 14 deletions(-)