Skip to content

Commit b51c2c6

Browse files
committed
Merge tag 'drm-intel-fixes-2020-11-25' of git://anongit.freedesktop.org/drm/drm-intel into drm-fixes
- Fix Perf/OA workaround register corruption (Lionel) - Correct a comment statement in GVT (Yan) - Fix GT enable/disable iterrupts, including a race condition that prevented GPU to go idle (Chris) - Free stale request on destroying the virtual engine (Chris) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Rodrigo Vivi <rodrigo.vivi@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20201126010623.GA827684@intel.com
2 parents 5ead67b + 280ffdb commit b51c2c6

7 files changed

Lines changed: 161 additions & 63 deletions

File tree

drivers/gpu/drm/i915/gt/intel_breadcrumbs.c

Lines changed: 92 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -30,18 +30,21 @@
3030
#include "i915_trace.h"
3131
#include "intel_breadcrumbs.h"
3232
#include "intel_context.h"
33+
#include "intel_engine_pm.h"
3334
#include "intel_gt_pm.h"
3435
#include "intel_gt_requests.h"
3536

36-
static void irq_enable(struct intel_engine_cs *engine)
37+
static bool irq_enable(struct intel_engine_cs *engine)
3738
{
3839
if (!engine->irq_enable)
39-
return;
40+
return false;
4041

4142
/* Caller disables interrupts */
4243
spin_lock(&engine->gt->irq_lock);
4344
engine->irq_enable(engine);
4445
spin_unlock(&engine->gt->irq_lock);
46+
47+
return true;
4548
}
4649

4750
static void irq_disable(struct intel_engine_cs *engine)
@@ -57,12 +60,11 @@ static void irq_disable(struct intel_engine_cs *engine)
5760

5861
static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
5962
{
60-
lockdep_assert_held(&b->irq_lock);
61-
62-
if (!b->irq_engine || b->irq_armed)
63-
return;
64-
65-
if (!intel_gt_pm_get_if_awake(b->irq_engine->gt))
63+
/*
64+
* Since we are waiting on a request, the GPU should be busy
65+
* and should have its own rpm reference.
66+
*/
67+
if (GEM_WARN_ON(!intel_gt_pm_get_if_awake(b->irq_engine->gt)))
6668
return;
6769

6870
/*
@@ -73,25 +75,24 @@ static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
7375
*/
7476
WRITE_ONCE(b->irq_armed, true);
7577

76-
/*
77-
* Since we are waiting on a request, the GPU should be busy
78-
* and should have its own rpm reference. This is tracked
79-
* by i915->gt.awake, we can forgo holding our own wakref
80-
* for the interrupt as before i915->gt.awake is released (when
81-
* the driver is idle) we disarm the breadcrumbs.
82-
*/
83-
84-
if (!b->irq_enabled++)
85-
irq_enable(b->irq_engine);
78+
/* Requests may have completed before we could enable the interrupt. */
79+
if (!b->irq_enabled++ && irq_enable(b->irq_engine))
80+
irq_work_queue(&b->irq_work);
8681
}
8782

88-
static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
83+
static void intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
8984
{
90-
lockdep_assert_held(&b->irq_lock);
91-
92-
if (!b->irq_engine || !b->irq_armed)
85+
if (!b->irq_engine)
9386
return;
9487

88+
spin_lock(&b->irq_lock);
89+
if (!b->irq_armed)
90+
__intel_breadcrumbs_arm_irq(b);
91+
spin_unlock(&b->irq_lock);
92+
}
93+
94+
static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
95+
{
9596
GEM_BUG_ON(!b->irq_enabled);
9697
if (!--b->irq_enabled)
9798
irq_disable(b->irq_engine);
@@ -105,8 +106,6 @@ static void add_signaling_context(struct intel_breadcrumbs *b,
105106
{
106107
intel_context_get(ce);
107108
list_add_tail(&ce->signal_link, &b->signalers);
108-
if (list_is_first(&ce->signal_link, &b->signalers))
109-
__intel_breadcrumbs_arm_irq(b);
110109
}
111110

112111
static void remove_signaling_context(struct intel_breadcrumbs *b,
@@ -174,34 +173,65 @@ static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl)
174173
intel_engine_add_retire(b->irq_engine, tl);
175174
}
176175

177-
static bool __signal_request(struct i915_request *rq, struct list_head *signals)
176+
static bool __signal_request(struct i915_request *rq)
178177
{
179-
clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
180-
181178
if (!__dma_fence_signal(&rq->fence)) {
182179
i915_request_put(rq);
183180
return false;
184181
}
185182

186-
list_add_tail(&rq->signal_link, signals);
187183
return true;
188184
}
189185

186+
static struct llist_node *
187+
slist_add(struct llist_node *node, struct llist_node *head)
188+
{
189+
node->next = head;
190+
return node;
191+
}
192+
190193
static void signal_irq_work(struct irq_work *work)
191194
{
192195
struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work);
193196
const ktime_t timestamp = ktime_get();
197+
struct llist_node *signal, *sn;
194198
struct intel_context *ce, *cn;
195199
struct list_head *pos, *next;
196-
LIST_HEAD(signal);
200+
201+
signal = NULL;
202+
if (unlikely(!llist_empty(&b->signaled_requests)))
203+
signal = llist_del_all(&b->signaled_requests);
197204

198205
spin_lock(&b->irq_lock);
199206

200-
if (list_empty(&b->signalers))
207+
/*
208+
* Keep the irq armed until the interrupt after all listeners are gone.
209+
*
210+
* Enabling/disabling the interrupt is rather costly, roughly a couple
211+
* of hundred microseconds. If we are proactive and enable/disable
212+
* the interrupt around every request that wants a breadcrumb, we
213+
* quickly drown in the extra orders of magnitude of latency imposed
214+
* on request submission.
215+
*
216+
* So we try to be lazy, and keep the interrupts enabled until no
217+
* more listeners appear within a breadcrumb interrupt interval (that
218+
* is until a request completes that no one cares about). The
219+
* observation is that listeners come in batches, and will often
220+
* listen to a bunch of requests in succession. Though note on icl+,
221+
* interrupts are always enabled due to concerns with rc6 being
222+
* dysfunctional with per-engine interrupt masking.
223+
*
224+
* We also try to avoid raising too many interrupts, as they may
225+
* be generated by userspace batches and it is unfortunately rather
226+
* too easy to drown the CPU under a flood of GPU interrupts. Thus
227+
* whenever no one appears to be listening, we turn off the interrupts.
228+
* Fewer interrupts should conserve power -- at the very least, fewer
229+
* interrupt draw less ire from other users of the system and tools
230+
* like powertop.
231+
*/
232+
if (!signal && b->irq_armed && list_empty(&b->signalers))
201233
__intel_breadcrumbs_disarm_irq(b);
202234

203-
list_splice_init(&b->signaled_requests, &signal);
204-
205235
list_for_each_entry_safe(ce, cn, &b->signalers, signal_link) {
206236
GEM_BUG_ON(list_empty(&ce->signals));
207237

@@ -218,7 +248,10 @@ static void signal_irq_work(struct irq_work *work)
218248
* spinlock as the callback chain may end up adding
219249
* more signalers to the same context or engine.
220250
*/
221-
__signal_request(rq, &signal);
251+
clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
252+
if (__signal_request(rq))
253+
/* We own signal_node now, xfer to local list */
254+
signal = slist_add(&rq->signal_node, signal);
222255
}
223256

224257
/*
@@ -238,9 +271,9 @@ static void signal_irq_work(struct irq_work *work)
238271

239272
spin_unlock(&b->irq_lock);
240273

241-
list_for_each_safe(pos, next, &signal) {
274+
llist_for_each_safe(signal, sn, signal) {
242275
struct i915_request *rq =
243-
list_entry(pos, typeof(*rq), signal_link);
276+
llist_entry(signal, typeof(*rq), signal_node);
244277
struct list_head cb_list;
245278

246279
spin_lock(&rq->lock);
@@ -251,6 +284,9 @@ static void signal_irq_work(struct irq_work *work)
251284

252285
i915_request_put(rq);
253286
}
287+
288+
if (!READ_ONCE(b->irq_armed) && !list_empty(&b->signalers))
289+
intel_breadcrumbs_arm_irq(b);
254290
}
255291

256292
struct intel_breadcrumbs *
@@ -264,7 +300,7 @@ intel_breadcrumbs_create(struct intel_engine_cs *irq_engine)
264300

265301
spin_lock_init(&b->irq_lock);
266302
INIT_LIST_HEAD(&b->signalers);
267-
INIT_LIST_HEAD(&b->signaled_requests);
303+
init_llist_head(&b->signaled_requests);
268304

269305
init_irq_work(&b->irq_work, signal_irq_work);
270306

@@ -292,21 +328,22 @@ void intel_breadcrumbs_reset(struct intel_breadcrumbs *b)
292328

293329
void intel_breadcrumbs_park(struct intel_breadcrumbs *b)
294330
{
295-
unsigned long flags;
296-
297-
if (!READ_ONCE(b->irq_armed))
298-
return;
299-
300-
spin_lock_irqsave(&b->irq_lock, flags);
301-
__intel_breadcrumbs_disarm_irq(b);
302-
spin_unlock_irqrestore(&b->irq_lock, flags);
303-
304-
if (!list_empty(&b->signalers))
305-
irq_work_queue(&b->irq_work);
331+
/* Kick the work once more to drain the signalers */
332+
irq_work_sync(&b->irq_work);
333+
while (unlikely(READ_ONCE(b->irq_armed))) {
334+
local_irq_disable();
335+
signal_irq_work(&b->irq_work);
336+
local_irq_enable();
337+
cond_resched();
338+
}
339+
GEM_BUG_ON(!list_empty(&b->signalers));
306340
}
307341

308342
void intel_breadcrumbs_free(struct intel_breadcrumbs *b)
309343
{
344+
irq_work_sync(&b->irq_work);
345+
GEM_BUG_ON(!list_empty(&b->signalers));
346+
GEM_BUG_ON(b->irq_armed);
310347
kfree(b);
311348
}
312349

@@ -327,7 +364,8 @@ static void insert_breadcrumb(struct i915_request *rq,
327364
* its signal completion.
328365
*/
329366
if (__request_completed(rq)) {
330-
if (__signal_request(rq, &b->signaled_requests))
367+
if (__signal_request(rq) &&
368+
llist_add(&rq->signal_node, &b->signaled_requests))
331369
irq_work_queue(&b->irq_work);
332370
return;
333371
}
@@ -362,9 +400,12 @@ static void insert_breadcrumb(struct i915_request *rq,
362400
GEM_BUG_ON(!check_signal_order(ce, rq));
363401
set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
364402

365-
/* Check after attaching to irq, interrupt may have already fired. */
366-
if (__request_completed(rq))
367-
irq_work_queue(&b->irq_work);
403+
/*
404+
* Defer enabling the interrupt to after HW submission and recheck
405+
* the request as it may have completed and raised the interrupt as
406+
* we were attaching it into the lists.
407+
*/
408+
irq_work_queue(&b->irq_work);
368409
}
369410

370411
bool i915_request_enable_breadcrumb(struct i915_request *rq)

drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ struct intel_breadcrumbs {
3535
struct intel_engine_cs *irq_engine;
3636

3737
struct list_head signalers;
38-
struct list_head signaled_requests;
38+
struct llist_head signaled_requests;
3939

4040
struct irq_work irq_work; /* for use from inside irq_lock */
4141

drivers/gpu/drm/i915/gt/intel_lrc.c

Lines changed: 53 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,7 @@
182182
struct virtual_engine {
183183
struct intel_engine_cs base;
184184
struct intel_context context;
185+
struct rcu_work rcu;
185186

186187
/*
187188
* We allow only a single request through the virtual engine at a time
@@ -5425,33 +5426,57 @@ static struct list_head *virtual_queue(struct virtual_engine *ve)
54255426
return &ve->base.execlists.default_priolist.requests[0];
54265427
}
54275428

5428-
static void virtual_context_destroy(struct kref *kref)
5429+
static void rcu_virtual_context_destroy(struct work_struct *wrk)
54295430
{
54305431
struct virtual_engine *ve =
5431-
container_of(kref, typeof(*ve), context.ref);
5432+
container_of(wrk, typeof(*ve), rcu.work);
54325433
unsigned int n;
54335434

5434-
GEM_BUG_ON(!list_empty(virtual_queue(ve)));
5435-
GEM_BUG_ON(ve->request);
54365435
GEM_BUG_ON(ve->context.inflight);
54375436

5437+
/* Preempt-to-busy may leave a stale request behind. */
5438+
if (unlikely(ve->request)) {
5439+
struct i915_request *old;
5440+
5441+
spin_lock_irq(&ve->base.active.lock);
5442+
5443+
old = fetch_and_zero(&ve->request);
5444+
if (old) {
5445+
GEM_BUG_ON(!i915_request_completed(old));
5446+
__i915_request_submit(old);
5447+
i915_request_put(old);
5448+
}
5449+
5450+
spin_unlock_irq(&ve->base.active.lock);
5451+
}
5452+
5453+
/*
5454+
* Flush the tasklet in case it is still running on another core.
5455+
*
5456+
* This needs to be done before we remove ourselves from the siblings'
5457+
* rbtrees as in the case it is running in parallel, it may reinsert
5458+
* the rb_node into a sibling.
5459+
*/
5460+
tasklet_kill(&ve->base.execlists.tasklet);
5461+
5462+
/* Decouple ourselves from the siblings, no more access allowed. */
54385463
for (n = 0; n < ve->num_siblings; n++) {
54395464
struct intel_engine_cs *sibling = ve->siblings[n];
54405465
struct rb_node *node = &ve->nodes[sibling->id].rb;
5441-
unsigned long flags;
54425466

54435467
if (RB_EMPTY_NODE(node))
54445468
continue;
54455469

5446-
spin_lock_irqsave(&sibling->active.lock, flags);
5470+
spin_lock_irq(&sibling->active.lock);
54475471

54485472
/* Detachment is lazily performed in the execlists tasklet */
54495473
if (!RB_EMPTY_NODE(node))
54505474
rb_erase_cached(node, &sibling->execlists.virtual);
54515475

5452-
spin_unlock_irqrestore(&sibling->active.lock, flags);
5476+
spin_unlock_irq(&sibling->active.lock);
54535477
}
54545478
GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
5479+
GEM_BUG_ON(!list_empty(virtual_queue(ve)));
54555480

54565481
if (ve->context.state)
54575482
__execlists_context_fini(&ve->context);
@@ -5464,6 +5489,27 @@ static void virtual_context_destroy(struct kref *kref)
54645489
kfree(ve);
54655490
}
54665491

5492+
static void virtual_context_destroy(struct kref *kref)
5493+
{
5494+
struct virtual_engine *ve =
5495+
container_of(kref, typeof(*ve), context.ref);
5496+
5497+
GEM_BUG_ON(!list_empty(&ve->context.signals));
5498+
5499+
/*
5500+
* When destroying the virtual engine, we have to be aware that
5501+
* it may still be in use from an hardirq/softirq context causing
5502+
* the resubmission of a completed request (background completion
5503+
* due to preempt-to-busy). Before we can free the engine, we need
5504+
* to flush the submission code and tasklets that are still potentially
5505+
* accessing the engine. Flushing the tasklets requires process context,
5506+
* and since we can guard the resubmit onto the engine with an RCU read
5507+
* lock, we can delegate the free of the engine to an RCU worker.
5508+
*/
5509+
INIT_RCU_WORK(&ve->rcu, rcu_virtual_context_destroy);
5510+
queue_rcu_work(system_wq, &ve->rcu);
5511+
}
5512+
54675513
static void virtual_engine_initial_hint(struct virtual_engine *ve)
54685514
{
54695515
int swp;

drivers/gpu/drm/i915/gvt/gvt.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,7 @@ struct intel_gvt_mmio {
255255
#define F_CMD_ACCESS (1 << 3)
256256
/* This reg has been accessed by a VM */
257257
#define F_ACCESSED (1 << 4)
258-
/* This reg has been accessed through GPU commands */
258+
/* This reg could be accessed by unaligned address */
259259
#define F_UNALIGN (1 << 6)
260260
/* This reg is in GVT's mmio save-restor list and in hardware
261261
* logical context image

0 commit comments

Comments
 (0)