drm/i915: Only spin whilst waiting on the current request
authorChris Wilson <chris@chris-wilson.co.uk>
Fri, 11 Dec 2015 11:32:59 +0000 (11:32 +0000)
committerJani Nikula <jani.nikula@intel.com>
Tue, 22 Dec 2015 10:56:58 +0000 (12:56 +0200)
Limit busywaiting only to the request currently being processed by the
GPU. If the request is not currently being processed by the GPU, there
is a very low likelihood of it being completed within the 2 microsecond
spin timeout and so we will just be wasting CPU cycles.

v2: Check for logical inversion when rebasing - we were incorrectly
checking for this request being active, and instead busywaiting for
when the GPU was not yet processing the request of interest.

v3: Try another colour for the seqno names.
v4: Another colour for the function names.

v5: Remove the forced coherency when checking for the active request. On
reflection and plenty of recent experimentation, the issue is not a
cache coherency problem - but an irq/seqno ordering problem (timing issue).
Here, we do not need the w/a to force ordering of the read with an
interrupt.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: "Rogozhkin, Dmitry V" <dmitry.v.rogozhkin@intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: Eero Tamminen <eero.t.tamminen@intel.com>
Cc: "Rantala, Valtteri" <valtteri.rantala@intel.com>
Cc: stable@vger.kernel.org
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/1449833608-22125-4-git-send-email-chris@chris-wilson.co.uk
(cherry picked from commit 821485dc2ad665f136c57ee589bf7a8210160fe2)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem.c

index 037a650d656541e8b4254f9fbd980d7500801770..f4af19a0d5696324c307617adfb0c86a3fdfed38 100644 (file)
@@ -2193,8 +2193,17 @@ struct drm_i915_gem_request {
        struct drm_i915_private *i915;
        struct intel_engine_cs *ring;
 
-       /** GEM sequence number associated with this request. */
-       uint32_t seqno;
+        /** GEM sequence number associated with the previous request,
+         * when the HWS breadcrumb is equal to this the GPU is processing
+         * this request.
+         */
+       u32 previous_seqno;
+
+        /** GEM sequence number associated with this request,
+         * when the HWS breadcrumb is equal or greater than this the GPU
+         * has finished processing this request.
+         */
+       u32 seqno;
 
        /** Position in the ringbuffer of the start of the request */
        u32 head;
@@ -2911,15 +2920,17 @@ i915_seqno_passed(uint32_t seq1, uint32_t seq2)
        return (int32_t)(seq1 - seq2) >= 0;
 }
 
+static inline bool i915_gem_request_started(struct drm_i915_gem_request *req,
+                                          bool lazy_coherency)
+{
+       u32 seqno = req->ring->get_seqno(req->ring, lazy_coherency);
+       return i915_seqno_passed(seqno, req->previous_seqno);
+}
+
 static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req,
                                              bool lazy_coherency)
 {
-       u32 seqno;
-
-       BUG_ON(req == NULL);
-
-       seqno = req->ring->get_seqno(req->ring, lazy_coherency);
-
+       u32 seqno = req->ring->get_seqno(req->ring, lazy_coherency);
        return i915_seqno_passed(seqno, req->seqno);
 }
 
index 8719fa2ae7e7ecb70aead52ef5a17d722f9ce887..f56af0aaafde511c1add59ce5f89fe2fa9be5c42 100644 (file)
@@ -1193,9 +1193,13 @@ static int __i915_spin_request(struct drm_i915_gem_request *req, int state)
         * takes to sleep on a request, on the order of a microsecond.
         */
 
-       if (i915_gem_request_get_ring(req)->irq_refcount)
+       if (req->ring->irq_refcount)
                return -EBUSY;
 
+       /* Only spin if we know the GPU is processing this request */
+       if (!i915_gem_request_started(req, true))
+               return -EAGAIN;
+
        timeout = local_clock_us(&cpu) + 5;
        while (!need_resched()) {
                if (i915_gem_request_completed(req, true))
@@ -1209,6 +1213,7 @@ static int __i915_spin_request(struct drm_i915_gem_request *req, int state)
 
                cpu_relax_lowlatency();
        }
+
        if (i915_gem_request_completed(req, false))
                return 0;
 
@@ -2600,6 +2605,7 @@ void __i915_add_request(struct drm_i915_gem_request *request,
        request->batch_obj = obj;
 
        request->emitted_jiffies = jiffies;
+       request->previous_seqno = ring->last_submitted_seqno;
        ring->last_submitted_seqno = request->seqno;
        list_add_tail(&request->list, &ring->request_list);