drm/i915: Fix hpd live status bits for g4x

[firefly-linux-kernel-4.4.55.git] / drivers / gpu / drm / i915 / i915_gem.c
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c

index 32e6aade62238be511237c9ffad1933e18e561cb..f56af0aaafde511c1add59ce5f89fe2fa9be5c42 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1146,23 +1146,74 @@ static bool missed_irq(struct drm_i915_private *dev_priv,
         return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings);
  }
  
-static int __i915_spin_request(struct drm_i915_gem_request *req)
+static unsigned long local_clock_us(unsigned *cpu)
+{
+       unsigned long t;
+
+       /* Cheaply and approximately convert from nanoseconds to microseconds.
+        * The result and subsequent calculations are also defined in the same
+        * approximate microseconds units. The principal source of timing
+        * error here is from the simple truncation.
+        *
+        * Note that local_clock() is only defined wrt to the current CPU;
+        * the comparisons are no longer valid if we switch CPUs. Instead of
+        * blocking preemption for the entire busywait, we can detect the CPU
+        * switch and use that as indicator of system load and a reason to
+        * stop busywaiting, see busywait_stop().
+        */
+       *cpu = get_cpu();
+       t = local_clock() >> 10;
+       put_cpu();
+
+       return t;
+}
+
+static bool busywait_stop(unsigned long timeout, unsigned cpu)
+{
+       unsigned this_cpu;
+
+       if (time_after(local_clock_us(&this_cpu), timeout))
+               return true;
+
+       return this_cpu != cpu;
+}
+
+static int __i915_spin_request(struct drm_i915_gem_request *req, int state)
  {
         unsigned long timeout;
+       unsigned cpu;
+
+       /* When waiting for high frequency requests, e.g. during synchronous
+        * rendering split between the CPU and GPU, the finite amount of time
+        * required to set up the irq and wait upon it limits the response
+        * rate. By busywaiting on the request completion for a short while we
+        * can service the high frequency waits as quick as possible. However,
+        * if it is a slow request, we want to sleep as quickly as possible.
+        * The tradeoff between waiting and sleeping is roughly the time it
+        * takes to sleep on a request, on the order of a microsecond.
+        */
  
-       if (i915_gem_request_get_ring(req)->irq_refcount)
+       if (req->ring->irq_refcount)
                 return -EBUSY;
  
-       timeout = jiffies + 1;
+       /* Only spin if we know the GPU is processing this request */
+       if (!i915_gem_request_started(req, true))
+               return -EAGAIN;
+
+       timeout = local_clock_us(&cpu) + 5;
         while (!need_resched()) {
                 if (i915_gem_request_completed(req, true))
                         return 0;
  
-               if (time_after_eq(jiffies, timeout))
+               if (signal_pending_state(state, current))
+                       break;
+
+               if (busywait_stop(timeout, cpu))
                         break;
  
                 cpu_relax_lowlatency();
         }
+
         if (i915_gem_request_completed(req, false))
                 return 0;
  
@@ -1197,6 +1248,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
         struct drm_i915_private *dev_priv = dev->dev_private;
         const bool irq_test_in_progress =
                 ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring);
+       int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
         DEFINE_WAIT(wait);
         unsigned long timeout_expire;
         s64 before, now;
@@ -1229,7 +1281,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
         before = ktime_get_raw_ns();
  
         /* Optimistic spin for the next jiffie before touching IRQs */
-       ret = __i915_spin_request(req);
+       ret = __i915_spin_request(req, state);
         if (ret == 0)
                 goto out;
  
@@ -1241,8 +1293,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
         for (;;) {
                 struct timer_list timer;
  
-               prepare_to_wait(&ring->irq_queue, &wait,
-                               interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
+               prepare_to_wait(&ring->irq_queue, &wait, state);
  
                 /* We need to check whether any gpu reset happened in between
                  * the caller grabbing the seqno and now ... */
@@ -1260,7 +1311,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
                         break;
                 }
  
-               if (interruptible && signal_pending(current)) {
+               if (signal_pending_state(state, current)) {
                         ret = -ERESTARTSYS;
                         break;
                 }
@@ -2554,6 +2605,7 @@ void __i915_add_request(struct drm_i915_gem_request *request,
         request->batch_obj = obj;
  
         request->emitted_jiffies = jiffies;
+       request->previous_seqno = ring->last_submitted_seqno;
         ring->last_submitted_seqno = request->seqno;
         list_add_tail(&request->list, &ring->request_list);
  
@@ -4080,6 +4132,29 @@ i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
         return false;
  }
  
+void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
+{
+       struct drm_i915_gem_object *obj = vma->obj;
+       bool mappable, fenceable;
+       u32 fence_size, fence_alignment;
+
+       fence_size = i915_gem_get_gtt_size(obj->base.dev,
+                                          obj->base.size,
+                                          obj->tiling_mode);
+       fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev,
+                                                    obj->base.size,
+                                                    obj->tiling_mode,
+                                                    true);
+
+       fenceable = (vma->node.size == fence_size &&
+                    (vma->node.start & (fence_alignment - 1)) == 0);
+
+       mappable = (vma->node.start + fence_size <=
+                   to_i915(obj->base.dev)->gtt.mappable_end);
+
+       obj->map_and_fenceable = mappable && fenceable;
+}
+
  static int
  i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
                        struct i915_address_space *vm,
@@ -4147,25 +4222,7 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
  
         if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL &&
             (bound ^ vma->bound) & GLOBAL_BIND) {
-               bool mappable, fenceable;
-               u32 fence_size, fence_alignment;
-
-               fence_size = i915_gem_get_gtt_size(obj->base.dev,
-                                                  obj->base.size,
-                                                  obj->tiling_mode);
-               fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev,
-                                                            obj->base.size,
-                                                            obj->tiling_mode,
-                                                            true);
-
-               fenceable = (vma->node.size == fence_size &&
-                            (vma->node.start & (fence_alignment - 1)) == 0);
-
-               mappable = (vma->node.start + fence_size <=
-                           dev_priv->gtt.mappable_end);
-
-               obj->map_and_fenceable = mappable && fenceable;
-
+               __i915_vma_set_map_and_fenceable(vma);
                 WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
         }