Revert "drm/i915: Avoid using PIPE_CONTROL on Ironlake"
authorChris Wilson <chris@chris-wilson.co.uk>
Wed, 15 Dec 2010 09:56:50 +0000 (09:56 +0000)
committerChris Wilson <chris@chris-wilson.co.uk>
Wed, 15 Dec 2010 10:15:25 +0000 (10:15 +0000)
Restore PIPE_CONTROL once again just for Ironlake, as it appears that
MI_USER_INTERRUPT does not have the same coherency guarantees, that is
on Ironlake the interrupt following a GPU write is not guaranteed to
arrive after the write is coherent from the CPU, as it does on the
other generations.

Reported-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Reported-by: Shuang He <shuang.he@intel.com>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=32402
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
drivers/gpu/drm/i915/i915_irq.c
drivers/gpu/drm/i915/intel_ringbuffer.c

index 2ddb98b5c90fb94c053a5ecfbbb81c4dee26bd11..e4a2e2c3dbe341d34f30a9f03502c77119b0b109 100644 (file)
@@ -349,7 +349,7 @@ static irqreturn_t ironlake_irq_handler(struct drm_device *dev)
                                READ_BREADCRUMB(dev_priv);
        }
 
-       if (gt_iir & GT_USER_INTERRUPT)
+       if (gt_iir & (GT_USER_INTERRUPT | GT_PIPE_NOTIFY))
                notify_ring(dev, &dev_priv->ring[RCS]);
        if (gt_iir & bsd_usr_interrupt)
                notify_ring(dev, &dev_priv->ring[VCS]);
@@ -1558,6 +1558,7 @@ static int ironlake_irq_postinstall(struct drm_device *dev)
        else
                render_irqs =
                        GT_USER_INTERRUPT |
+                       GT_PIPE_NOTIFY |
                        GT_BSD_USER_INTERRUPT;
        I915_WRITE(GTIER, render_irqs);
        POSTING_READ(GTIER);
index a3fd993e0de0b6fe43d33ae4503edf124df8252e..56bc95c056dd9da2a06946e9d06a8d52f1978c85 100644 (file)
@@ -209,6 +209,78 @@ static int init_ring_common(struct intel_ring_buffer *ring)
        return 0;
 }
 
+/*
+ * 965+ support PIPE_CONTROL commands, which provide finer grained control
+ * over cache flushing.
+ */
+struct pipe_control {
+       struct drm_i915_gem_object *obj;
+       volatile u32 *cpu_page;
+       u32 gtt_offset;
+};
+
+static int
+init_pipe_control(struct intel_ring_buffer *ring)
+{
+       struct pipe_control *pc;
+       struct drm_i915_gem_object *obj;
+       int ret;
+
+       if (ring->private)
+               return 0;
+
+       pc = kmalloc(sizeof(*pc), GFP_KERNEL);
+       if (!pc)
+               return -ENOMEM;
+
+       obj = i915_gem_alloc_object(ring->dev, 4096);
+       if (obj == NULL) {
+               DRM_ERROR("Failed to allocate seqno page\n");
+               ret = -ENOMEM;
+               goto err;
+       }
+       obj->agp_type = AGP_USER_CACHED_MEMORY;
+
+       ret = i915_gem_object_pin(obj, 4096, true);
+       if (ret)
+               goto err_unref;
+
+       pc->gtt_offset = obj->gtt_offset;
+       pc->cpu_page =  kmap(obj->pages[0]);
+       if (pc->cpu_page == NULL)
+               goto err_unpin;
+
+       pc->obj = obj;
+       ring->private = pc;
+       return 0;
+
+err_unpin:
+       i915_gem_object_unpin(obj);
+err_unref:
+       drm_gem_object_unreference(&obj->base);
+err:
+       kfree(pc);
+       return ret;
+}
+
+static void
+cleanup_pipe_control(struct intel_ring_buffer *ring)
+{
+       struct pipe_control *pc = ring->private;
+       struct drm_i915_gem_object *obj;
+
+       if (!ring->private)
+               return;
+
+       obj = pc->obj;
+       kunmap(obj->pages[0]);
+       i915_gem_object_unpin(obj);
+       drm_gem_object_unreference(&obj->base);
+
+       kfree(pc);
+       ring->private = NULL;
+}
+
 static int init_render_ring(struct intel_ring_buffer *ring)
 {
        struct drm_device *dev = ring->dev;
@@ -222,9 +294,24 @@ static int init_render_ring(struct intel_ring_buffer *ring)
                I915_WRITE(MI_MODE, mode);
        }
 
+       if (INTEL_INFO(dev)->gen >= 6) {
+       } else if (IS_GEN5(dev)) {
+               ret = init_pipe_control(ring);
+               if (ret)
+                       return ret;
+       }
+
        return ret;
 }
 
+static void render_ring_cleanup(struct intel_ring_buffer *ring)
+{
+       if (!ring->private)
+               return;
+
+       cleanup_pipe_control(ring);
+}
+
 static void
 update_semaphore(struct intel_ring_buffer *ring, int i, u32 seqno)
 {
@@ -299,6 +386,65 @@ intel_ring_sync(struct intel_ring_buffer *ring,
        return 0;
 }
 
+#define PIPE_CONTROL_FLUSH(ring__, addr__)                                     \
+do {                                                                   \
+       intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL | PIPE_CONTROL_QW_WRITE |           \
+                PIPE_CONTROL_DEPTH_STALL | 2);                         \
+       intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT);                    \
+       intel_ring_emit(ring__, 0);                                                     \
+       intel_ring_emit(ring__, 0);                                                     \
+} while (0)
+
+static int
+pc_render_add_request(struct intel_ring_buffer *ring,
+                     u32 *result)
+{
+       struct drm_device *dev = ring->dev;
+       u32 seqno = i915_gem_get_seqno(dev);
+       struct pipe_control *pc = ring->private;
+       u32 scratch_addr = pc->gtt_offset + 128;
+       int ret;
+
+       /* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently
+        * incoherent with writes to memory, i.e. completely fubar,
+        * so we need to use PIPE_NOTIFY instead.
+        *
+        * However, we also need to workaround the qword write
+        * incoherence by flushing the 6 PIPE_NOTIFY buffers out to
+        * memory before requesting an interrupt.
+        */
+       ret = intel_ring_begin(ring, 32);
+       if (ret)
+               return ret;
+
+       intel_ring_emit(ring, GFX_OP_PIPE_CONTROL | PIPE_CONTROL_QW_WRITE |
+                       PIPE_CONTROL_WC_FLUSH | PIPE_CONTROL_TC_FLUSH);
+       intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
+       intel_ring_emit(ring, seqno);
+       intel_ring_emit(ring, 0);
+       PIPE_CONTROL_FLUSH(ring, scratch_addr);
+       scratch_addr += 128; /* write to separate cachelines */
+       PIPE_CONTROL_FLUSH(ring, scratch_addr);
+       scratch_addr += 128;
+       PIPE_CONTROL_FLUSH(ring, scratch_addr);
+       scratch_addr += 128;
+       PIPE_CONTROL_FLUSH(ring, scratch_addr);
+       scratch_addr += 128;
+       PIPE_CONTROL_FLUSH(ring, scratch_addr);
+       scratch_addr += 128;
+       PIPE_CONTROL_FLUSH(ring, scratch_addr);
+       intel_ring_emit(ring, GFX_OP_PIPE_CONTROL | PIPE_CONTROL_QW_WRITE |
+                       PIPE_CONTROL_WC_FLUSH | PIPE_CONTROL_TC_FLUSH |
+                       PIPE_CONTROL_NOTIFY);
+       intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
+       intel_ring_emit(ring, seqno);
+       intel_ring_emit(ring, 0);
+       intel_ring_advance(ring);
+
+       *result = seqno;
+       return 0;
+}
+
 static int
 render_ring_add_request(struct intel_ring_buffer *ring,
                        u32 *result)
@@ -327,6 +473,13 @@ ring_get_seqno(struct intel_ring_buffer *ring)
        return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
 }
 
+static u32
+pc_render_get_seqno(struct intel_ring_buffer *ring)
+{
+       struct pipe_control *pc = ring->private;
+       return pc->cpu_page[0];
+}
+
 static bool
 render_ring_get_irq(struct intel_ring_buffer *ring)
 {
@@ -342,7 +495,7 @@ render_ring_get_irq(struct intel_ring_buffer *ring)
                spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
                if (HAS_PCH_SPLIT(dev))
                        ironlake_enable_graphics_irq(dev_priv,
-                                                    GT_USER_INTERRUPT);
+                                                    GT_PIPE_NOTIFY | GT_USER_INTERRUPT);
                else
                        i915_enable_irq(dev_priv, I915_USER_INTERRUPT);
                spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
@@ -363,7 +516,8 @@ render_ring_put_irq(struct intel_ring_buffer *ring)
                spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
                if (HAS_PCH_SPLIT(dev))
                        ironlake_disable_graphics_irq(dev_priv,
-                                                     GT_USER_INTERRUPT);
+                                                     GT_USER_INTERRUPT |
+                                                     GT_PIPE_NOTIFY);
                else
                        i915_disable_irq(dev_priv, I915_USER_INTERRUPT);
                spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
@@ -776,6 +930,7 @@ static const struct intel_ring_buffer render_ring = {
        .irq_get                = render_ring_get_irq,
        .irq_put                = render_ring_put_irq,
        .dispatch_execbuffer    = render_ring_dispatch_execbuffer,
+       .cleanup                        = render_ring_cleanup,
 };
 
 /* ring buffer for bit-stream decoder */
@@ -1010,6 +1165,9 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
        *ring = render_ring;
        if (INTEL_INFO(dev)->gen >= 6) {
                ring->add_request = gen6_add_request;
+       } else if (IS_GEN5(dev)) {
+               ring->add_request = pc_render_add_request;
+               ring->get_seqno = pc_render_get_seqno;
        }
 
        if (!I915_NEED_GFX_HWS(dev)) {