drm/i915: Drop i915_gem_obj_is_pinned() from set-cache-level

author Chris Wilson <chris@chris-wilson.co.uk>

Fri, 9 Oct 2015 13:11:27 +0000 (14:11 +0100)

committer Daniel Vetter <daniel.vetter@ffwll.ch>

Tue, 13 Oct 2015 13:52:38 +0000 (15:52 +0200)
author Chris Wilson <chris@chris-wilson.co.uk>
Fri, 9 Oct 2015 13:11:27 +0000 (14:11 +0100)
committer Daniel Vetter <daniel.vetter@ffwll.ch>
Tue, 13 Oct 2015 13:52:38 +0000 (15:52 +0200)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c

index 1e67484fd5dc5a37b7db8f4cac69d07b4b396c79..e57061ac02191dd352d71f72ed0599f58c80b45b 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3657,53 +3657,106 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
         return 0;
  }
  
+/**
+ * Changes the cache-level of an object across all VMA.
+ *
+ * After this function returns, the object will be in the new cache-level
+ * across all GTT and the contents of the backing storage will be coherent,
+ * with respect to the new cache-level. In order to keep the backing storage
+ * coherent for all users, we only allow a single cache level to be set
+ * globally on the object and prevent it from being changed whilst the
+ * hardware is reading from the object. That is if the object is currently
+ * on the scanout it will be set to uncached (or equivalent display
+ * cache coherency) and all non-MOCS GPU access will also be uncached so
+ * that all direct access to the scanout remains coherent.
+ */
  int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
                                     enum i915_cache_level cache_level)
  {
         struct drm_device *dev = obj->base.dev;
         struct i915_vma *vma, *next;
+       bool bound = false;
         int ret = 0;
  
         if (obj->cache_level == cache_level)
                 goto out;
  
-       if (i915_gem_obj_is_pinned(obj)) {
-               DRM_DEBUG("can not change the cache level of pinned objects\n");
-               return -EBUSY;
-       }
-
+       /* Inspect the list of currently bound VMA and unbind any that would
+        * be invalid given the new cache-level. This is principally to
+        * catch the issue of the CS prefetch crossing page boundaries and
+        * reading an invalid PTE on older architectures.
+        */
         list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
+               if (!drm_mm_node_allocated(&vma->node))
+                       continue;
+
+               if (vma->pin_count) {
+                       DRM_DEBUG("can not change the cache level of pinned objects\n");
+                       return -EBUSY;
+               }
+
                 if (!i915_gem_valid_gtt_space(vma, cache_level)) {
                         ret = i915_vma_unbind(vma);
                         if (ret)
                                 return ret;
-               }
+               } else
+                       bound = true;
         }
  
-       if (i915_gem_obj_bound_any(obj)) {
+       /* We can reuse the existing drm_mm nodes but need to change the
+        * cache-level on the PTE. We could simply unbind them all and
+        * rebind with the correct cache-level on next use. However since
+        * we already have a valid slot, dma mapping, pages etc, we may as
+        * rewrite the PTE in the belief that doing so tramples upon less
+        * state and so involves less work.
+        */
+       if (bound) {
+               /* Before we change the PTE, the GPU must not be accessing it.
+                * If we wait upon the object, we know that all the bound
+                * VMA are no longer active.
+                */
                 ret = i915_gem_object_wait_rendering(obj, false);
                 if (ret)
                         return ret;
  
-               i915_gem_object_finish_gtt(obj);
-
-               /* Before SandyBridge, you could not use tiling or fence
-                * registers with snooped memory, so relinquish any fences
-                * currently pointing to our region in the aperture.
-                */
-               if (INTEL_INFO(dev)->gen < 6) {
+               if (!HAS_LLC(dev) && cache_level != I915_CACHE_NONE) {
+                       /* Access to snoopable pages through the GTT is
+                        * incoherent and on some machines causes a hard
+                        * lockup. Relinquish the CPU mmaping to force
+                        * userspace to refault in the pages and we can
+                        * then double check if the GTT mapping is still
+                        * valid for that pointer access.
+                        */
+                       i915_gem_release_mmap(obj);
+
+                       /* As we no longer need a fence for GTT access,
+                        * we can relinquish it now (and so prevent having
+                        * to steal a fence from someone else on the next
+                        * fence request). Note GPU activity would have
+                        * dropped the fence as all snoopable access is
+                        * supposed to be linear.
+                        */
                         ret = i915_gem_object_put_fence(obj);
                         if (ret)
                                 return ret;
+               } else {
+                       /* We either have incoherent backing store and
+                        * so no GTT access or the architecture is fully
+                        * coherent. In such cases, existing GTT mmaps
+                        * ignore the cache bit in the PTE and we can
+                        * rewrite it without confusing the GPU or having
+                        * to force userspace to fault back in its mmaps.
+                        */
                 }
  
-               list_for_each_entry(vma, &obj->vma_list, vma_link)
-                       if (drm_mm_node_allocated(&vma->node)) {
-                               ret = i915_vma_bind(vma, cache_level,
-                                                   PIN_UPDATE);
-                               if (ret)
-                                       return ret;
-                       }
+               list_for_each_entry(vma, &obj->vma_list, vma_link) {
+                       if (!drm_mm_node_allocated(&vma->node))
+                               continue;
+
+                       ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
+                       if (ret)
+                               return ret;
+               }
         }
  
         list_for_each_entry(vma, &obj->vma_list, vma_link)
@@ -3711,6 +3764,10 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
         obj->cache_level = cache_level;
  
  out:
+       /* Flush the dirty CPU caches to the backing storage so that the
+        * object is now coherent at its new cache level (with respect
+        * to the access domain).
+        */
         if (obj->cache_dirty &&
             obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
             cpu_write_needs_clflush(obj)) {
author	Chris Wilson <chris@chris-wilson.co.uk>
	Fri, 9 Oct 2015 13:11:27 +0000 (14:11 +0100)
committer	Daniel Vetter <daniel.vetter@ffwll.ch>
	Tue, 13 Oct 2015 13:52:38 +0000 (15:52 +0200)