Merge tag 'topic/drm-misc-2015-07-28' into drm-intel-next-queued

author Daniel Vetter <daniel.vetter@ffwll.ch>

Thu, 6 Aug 2015 12:27:09 +0000 (14:27 +0200)

committer Daniel Vetter <daniel.vetter@ffwll.ch>

Thu, 6 Aug 2015 12:27:09 +0000 (14:27 +0200)
author Daniel Vetter <daniel.vetter@ffwll.ch>
Thu, 6 Aug 2015 12:27:09 +0000 (14:27 +0200)
committer Daniel Vetter <daniel.vetter@ffwll.ch>
Thu, 6 Aug 2015 12:27:09 +0000 (14:27 +0200)
diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl

index 30b3651d642b5cb68f9b2e32748d3c05d2c7e5ac..9ddf8c6cb88791e1f0f5a12629cd9ce5be30f2e7 100644 (file)
--- a/Documentation/DocBook/drm.tmpl
+++ b/Documentation/DocBook/drm.tmpl
@@ -3982,7 +3982,6 @@ int num_ioctls;</synopsis>
          <title>Interrupt Handling</title>
  !Pdrivers/gpu/drm/i915/i915_irq.c interrupt handling
  !Fdrivers/gpu/drm/i915/i915_irq.c intel_irq_init intel_irq_init_hw intel_hpd_init
-!Fdrivers/gpu/drm/i915/i915_irq.c intel_irq_fini
  !Fdrivers/gpu/drm/i915/i915_irq.c intel_runtime_pm_disable_interrupts
  !Fdrivers/gpu/drm/i915/i915_irq.c intel_runtime_pm_enable_interrupts
        </sect2>
@@ -4197,6 +4196,23 @@ int num_ioctls;</synopsis>
          <title>Global GTT views</title>
  !Pdrivers/gpu/drm/i915/i915_gem_gtt.c Global GTT views
  !Idrivers/gpu/drm/i915/i915_gem_gtt.c
+      </sect2>
+      <sect2>
+        <title>GTT Fences and Swizzling</title>
+!Idrivers/gpu/drm/i915/i915_gem_fence.c
+        <sect3>
+          <title>Global GTT Fence Handling</title>
+!Pdrivers/gpu/drm/i915/i915_gem_fence.c fence register handling
+        </sect3>
+        <sect3>
+          <title>Hardware Tiling and Swizzling Details</title>
+!Pdrivers/gpu/drm/i915/i915_gem_fence.c tiling swizzling details
+        </sect3>
+      </sect2>
+      <sect2>
+        <title>Object Tiling IOCTLs</title>
+!Idrivers/gpu/drm/i915/i915_gem_tiling.c
+!Pdrivers/gpu/drm/i915/i915_gem_tiling.c buffer object tiling
        </sect2>
        <sect2>
          <title>Buffer Object Eviction</title>
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile

index e52e0125164450ad7f1e2dea841000b7a24c94bb..41fb8a9c5bef4e1b6f9c2b248860e0408133cc87 100644 (file)
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -6,12 +6,13 @@
  
  # core driver code
  i915-y := i915_drv.o \
+         i915_irq.o \
           i915_params.o \
            i915_suspend.o \
           i915_sysfs.o \
+         intel_csr.o \
           intel_pm.o \
-         intel_runtime_pm.o \
-         intel_csr.o
+         intel_runtime_pm.o
  
  i915-$(CONFIG_COMPAT)   += i915_ioc32.o
  i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o
@@ -20,21 +21,20 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o
  i915-y += i915_cmd_parser.o \
           i915_gem_batch_pool.o \
           i915_gem_context.o \
-         i915_gem_render_state.o \
           i915_gem_debug.o \
           i915_gem_dmabuf.o \
           i915_gem_evict.o \
           i915_gem_execbuffer.o \
+         i915_gem_fence.o \
           i915_gem_gtt.o \
           i915_gem.o \
+         i915_gem_render_state.o \
           i915_gem_shrinker.o \
           i915_gem_stolen.o \
           i915_gem_tiling.o \
           i915_gem_userptr.o \
           i915_gpu_error.o \
-         i915_irq.o \
           i915_trace_points.o \
-         intel_hotplug.o \
           intel_lrc.o \
           intel_mocs.o \
           intel_ringbuffer.o \
@@ -48,11 +48,14 @@ i915-y += intel_renderstate_gen6.o \
  
  # modesetting core code
  i915-y += intel_audio.o \
+         intel_atomic.o \
+         intel_atomic_plane.o \
           intel_bios.o \
           intel_display.o \
           intel_fbc.o \
           intel_fifo_underrun.o \
           intel_frontbuffer.o \
+         intel_hotplug.o \
           intel_modes.o \
           intel_overlay.o \
           intel_psr.o \
@@ -68,15 +71,13 @@ i915-y += dvo_ch7017.o \
           dvo_ns2501.o \
           dvo_sil164.o \
           dvo_tfp410.o \
-         intel_atomic.o \
-         intel_atomic_plane.o \
           intel_crt.o \
           intel_ddi.o \
-         intel_dp.o \
           intel_dp_mst.o \
+         intel_dp.o \
           intel_dsi.o \
-         intel_dsi_pll.o \
           intel_dsi_panel_vbt.o \
+         intel_dsi_pll.o \
           intel_dvo.o \
           intel_hdmi.o \
           intel_i2c.o \
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c

index 430571b977db9d771a2ff15ae543526e3a321418..237ff6884a2227bc9b7520ed4bcaabe75d924f74 100644 (file)
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -151,8 +151,8 @@ static const struct drm_i915_cmd_descriptor render_cmds[] = {
         CMD(  MI_ARB_ON_OFF,                    SMI,    F,  1,      R  ),
         CMD(  MI_PREDICATE,                     SMI,    F,  1,      S  ),
         CMD(  MI_TOPOLOGY_FILTER,               SMI,    F,  1,      S  ),
-       CMD(  MI_DISPLAY_FLIP,                  SMI,   !F,  0xFF,   R  ),
         CMD(  MI_SET_APPID,                     SMI,    F,  1,      S  ),
+       CMD(  MI_DISPLAY_FLIP,                  SMI,   !F,  0xFF,   R  ),
         CMD(  MI_SET_CONTEXT,                   SMI,   !F,  0xFF,   R  ),
         CMD(  MI_URB_CLEAR,                     SMI,   !F,  0xFF,   S  ),
         CMD(  MI_STORE_DWORD_IMM,               SMI,   !F,  0x3F,   B,
@@ -564,7 +564,7 @@ static bool validate_cmds_sorted(struct intel_engine_cs *ring,
  
                 for (j = 0; j < table->count; j++) {
                         const struct drm_i915_cmd_descriptor *desc =
-                               &table->table[i];
+                               &table->table[j];
                         u32 curr = desc->cmd.value & desc->cmd.mask;
  
                         if (curr < previous) {
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c

index 51580bdd587fd8e6e3ef3f4820682924bdc35003..23a69307e12ed1205a1fb93540fd475a42b8f4e4 100644 (file)
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -4030,24 +4030,14 @@ static ssize_t i915_displayport_test_active_write(struct file *file,
  {
         char *input_buffer;
         int status = 0;
-       struct seq_file *m;
         struct drm_device *dev;
         struct drm_connector *connector;
         struct list_head *connector_list;
         struct intel_dp *intel_dp;
         int val = 0;
  
-       m = file->private_data;
-       if (!m) {
-               status = -ENODEV;
-               return status;
-       }
-       dev = m->private;
+       dev = ((struct seq_file *)file->private_data)->private;
  
-       if (!dev) {
-               status = -ENODEV;
-               return status;
-       }
         connector_list = &dev->mode_config.connector_list;
  
         if (len == 0)
@@ -4071,9 +4061,7 @@ static ssize_t i915_displayport_test_active_write(struct file *file,
                     DRM_MODE_CONNECTOR_DisplayPort)
                         continue;
  
-               if (connector->connector_type ==
-                   DRM_MODE_CONNECTOR_DisplayPort &&
-                   connector->status == connector_status_connected &&
+               if (connector->status == connector_status_connected &&
                     connector->encoder != NULL) {
                         intel_dp = enc_to_intel_dp(connector->encoder);
                         status = kstrtoint(input_buffer, 10, &val);
@@ -4105,9 +4093,6 @@ static int i915_displayport_test_active_show(struct seq_file *m, void *data)
         struct list_head *connector_list = &dev->mode_config.connector_list;
         struct intel_dp *intel_dp;
  
-       if (!dev)
-               return -ENODEV;
-
         list_for_each_entry(connector, connector_list, head) {
  
                 if (connector->connector_type !=
@@ -4152,9 +4137,6 @@ static int i915_displayport_test_data_show(struct seq_file *m, void *data)
         struct list_head *connector_list = &dev->mode_config.connector_list;
         struct intel_dp *intel_dp;
  
-       if (!dev)
-               return -ENODEV;
-
         list_for_each_entry(connector, connector_list, head) {
  
                 if (connector->connector_type !=
@@ -4194,9 +4176,6 @@ static int i915_displayport_test_type_show(struct seq_file *m, void *data)
         struct list_head *connector_list = &dev->mode_config.connector_list;
         struct intel_dp *intel_dp;
  
-       if (!dev)
-               return -ENODEV;
-
         list_for_each_entry(connector, connector_list, head) {
  
                 if (connector->connector_type !=
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c

index b1f9e5561cf2c73d1a9ca62536663322cb370ec6..ab37d1121be8277728bff5d25a0cb4a4599de0aa 100644 (file)
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1274,13 +1274,3 @@ const struct drm_ioctl_desc i915_ioctls[] = {
  };
  
  int i915_max_ioctl = ARRAY_SIZE(i915_ioctls);
-
-/*
- * This is really ugly: Because old userspace abused the linux agp interface to
- * manage the gtt, we need to claim that all intel devices are agp.  For
- * otherwise the drm core refuses to initialize the agp support code.
- */
-int i915_driver_device_is_agp(struct drm_device *dev)
-{
-       return 1;
-}
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c

index 0d6775a3e88c3e09254874b3a99d0d950d5f0f5c..1d887459e37fd717992ddbfb52228d8550e55a09 100644 (file)
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -935,8 +935,6 @@ static int i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
         if (PCI_FUNC(pdev->devfn))
                 return -ENODEV;
  
-       driver.driver_features &= ~(DRIVER_USE_AGP);
-
         return drm_get_pci_dev(pdev, ent, &driver);
  }
  
@@ -1491,7 +1489,15 @@ static int intel_runtime_suspend(struct device *device)
          * FIXME: We really should find a document that references the arguments
          * used below!
          */
-       if (IS_HASWELL(dev)) {
+       if (IS_BROADWELL(dev)) {
+               /*
+                * On Broadwell, if we use PCI_D1 the PCH DDI ports will stop
+                * being detected, and the call we do at intel_runtime_resume()
+                * won't be able to restore them. Since PCI_D3hot matches the
+                * actual specification and appears to be working, use it.
+                */
+               intel_opregion_notify_adapter(dev, PCI_D3hot);
+       } else {
                 /*
                  * current versions of firmware which depend on this opregion
                  * notification have repurposed the D1 definition to mean
@@ -1500,16 +1506,6 @@ static int intel_runtime_suspend(struct device *device)
                  * the suspend path.
                  */
                 intel_opregion_notify_adapter(dev, PCI_D1);
-       } else {
-               /*
-                * On Broadwell, if we use PCI_D1 the PCH DDI ports will stop
-                * being detected, and the call we do at intel_runtime_resume()
-                * won't be able to restore them. Since PCI_D3hot matches the
-                * actual specification and appears to be working, use it. Let's
-                * assume the other non-Haswell platforms will stay the same as
-                * Broadwell.
-                */
-               intel_opregion_notify_adapter(dev, PCI_D3hot);
         }
  
         assert_forcewakes_inactive(dev_priv);
@@ -1649,7 +1645,6 @@ static struct drm_driver driver = {
          * deal with them for Intel hardware.
          */
         .driver_features =
-           DRIVER_USE_AGP |
             DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM | DRIVER_PRIME |
             DRIVER_RENDER,
         .load = i915_driver_load,
@@ -1664,7 +1659,6 @@ static struct drm_driver driver = {
         .suspend = i915_suspend_legacy,
         .resume = i915_resume_legacy,
  
-       .device_is_agp = i915_driver_device_is_agp,
  #if defined(CONFIG_DEBUG_FS)
         .debugfs_init = i915_debugfs_init,
         .debugfs_cleanup = i915_debugfs_cleanup,
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h

index 23ce125e0298e05b430f05daf45ef56952948fed..4e9f7b16a729b0d50655035c4edf419b9ffe3647 100644 (file)
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -56,7 +56,7 @@
  
  #define DRIVER_NAME            "i915"
  #define DRIVER_DESC            "Intel Graphics"
-#define DRIVER_DATE            "20150717"
+#define DRIVER_DATE            "20150731"
  
  #undef WARN_ON
  /* Many gcc seem to no see through this and fall over :( */
@@ -206,11 +206,11 @@ enum intel_display_power_domain {
  
  enum hpd_pin {
         HPD_NONE = 0,
-       HPD_PORT_A = HPD_NONE, /* PORT_A is internal */
         HPD_TV = HPD_NONE,     /* TV is known to be unreliable */
         HPD_CRT,
         HPD_SDVO_B,
         HPD_SDVO_C,
+       HPD_PORT_A,
         HPD_PORT_B,
         HPD_PORT_C,
         HPD_PORT_D,
@@ -742,7 +742,7 @@ enum csr_state {
  
  struct intel_csr {
         const char *fw_path;
-       __be32 *dmc_payload;
+       uint32_t *dmc_payload;
         uint32_t dmc_fw_size;
         uint32_t mmio_count;
         uint32_t mmioaddr[8];
@@ -894,6 +894,7 @@ enum fb_op_origin {
         ORIGIN_CPU,
         ORIGIN_CS,
         ORIGIN_FLIP,
+       ORIGIN_DIRTYFB,
  };
  
  struct i915_fbc {
@@ -2610,6 +2611,8 @@ struct i915_params {
         bool reset;
         bool disable_display;
         bool disable_vtd_wa;
+       bool enable_guc_submission;
+       int guc_log_level;
         int use_mmio_flip;
         int mmio_debug;
         bool verbose_state_checks;
@@ -2626,7 +2629,6 @@ extern void i915_driver_preclose(struct drm_device *dev,
                                  struct drm_file *file);
  extern void i915_driver_postclose(struct drm_device *dev,
                                   struct drm_file *file);
-extern int i915_driver_device_is_agp(struct drm_device * dev);
  #ifdef CONFIG_COMPAT
  extern long i915_compat_ioctl(struct file *filp, unsigned int cmd,
                               unsigned long arg);
@@ -2646,7 +2648,7 @@ void intel_hpd_irq_handler(struct drm_device *dev, u32 pin_mask, u32 long_mask);
  void intel_hpd_init(struct drm_i915_private *dev_priv);
  void intel_hpd_init_work(struct drm_i915_private *dev_priv);
  void intel_hpd_cancel_work(struct drm_i915_private *dev_priv);
-enum port intel_hpd_pin_to_port(enum hpd_pin pin);
+bool intel_hpd_pin_to_port(enum hpd_pin pin, enum port *port);
  
  /* i915_irq.c */
  void i915_queue_hangcheck(struct drm_device *dev);
@@ -2758,6 +2760,8 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
                          const struct drm_i915_gem_object_ops *ops);
  struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
                                                   size_t size);
+struct drm_i915_gem_object *i915_gem_object_create_from_data(
+               struct drm_device *dev, const void *data, size_t size);
  void i915_init_vm(struct drm_i915_private *dev_priv,
                   struct i915_address_space *vm);
  void i915_gem_free_object(struct drm_gem_object *obj);
@@ -2864,11 +2868,6 @@ static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req,
  
  int __must_check i915_gem_get_seqno(struct drm_device *dev, u32 *seqno);
  int __must_check i915_gem_set_seqno(struct drm_device *dev, u32 seqno);
-int __must_check i915_gem_object_get_fence(struct drm_i915_gem_object *obj);
-int __must_check i915_gem_object_put_fence(struct drm_i915_gem_object *obj);
-
-bool i915_gem_object_pin_fence(struct drm_i915_gem_object *obj);
-void i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj);
  
  struct drm_i915_gem_request *
  i915_gem_find_active_request(struct intel_engine_cs *ring);
@@ -2966,8 +2965,6 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
  struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
                                 struct drm_gem_object *gem_obj, int flags);
  
-void i915_gem_restore_fences(struct drm_device *dev);
-
  unsigned long
  i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o,
                               const struct i915_ggtt_view *view);
@@ -3062,6 +3059,19 @@ i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj)
         i915_gem_object_ggtt_unpin_view(obj, &i915_ggtt_view_normal);
  }
  
+/* i915_gem_fence.c */
+int __must_check i915_gem_object_get_fence(struct drm_i915_gem_object *obj);
+int __must_check i915_gem_object_put_fence(struct drm_i915_gem_object *obj);
+
+bool i915_gem_object_pin_fence(struct drm_i915_gem_object *obj);
+void i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj);
+
+void i915_gem_restore_fences(struct drm_device *dev);
+
+void i915_gem_detect_bit_6_swizzle(struct drm_device *dev);
+void i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj);
+void i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj);
+
  /* i915_gem_context.c */
  int __must_check i915_gem_context_init(struct drm_device *dev);
  void i915_gem_context_fini(struct drm_device *dev);
@@ -3154,10 +3164,6 @@ static inline bool i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_objec
                 obj->tiling_mode != I915_TILING_NONE;
  }
  
-void i915_gem_detect_bit_6_swizzle(struct drm_device *dev);
-void i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj);
-void i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj);
-
  /* i915_gem_debug.c */
  #if WATCH_LISTS
  int i915_verify_lists(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c

index d9f2701b45932e48c77fe0826158240e53c162ca..84f91bcc12f7946de23d71632cd3fbf66f62312e 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -46,11 +46,6 @@ static void
  i915_gem_object_retire__write(struct drm_i915_gem_object *obj);
  static void
  i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring);
-static void i915_gem_write_fence(struct drm_device *dev, int reg,
-                                struct drm_i915_gem_object *obj);
-static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
-                                        struct drm_i915_fence_reg *fence,
-                                        bool enable);
  
  static bool cpu_cache_is_coherent(struct drm_device *dev,
                                   enum i915_cache_level level)
@@ -66,18 +61,6 @@ static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
         return obj->pin_display;
  }
  
-static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
-{
-       if (obj->tiling_mode)
-               i915_gem_release_mmap(obj);
-
-       /* As we do not have an associated fence register, we will force
-        * a tiling change if we ever need to acquire one.
-        */
-       obj->fence_dirty = false;
-       obj->fence_reg = I915_FENCE_REG_NONE;
-}
-
  /* some bookkeeping */
  static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
                                   size_t size)
@@ -2402,6 +2385,13 @@ i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring)
         if (obj->active)
                 return;
  
+       /* Bump our place on the bound list to keep it roughly in LRU order
+        * so that we don't steal from recently used but inactive objects
+        * (unless we are forced to ofc!)
+        */
+       list_move_tail(&obj->global_list,
+                      &to_i915(obj->base.dev)->mm.bound_list);
+
         list_for_each_entry(vma, &obj->vma_list, vma_link) {
                 if (!list_empty(&vma->mm_list))
                         list_move_tail(&vma->mm_list, &vma->vm->inactive_list);
@@ -2793,27 +2783,6 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
         }
  }
  
-void i915_gem_restore_fences(struct drm_device *dev)
-{
-       struct drm_i915_private *dev_priv = dev->dev_private;
-       int i;
-
-       for (i = 0; i < dev_priv->num_fence_regs; i++) {
-               struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
-
-               /*
-                * Commit delayed tiling changes if we have an object still
-                * attached to the fence, otherwise just clear the fence.
-                */
-               if (reg->obj) {
-                       i915_gem_object_update_fence(reg->obj, reg,
-                                                    reg->obj->tiling_mode);
-               } else {
-                       i915_gem_write_fence(dev, i, NULL);
-               }
-       }
-}
-
  void i915_gem_reset(struct drm_device *dev)
  {
         struct drm_i915_private *dev_priv = dev->dev_private;
@@ -3340,343 +3309,6 @@ int i915_gpu_idle(struct drm_device *dev)
         return 0;
  }
  
-static void i965_write_fence_reg(struct drm_device *dev, int reg,
-                                struct drm_i915_gem_object *obj)
-{
-       struct drm_i915_private *dev_priv = dev->dev_private;
-       int fence_reg;
-       int fence_pitch_shift;
-
-       if (INTEL_INFO(dev)->gen >= 6) {
-               fence_reg = FENCE_REG_SANDYBRIDGE_0;
-               fence_pitch_shift = SANDYBRIDGE_FENCE_PITCH_SHIFT;
-       } else {
-               fence_reg = FENCE_REG_965_0;
-               fence_pitch_shift = I965_FENCE_PITCH_SHIFT;
-       }
-
-       fence_reg += reg * 8;
-
-       /* To w/a incoherency with non-atomic 64-bit register updates,
-        * we split the 64-bit update into two 32-bit writes. In order
-        * for a partial fence not to be evaluated between writes, we
-        * precede the update with write to turn off the fence register,
-        * and only enable the fence as the last step.
-        *
-        * For extra levels of paranoia, we make sure each step lands
-        * before applying the next step.
-        */
-       I915_WRITE(fence_reg, 0);
-       POSTING_READ(fence_reg);
-
-       if (obj) {
-               u32 size = i915_gem_obj_ggtt_size(obj);
-               uint64_t val;
-
-               /* Adjust fence size to match tiled area */
-               if (obj->tiling_mode != I915_TILING_NONE) {
-                       uint32_t row_size = obj->stride *
-                               (obj->tiling_mode == I915_TILING_Y ? 32 : 8);
-                       size = (size / row_size) * row_size;
-               }
-
-               val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) &
-                                0xfffff000) << 32;
-               val |= i915_gem_obj_ggtt_offset(obj) & 0xfffff000;
-               val |= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift;
-               if (obj->tiling_mode == I915_TILING_Y)
-                       val |= 1 << I965_FENCE_TILING_Y_SHIFT;
-               val |= I965_FENCE_REG_VALID;
-
-               I915_WRITE(fence_reg + 4, val >> 32);
-               POSTING_READ(fence_reg + 4);
-
-               I915_WRITE(fence_reg + 0, val);
-               POSTING_READ(fence_reg);
-       } else {
-               I915_WRITE(fence_reg + 4, 0);
-               POSTING_READ(fence_reg + 4);
-       }
-}
-
-static void i915_write_fence_reg(struct drm_device *dev, int reg,
-                                struct drm_i915_gem_object *obj)
-{
-       struct drm_i915_private *dev_priv = dev->dev_private;
-       u32 val;
-
-       if (obj) {
-               u32 size = i915_gem_obj_ggtt_size(obj);
-               int pitch_val;
-               int tile_width;
-
-               WARN((i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) ||
-                    (size & -size) != size ||
-                    (i915_gem_obj_ggtt_offset(obj) & (size - 1)),
-                    "object 0x%08lx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
-                    i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size);
-
-               if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
-                       tile_width = 128;
-               else
-                       tile_width = 512;
-
-               /* Note: pitch better be a power of two tile widths */
-               pitch_val = obj->stride / tile_width;
-               pitch_val = ffs(pitch_val) - 1;
-
-               val = i915_gem_obj_ggtt_offset(obj);
-               if (obj->tiling_mode == I915_TILING_Y)
-                       val |= 1 << I830_FENCE_TILING_Y_SHIFT;
-               val |= I915_FENCE_SIZE_BITS(size);
-               val |= pitch_val << I830_FENCE_PITCH_SHIFT;
-               val |= I830_FENCE_REG_VALID;
-       } else
-               val = 0;
-
-       if (reg < 8)
-               reg = FENCE_REG_830_0 + reg * 4;
-       else
-               reg = FENCE_REG_945_8 + (reg - 8) * 4;
-
-       I915_WRITE(reg, val);
-       POSTING_READ(reg);
-}
-
-static void i830_write_fence_reg(struct drm_device *dev, int reg,
-                               struct drm_i915_gem_object *obj)
-{
-       struct drm_i915_private *dev_priv = dev->dev_private;
-       uint32_t val;
-
-       if (obj) {
-               u32 size = i915_gem_obj_ggtt_size(obj);
-               uint32_t pitch_val;
-
-               WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) ||
-                    (size & -size) != size ||
-                    (i915_gem_obj_ggtt_offset(obj) & (size - 1)),
-                    "object 0x%08lx not 512K or pot-size 0x%08x aligned\n",
-                    i915_gem_obj_ggtt_offset(obj), size);
-
-               pitch_val = obj->stride / 128;
-               pitch_val = ffs(pitch_val) - 1;
-
-               val = i915_gem_obj_ggtt_offset(obj);
-               if (obj->tiling_mode == I915_TILING_Y)
-                       val |= 1 << I830_FENCE_TILING_Y_SHIFT;
-               val |= I830_FENCE_SIZE_BITS(size);
-               val |= pitch_val << I830_FENCE_PITCH_SHIFT;
-               val |= I830_FENCE_REG_VALID;
-       } else
-               val = 0;
-
-       I915_WRITE(FENCE_REG_830_0 + reg * 4, val);
-       POSTING_READ(FENCE_REG_830_0 + reg * 4);
-}
-
-inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj)
-{
-       return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT;
-}
-
-static void i915_gem_write_fence(struct drm_device *dev, int reg,
-                                struct drm_i915_gem_object *obj)
-{
-       struct drm_i915_private *dev_priv = dev->dev_private;
-
-       /* Ensure that all CPU reads are completed before installing a fence
-        * and all writes before removing the fence.
-        */
-       if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj))
-               mb();
-
-       WARN(obj && (!obj->stride || !obj->tiling_mode),
-            "bogus fence setup with stride: 0x%x, tiling mode: %i\n",
-            obj->stride, obj->tiling_mode);
-
-       if (IS_GEN2(dev))
-               i830_write_fence_reg(dev, reg, obj);
-       else if (IS_GEN3(dev))
-               i915_write_fence_reg(dev, reg, obj);
-       else if (INTEL_INFO(dev)->gen >= 4)
-               i965_write_fence_reg(dev, reg, obj);
-
-       /* And similarly be paranoid that no direct access to this region
-        * is reordered to before the fence is installed.
-        */
-       if (i915_gem_object_needs_mb(obj))
-               mb();
-}
-
-static inline int fence_number(struct drm_i915_private *dev_priv,
-                              struct drm_i915_fence_reg *fence)
-{
-       return fence - dev_priv->fence_regs;
-}
-
-static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
-                                        struct drm_i915_fence_reg *fence,
-                                        bool enable)
-{
-       struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
-       int reg = fence_number(dev_priv, fence);
-
-       i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL);
-
-       if (enable) {
-               obj->fence_reg = reg;
-               fence->obj = obj;
-               list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list);
-       } else {
-               obj->fence_reg = I915_FENCE_REG_NONE;
-               fence->obj = NULL;
-               list_del_init(&fence->lru_list);
-       }
-       obj->fence_dirty = false;
-}
-
-static int
-i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
-{
-       if (obj->last_fenced_req) {
-               int ret = i915_wait_request(obj->last_fenced_req);
-               if (ret)
-                       return ret;
-
-               i915_gem_request_assign(&obj->last_fenced_req, NULL);
-       }
-
-       return 0;
-}
-
-int
-i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
-{
-       struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
-       struct drm_i915_fence_reg *fence;
-       int ret;
-
-       ret = i915_gem_object_wait_fence(obj);
-       if (ret)
-               return ret;
-
-       if (obj->fence_reg == I915_FENCE_REG_NONE)
-               return 0;
-
-       fence = &dev_priv->fence_regs[obj->fence_reg];
-
-       if (WARN_ON(fence->pin_count))
-               return -EBUSY;
-
-       i915_gem_object_fence_lost(obj);
-       i915_gem_object_update_fence(obj, fence, false);
-
-       return 0;
-}
-
-static struct drm_i915_fence_reg *
-i915_find_fence_reg(struct drm_device *dev)
-{
-       struct drm_i915_private *dev_priv = dev->dev_private;
-       struct drm_i915_fence_reg *reg, *avail;
-       int i;
-
-       /* First try to find a free reg */
-       avail = NULL;
-       for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
-               reg = &dev_priv->fence_regs[i];
-               if (!reg->obj)
-                       return reg;
-
-               if (!reg->pin_count)
-                       avail = reg;
-       }
-
-       if (avail == NULL)
-               goto deadlock;
-
-       /* None available, try to steal one or wait for a user to finish */
-       list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
-               if (reg->pin_count)
-                       continue;
-
-               return reg;
-       }
-
-deadlock:
-       /* Wait for completion of pending flips which consume fences */
-       if (intel_has_pending_fb_unpin(dev))
-               return ERR_PTR(-EAGAIN);
-
-       return ERR_PTR(-EDEADLK);
-}
-
-/**
- * i915_gem_object_get_fence - set up fencing for an object
- * @obj: object to map through a fence reg
- *
- * When mapping objects through the GTT, userspace wants to be able to write
- * to them without having to worry about swizzling if the object is tiled.
- * This function walks the fence regs looking for a free one for @obj,
- * stealing one if it can't find any.
- *
- * It then sets up the reg based on the object's properties: address, pitch
- * and tiling format.
- *
- * For an untiled surface, this removes any existing fence.
- */
-int
-i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
-{
-       struct drm_device *dev = obj->base.dev;
-       struct drm_i915_private *dev_priv = dev->dev_private;
-       bool enable = obj->tiling_mode != I915_TILING_NONE;
-       struct drm_i915_fence_reg *reg;
-       int ret;
-
-       /* Have we updated the tiling parameters upon the object and so
-        * will need to serialise the write to the associated fence register?
-        */
-       if (obj->fence_dirty) {
-               ret = i915_gem_object_wait_fence(obj);
-               if (ret)
-                       return ret;
-       }
-
-       /* Just update our place in the LRU if our fence is getting reused. */
-       if (obj->fence_reg != I915_FENCE_REG_NONE) {
-               reg = &dev_priv->fence_regs[obj->fence_reg];
-               if (!obj->fence_dirty) {
-                       list_move_tail(&reg->lru_list,
-                                      &dev_priv->mm.fence_list);
-                       return 0;
-               }
-       } else if (enable) {
-               if (WARN_ON(!obj->map_and_fenceable))
-                       return -EINVAL;
-
-               reg = i915_find_fence_reg(dev);
-               if (IS_ERR(reg))
-                       return PTR_ERR(reg);
-
-               if (reg->obj) {
-                       struct drm_i915_gem_object *old = reg->obj;
-
-                       ret = i915_gem_object_wait_fence(old);
-                       if (ret)
-                               return ret;
-
-                       i915_gem_object_fence_lost(old);
-               }
-       } else
-               return 0;
-
-       i915_gem_object_update_fence(obj, reg, enable);
-
-       return 0;
-}
-
  static bool i915_gem_valid_gtt_space(struct i915_vma *vma,
                                      unsigned long cache_level)
  {
@@ -4476,32 +4108,6 @@ i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
         --vma->pin_count;
  }
  
-bool
-i915_gem_object_pin_fence(struct drm_i915_gem_object *obj)
-{
-       if (obj->fence_reg != I915_FENCE_REG_NONE) {
-               struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
-               struct i915_vma *ggtt_vma = i915_gem_obj_to_ggtt(obj);
-
-               WARN_ON(!ggtt_vma ||
-                       dev_priv->fence_regs[obj->fence_reg].pin_count >
-                       ggtt_vma->pin_count);
-               dev_priv->fence_regs[obj->fence_reg].pin_count++;
-               return true;
-       } else
-               return false;
-}
-
-void
-i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj)
-{
-       if (obj->fence_reg != I915_FENCE_REG_NONE) {
-               struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
-               WARN_ON(dev_priv->fence_regs[obj->fence_reg].pin_count <= 0);
-               dev_priv->fence_regs[obj->fence_reg].pin_count--;
-       }
-}
-
  int
  i915_gem_busy_ioctl(struct drm_device *dev, void *data,
                     struct drm_file *file)
@@ -5477,3 +5083,43 @@ bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj)
  
         return false;
  }
+
+/* Allocate a new GEM object and fill it with the supplied data */
+struct drm_i915_gem_object *
+i915_gem_object_create_from_data(struct drm_device *dev,
+                                const void *data, size_t size)
+{
+       struct drm_i915_gem_object *obj;
+       struct sg_table *sg;
+       size_t bytes;
+       int ret;
+
+       obj = i915_gem_alloc_object(dev, round_up(size, PAGE_SIZE));
+       if (IS_ERR_OR_NULL(obj))
+               return obj;
+
+       ret = i915_gem_object_set_to_cpu_domain(obj, true);
+       if (ret)
+               goto fail;
+
+       ret = i915_gem_object_get_pages(obj);
+       if (ret)
+               goto fail;
+
+       i915_gem_object_pin_pages(obj);
+       sg = obj->pages;
+       bytes = sg_copy_from_buffer(sg->sgl, sg->nents, (void *)data, size);
+       i915_gem_object_unpin_pages(obj);
+
+       if (WARN_ON(bytes != size)) {
+               DRM_ERROR("Incomplete copy, wrote %zu of %zu", bytes, size);
+               ret = -EFAULT;
+               goto fail;
+       }
+
+       return obj;
+
+fail:
+       drm_gem_object_unreference(&obj->base);
+       return ERR_PTR(ret);
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c

new file mode 100644 (file)

index 0000000..af1f8c4
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_fence.c
@@ -0,0 +1,787 @@
+/*
+ * Copyright © 2008-2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <drm/drmP.h>
+#include <drm/i915_drm.h>
+#include "i915_drv.h"
+
+/**
+ * DOC: fence register handling
+ *
+ * Important to avoid confusions: "fences" in the i915 driver are not execution
+ * fences used to track command completion but hardware detiler objects which
+ * wrap a given range of the global GTT. Each platform has only a fairly limited
+ * set of these objects.
+ *
+ * Fences are used to detile GTT memory mappings. They're also connected to the
+ * hardware frontbuffer render tracking and hence interract with frontbuffer
+ * conmpression. Furthermore on older platforms fences are required for tiled
+ * objects used by the display engine. They can also be used by the render
+ * engine - they're required for blitter commands and are optional for render
+ * commands. But on gen4+ both display (with the exception of fbc) and rendering
+ * have their own tiling state bits and don't need fences.
+ *
+ * Also note that fences only support X and Y tiling and hence can't be used for
+ * the fancier new tiling formats like W, Ys and Yf.
+ *
+ * Finally note that because fences are such a restricted resource they're
+ * dynamically associated with objects. Furthermore fence state is committed to
+ * the hardware lazily to avoid unecessary stalls on gen2/3. Therefore code must
+ * explictly call i915_gem_object_get_fence() to synchronize fencing status
+ * for cpu access. Also note that some code wants an unfenced view, for those
+ * cases the fence can be removed forcefully with i915_gem_object_put_fence().
+ *
+ * Internally these functions will synchronize with userspace access by removing
+ * CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed.
+ */
+
+static void i965_write_fence_reg(struct drm_device *dev, int reg,
+                                struct drm_i915_gem_object *obj)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       int fence_reg;
+       int fence_pitch_shift;
+
+       if (INTEL_INFO(dev)->gen >= 6) {
+               fence_reg = FENCE_REG_SANDYBRIDGE_0;
+               fence_pitch_shift = SANDYBRIDGE_FENCE_PITCH_SHIFT;
+       } else {
+               fence_reg = FENCE_REG_965_0;
+               fence_pitch_shift = I965_FENCE_PITCH_SHIFT;
+       }
+
+       fence_reg += reg * 8;
+
+       /* To w/a incoherency with non-atomic 64-bit register updates,
+        * we split the 64-bit update into two 32-bit writes. In order
+        * for a partial fence not to be evaluated between writes, we
+        * precede the update with write to turn off the fence register,
+        * and only enable the fence as the last step.
+        *
+        * For extra levels of paranoia, we make sure each step lands
+        * before applying the next step.
+        */
+       I915_WRITE(fence_reg, 0);
+       POSTING_READ(fence_reg);
+
+       if (obj) {
+               u32 size = i915_gem_obj_ggtt_size(obj);
+               uint64_t val;
+
+               /* Adjust fence size to match tiled area */
+               if (obj->tiling_mode != I915_TILING_NONE) {
+                       uint32_t row_size = obj->stride *
+                               (obj->tiling_mode == I915_TILING_Y ? 32 : 8);
+                       size = (size / row_size) * row_size;
+               }
+
+               val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) &
+                                0xfffff000) << 32;
+               val |= i915_gem_obj_ggtt_offset(obj) & 0xfffff000;
+               val |= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift;
+               if (obj->tiling_mode == I915_TILING_Y)
+                       val |= 1 << I965_FENCE_TILING_Y_SHIFT;
+               val |= I965_FENCE_REG_VALID;
+
+               I915_WRITE(fence_reg + 4, val >> 32);
+               POSTING_READ(fence_reg + 4);
+
+               I915_WRITE(fence_reg + 0, val);
+               POSTING_READ(fence_reg);
+       } else {
+               I915_WRITE(fence_reg + 4, 0);
+               POSTING_READ(fence_reg + 4);
+       }
+}
+
+static void i915_write_fence_reg(struct drm_device *dev, int reg,
+                                struct drm_i915_gem_object *obj)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       u32 val;
+
+       if (obj) {
+               u32 size = i915_gem_obj_ggtt_size(obj);
+               int pitch_val;
+               int tile_width;
+
+               WARN((i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) ||
+                    (size & -size) != size ||
+                    (i915_gem_obj_ggtt_offset(obj) & (size - 1)),
+                    "object 0x%08lx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
+                    i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size);
+
+               if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
+                       tile_width = 128;
+               else
+                       tile_width = 512;
+
+               /* Note: pitch better be a power of two tile widths */
+               pitch_val = obj->stride / tile_width;
+               pitch_val = ffs(pitch_val) - 1;
+
+               val = i915_gem_obj_ggtt_offset(obj);
+               if (obj->tiling_mode == I915_TILING_Y)
+                       val |= 1 << I830_FENCE_TILING_Y_SHIFT;
+               val |= I915_FENCE_SIZE_BITS(size);
+               val |= pitch_val << I830_FENCE_PITCH_SHIFT;
+               val |= I830_FENCE_REG_VALID;
+       } else
+               val = 0;
+
+       if (reg < 8)
+               reg = FENCE_REG_830_0 + reg * 4;
+       else
+               reg = FENCE_REG_945_8 + (reg - 8) * 4;
+
+       I915_WRITE(reg, val);
+       POSTING_READ(reg);
+}
+
+static void i830_write_fence_reg(struct drm_device *dev, int reg,
+                               struct drm_i915_gem_object *obj)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       uint32_t val;
+
+       if (obj) {
+               u32 size = i915_gem_obj_ggtt_size(obj);
+               uint32_t pitch_val;
+
+               WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) ||
+                    (size & -size) != size ||
+                    (i915_gem_obj_ggtt_offset(obj) & (size - 1)),
+                    "object 0x%08lx not 512K or pot-size 0x%08x aligned\n",
+                    i915_gem_obj_ggtt_offset(obj), size);
+
+               pitch_val = obj->stride / 128;
+               pitch_val = ffs(pitch_val) - 1;
+
+               val = i915_gem_obj_ggtt_offset(obj);
+               if (obj->tiling_mode == I915_TILING_Y)
+                       val |= 1 << I830_FENCE_TILING_Y_SHIFT;
+               val |= I830_FENCE_SIZE_BITS(size);
+               val |= pitch_val << I830_FENCE_PITCH_SHIFT;
+               val |= I830_FENCE_REG_VALID;
+       } else
+               val = 0;
+
+       I915_WRITE(FENCE_REG_830_0 + reg * 4, val);
+       POSTING_READ(FENCE_REG_830_0 + reg * 4);
+}
+
+inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj)
+{
+       return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT;
+}
+
+static void i915_gem_write_fence(struct drm_device *dev, int reg,
+                                struct drm_i915_gem_object *obj)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+
+       /* Ensure that all CPU reads are completed before installing a fence
+        * and all writes before removing the fence.
+        */
+       if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj))
+               mb();
+
+       WARN(obj && (!obj->stride || !obj->tiling_mode),
+            "bogus fence setup with stride: 0x%x, tiling mode: %i\n",
+            obj->stride, obj->tiling_mode);
+
+       if (IS_GEN2(dev))
+               i830_write_fence_reg(dev, reg, obj);
+       else if (IS_GEN3(dev))
+               i915_write_fence_reg(dev, reg, obj);
+       else if (INTEL_INFO(dev)->gen >= 4)
+               i965_write_fence_reg(dev, reg, obj);
+
+       /* And similarly be paranoid that no direct access to this region
+        * is reordered to before the fence is installed.
+        */
+       if (i915_gem_object_needs_mb(obj))
+               mb();
+}
+
+static inline int fence_number(struct drm_i915_private *dev_priv,
+                              struct drm_i915_fence_reg *fence)
+{
+       return fence - dev_priv->fence_regs;
+}
+
+static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
+                                        struct drm_i915_fence_reg *fence,
+                                        bool enable)
+{
+       struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
+       int reg = fence_number(dev_priv, fence);
+
+       i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL);
+
+       if (enable) {
+               obj->fence_reg = reg;
+               fence->obj = obj;
+               list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list);
+       } else {
+               obj->fence_reg = I915_FENCE_REG_NONE;
+               fence->obj = NULL;
+               list_del_init(&fence->lru_list);
+       }
+       obj->fence_dirty = false;
+}
+
+static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
+{
+       if (obj->tiling_mode)
+               i915_gem_release_mmap(obj);
+
+       /* As we do not have an associated fence register, we will force
+        * a tiling change if we ever need to acquire one.
+        */
+       obj->fence_dirty = false;
+       obj->fence_reg = I915_FENCE_REG_NONE;
+}
+
+static int
+i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
+{
+       if (obj->last_fenced_req) {
+               int ret = i915_wait_request(obj->last_fenced_req);
+               if (ret)
+                       return ret;
+
+               i915_gem_request_assign(&obj->last_fenced_req, NULL);
+       }
+
+       return 0;
+}
+
+/**
+ * i915_gem_object_put_fence - force-remove fence for an object
+ * @obj: object to map through a fence reg
+ *
+ * This function force-removes any fence from the given object, which is useful
+ * if the kernel wants to do untiled GTT access.
+ *
+ * Returns:
+ *
+ * 0 on success, negative error code on failure.
+ */
+int
+i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
+{
+       struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
+       struct drm_i915_fence_reg *fence;
+       int ret;
+
+       ret = i915_gem_object_wait_fence(obj);
+       if (ret)
+               return ret;
+
+       if (obj->fence_reg == I915_FENCE_REG_NONE)
+               return 0;
+
+       fence = &dev_priv->fence_regs[obj->fence_reg];
+
+       if (WARN_ON(fence->pin_count))
+               return -EBUSY;
+
+       i915_gem_object_fence_lost(obj);
+       i915_gem_object_update_fence(obj, fence, false);
+
+       return 0;
+}
+
+static struct drm_i915_fence_reg *
+i915_find_fence_reg(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       struct drm_i915_fence_reg *reg, *avail;
+       int i;
+
+       /* First try to find a free reg */
+       avail = NULL;
+       for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
+               reg = &dev_priv->fence_regs[i];
+               if (!reg->obj)
+                       return reg;
+
+               if (!reg->pin_count)
+                       avail = reg;
+       }
+
+       if (avail == NULL)
+               goto deadlock;
+
+       /* None available, try to steal one or wait for a user to finish */
+       list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
+               if (reg->pin_count)
+                       continue;
+
+               return reg;
+       }
+
+deadlock:
+       /* Wait for completion of pending flips which consume fences */
+       if (intel_has_pending_fb_unpin(dev))
+               return ERR_PTR(-EAGAIN);
+
+       return ERR_PTR(-EDEADLK);
+}
+
+/**
+ * i915_gem_object_get_fence - set up fencing for an object
+ * @obj: object to map through a fence reg
+ *
+ * When mapping objects through the GTT, userspace wants to be able to write
+ * to them without having to worry about swizzling if the object is tiled.
+ * This function walks the fence regs looking for a free one for @obj,
+ * stealing one if it can't find any.
+ *
+ * It then sets up the reg based on the object's properties: address, pitch
+ * and tiling format.
+ *
+ * For an untiled surface, this removes any existing fence.
+ *
+ * Returns:
+ *
+ * 0 on success, negative error code on failure.
+ */
+int
+i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
+{
+       struct drm_device *dev = obj->base.dev;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       bool enable = obj->tiling_mode != I915_TILING_NONE;
+       struct drm_i915_fence_reg *reg;
+       int ret;
+
+       /* Have we updated the tiling parameters upon the object and so
+        * will need to serialise the write to the associated fence register?
+        */
+       if (obj->fence_dirty) {
+               ret = i915_gem_object_wait_fence(obj);
+               if (ret)
+                       return ret;
+       }
+
+       /* Just update our place in the LRU if our fence is getting reused. */
+       if (obj->fence_reg != I915_FENCE_REG_NONE) {
+               reg = &dev_priv->fence_regs[obj->fence_reg];
+               if (!obj->fence_dirty) {
+                       list_move_tail(&reg->lru_list,
+                                      &dev_priv->mm.fence_list);
+                       return 0;
+               }
+       } else if (enable) {
+               if (WARN_ON(!obj->map_and_fenceable))
+                       return -EINVAL;
+
+               reg = i915_find_fence_reg(dev);
+               if (IS_ERR(reg))
+                       return PTR_ERR(reg);
+
+               if (reg->obj) {
+                       struct drm_i915_gem_object *old = reg->obj;
+
+                       ret = i915_gem_object_wait_fence(old);
+                       if (ret)
+                               return ret;
+
+                       i915_gem_object_fence_lost(old);
+               }
+       } else
+               return 0;
+
+       i915_gem_object_update_fence(obj, reg, enable);
+
+       return 0;
+}
+
+/**
+ * i915_gem_object_pin_fence - pin fencing state
+ * @obj: object to pin fencing for
+ *
+ * This pins the fencing state (whether tiled or untiled) to make sure the
+ * object is ready to be used as a scanout target. Fencing status must be
+ * synchronize first by calling i915_gem_object_get_fence():
+ *
+ * The resulting fence pin reference must be released again with
+ * i915_gem_object_unpin_fence().
+ *
+ * Returns:
+ *
+ * True if the object has a fence, false otherwise.
+ */
+bool
+i915_gem_object_pin_fence(struct drm_i915_gem_object *obj)
+{
+       if (obj->fence_reg != I915_FENCE_REG_NONE) {
+               struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
+               struct i915_vma *ggtt_vma = i915_gem_obj_to_ggtt(obj);
+
+               WARN_ON(!ggtt_vma ||
+                       dev_priv->fence_regs[obj->fence_reg].pin_count >
+                       ggtt_vma->pin_count);
+               dev_priv->fence_regs[obj->fence_reg].pin_count++;
+               return true;
+       } else
+               return false;
+}
+
+/**
+ * i915_gem_object_unpin_fence - unpin fencing state
+ * @obj: object to unpin fencing for
+ *
+ * This releases the fence pin reference acquired through
+ * i915_gem_object_pin_fence. It will handle both objects with and without an
+ * attached fence correctly, callers do not need to distinguish this.
+ */
+void
+i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj)
+{
+       if (obj->fence_reg != I915_FENCE_REG_NONE) {
+               struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
+               WARN_ON(dev_priv->fence_regs[obj->fence_reg].pin_count <= 0);
+               dev_priv->fence_regs[obj->fence_reg].pin_count--;
+       }
+}
+
+/**
+ * i915_gem_restore_fences - restore fence state
+ * @dev: DRM device
+ *
+ * Restore the hw fence state to match the software tracking again, to be called
+ * after a gpu reset and on resume.
+ */
+void i915_gem_restore_fences(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       int i;
+
+       for (i = 0; i < dev_priv->num_fence_regs; i++) {
+               struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
+
+               /*
+                * Commit delayed tiling changes if we have an object still
+                * attached to the fence, otherwise just clear the fence.
+                */
+               if (reg->obj) {
+                       i915_gem_object_update_fence(reg->obj, reg,
+                                                    reg->obj->tiling_mode);
+               } else {
+                       i915_gem_write_fence(dev, i, NULL);
+               }
+       }
+}
+
+/**
+ * DOC: tiling swizzling details
+ *
+ * The idea behind tiling is to increase cache hit rates by rearranging
+ * pixel data so that a group of pixel accesses are in the same cacheline.
+ * Performance improvement from doing this on the back/depth buffer are on
+ * the order of 30%.
+ *
+ * Intel architectures make this somewhat more complicated, though, by
+ * adjustments made to addressing of data when the memory is in interleaved
+ * mode (matched pairs of DIMMS) to improve memory bandwidth.
+ * For interleaved memory, the CPU sends every sequential 64 bytes
+ * to an alternate memory channel so it can get the bandwidth from both.
+ *
+ * The GPU also rearranges its accesses for increased bandwidth to interleaved
+ * memory, and it matches what the CPU does for non-tiled.  However, when tiled
+ * it does it a little differently, since one walks addresses not just in the
+ * X direction but also Y.  So, along with alternating channels when bit
+ * 6 of the address flips, it also alternates when other bits flip --  Bits 9
+ * (every 512 bytes, an X tile scanline) and 10 (every two X tile scanlines)
+ * are common to both the 915 and 965-class hardware.
+ *
+ * The CPU also sometimes XORs in higher bits as well, to improve
+ * bandwidth doing strided access like we do so frequently in graphics.  This
+ * is called "Channel XOR Randomization" in the MCH documentation.  The result
+ * is that the CPU is XORing in either bit 11 or bit 17 to bit 6 of its address
+ * decode.
+ *
+ * All of this bit 6 XORing has an effect on our memory management,
+ * as we need to make sure that the 3d driver can correctly address object
+ * contents.
+ *
+ * If we don't have interleaved memory, all tiling is safe and no swizzling is
+ * required.
+ *
+ * When bit 17 is XORed in, we simply refuse to tile at all.  Bit
+ * 17 is not just a page offset, so as we page an objet out and back in,
+ * individual pages in it will have different bit 17 addresses, resulting in
+ * each 64 bytes being swapped with its neighbor!
+ *
+ * Otherwise, if interleaved, we have to tell the 3d driver what the address
+ * swizzling it needs to do is, since it's writing with the CPU to the pages
+ * (bit 6 and potentially bit 11 XORed in), and the GPU is reading from the
+ * pages (bit 6, 9, and 10 XORed in), resulting in a cumulative bit swizzling
+ * required by the CPU of XORing in bit 6, 9, 10, and potentially 11, in order
+ * to match what the GPU expects.
+ */
+
+/**
+ * i915_gem_detect_bit_6_swizzle - detect bit 6 swizzling pattern
+ * @dev: DRM device
+ *
+ * Detects bit 6 swizzling of address lookup between IGD access and CPU
+ * access through main memory.
+ */
+void
+i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       uint32_t swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
+       uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
+
+       if (INTEL_INFO(dev)->gen >= 8 || IS_VALLEYVIEW(dev)) {
+               /*
+                * On BDW+, swizzling is not used. We leave the CPU memory
+                * controller in charge of optimizing memory accesses without
+                * the extra address manipulation GPU side.
+                *
+                * VLV and CHV don't have GPU swizzling.
+                */
+               swizzle_x = I915_BIT_6_SWIZZLE_NONE;
+               swizzle_y = I915_BIT_6_SWIZZLE_NONE;
+       } else if (INTEL_INFO(dev)->gen >= 6) {
+               if (dev_priv->preserve_bios_swizzle) {
+                       if (I915_READ(DISP_ARB_CTL) &
+                           DISP_TILE_SURFACE_SWIZZLING) {
+                               swizzle_x = I915_BIT_6_SWIZZLE_9_10;
+                               swizzle_y = I915_BIT_6_SWIZZLE_9;
+                       } else {
+                               swizzle_x = I915_BIT_6_SWIZZLE_NONE;
+                               swizzle_y = I915_BIT_6_SWIZZLE_NONE;
+                       }
+               } else {
+                       uint32_t dimm_c0, dimm_c1;
+                       dimm_c0 = I915_READ(MAD_DIMM_C0);
+                       dimm_c1 = I915_READ(MAD_DIMM_C1);
+                       dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
+                       dimm_c1 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
+                       /* Enable swizzling when the channels are populated
+                        * with identically sized dimms. We don't need to check
+                        * the 3rd channel because no cpu with gpu attached
+                        * ships in that configuration. Also, swizzling only
+                        * makes sense for 2 channels anyway. */
+                       if (dimm_c0 == dimm_c1) {
+                               swizzle_x = I915_BIT_6_SWIZZLE_9_10;
+                               swizzle_y = I915_BIT_6_SWIZZLE_9;
+                       } else {
+                               swizzle_x = I915_BIT_6_SWIZZLE_NONE;
+                               swizzle_y = I915_BIT_6_SWIZZLE_NONE;
+                       }
+               }
+       } else if (IS_GEN5(dev)) {
+               /* On Ironlake whatever DRAM config, GPU always do
+                * same swizzling setup.
+                */
+               swizzle_x = I915_BIT_6_SWIZZLE_9_10;
+               swizzle_y = I915_BIT_6_SWIZZLE_9;
+       } else if (IS_GEN2(dev)) {
+               /* As far as we know, the 865 doesn't have these bit 6
+                * swizzling issues.
+                */
+               swizzle_x = I915_BIT_6_SWIZZLE_NONE;
+               swizzle_y = I915_BIT_6_SWIZZLE_NONE;
+       } else if (IS_MOBILE(dev) || (IS_GEN3(dev) && !IS_G33(dev))) {
+               uint32_t dcc;
+
+               /* On 9xx chipsets, channel interleave by the CPU is
+                * determined by DCC.  For single-channel, neither the CPU
+                * nor the GPU do swizzling.  For dual channel interleaved,
+                * the GPU's interleave is bit 9 and 10 for X tiled, and bit
+                * 9 for Y tiled.  The CPU's interleave is independent, and
+                * can be based on either bit 11 (haven't seen this yet) or
+                * bit 17 (common).
+                */
+               dcc = I915_READ(DCC);
+               switch (dcc & DCC_ADDRESSING_MODE_MASK) {
+               case DCC_ADDRESSING_MODE_SINGLE_CHANNEL:
+               case DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC:
+                       swizzle_x = I915_BIT_6_SWIZZLE_NONE;
+                       swizzle_y = I915_BIT_6_SWIZZLE_NONE;
+                       break;
+               case DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED:
+                       if (dcc & DCC_CHANNEL_XOR_DISABLE) {
+                               /* This is the base swizzling by the GPU for
+                                * tiled buffers.
+                                */
+                               swizzle_x = I915_BIT_6_SWIZZLE_9_10;
+                               swizzle_y = I915_BIT_6_SWIZZLE_9;
+                       } else if ((dcc & DCC_CHANNEL_XOR_BIT_17) == 0) {
+                               /* Bit 11 swizzling by the CPU in addition. */
+                               swizzle_x = I915_BIT_6_SWIZZLE_9_10_11;
+                               swizzle_y = I915_BIT_6_SWIZZLE_9_11;
+                       } else {
+                               /* Bit 17 swizzling by the CPU in addition. */
+                               swizzle_x = I915_BIT_6_SWIZZLE_9_10_17;
+                               swizzle_y = I915_BIT_6_SWIZZLE_9_17;
+                       }
+                       break;
+               }
+
+               /* check for L-shaped memory aka modified enhanced addressing */
+               if (IS_GEN4(dev)) {
+                       uint32_t ddc2 = I915_READ(DCC2);
+
+                       if (!(ddc2 & DCC2_MODIFIED_ENHANCED_DISABLE))
+                               dev_priv->quirks |= QUIRK_PIN_SWIZZLED_PAGES;
+               }
+
+               if (dcc == 0xffffffff) {
+                       DRM_ERROR("Couldn't read from MCHBAR.  "
+                                 "Disabling tiling.\n");
+                       swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
+                       swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
+               }
+       } else {
+               /* The 965, G33, and newer, have a very flexible memory
+                * configuration.  It will enable dual-channel mode
+                * (interleaving) on as much memory as it can, and the GPU
+                * will additionally sometimes enable different bit 6
+                * swizzling for tiled objects from the CPU.
+                *
+                * Here's what I found on the G965:
+                *    slot fill         memory size  swizzling
+                * 0A   0B   1A   1B    1-ch   2-ch
+                * 512  0    0    0     512    0     O
+                * 512  0    512  0     16     1008  X
+                * 512  0    0    512   16     1008  X
+                * 0    512  0    512   16     1008  X
+                * 1024 1024 1024 0     2048   1024  O
+                *
+                * We could probably detect this based on either the DRB
+                * matching, which was the case for the swizzling required in
+                * the table above, or from the 1-ch value being less than
+                * the minimum size of a rank.
+                */
+               if (I915_READ16(C0DRB3) != I915_READ16(C1DRB3)) {
+                       swizzle_x = I915_BIT_6_SWIZZLE_NONE;
+                       swizzle_y = I915_BIT_6_SWIZZLE_NONE;
+               } else {
+                       swizzle_x = I915_BIT_6_SWIZZLE_9_10;
+                       swizzle_y = I915_BIT_6_SWIZZLE_9;
+               }
+       }
+
+       dev_priv->mm.bit_6_swizzle_x = swizzle_x;
+       dev_priv->mm.bit_6_swizzle_y = swizzle_y;
+}
+
+/*
+ * Swap every 64 bytes of this page around, to account for it having a new
+ * bit 17 of its physical address and therefore being interpreted differently
+ * by the GPU.
+ */
+static void
+i915_gem_swizzle_page(struct page *page)
+{
+       char temp[64];
+       char *vaddr;
+       int i;
+
+       vaddr = kmap(page);
+
+       for (i = 0; i < PAGE_SIZE; i += 128) {
+               memcpy(temp, &vaddr[i], 64);
+               memcpy(&vaddr[i], &vaddr[i + 64], 64);
+               memcpy(&vaddr[i + 64], temp, 64);
+       }
+
+       kunmap(page);
+}
+
+/**
+ * i915_gem_object_do_bit_17_swizzle - fixup bit 17 swizzling
+ * @obj: i915 GEM buffer object
+ *
+ * This function fixes up the swizzling in case any page frame number for this
+ * object has changed in bit 17 since that state has been saved with
+ * i915_gem_object_save_bit_17_swizzle().
+ *
+ * This is called when pinning backing storage again, since the kernel is free
+ * to move unpinned backing storage around (either by directly moving pages or
+ * by swapping them out and back in again).
+ */
+void
+i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj)
+{
+       struct sg_page_iter sg_iter;
+       int i;
+
+       if (obj->bit_17 == NULL)
+               return;
+
+       i = 0;
+       for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
+               struct page *page = sg_page_iter_page(&sg_iter);
+               char new_bit_17 = page_to_phys(page) >> 17;
+               if ((new_bit_17 & 0x1) !=
+                   (test_bit(i, obj->bit_17) != 0)) {
+                       i915_gem_swizzle_page(page);
+                       set_page_dirty(page);
+               }
+               i++;
+       }
+}
+
+/**
+ * i915_gem_object_save_bit_17_swizzle - save bit 17 swizzling
+ * @obj: i915 GEM buffer object
+ *
+ * This function saves the bit 17 of each page frame number so that swizzling
+ * can be fixed up later on with i915_gem_object_do_bit_17_swizzle(). This must
+ * be called before the backing storage can be unpinned.
+ */
+void
+i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj)
+{
+       struct sg_page_iter sg_iter;
+       int page_count = obj->base.size >> PAGE_SHIFT;
+       int i;
+
+       if (obj->bit_17 == NULL) {
+               obj->bit_17 = kcalloc(BITS_TO_LONGS(page_count),
+                                     sizeof(long), GFP_KERNEL);
+               if (obj->bit_17 == NULL) {
+                       DRM_ERROR("Failed to allocate memory for bit 17 "
+                                 "record\n");
+                       return;
+               }
+       }
+
+       i = 0;
+       for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
+               if (page_to_phys(sg_page_iter_page(&sg_iter)) & (1 << 17))
+                       __set_bit(i, obj->bit_17);
+               else
+                       __clear_bit(i, obj->bit_17);
+               i++;
+       }
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c

index a0201fc94d256431b41aa5ca26dbef0fae565257..5026a6267a88034b3cae18acc0674952a711521a 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -73,6 +73,24 @@ free_gem:
         return ret;
  }
  
+/*
+ * Macro to add commands to auxiliary batch.
+ * This macro only checks for page overflow before inserting the commands,
+ * this is sufficient as the null state generator makes the final batch
+ * with two passes to build command and state separately. At this point
+ * the size of both are known and it compacts them by relocating the state
+ * right after the commands taking care of aligment so we should sufficient
+ * space below them for adding new commands.
+ */
+#define OUT_BATCH(batch, i, val)                               \
+       do {                                                    \
+               if (WARN_ON((i) >= PAGE_SIZE / sizeof(u32))) {  \
+                       ret = -ENOSPC;                          \
+                       goto err_out;                           \
+               }                                               \
+               (batch)[(i)++] = (val);                         \
+       } while(0)
+
  static int render_state_setup(struct render_state *so)
  {
         const struct intel_renderstate_rodata *rodata = so->rodata;
@@ -96,8 +114,10 @@ static int render_state_setup(struct render_state *so)
                         s = lower_32_bits(r);
                         if (so->gen >= 8) {
                                 if (i + 1 >= rodata->batch_items ||
-                                   rodata->batch[i + 1] != 0)
-                                       return -EINVAL;
+                                   rodata->batch[i + 1] != 0) {
+                                       ret = -EINVAL;
+                                       goto err_out;
+                               }
  
                                 d[i++] = s;
                                 s = upper_32_bits(r);
@@ -108,6 +128,21 @@ static int render_state_setup(struct render_state *so)
  
                 d[i++] = s;
         }
+
+       while (i % CACHELINE_DWORDS)
+               OUT_BATCH(d, i, MI_NOOP);
+
+       so->aux_batch_offset = i * sizeof(u32);
+
+       OUT_BATCH(d, i, MI_BATCH_BUFFER_END);
+       so->aux_batch_size = (i * sizeof(u32)) - so->aux_batch_offset;
+
+       /*
+        * Since we are sending length, we need to strictly conform to
+        * all requirements. For Gen2 this must be a multiple of 8.
+        */
+       so->aux_batch_size = ALIGN(so->aux_batch_size, 8);
+
         kunmap(page);
  
         ret = i915_gem_object_set_to_gtt_domain(so->obj, false);
@@ -120,8 +155,14 @@ static int render_state_setup(struct render_state *so)
         }
  
         return 0;
+
+err_out:
+       kunmap(page);
+       return ret;
  }
  
+#undef OUT_BATCH
+
  void i915_gem_render_state_fini(struct render_state *so)
  {
         i915_gem_object_ggtt_unpin(so->obj);
@@ -170,6 +211,16 @@ int i915_gem_render_state_init(struct drm_i915_gem_request *req)
         if (ret)
                 goto out;
  
+       if (so.aux_batch_size > 8) {
+               ret = req->ring->dispatch_execbuffer(req,
+                                                    (so.ggtt_offset +
+                                                     so.aux_batch_offset),
+                                                    so.aux_batch_size,
+                                                    I915_DISPATCH_SECURE);
+               if (ret)
+                       goto out;
+       }
+
         i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req);
  
  out:
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.h b/drivers/gpu/drm/i915/i915_gem_render_state.h

index 7aa73728178ace3a5e03851ace0e753d0d21a4c8..e641bb093a903bba18e02cd0cd156e6070a40a62 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_render_state.h
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.h
@@ -37,6 +37,8 @@ struct render_state {
         struct drm_i915_gem_object *obj;
         u64 ggtt_offset;
         int gen;
+       u32 aux_batch_size;
+       u32 aux_batch_offset;
  };
  
  int i915_gem_render_state_init(struct drm_i915_gem_request *req);
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c

index 633bd1fcab6925881048e7310f9aa40e4f9db868..ac3eb566c9d259fa2d6b7db077a9c4faca9fc223 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -31,201 +31,32 @@
  #include <drm/i915_drm.h>
  #include "i915_drv.h"
  
-/** @file i915_gem_tiling.c
- *
- * Support for managing tiling state of buffer objects.
- *
- * The idea behind tiling is to increase cache hit rates by rearranging
- * pixel data so that a group of pixel accesses are in the same cacheline.
- * Performance improvement from doing this on the back/depth buffer are on
- * the order of 30%.
- *
- * Intel architectures make this somewhat more complicated, though, by
- * adjustments made to addressing of data when the memory is in interleaved
- * mode (matched pairs of DIMMS) to improve memory bandwidth.
- * For interleaved memory, the CPU sends every sequential 64 bytes
- * to an alternate memory channel so it can get the bandwidth from both.
- *
- * The GPU also rearranges its accesses for increased bandwidth to interleaved
- * memory, and it matches what the CPU does for non-tiled.  However, when tiled
- * it does it a little differently, since one walks addresses not just in the
- * X direction but also Y.  So, along with alternating channels when bit
- * 6 of the address flips, it also alternates when other bits flip --  Bits 9
- * (every 512 bytes, an X tile scanline) and 10 (every two X tile scanlines)
- * are common to both the 915 and 965-class hardware.
- *
- * The CPU also sometimes XORs in higher bits as well, to improve
- * bandwidth doing strided access like we do so frequently in graphics.  This
- * is called "Channel XOR Randomization" in the MCH documentation.  The result
- * is that the CPU is XORing in either bit 11 or bit 17 to bit 6 of its address
- * decode.
+/**
+ * DOC: buffer object tiling
   *
- * All of this bit 6 XORing has an effect on our memory management,
- * as we need to make sure that the 3d driver can correctly address object
- * contents.
+ * i915_gem_set_tiling() and i915_gem_get_tiling() is the userspace interface to
+ * declare fence register requirements.
   *
- * If we don't have interleaved memory, all tiling is safe and no swizzling is
- * required.
+ * In principle GEM doesn't care at all about the internal data layout of an
+ * object, and hence it also doesn't care about tiling or swizzling. There's two
+ * exceptions:
   *
- * When bit 17 is XORed in, we simply refuse to tile at all.  Bit
- * 17 is not just a page offset, so as we page an objet out and back in,
- * individual pages in it will have different bit 17 addresses, resulting in
- * each 64 bytes being swapped with its neighbor!
+ * - For X and Y tiling the hardware provides detilers for CPU access, so called
+ *   fences. Since there's only a limited amount of them the kernel must manage
+ *   these, and therefore userspace must tell the kernel the object tiling if it
+ *   wants to use fences for detiling.
+ * - On gen3 and gen4 platforms have a swizzling pattern for tiled objects which
+ *   depends upon the physical page frame number. When swapping such objects the
+ *   page frame number might change and the kernel must be able to fix this up
+ *   and hence now the tiling. Note that on a subset of platforms with
+ *   asymmetric memory channel population the swizzling pattern changes in an
+ *   unknown way, and for those the kernel simply forbids swapping completely.
   *
- * Otherwise, if interleaved, we have to tell the 3d driver what the address
- * swizzling it needs to do is, since it's writing with the CPU to the pages
- * (bit 6 and potentially bit 11 XORed in), and the GPU is reading from the
- * pages (bit 6, 9, and 10 XORed in), resulting in a cumulative bit swizzling
- * required by the CPU of XORing in bit 6, 9, 10, and potentially 11, in order
- * to match what the GPU expects.
- */
-
-/**
- * Detects bit 6 swizzling of address lookup between IGD access and CPU
- * access through main memory.
+ * Since neither of this applies for new tiling layouts on modern platforms like
+ * W, Ys and Yf tiling GEM only allows object tiling to be set to X or Y tiled.
+ * Anything else can be handled in userspace entirely without the kernel's
+ * invovlement.
   */
-void
-i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
-{
-       struct drm_i915_private *dev_priv = dev->dev_private;
-       uint32_t swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
-       uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
-
-       if (INTEL_INFO(dev)->gen >= 8 || IS_VALLEYVIEW(dev)) {
-               /*
-                * On BDW+, swizzling is not used. We leave the CPU memory
-                * controller in charge of optimizing memory accesses without
-                * the extra address manipulation GPU side.
-                *
-                * VLV and CHV don't have GPU swizzling.
-                */
-               swizzle_x = I915_BIT_6_SWIZZLE_NONE;
-               swizzle_y = I915_BIT_6_SWIZZLE_NONE;
-       } else if (INTEL_INFO(dev)->gen >= 6) {
-               if (dev_priv->preserve_bios_swizzle) {
-                       if (I915_READ(DISP_ARB_CTL) &
-                           DISP_TILE_SURFACE_SWIZZLING) {
-                               swizzle_x = I915_BIT_6_SWIZZLE_9_10;
-                               swizzle_y = I915_BIT_6_SWIZZLE_9;
-                       } else {
-                               swizzle_x = I915_BIT_6_SWIZZLE_NONE;
-                               swizzle_y = I915_BIT_6_SWIZZLE_NONE;
-                       }
-               } else {
-                       uint32_t dimm_c0, dimm_c1;
-                       dimm_c0 = I915_READ(MAD_DIMM_C0);
-                       dimm_c1 = I915_READ(MAD_DIMM_C1);
-                       dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
-                       dimm_c1 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
-                       /* Enable swizzling when the channels are populated
-                        * with identically sized dimms. We don't need to check
-                        * the 3rd channel because no cpu with gpu attached
-                        * ships in that configuration. Also, swizzling only
-                        * makes sense for 2 channels anyway. */
-                       if (dimm_c0 == dimm_c1) {
-                               swizzle_x = I915_BIT_6_SWIZZLE_9_10;
-                               swizzle_y = I915_BIT_6_SWIZZLE_9;
-                       } else {
-                               swizzle_x = I915_BIT_6_SWIZZLE_NONE;
-                               swizzle_y = I915_BIT_6_SWIZZLE_NONE;
-                       }
-               }
-       } else if (IS_GEN5(dev)) {
-               /* On Ironlake whatever DRAM config, GPU always do
-                * same swizzling setup.
-                */
-               swizzle_x = I915_BIT_6_SWIZZLE_9_10;
-               swizzle_y = I915_BIT_6_SWIZZLE_9;
-       } else if (IS_GEN2(dev)) {
-               /* As far as we know, the 865 doesn't have these bit 6
-                * swizzling issues.
-                */
-               swizzle_x = I915_BIT_6_SWIZZLE_NONE;
-               swizzle_y = I915_BIT_6_SWIZZLE_NONE;
-       } else if (IS_MOBILE(dev) || (IS_GEN3(dev) && !IS_G33(dev))) {
-               uint32_t dcc;
-
-               /* On 9xx chipsets, channel interleave by the CPU is
-                * determined by DCC.  For single-channel, neither the CPU
-                * nor the GPU do swizzling.  For dual channel interleaved,
-                * the GPU's interleave is bit 9 and 10 for X tiled, and bit
-                * 9 for Y tiled.  The CPU's interleave is independent, and
-                * can be based on either bit 11 (haven't seen this yet) or
-                * bit 17 (common).
-                */
-               dcc = I915_READ(DCC);
-               switch (dcc & DCC_ADDRESSING_MODE_MASK) {
-               case DCC_ADDRESSING_MODE_SINGLE_CHANNEL:
-               case DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC:
-                       swizzle_x = I915_BIT_6_SWIZZLE_NONE;
-                       swizzle_y = I915_BIT_6_SWIZZLE_NONE;
-                       break;
-               case DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED:
-                       if (dcc & DCC_CHANNEL_XOR_DISABLE) {
-                               /* This is the base swizzling by the GPU for
-                                * tiled buffers.
-                                */
-                               swizzle_x = I915_BIT_6_SWIZZLE_9_10;
-                               swizzle_y = I915_BIT_6_SWIZZLE_9;
-                       } else if ((dcc & DCC_CHANNEL_XOR_BIT_17) == 0) {
-                               /* Bit 11 swizzling by the CPU in addition. */
-                               swizzle_x = I915_BIT_6_SWIZZLE_9_10_11;
-                               swizzle_y = I915_BIT_6_SWIZZLE_9_11;
-                       } else {
-                               /* Bit 17 swizzling by the CPU in addition. */
-                               swizzle_x = I915_BIT_6_SWIZZLE_9_10_17;
-                               swizzle_y = I915_BIT_6_SWIZZLE_9_17;
-                       }
-                       break;
-               }
-
-               /* check for L-shaped memory aka modified enhanced addressing */
-               if (IS_GEN4(dev)) {
-                       uint32_t ddc2 = I915_READ(DCC2);
-
-                       if (!(ddc2 & DCC2_MODIFIED_ENHANCED_DISABLE))
-                               dev_priv->quirks |= QUIRK_PIN_SWIZZLED_PAGES;
-               }
-
-               if (dcc == 0xffffffff) {
-                       DRM_ERROR("Couldn't read from MCHBAR.  "
-                                 "Disabling tiling.\n");
-                       swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
-                       swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
-               }
-       } else {
-               /* The 965, G33, and newer, have a very flexible memory
-                * configuration.  It will enable dual-channel mode
-                * (interleaving) on as much memory as it can, and the GPU
-                * will additionally sometimes enable different bit 6
-                * swizzling for tiled objects from the CPU.
-                *
-                * Here's what I found on the G965:
-                *    slot fill         memory size  swizzling
-                * 0A   0B   1A   1B    1-ch   2-ch
-                * 512  0    0    0     512    0     O
-                * 512  0    512  0     16     1008  X
-                * 512  0    0    512   16     1008  X
-                * 0    512  0    512   16     1008  X
-                * 1024 1024 1024 0     2048   1024  O
-                *
-                * We could probably detect this based on either the DRB
-                * matching, which was the case for the swizzling required in
-                * the table above, or from the 1-ch value being less than
-                * the minimum size of a rank.
-                */
-               if (I915_READ16(C0DRB3) != I915_READ16(C1DRB3)) {
-                       swizzle_x = I915_BIT_6_SWIZZLE_NONE;
-                       swizzle_y = I915_BIT_6_SWIZZLE_NONE;
-               } else {
-                       swizzle_x = I915_BIT_6_SWIZZLE_9_10;
-                       swizzle_y = I915_BIT_6_SWIZZLE_9;
-               }
-       }
-
-       dev_priv->mm.bit_6_swizzle_x = swizzle_x;
-       dev_priv->mm.bit_6_swizzle_y = swizzle_y;
-}
  
  /* Check pitch constriants for all chips & tiling formats */
  static bool
@@ -313,8 +144,18 @@ i915_gem_object_fence_ok(struct drm_i915_gem_object *obj, int tiling_mode)
  }
  
  /**
+ * i915_gem_set_tiling - IOCTL handler to set tiling mode
+ * @dev: DRM device
+ * @data: data pointer for the ioctl
+ * @file: DRM file for the ioctl call
+ *
   * Sets the tiling mode of an object, returning the required swizzling of
   * bit 6 of addresses in the object.
+ *
+ * Called by the user via ioctl.
+ *
+ * Returns:
+ * Zero on success, negative errno on failure.
   */
  int
  i915_gem_set_tiling(struct drm_device *dev, void *data,
@@ -432,7 +273,17 @@ err:
  }
  
  /**
+ * i915_gem_get_tiling - IOCTL handler to get tiling mode
+ * @dev: DRM device
+ * @data: data pointer for the ioctl
+ * @file: DRM file for the ioctl call
+ *
   * Returns the current tiling mode and required bit 6 swizzling for the object.
+ *
+ * Called by the user via ioctl.
+ *
+ * Returns:
+ * Zero on success, negative errno on failure.
   */
  int
  i915_gem_get_tiling(struct drm_device *dev, void *data,
@@ -475,75 +326,3 @@ i915_gem_get_tiling(struct drm_device *dev, void *data,
  
         return 0;
  }
-
-/**
- * Swap every 64 bytes of this page around, to account for it having a new
- * bit 17 of its physical address and therefore being interpreted differently
- * by the GPU.
- */
-static void
-i915_gem_swizzle_page(struct page *page)
-{
-       char temp[64];
-       char *vaddr;
-       int i;
-
-       vaddr = kmap(page);
-
-       for (i = 0; i < PAGE_SIZE; i += 128) {
-               memcpy(temp, &vaddr[i], 64);
-               memcpy(&vaddr[i], &vaddr[i + 64], 64);
-               memcpy(&vaddr[i + 64], temp, 64);
-       }
-
-       kunmap(page);
-}
-
-void
-i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj)
-{
-       struct sg_page_iter sg_iter;
-       int i;
-
-       if (obj->bit_17 == NULL)
-               return;
-
-       i = 0;
-       for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
-               struct page *page = sg_page_iter_page(&sg_iter);
-               char new_bit_17 = page_to_phys(page) >> 17;
-               if ((new_bit_17 & 0x1) !=
-                   (test_bit(i, obj->bit_17) != 0)) {
-                       i915_gem_swizzle_page(page);
-                       set_page_dirty(page);
-               }
-               i++;
-       }
-}
-
-void
-i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj)
-{
-       struct sg_page_iter sg_iter;
-       int page_count = obj->base.size >> PAGE_SHIFT;
-       int i;
-
-       if (obj->bit_17 == NULL) {
-               obj->bit_17 = kcalloc(BITS_TO_LONGS(page_count),
-                                     sizeof(long), GFP_KERNEL);
-               if (obj->bit_17 == NULL) {
-                       DRM_ERROR("Failed to allocate memory for bit 17 "
-                                 "record\n");
-                       return;
-               }
-       }
-
-       i = 0;
-       for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
-               if (page_to_phys(sg_page_iter_page(&sg_iter)) & (1 << 17))
-                       __set_bit(i, obj->bit_17);
-               else
-                       __clear_bit(i, obj->bit_17);
-               i++;
-       }
-}
diff --git a/drivers/gpu/drm/i915/i915_guc_reg.h b/drivers/gpu/drm/i915/i915_guc_reg.h

new file mode 100644 (file)

index 0000000..ccdc6c8
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_guc_reg.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+#ifndef _I915_GUC_REG_H_
+#define _I915_GUC_REG_H_
+
+/* Definitions of GuC H/W registers, bits, etc */
+
+#define GUC_STATUS                     0xc000
+#define   GS_BOOTROM_SHIFT             1
+#define   GS_BOOTROM_MASK                (0x7F << GS_BOOTROM_SHIFT)
+#define   GS_BOOTROM_RSA_FAILED                  (0x50 << GS_BOOTROM_SHIFT)
+#define   GS_UKERNEL_SHIFT             8
+#define   GS_UKERNEL_MASK                (0xFF << GS_UKERNEL_SHIFT)
+#define   GS_UKERNEL_LAPIC_DONE                  (0x30 << GS_UKERNEL_SHIFT)
+#define   GS_UKERNEL_DPC_ERROR           (0x60 << GS_UKERNEL_SHIFT)
+#define   GS_UKERNEL_READY               (0xF0 << GS_UKERNEL_SHIFT)
+#define   GS_MIA_SHIFT                 16
+#define   GS_MIA_MASK                    (0x07 << GS_MIA_SHIFT)
+
+#define GUC_WOPCM_SIZE                 0xc050
+#define   GUC_WOPCM_SIZE_VALUE           (0x80 << 12)  /* 512KB */
+#define GUC_WOPCM_OFFSET               0x80000         /* 512KB */
+
+#define SOFT_SCRATCH(n)                        (0xc180 + ((n) * 4))
+
+#define UOS_RSA_SCRATCH_0              0xc200
+#define DMA_ADDR_0_LOW                 0xc300
+#define DMA_ADDR_0_HIGH                        0xc304
+#define DMA_ADDR_1_LOW                 0xc308
+#define DMA_ADDR_1_HIGH                        0xc30c
+#define   DMA_ADDRESS_SPACE_WOPCM        (7 << 16)
+#define   DMA_ADDRESS_SPACE_GTT                  (8 << 16)
+#define DMA_COPY_SIZE                  0xc310
+#define DMA_CTRL                       0xc314
+#define   UOS_MOVE                       (1<<4)
+#define   START_DMA                      (1<<0)
+#define DMA_GUC_WOPCM_OFFSET           0xc340
+
+#define GEN8_GT_PM_CONFIG              0x138140
+#define GEN9_GT_PM_CONFIG              0x13816c
+#define   GEN8_GT_DOORBELL_ENABLE        (1<<0)
+
+#define GEN8_GTCR                      0x4274
+#define   GEN8_GTCR_INVALIDATE           (1<<0)
+
+#define GUC_ARAT_C6DIS                 0xA178
+
+#define GUC_SHIM_CONTROL               0xc064
+#define   GUC_DISABLE_SRAM_INIT_TO_ZEROES      (1<<0)
+#define   GUC_ENABLE_READ_CACHE_LOGIC          (1<<1)
+#define   GUC_ENABLE_MIA_CACHING               (1<<2)
+#define   GUC_GEN10_MSGCH_ENABLE               (1<<4)
+#define   GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA  (1<<9)
+#define   GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA (1<<10)
+#define   GUC_ENABLE_MIA_CLOCK_GATING          (1<<15)
+#define   GUC_GEN10_SHIM_WC_ENABLE             (1<<21)
+
+#define GUC_SHIM_CONTROL_VALUE (GUC_DISABLE_SRAM_INIT_TO_ZEROES        | \
+                                GUC_ENABLE_READ_CACHE_LOGIC            | \
+                                GUC_ENABLE_MIA_CACHING                 | \
+                                GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA    | \
+                                GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA)
+
+#define HOST2GUC_INTERRUPT             0xc4c8
+#define   HOST2GUC_TRIGGER               (1<<0)
+
+#define DRBMISC1                       0x1984
+#define   DOORBELL_ENABLE                (1<<0)
+
+#define GEN8_DRBREGL(x)                        (0x1000 + (x) * 8)
+#define   GEN8_DRB_VALID                 (1<<0)
+#define GEN8_DRBREGU(x)                        (GEN8_DRBREGL(x) + 4)
+
+#define DE_GUCRMR                      0x44054
+
+#define GUC_BCS_RCS_IER                        0xC550
+#define GUC_VCS2_VCS1_IER              0xC554
+#define GUC_WD_VECS_IER                        0xC558
+#define GUC_PM_P24C_IER                        0xC55C
+
+#endif
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c

index d87f173a0179aca7e702dfcc6282aaad59fefa96..1118c39281f98cc272b23bcbded120c7ecdd4502 100644 (file)
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1227,6 +1227,22 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_i915_private *dev_priv,
         return ret;
  }
  
+static bool bxt_port_hotplug_long_detect(enum port port, u32 val)
+{
+       switch (port) {
+       case PORT_A:
+               return val & BXT_PORTA_HOTPLUG_LONG_DETECT;
+       case PORT_B:
+               return val & PORTB_HOTPLUG_LONG_DETECT;
+       case PORT_C:
+               return val & PORTC_HOTPLUG_LONG_DETECT;
+       case PORT_D:
+               return val & PORTD_HOTPLUG_LONG_DETECT;
+       default:
+               return false;
+       }
+}
+
  static bool pch_port_hotplug_long_detect(enum port port, u32 val)
  {
         switch (port) {
@@ -1256,9 +1272,10 @@ static bool i9xx_port_hotplug_long_detect(enum port port, u32 val)
  }
  
  /* Get a bit mask of pins that have triggered, and which ones may be long. */
-static void pch_get_hpd_pins(u32 *pin_mask, u32 *long_mask,
+static void intel_get_hpd_pins(u32 *pin_mask, u32 *long_mask,
                              u32 hotplug_trigger, u32 dig_hotplug_reg,
-                            const u32 hpd[HPD_NUM_PINS])
+                            const u32 hpd[HPD_NUM_PINS],
+                            bool long_pulse_detect(enum port port, u32 val))
  {
         enum port port;
         int i;
@@ -1272,8 +1289,10 @@ static void pch_get_hpd_pins(u32 *pin_mask, u32 *long_mask,
  
                 *pin_mask |= BIT(i);
  
-               port = intel_hpd_pin_to_port(i);
-               if (pch_port_hotplug_long_detect(port, dig_hotplug_reg))
+               if (!intel_hpd_pin_to_port(i, &port))
+                       continue;
+
+               if (long_pulse_detect(port, dig_hotplug_reg))
                         *long_mask |= BIT(i);
         }
  
@@ -1282,34 +1301,6 @@ static void pch_get_hpd_pins(u32 *pin_mask, u32 *long_mask,
  
  }
  
-/* Get a bit mask of pins that have triggered, and which ones may be long. */
-static void i9xx_get_hpd_pins(u32 *pin_mask, u32 *long_mask,
-                             u32 hotplug_trigger, const u32 hpd[HPD_NUM_PINS])
-{
-       enum port port;
-       int i;
-
-       *pin_mask = 0;
-       *long_mask = 0;
-
-       if (!hotplug_trigger)
-               return;
-
-       for_each_hpd_pin(i) {
-               if ((hpd[i] & hotplug_trigger) == 0)
-                       continue;
-
-               *pin_mask |= BIT(i);
-
-               port = intel_hpd_pin_to_port(i);
-               if (i9xx_port_hotplug_long_detect(port, hotplug_trigger))
-                       *long_mask |= BIT(i);
-       }
-
-       DRM_DEBUG_DRIVER("hotplug event received, stat 0x%08x, pins 0x%08x\n",
-                        hotplug_trigger, *pin_mask);
-}
-
  static void gmbus_irq_handler(struct drm_device *dev)
  {
         struct drm_i915_private *dev_priv = dev->dev_private;
@@ -1547,7 +1538,9 @@ static void i9xx_hpd_irq_handler(struct drm_device *dev)
         if (IS_G4X(dev) || IS_VALLEYVIEW(dev)) {
                 u32 hotplug_trigger = hotplug_status & HOTPLUG_INT_STATUS_G4X;
  
-               i9xx_get_hpd_pins(&pin_mask, &long_mask, hotplug_trigger, hpd_status_g4x);
+               intel_get_hpd_pins(&pin_mask, &long_mask, hotplug_trigger,
+                                  hotplug_trigger, hpd_status_g4x,
+                                  i9xx_port_hotplug_long_detect);
                 intel_hpd_irq_handler(dev, pin_mask, long_mask);
  
                 if (hotplug_status & DP_AUX_CHANNEL_MASK_INT_STATUS_G4X)
@@ -1555,7 +1548,9 @@ static void i9xx_hpd_irq_handler(struct drm_device *dev)
         } else {
                 u32 hotplug_trigger = hotplug_status & HOTPLUG_INT_STATUS_I915;
  
-               i9xx_get_hpd_pins(&pin_mask, &long_mask, hotplug_trigger, hpd_status_i915);
+               intel_get_hpd_pins(&pin_mask, &long_mask, hotplug_trigger,
+                                  hotplug_trigger, hpd_status_g4x,
+                                  i9xx_port_hotplug_long_detect);
                 intel_hpd_irq_handler(dev, pin_mask, long_mask);
         }
  }
@@ -1662,8 +1657,9 @@ static void ibx_irq_handler(struct drm_device *dev, u32 pch_iir)
                 dig_hotplug_reg = I915_READ(PCH_PORT_HOTPLUG);
                 I915_WRITE(PCH_PORT_HOTPLUG, dig_hotplug_reg);
  
-               pch_get_hpd_pins(&pin_mask, &long_mask, hotplug_trigger,
-                                dig_hotplug_reg, hpd_ibx);
+               intel_get_hpd_pins(&pin_mask, &long_mask, hotplug_trigger,
+                                  dig_hotplug_reg, hpd_ibx,
+                                  pch_port_hotplug_long_detect);
                 intel_hpd_irq_handler(dev, pin_mask, long_mask);
         }
  
@@ -1763,8 +1759,10 @@ static void cpt_irq_handler(struct drm_device *dev, u32 pch_iir)
  
                 dig_hotplug_reg = I915_READ(PCH_PORT_HOTPLUG);
                 I915_WRITE(PCH_PORT_HOTPLUG, dig_hotplug_reg);
-               pch_get_hpd_pins(&pin_mask, &long_mask, hotplug_trigger,
-                                dig_hotplug_reg, hpd_cpt);
+
+               intel_get_hpd_pins(&pin_mask, &long_mask, hotplug_trigger,
+                                  dig_hotplug_reg, hpd_cpt,
+                                  pch_port_hotplug_long_detect);
                 intel_hpd_irq_handler(dev, pin_mask, long_mask);
         }
  
@@ -1981,7 +1979,8 @@ static void bxt_hpd_handler(struct drm_device *dev, uint32_t iir_status)
         /* Clear sticky bits in hpd status */
         I915_WRITE(BXT_HOTPLUG_CTL, hp_control);
  
-       pch_get_hpd_pins(&pin_mask, &long_mask, hp_trigger, hp_control, hpd_bxt);
+       intel_get_hpd_pins(&pin_mask, &long_mask, hp_trigger, hp_control,
+                          hpd_bxt, bxt_port_hotplug_long_detect);
         intel_hpd_irq_handler(dev, pin_mask, long_mask);
  }
  
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c

index 5f4e7295295ff4904a7207f839abd912f77f0340..5ae4b0aba56412e3c7d8560c5fe1d1484ae00675 100644 (file)
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -52,6 +52,8 @@ struct i915_params i915 __read_mostly = {
         .mmio_debug = 0,
         .verbose_state_checks = 1,
         .edp_vswing = 0,
+       .enable_guc_submission = false,
+       .guc_log_level = -1,
  };
  
  module_param_named(modeset, i915.modeset, int, 0400);
@@ -181,3 +183,10 @@ MODULE_PARM_DESC(edp_vswing,
                  "Ignore/Override vswing pre-emph table selection from VBT "
                  "(0=use value from vbt [default], 1=low power swing(200mV),"
                  "2=default swing(400mV))");
+
+module_param_named_unsafe(enable_guc_submission, i915.enable_guc_submission, bool, 0400);
+MODULE_PARM_DESC(enable_guc_submission, "Enable GuC submission (default:false)");
+
+module_param_named(guc_log_level, i915.guc_log_level, int, 0400);
+MODULE_PARM_DESC(guc_log_level,
+       "GuC firmware logging level (-1:disabled (default), 0-3:enabled)");
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h

index e9a95df639f0e48653c200ecd5085eb5808a71eb..0f67f3da0762a39135060ad367374400fa9a6243 100644 (file)
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -5985,6 +5985,11 @@ enum skl_disp_power_wells {
  
  /* digital port hotplug */
  #define PCH_PORT_HOTPLUG        0xc4030                /* SHOTPLUG_CTL */
+#define BXT_PORTA_HOTPLUG_ENABLE       (1 << 28)
+#define BXT_PORTA_HOTPLUG_STATUS_MASK  (0x3 << 24)
+#define  BXT_PORTA_HOTPLUG_NO_DETECT   (0 << 24)
+#define  BXT_PORTA_HOTPLUG_SHORT_DETECT        (1 << 24)
+#define  BXT_PORTA_HOTPLUG_LONG_DETECT (2 << 24)
  #define PORTD_HOTPLUG_ENABLE            (1 << 20)
  #define PORTD_PULSE_DURATION_2ms        (0)
  #define PORTD_PULSE_DURATION_4_5ms      (1 << 18)
@@ -6846,6 +6851,9 @@ enum skl_disp_power_wells {
  #define GEN7_MISCCPCTL                 (0x9424)
  #define   GEN7_DOP_CLOCK_GATE_ENABLE   (1<<0)
  
+#define GEN8_GARBCNTL                   0xB004
+#define   GEN9_GAPS_TSV_CREDIT_DISABLE  (1<<7)
+
  /* IVYBRIDGE DPF */
  #define GEN7_L3CDERRST1                        0xB008 /* L3CD Error Status 1 */
  #define HSW_L3CDERRST11                        0xB208 /* L3CD Error Status register 1 slice 1 */
diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c

index 6d8a7bf06dfccba963705750f3881407c78a5c78..ba1ae031e6fd47ff7873fe739a4efd5e5c5fddac 100644 (file)
--- a/drivers/gpu/drm/i915/intel_csr.c
+++ b/drivers/gpu/drm/i915/intel_csr.c
@@ -244,7 +244,7 @@ void intel_csr_load_status_set(struct drm_i915_private *dev_priv,
  void intel_csr_load_program(struct drm_device *dev)
  {
         struct drm_i915_private *dev_priv = dev->dev_private;
-       __be32 *payload = dev_priv->csr.dmc_payload;
+       u32 *payload = dev_priv->csr.dmc_payload;
         uint32_t i, fw_size;
  
         if (!IS_GEN9(dev)) {
@@ -256,7 +256,7 @@ void intel_csr_load_program(struct drm_device *dev)
         fw_size = dev_priv->csr.dmc_fw_size;
         for (i = 0; i < fw_size; i++)
                 I915_WRITE(CSR_PROGRAM_BASE + i * 4,
-                       (u32 __force)payload[i]);
+                       payload[i]);
  
         for (i = 0; i < dev_priv->csr.mmio_count; i++) {
                 I915_WRITE(dev_priv->csr.mmioaddr[i],
@@ -279,7 +279,7 @@ static void finish_csr_load(const struct firmware *fw, void *context)
         char substepping = intel_get_substepping(dev);
         uint32_t dmc_offset = CSR_DEFAULT_FW_OFFSET, readcount = 0, nbytes;
         uint32_t i;
-       __be32 *dmc_payload;
+       uint32_t *dmc_payload;
         bool fw_loaded = false;
  
         if (!fw) {
@@ -375,15 +375,7 @@ static void finish_csr_load(const struct firmware *fw, void *context)
         }
  
         dmc_payload = csr->dmc_payload;
-       for (i = 0; i < dmc_header->fw_size; i++) {
-               uint32_t *tmp = (u32 *)&fw->data[readcount + i * 4];
-               /*
-                * The firmware payload is an array of 32 bit words stored in
-                * little-endian format in the firmware image and programmed
-                * as 32 bit big-endian format to memory.
-                */
-               dmc_payload[i] = cpu_to_be32(*tmp);
-       }
+       memcpy(dmc_payload, &fw->data[readcount], nbytes);
  
         /* load csr program during system boot, as needed for DC states */
         intel_csr_load_program(dev);
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c

index 43b0f17ad1fa5c1e2c86c8c5e8381d4035edc63c..a76751633af67310750098f18e008e6f83ee09ae 100644 (file)
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1936,7 +1936,9 @@ static void intel_disable_shared_dpll(struct intel_crtc *crtc)
         struct intel_shared_dpll *pll = intel_crtc_to_shared_dpll(crtc);
  
         /* PCH only available on ILK+ */
-       BUG_ON(INTEL_INFO(dev)->gen < 5);
+       if (INTEL_INFO(dev)->gen < 5)
+               return;
+
         if (pll == NULL)
                 return;
  
@@ -10764,15 +10766,12 @@ static void intel_unpin_work_fn(struct work_struct *__work)
                 container_of(__work, struct intel_unpin_work, work);
         struct intel_crtc *crtc = to_intel_crtc(work->crtc);
         struct drm_device *dev = crtc->base.dev;
-       struct drm_i915_private *dev_priv = dev->dev_private;
         struct drm_plane *primary = crtc->base.primary;
  
         mutex_lock(&dev->struct_mutex);
         intel_unpin_fb_obj(work->old_fb, primary->state);
         drm_gem_object_unreference(&work->pending_flip_obj->base);
  
-       intel_fbc_update(dev_priv);
-
         if (work->flip_queued_req)
                 i915_gem_request_assign(&work->flip_queued_req, NULL);
         mutex_unlock(&dev->struct_mutex);
@@ -11544,7 +11543,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
                           to_intel_plane(primary)->frontbuffer_bit);
         mutex_unlock(&dev->struct_mutex);
  
-       intel_fbc_disable(dev_priv);
+       intel_fbc_disable_crtc(intel_crtc);
         intel_frontbuffer_flip_prepare(dev,
                                        to_intel_plane(primary)->frontbuffer_bit);
  
@@ -14271,7 +14270,7 @@ static int intel_user_framebuffer_dirty(struct drm_framebuffer *fb,
         struct drm_i915_gem_object *obj = intel_fb->obj;
  
         mutex_lock(&dev->struct_mutex);
-       intel_fb_obj_flush(obj, false, ORIGIN_GTT);
+       intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB);
         mutex_unlock(&dev->struct_mutex);
  
         return 0;
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c

index f1b9f939b4352b31d00b85b46373cc0b51169acc..df7e2cfef38dfbc7ccee0d0d4123b2c94b60d9a1 100644 (file)
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -1409,7 +1409,10 @@ intel_dp_compute_config(struct intel_encoder *encoder,
          * bpc in between. */
         bpp = pipe_config->pipe_bpp;
         if (is_edp(intel_dp)) {
-               if (dev_priv->vbt.edp_bpp && dev_priv->vbt.edp_bpp < bpp) {
+
+               /* Get bpp from vbt only for panels that dont have bpp in edid */
+               if (intel_connector->base.display_info.bpc == 0 &&
+                       (dev_priv->vbt.edp_bpp && dev_priv->vbt.edp_bpp < bpp)) {
                         DRM_DEBUG_KMS("clamping bpp for eDP panel to BIOS-provided %i\n",
                                       dev_priv->vbt.edp_bpp);
                         bpp = dev_priv->vbt.edp_bpp;
@@ -3958,43 +3961,67 @@ intel_dp_probe_mst(struct intel_dp *intel_dp)
         return intel_dp->is_mst;
  }
  
-int intel_dp_sink_crc(struct intel_dp *intel_dp, u8 *crc)
+static void intel_dp_sink_crc_stop(struct intel_dp *intel_dp)
  {
-       struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
-       struct drm_device *dev = intel_dig_port->base.base.dev;
-       struct intel_crtc *intel_crtc =
-               to_intel_crtc(intel_dig_port->base.base.crtc);
+       struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
+       struct intel_crtc *intel_crtc = to_intel_crtc(dig_port->base.base.crtc);
         u8 buf;
-       int test_crc_count;
-       int attempts = 6;
-       int ret = 0;
  
-       hsw_disable_ips(intel_crtc);
-
-       if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK_MISC, &buf) < 0) {
-               ret = -EIO;
-               goto out;
+       if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK, &buf) < 0) {
+               DRM_DEBUG_KMS("Sink CRC couldn't be stopped properly\n");
+               return;
         }
  
-       if (!(buf & DP_TEST_CRC_SUPPORTED)) {
-               ret = -ENOTTY;
-               goto out;
-       }
+       if (drm_dp_dpcd_writeb(&intel_dp->aux, DP_TEST_SINK,
+                              buf & ~DP_TEST_SINK_START) < 0)
+               DRM_DEBUG_KMS("Sink CRC couldn't be stopped properly\n");
  
-       if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK, &buf) < 0) {
-               ret = -EIO;
-               goto out;
-       }
+       hsw_enable_ips(intel_crtc);
+}
+
+static int intel_dp_sink_crc_start(struct intel_dp *intel_dp)
+{
+       struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
+       struct intel_crtc *intel_crtc = to_intel_crtc(dig_port->base.base.crtc);
+       u8 buf;
+
+       if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK_MISC, &buf) < 0)
+               return -EIO;
+
+       if (!(buf & DP_TEST_CRC_SUPPORTED))
+               return -ENOTTY;
+
+       if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK, &buf) < 0)
+               return -EIO;
+
+       hsw_disable_ips(intel_crtc);
  
         if (drm_dp_dpcd_writeb(&intel_dp->aux, DP_TEST_SINK,
-                               buf | DP_TEST_SINK_START) < 0) {
-               ret = -EIO;
-               goto out;
+                              buf | DP_TEST_SINK_START) < 0) {
+               hsw_enable_ips(intel_crtc);
+               return -EIO;
         }
  
+       return 0;
+}
+
+int intel_dp_sink_crc(struct intel_dp *intel_dp, u8 *crc)
+{
+       struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
+       struct drm_device *dev = dig_port->base.base.dev;
+       struct intel_crtc *intel_crtc = to_intel_crtc(dig_port->base.base.crtc);
+       u8 buf;
+       int test_crc_count;
+       int attempts = 6;
+       int ret;
+
+       ret = intel_dp_sink_crc_start(intel_dp);
+       if (ret)
+               return ret;
+
         if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK_MISC, &buf) < 0) {
                 ret = -EIO;
-               goto out;
+               goto stop;
         }
  
         test_crc_count = buf & DP_TEST_COUNT_MASK;
@@ -4003,7 +4030,7 @@ int intel_dp_sink_crc(struct intel_dp *intel_dp, u8 *crc)
                 if (drm_dp_dpcd_readb(&intel_dp->aux,
                                       DP_TEST_SINK_MISC, &buf) < 0) {
                         ret = -EIO;
-                       goto out;
+                       goto stop;
                 }
                 intel_wait_for_vblank(dev, intel_crtc->pipe);
         } while (--attempts && (buf & DP_TEST_COUNT_MASK) == test_crc_count);
@@ -4011,25 +4038,13 @@ int intel_dp_sink_crc(struct intel_dp *intel_dp, u8 *crc)
         if (attempts == 0) {
                 DRM_DEBUG_KMS("Panel is unable to calculate CRC after 6 vblanks\n");
                 ret = -ETIMEDOUT;
-               goto out;
-       }
-
-       if (drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_CRC_R_CR, crc, 6) < 0) {
-               ret = -EIO;
-               goto out;
+               goto stop;
         }
  
-       if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK, &buf) < 0) {
+       if (drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_CRC_R_CR, crc, 6) < 0)
                 ret = -EIO;
-               goto out;
-       }
-       if (drm_dp_dpcd_writeb(&intel_dp->aux, DP_TEST_SINK,
-                              buf & ~DP_TEST_SINK_START) < 0) {
-               ret = -EIO;
-               goto out;
-       }
-out:
-       hsw_enable_ips(intel_crtc);
+stop:
+       intel_dp_sink_crc_stop(intel_dp);
         return ret;
  }
  
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h

index 320c9e6bd8484e231767534e4563047957ff2b25..34ad042f18b571e8dda9b309f42c5fc23bf2f0d8 100644 (file)
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1243,7 +1243,7 @@ void intel_fbc_invalidate(struct drm_i915_private *dev_priv,
                           unsigned int frontbuffer_bits,
                           enum fb_op_origin origin);
  void intel_fbc_flush(struct drm_i915_private *dev_priv,
-                    unsigned int frontbuffer_bits);
+                    unsigned int frontbuffer_bits, enum fb_op_origin origin);
  const char *intel_no_fbc_reason_str(enum no_fbc_reason reason);
  void intel_fbc_cleanup_cfb(struct drm_i915_private *dev_priv);
  
diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c

index c271af7679815bedee3623efc869b15de1b551b2..1f97fb548c2ac6b937e2b5e8f0a7a29c9592a9b2 100644 (file)
--- a/drivers/gpu/drm/i915/intel_fbc.c
+++ b/drivers/gpu/drm/i915/intel_fbc.c
@@ -884,22 +884,23 @@ void intel_fbc_invalidate(struct drm_i915_private *dev_priv,
  }
  
  void intel_fbc_flush(struct drm_i915_private *dev_priv,
-                    unsigned int frontbuffer_bits)
+                    unsigned int frontbuffer_bits, enum fb_op_origin origin)
  {
         if (!dev_priv->fbc.enable_fbc)
                 return;
  
-       mutex_lock(&dev_priv->fbc.lock);
+       if (origin == ORIGIN_GTT)
+               return;
  
-       if (!dev_priv->fbc.busy_bits)
-               goto out;
+       mutex_lock(&dev_priv->fbc.lock);
  
         dev_priv->fbc.busy_bits &= ~frontbuffer_bits;
  
-       if (!dev_priv->fbc.busy_bits)
+       if (!dev_priv->fbc.busy_bits) {
+               __intel_fbc_disable(dev_priv);
                 __intel_fbc_update(dev_priv);
+       }
  
-out:
         mutex_unlock(&dev_priv->fbc.lock);
  }
  
diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c

index 777b1d3ccd41992bacad8c75a18a0c9ec5f398a4..ac85357010b4b652ec15811d9c7d4a3b52ad8ebd 100644 (file)
--- a/drivers/gpu/drm/i915/intel_frontbuffer.c
+++ b/drivers/gpu/drm/i915/intel_frontbuffer.c
@@ -129,7 +129,7 @@ static void intel_frontbuffer_flush(struct drm_device *dev,
  
         intel_edp_drrs_flush(dev, frontbuffer_bits);
         intel_psr_flush(dev, frontbuffer_bits, origin);
-       intel_fbc_flush(dev_priv, frontbuffer_bits);
+       intel_fbc_flush(dev_priv, frontbuffer_bits, origin);
  }
  
  /**
diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h

new file mode 100644 (file)

index 0000000..18d7f20
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_guc_fwif.h
@@ -0,0 +1,245 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#ifndef _INTEL_GUC_FWIF_H
+#define _INTEL_GUC_FWIF_H
+
+/*
+ * This file is partially autogenerated, although currently with some manual
+ * fixups afterwards. In future, it should be entirely autogenerated, in order
+ * to ensure that the definitions herein remain in sync with those used by the
+ * GuC's own firmware.
+ *
+ * EDITING THIS FILE IS THEREFORE NOT RECOMMENDED - YOUR CHANGES MAY BE LOST.
+ */
+
+#define GFXCORE_FAMILY_GEN8            11
+#define GFXCORE_FAMILY_GEN9            12
+#define GFXCORE_FAMILY_FORCE_ULONG     0x7fffffff
+
+#define GUC_CTX_PRIORITY_CRITICAL      0
+#define GUC_CTX_PRIORITY_HIGH          1
+#define GUC_CTX_PRIORITY_NORMAL                2
+#define GUC_CTX_PRIORITY_LOW           3
+
+#define GUC_MAX_GPU_CONTEXTS           1024
+#define        GUC_INVALID_CTX_ID              (GUC_MAX_GPU_CONTEXTS + 1)
+
+/* Work queue item header definitions */
+#define WQ_STATUS_ACTIVE               1
+#define WQ_STATUS_SUSPENDED            2
+#define WQ_STATUS_CMD_ERROR            3
+#define WQ_STATUS_ENGINE_ID_NOT_USED   4
+#define WQ_STATUS_SUSPENDED_FROM_RESET 5
+#define WQ_TYPE_SHIFT                  0
+#define   WQ_TYPE_BATCH_BUF            (0x1 << WQ_TYPE_SHIFT)
+#define   WQ_TYPE_PSEUDO               (0x2 << WQ_TYPE_SHIFT)
+#define   WQ_TYPE_INORDER              (0x3 << WQ_TYPE_SHIFT)
+#define WQ_TARGET_SHIFT                        10
+#define WQ_LEN_SHIFT                   16
+#define WQ_NO_WCFLUSH_WAIT             (1 << 27)
+#define WQ_PRESENT_WORKLOAD            (1 << 28)
+#define WQ_WORKLOAD_SHIFT              29
+#define   WQ_WORKLOAD_GENERAL          (0 << WQ_WORKLOAD_SHIFT)
+#define   WQ_WORKLOAD_GPGPU            (1 << WQ_WORKLOAD_SHIFT)
+#define   WQ_WORKLOAD_TOUCH            (2 << WQ_WORKLOAD_SHIFT)
+
+#define WQ_RING_TAIL_SHIFT             20
+#define WQ_RING_TAIL_MASK              (0x7FF << WQ_RING_TAIL_SHIFT)
+
+#define GUC_DOORBELL_ENABLED           1
+#define GUC_DOORBELL_DISABLED          0
+
+#define GUC_CTX_DESC_ATTR_ACTIVE       (1 << 0)
+#define GUC_CTX_DESC_ATTR_PENDING_DB   (1 << 1)
+#define GUC_CTX_DESC_ATTR_KERNEL       (1 << 2)
+#define GUC_CTX_DESC_ATTR_PREEMPT      (1 << 3)
+#define GUC_CTX_DESC_ATTR_RESET                (1 << 4)
+#define GUC_CTX_DESC_ATTR_WQLOCKED     (1 << 5)
+#define GUC_CTX_DESC_ATTR_PCH          (1 << 6)
+
+/* The guc control data is 10 DWORDs */
+#define GUC_CTL_CTXINFO                        0
+#define   GUC_CTL_CTXNUM_IN16_SHIFT    0
+#define   GUC_CTL_BASE_ADDR_SHIFT      12
+#define GUC_CTL_ARAT_HIGH              1
+#define GUC_CTL_ARAT_LOW               2
+#define GUC_CTL_DEVICE_INFO            3
+#define   GUC_CTL_GTTYPE_SHIFT         0
+#define   GUC_CTL_COREFAMILY_SHIFT     7
+#define GUC_CTL_LOG_PARAMS             4
+#define   GUC_LOG_VALID                        (1 << 0)
+#define   GUC_LOG_NOTIFY_ON_HALF_FULL  (1 << 1)
+#define   GUC_LOG_ALLOC_IN_MEGABYTE    (1 << 3)
+#define   GUC_LOG_CRASH_PAGES          1
+#define   GUC_LOG_CRASH_SHIFT          4
+#define   GUC_LOG_DPC_PAGES            3
+#define   GUC_LOG_DPC_SHIFT            6
+#define   GUC_LOG_ISR_PAGES            3
+#define   GUC_LOG_ISR_SHIFT            9
+#define   GUC_LOG_BUF_ADDR_SHIFT       12
+#define GUC_CTL_PAGE_FAULT_CONTROL     5
+#define GUC_CTL_WA                     6
+#define   GUC_CTL_WA_UK_BY_DRIVER      (1 << 3)
+#define GUC_CTL_FEATURE                        7
+#define   GUC_CTL_VCS2_ENABLED         (1 << 0)
+#define   GUC_CTL_KERNEL_SUBMISSIONS   (1 << 1)
+#define   GUC_CTL_FEATURE2             (1 << 2)
+#define   GUC_CTL_POWER_GATING         (1 << 3)
+#define   GUC_CTL_DISABLE_SCHEDULER    (1 << 4)
+#define   GUC_CTL_PREEMPTION_LOG       (1 << 5)
+#define   GUC_CTL_ENABLE_SLPC          (1 << 7)
+#define GUC_CTL_DEBUG                  8
+#define   GUC_LOG_VERBOSITY_SHIFT      0
+#define   GUC_LOG_VERBOSITY_LOW                (0 << GUC_LOG_VERBOSITY_SHIFT)
+#define   GUC_LOG_VERBOSITY_MED                (1 << GUC_LOG_VERBOSITY_SHIFT)
+#define   GUC_LOG_VERBOSITY_HIGH       (2 << GUC_LOG_VERBOSITY_SHIFT)
+#define   GUC_LOG_VERBOSITY_ULTRA      (3 << GUC_LOG_VERBOSITY_SHIFT)
+/* Verbosity range-check limits, without the shift */
+#define          GUC_LOG_VERBOSITY_MIN         0
+#define          GUC_LOG_VERBOSITY_MAX         3
+
+#define GUC_CTL_MAX_DWORDS             (GUC_CTL_DEBUG + 1)
+
+struct guc_doorbell_info {
+       u32 db_status;
+       u32 cookie;
+       u32 reserved[14];
+} __packed;
+
+union guc_doorbell_qw {
+       struct {
+               u32 db_status;
+               u32 cookie;
+       };
+       u64 value_qw;
+} __packed;
+
+#define GUC_MAX_DOORBELLS              256
+#define GUC_INVALID_DOORBELL_ID                (GUC_MAX_DOORBELLS)
+
+#define GUC_DB_SIZE                    (PAGE_SIZE)
+#define GUC_WQ_SIZE                    (PAGE_SIZE * 2)
+
+/* Work item for submitting workloads into work queue of GuC. */
+struct guc_wq_item {
+       u32 header;
+       u32 context_desc;
+       u32 ring_tail;
+       u32 fence_id;
+} __packed;
+
+struct guc_process_desc {
+       u32 context_id;
+       u64 db_base_addr;
+       u32 head;
+       u32 tail;
+       u32 error_offset;
+       u64 wq_base_addr;
+       u32 wq_size_bytes;
+       u32 wq_status;
+       u32 engine_presence;
+       u32 priority;
+       u32 reserved[30];
+} __packed;
+
+/* engine id and context id is packed into guc_execlist_context.context_id*/
+#define GUC_ELC_CTXID_OFFSET           0
+#define GUC_ELC_ENGINE_OFFSET          29
+
+/* The execlist context including software and HW information */
+struct guc_execlist_context {
+       u32 context_desc;
+       u32 context_id;
+       u32 ring_status;
+       u32 ring_lcra;
+       u32 ring_begin;
+       u32 ring_end;
+       u32 ring_next_free_location;
+       u32 ring_current_tail_pointer_value;
+       u8 engine_state_submit_value;
+       u8 engine_state_wait_value;
+       u16 pagefault_count;
+       u16 engine_submit_queue_count;
+} __packed;
+
+/*Context descriptor for communicating between uKernel and Driver*/
+struct guc_context_desc {
+       u32 sched_common_area;
+       u32 context_id;
+       u32 pas_id;
+       u8 engines_used;
+       u64 db_trigger_cpu;
+       u32 db_trigger_uk;
+       u64 db_trigger_phy;
+       u16 db_id;
+
+       struct guc_execlist_context lrc[I915_NUM_RINGS];
+
+       u8 attribute;
+
+       u32 priority;
+
+       u32 wq_sampled_tail_offset;
+       u32 wq_total_submit_enqueues;
+
+       u32 process_desc;
+       u32 wq_addr;
+       u32 wq_size;
+
+       u32 engine_presence;
+
+       u32 reserved0[1];
+       u64 reserved1[1];
+
+       u64 desc_private;
+} __packed;
+
+/* This Action will be programmed in C180 - SOFT_SCRATCH_O_REG */
+enum host2guc_action {
+       HOST2GUC_ACTION_DEFAULT = 0x0,
+       HOST2GUC_ACTION_SAMPLE_FORCEWAKE = 0x6,
+       HOST2GUC_ACTION_ALLOCATE_DOORBELL = 0x10,
+       HOST2GUC_ACTION_DEALLOCATE_DOORBELL = 0x20,
+       HOST2GUC_ACTION_SLPC_REQUEST = 0x3003,
+       HOST2GUC_ACTION_LIMIT
+};
+
+/*
+ * The GuC sends its response to a command by overwriting the
+ * command in SS0. The response is distinguishable from a command
+ * by the fact that all the MASK bits are set. The remaining bits
+ * give more detail.
+ */
+#define        GUC2HOST_RESPONSE_MASK          ((u32)0xF0000000)
+#define        GUC2HOST_IS_RESPONSE(x)         ((u32)(x) >= GUC2HOST_RESPONSE_MASK)
+#define        GUC2HOST_STATUS(x)              (GUC2HOST_RESPONSE_MASK | (x))
+
+/* GUC will return status back to SOFT_SCRATCH_O_REG */
+enum guc2host_status {
+       GUC2HOST_STATUS_SUCCESS = GUC2HOST_STATUS(0x0),
+       GUC2HOST_STATUS_ALLOCATE_DOORBELL_FAIL = GUC2HOST_STATUS(0x10),
+       GUC2HOST_STATUS_DEALLOCATE_DOORBELL_FAIL = GUC2HOST_STATUS(0x20),
+       GUC2HOST_STATUS_GENERIC_FAIL = GUC2HOST_STATUS(0x0000F000)
+};
+
+#endif
diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c

index 3c9171f115310209d2e725f0b899d9d825baecdb..032a0bf75f3b1d4d7fbbfd44025653865f30ac0b 100644 (file)
--- a/drivers/gpu/drm/i915/intel_hotplug.c
+++ b/drivers/gpu/drm/i915/intel_hotplug.c
@@ -76,17 +76,23 @@
   * it will use i915_hotplug_work_func where this logic is handled.
   */
  
-enum port intel_hpd_pin_to_port(enum hpd_pin pin)
+bool intel_hpd_pin_to_port(enum hpd_pin pin, enum port *port)
  {
         switch (pin) {
+       case HPD_PORT_A:
+               *port = PORT_A;
+               return true;
         case HPD_PORT_B:
-               return PORT_B;
+               *port = PORT_B;
+               return true;
         case HPD_PORT_C:
-               return PORT_C;
+               *port = PORT_C;
+               return true;
         case HPD_PORT_D:
-               return PORT_D;
+               *port = PORT_D;
+               return true;
         default:
-               return PORT_A; /* no hpd */
+               return false;   /* no hpd */
         }
  }
  
@@ -369,8 +375,8 @@ void intel_hpd_irq_handler(struct drm_device *dev,
                 if (!(BIT(i) & pin_mask))
                         continue;
  
-               port = intel_hpd_pin_to_port(i);
-               is_dig_port = port && dev_priv->hotplug.irq_port[port];
+               is_dig_port = intel_hpd_pin_to_port(i, &port) &&
+                             dev_priv->hotplug.irq_port[port];
  
                 if (is_dig_port) {
                         bool long_hpd = long_mask & BIT(i);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c

index 9faad82c42ecd05fe599072bcaf25d49d05ae13b..99bba8ece464bd5f9844e28e70fb96851797dd75 100644 (file)
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1740,6 +1740,12 @@ static int intel_lr_context_render_state_init(struct drm_i915_gem_request *req)
         if (ret)
                 goto out;
  
+       ret = req->ring->emit_bb_start(req,
+                                      (so.ggtt_offset + so.aux_batch_offset),
+                                      I915_DISPATCH_SECURE);
+       if (ret)
+               goto out;
+
         i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req);
  
  out:
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c

index 5004c4a46a9e30aac110ba61a7116e983e74ba01..fff0c22682ee32f947907da7bb27f4fda0463073 100644 (file)
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -102,6 +102,12 @@ static void skl_init_clock_gating(struct drm_device *dev)
                 /* WaDisableLSQCROPERFforOCL:skl */
                 I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) |
                            GEN8_LQSC_RO_PERF_DIS);
+
+       /* WaEnableGapsTsvCreditFix:skl */
+       if (IS_SKYLAKE(dev) && (INTEL_REVID(dev) >= SKL_REVID_C0)) {
+               I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
+                                          GEN9_GAPS_TSV_CREDIT_DISABLE));
+       }
  }
  
  static void bxt_init_clock_gating(struct drm_device *dev)
@@ -4266,7 +4272,7 @@ static void ironlake_enable_drps(struct drm_device *dev)
  
         if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10))
                 DRM_ERROR("stuck trying to change perf mode\n");
-       msleep(1);
+       mdelay(1);
  
         ironlake_set_drps(dev, fstart);
  
@@ -4297,10 +4303,10 @@ static void ironlake_disable_drps(struct drm_device *dev)
  
         /* Go back to the starting frequency */
         ironlake_set_drps(dev, dev_priv->ips.fstart);
-       msleep(1);
+       mdelay(1);
         rgvswctl |= MEMCTL_CMD_STS;
         I915_WRITE(MEMSWCTL, rgvswctl);
-       msleep(1);
+       mdelay(1);
  
         spin_unlock_irq(&mchdev_lock);
  }
diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c

index acd8ec859f71b8bd5a22dcaa8809b7bc552f29e4..a04b4dc5ed9b459ce42de39c12de74539a620066 100644 (file)
--- a/drivers/gpu/drm/i915/intel_psr.c
+++ b/drivers/gpu/drm/i915/intel_psr.c
@@ -698,6 +698,7 @@ void intel_psr_flush(struct drm_device *dev,
         struct drm_i915_private *dev_priv = dev->dev_private;
         struct drm_crtc *crtc;
         enum pipe pipe;
+       int delay_ms = HAS_DDI(dev) ? 100 : 500;
  
         mutex_lock(&dev_priv->psr.lock);
         if (!dev_priv->psr.enabled) {
@@ -733,7 +734,7 @@ void intel_psr_flush(struct drm_device *dev,
  
         if (!dev_priv->psr.active && !dev_priv->psr.busy_frontbuffer_bits)
                 schedule_delayed_work(&dev_priv->psr.work,
-                                     msecs_to_jiffies(100));
+                                     msecs_to_jiffies(delay_ms));
         mutex_unlock(&dev_priv->psr.lock);
  }
  
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c

index 177f7ed16cf0abbe6c2518b0a98ae7bf16a34b44..1c14233d179f1e85f988e603f2a6478586224ff0 100644 (file)
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1041,13 +1041,6 @@ static int skl_init_workarounds(struct intel_engine_cs *ring)
                 WA_SET_BIT_MASKED(HIZ_CHICKEN,
                                   BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE);
  
-       if (INTEL_REVID(dev) == SKL_REVID_C0 ||
-           INTEL_REVID(dev) == SKL_REVID_D0)
-               /* WaBarrierPerformanceFixDisable:skl */
-               WA_SET_BIT_MASKED(HDC_CHICKEN0,
-                                 HDC_FENCE_DEST_SLM_DISABLE |
-                                 HDC_BARRIER_PERFORMANCE_DISABLE);
-
         if (INTEL_REVID(dev) <= SKL_REVID_D0) {
                 /*
                  *Use Force Non-Coherent whenever executing a 3D context. This
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c

index 6393b76f87ffa57e48134bab5805411ee1bd2354..821644d1b544eb618e57db8196014768ebe9d7b8 100644 (file)
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -68,6 +68,22 @@
  bool intel_display_power_well_is_enabled(struct drm_i915_private *dev_priv,
                                     int power_well_id);
  
+static void intel_power_well_enable(struct drm_i915_private *dev_priv,
+                                   struct i915_power_well *power_well)
+{
+       DRM_DEBUG_KMS("enabling %s\n", power_well->name);
+       power_well->ops->enable(dev_priv, power_well);
+       power_well->hw_enabled = true;
+}
+
+static void intel_power_well_disable(struct drm_i915_private *dev_priv,
+                                    struct i915_power_well *power_well)
+{
+       DRM_DEBUG_KMS("disabling %s\n", power_well->name);
+       power_well->hw_enabled = false;
+       power_well->ops->disable(dev_priv, power_well);
+}
+
  /*
   * We should only use the power well if we explicitly asked the hardware to
   * enable it, so check if it's enabled and also check if we've requested it to
@@ -1104,11 +1120,8 @@ void intel_display_power_get(struct drm_i915_private *dev_priv,
         mutex_lock(&power_domains->lock);
  
         for_each_power_well(i, power_well, BIT(domain), power_domains) {
-               if (!power_well->count++) {
-                       DRM_DEBUG_KMS("enabling %s\n", power_well->name);
-                       power_well->ops->enable(dev_priv, power_well);
-                       power_well->hw_enabled = true;
-               }
+               if (!power_well->count++)
+                       intel_power_well_enable(dev_priv, power_well);
         }
  
         power_domains->domain_use_count[domain]++;
@@ -1142,11 +1155,8 @@ void intel_display_power_put(struct drm_i915_private *dev_priv,
         for_each_power_well_rev(i, power_well, BIT(domain), power_domains) {
                 WARN_ON(!power_well->count);
  
-               if (!--power_well->count && i915.disable_power_well) {
-                       DRM_DEBUG_KMS("disabling %s\n", power_well->name);
-                       power_well->hw_enabled = false;
-                       power_well->ops->disable(dev_priv, power_well);
-               }
+               if (!--power_well->count && i915.disable_power_well)
+                       intel_power_well_disable(dev_priv, power_well);
         }
  
         mutex_unlock(&power_domains->lock);
author	Daniel Vetter <daniel.vetter@ffwll.ch>
	Thu, 6 Aug 2015 12:27:09 +0000 (14:27 +0200)
committer	Daniel Vetter <daniel.vetter@ffwll.ch>
	Thu, 6 Aug 2015 12:27:09 +0000 (14:27 +0200)
Documentation/DocBook/drm.tmpl		patch \| blob \| history
drivers/gpu/drm/i915/Makefile		patch \| blob \| history
drivers/gpu/drm/i915/i915_cmd_parser.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_debugfs.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_dma.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_drv.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_drv.h		patch \| blob \| history
drivers/gpu/drm/i915/i915_gem.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_gem_fence.c	[new file with mode: 0644]	patch \| blob
drivers/gpu/drm/i915/i915_gem_render_state.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_gem_render_state.h		patch \| blob \| history
drivers/gpu/drm/i915/i915_gem_tiling.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_guc_reg.h	[new file with mode: 0644]	patch \| blob
drivers/gpu/drm/i915/i915_irq.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_params.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_reg.h		patch \| blob \| history
drivers/gpu/drm/i915/intel_csr.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_display.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_dp.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_drv.h		patch \| blob \| history
drivers/gpu/drm/i915/intel_fbc.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_frontbuffer.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_guc_fwif.h	[new file with mode: 0644]	patch \| blob
drivers/gpu/drm/i915/intel_hotplug.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_lrc.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_pm.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_psr.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_ringbuffer.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_runtime_pm.c		patch \| blob \| history