MALI: rockchip: upgrade midgard DDK to r14p0-01rel0
authorchenzhen <chenzhen@rock-chips.com>
Mon, 17 Oct 2016 11:38:36 +0000 (19:38 +0800)
committerHuang, Tao <huangtao@rock-chips.com>
Mon, 20 Feb 2017 06:27:24 +0000 (14:27 +0800)
Along with a slight modification in mali_kbase_core_linux.c,
for building in rk Linux 4.4:
-#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE
+#if KERNEL_VERSION(4, 4, 0) > LINUX_VERSION_CODE

Change-Id: I34565cb975866b46c5e3a4d8e2ac5e350dcceb80
Signed-off-by: chenzhen <chenzhen@rock-chips.com>
74 files changed:
drivers/gpu/arm/midgard/Kbuild
drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c
drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c
drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c
drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c
drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c
drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c
drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h
drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c
drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h
drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c
drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h
drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c
drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h
drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c
drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c
drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c
drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c
drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h
drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c
drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h
drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c
drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c
drivers/gpu/arm/midgard/mali_base_hwconfig_features.h
drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h
drivers/gpu/arm/midgard/mali_base_kernel.h
drivers/gpu/arm/midgard/mali_kbase.h
drivers/gpu/arm/midgard/mali_kbase_config_defaults.h
drivers/gpu/arm/midgard/mali_kbase_context.c
drivers/gpu/arm/midgard/mali_kbase_context.h [new file with mode: 0644]
drivers/gpu/arm/midgard/mali_kbase_core_linux.c
drivers/gpu/arm/midgard/mali_kbase_defs.h
drivers/gpu/arm/midgard/mali_kbase_device.c
drivers/gpu/arm/midgard/mali_kbase_dma_fence.c
drivers/gpu/arm/midgard/mali_kbase_event.c
drivers/gpu/arm/midgard/mali_kbase_gator_api.c
drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h
drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_thex.h [new file with mode: 0644]
drivers/gpu/arm/midgard/mali_kbase_gpu_id.h
drivers/gpu/arm/midgard/mali_kbase_hw.c
drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h
drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h
drivers/gpu/arm/midgard/mali_kbase_jd.c
drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c
drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h
drivers/gpu/arm/midgard/mali_kbase_jm.c
drivers/gpu/arm/midgard/mali_kbase_jm.h
drivers/gpu/arm/midgard/mali_kbase_js.c
drivers/gpu/arm/midgard/mali_kbase_js.h
drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c
drivers/gpu/arm/midgard/mali_kbase_js_defs.h
drivers/gpu/arm/midgard/mali_kbase_js_policy.h
drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c
drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.h
drivers/gpu/arm/midgard/mali_kbase_mem.c
drivers/gpu/arm/midgard/mali_kbase_mem.h
drivers/gpu/arm/midgard/mali_kbase_mem_linux.c
drivers/gpu/arm/midgard/mali_kbase_mem_pool.c
drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c
drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h
drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c
drivers/gpu/arm/midgard/mali_kbase_mmu.c
drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c [new file with mode: 0644]
drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h [new file with mode: 0644]
drivers/gpu/arm/midgard/mali_kbase_replay.c
drivers/gpu/arm/midgard/mali_kbase_softjobs.c
drivers/gpu/arm/midgard/mali_kbase_tlstream.c
drivers/gpu/arm/midgard/mali_kbase_tlstream.h
drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c
drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h
drivers/gpu/arm/midgard/mali_kbase_uku.h
drivers/gpu/arm/midgard/mali_kbase_vinstr.c
drivers/gpu/arm/midgard/mali_midg_regmap.h
drivers/gpu/arm/midgard/sconscript

index 8a47cdd..4348704 100644 (file)
@@ -15,7 +15,7 @@
 
 
 # Driver version string which is returned to userspace via an ioctl
-MALI_RELEASE_NAME ?= "r13p0-00rel0"
+MALI_RELEASE_NAME ?= "r14p0-01rel0"
 
 # Paths required for build
 KBASE_PATH = $(src)
@@ -113,7 +113,8 @@ SRC := \
        mali_kbase_mem_pool_debugfs.c \
        mali_kbase_tlstream.c \
        mali_kbase_strings.c \
-       mali_kbase_as_fault_debugfs.c
+       mali_kbase_as_fault_debugfs.c \
+       mali_kbase_regs_history_debugfs.c
 
 ifeq ($(MALI_UNIT_TEST),1)
        SRC += mali_kbase_tlstream_test.c
index c686253..fef9a2c 100644 (file)
@@ -21,6 +21,8 @@
 void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
                u32 mode)
 {
+       kbdev->current_gpu_coherency_mode = mode;
+
        if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG))
                kbase_reg_write(kbdev, COHERENCY_ENABLE, mode, NULL);
 }
index ad05fe5..2306c75 100644 (file)
@@ -19,6 +19,7 @@
 
 
 #include <mali_kbase.h>
+#include <mali_kbase_tlstream.h>
 #include <mali_kbase_config_defaults.h>
 #include <backend/gpu/mali_kbase_pm_internal.h>
 #ifdef CONFIG_DEVFREQ_THERMAL
@@ -108,6 +109,8 @@ kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags)
        kbdev->current_voltage = voltage;
        kbdev->current_freq = freq;
 
+       kbase_tlstream_aux_devfreq_target((u64)freq);
+
        kbase_pm_reset_dvfs_utilisation(kbdev);
 
        return err;
index b9238a3..dcdf15c 100644 (file)
 #include <backend/gpu/mali_kbase_device_internal.h>
 
 #if !defined(CONFIG_MALI_NO_MALI)
+
+
+#ifdef CONFIG_DEBUG_FS
+
+
+int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size)
+{
+       struct kbase_io_access *old_buf;
+       struct kbase_io_access *new_buf;
+       unsigned long flags;
+
+       if (!new_size)
+               goto out_err; /* The new size must not be 0 */
+
+       new_buf = vmalloc(new_size * sizeof(*h->buf));
+       if (!new_buf)
+               goto out_err;
+
+       spin_lock_irqsave(&h->lock, flags);
+
+       old_buf = h->buf;
+
+       /* Note: we won't bother with copying the old data over. The dumping
+        * logic wouldn't work properly as it relies on 'count' both as a
+        * counter and as an index to the buffer which would have changed with
+        * the new array. This is a corner case that we don't need to support.
+        */
+       h->count = 0;
+       h->size = new_size;
+       h->buf = new_buf;
+
+       spin_unlock_irqrestore(&h->lock, flags);
+
+       vfree(old_buf);
+
+       return 0;
+
+out_err:
+       return -1;
+}
+
+
+int kbase_io_history_init(struct kbase_io_history *h, u16 n)
+{
+       h->enabled = false;
+       spin_lock_init(&h->lock);
+       h->count = 0;
+       h->size = 0;
+       h->buf = NULL;
+       if (kbase_io_history_resize(h, n))
+               return -1;
+
+       return 0;
+}
+
+
+void kbase_io_history_term(struct kbase_io_history *h)
+{
+       vfree(h->buf);
+       h->buf = NULL;
+}
+
+
+/* kbase_io_history_add - add new entry to the register access history
+ *
+ * @h: Pointer to the history data structure
+ * @addr: Register address
+ * @value: The value that is either read from or written to the register
+ * @write: 1 if it's a register write, 0 if it's a read
+ */
+static void kbase_io_history_add(struct kbase_io_history *h,
+               void __iomem const *addr, u32 value, u8 write)
+{
+       struct kbase_io_access *io;
+       unsigned long flags;
+
+       spin_lock_irqsave(&h->lock, flags);
+
+       io = &h->buf[h->count % h->size];
+       io->addr = (uintptr_t)addr | write;
+       io->value = value;
+       ++h->count;
+       /* If count overflows, move the index by the buffer size so the entire
+        * buffer will still be dumped later */
+       if (unlikely(!h->count))
+               h->count = h->size;
+
+       spin_unlock_irqrestore(&h->lock, flags);
+}
+
+
+void kbase_io_history_dump(struct kbase_device *kbdev)
+{
+       struct kbase_io_history *const h = &kbdev->io_history;
+       u16 i;
+       size_t iters;
+       unsigned long flags;
+
+       if (!unlikely(h->enabled))
+               return;
+
+       spin_lock_irqsave(&h->lock, flags);
+
+       dev_err(kbdev->dev, "Register IO History:");
+       iters = (h->size > h->count) ? h->count : h->size;
+       dev_err(kbdev->dev, "Last %zu register accesses of %zu total:\n", iters,
+                       h->count);
+       for (i = 0; i < iters; ++i) {
+               struct kbase_io_access *io =
+                       &h->buf[(h->count - iters + i) % h->size];
+               char const access = (io->addr & 1) ? 'w' : 'r';
+
+               dev_err(kbdev->dev, "%6i: %c: reg 0x%p val %08x\n", i, access,
+                               (void *)(io->addr & ~0x1), io->value);
+       }
+
+       spin_unlock_irqrestore(&h->lock, flags);
+}
+
+
+#endif /* CONFIG_DEBUG_FS */
+
+
 void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value,
                                                struct kbase_context *kctx)
 {
        KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
        KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID);
        KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
-       dev_dbg(kbdev->dev, "w: reg %04x val %08x", offset, value);
 
        writel(value, kbdev->reg + offset);
 
+#ifdef CONFIG_DEBUG_FS
+       if (unlikely(kbdev->io_history.enabled))
+               kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset,
+                               value, 1);
+#endif /* CONFIG_DEBUG_FS */
+       dev_dbg(kbdev->dev, "w: reg %04x val %08x", offset, value);
+
        if (kctx && kctx->jctx.tb)
                kbase_device_trace_register_access(kctx, REG_WRITE, offset,
                                                                        value);
@@ -53,7 +182,13 @@ u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset,
 
        val = readl(kbdev->reg + offset);
 
+#ifdef CONFIG_DEBUG_FS
+       if (unlikely(kbdev->io_history.enabled))
+               kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset,
+                               val, 0);
+#endif /* CONFIG_DEBUG_FS */
        dev_dbg(kbdev->dev, "r: reg %04x val %08x", offset, val);
+
        if (kctx && kctx->jctx.tb)
                kbase_device_trace_register_access(kctx, REG_READ, offset, val);
        return val;
index 3f06a10..7ad309e 100644 (file)
@@ -45,11 +45,11 @@ static void kbasep_instr_hwcnt_cacheclean(struct kbase_device *kbdev)
                                        KBASE_INSTR_STATE_REQUEST_CLEAN);
 
        /* Enable interrupt */
-       spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
+       spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
        irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
        kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
                                irq_mask | CLEAN_CACHES_COMPLETED, NULL);
-       spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);
+       spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
 
        /* clean&invalidate the caches so we're sure the mmu tables for the dump
         * buffer is valid */
@@ -96,11 +96,11 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
        }
 
        /* Enable interrupt */
-       spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
+       spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
        irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
        kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask |
                                                PRFCNT_SAMPLE_COMPLETED, NULL);
-       spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);
+       spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
 
        /* In use, this context is the owner */
        kbdev->hwcnt.kctx = kctx;
@@ -185,7 +185,9 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
        dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx);
        return err;
  out_unrequest_cores:
+       spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
        kbase_pm_unrequest_cores(kbdev, true, shader_cores_needed);
+       spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
  out_err:
        return err;
 }
@@ -226,11 +228,10 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
        kbdev->hwcnt.backend.triggered = 0;
 
        /* Disable interrupt */
-       spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
+       spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
        irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
        kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
                                irq_mask & ~PRFCNT_SAMPLE_COMPLETED, NULL);
-       spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);
 
        /* Disable the counters */
        kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0, kctx);
@@ -243,10 +244,11 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
        kbase_pm_unrequest_cores(kbdev, true,
                kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER));
 
-       spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
-
        kbase_pm_release_l2_caches(kbdev);
 
+       spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+       spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
        dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p",
                                                                        kctx);
 
@@ -391,12 +393,12 @@ void kbase_clean_caches_done(struct kbase_device *kbdev)
 
                spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
                /* Disable interrupt */
-               spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
+               spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
                irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
                                                                        NULL);
                kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
                                irq_mask & ~CLEAN_CACHES_COMPLETED, NULL);
-               spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);
+               spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
 
                /* Wakeup... */
                if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
index b891b12..8416b80 100644 (file)
@@ -148,6 +148,8 @@ static irqreturn_t kbase_gpu_irq_handler(int irq, void *data)
        return IRQ_HANDLED;
 }
 
+KBASE_EXPORT_TEST_API(kbase_gpu_irq_handler);
+
 static irq_handler_t kbase_handler_table[] = {
        [JOB_IRQ_TAG] = kbase_job_irq_handler,
        [MMU_IRQ_TAG] = kbase_mmu_irq_handler,
index f216788..202dcfa 100644 (file)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -52,8 +52,7 @@ static void assign_and_activate_kctx_addr_space(struct kbase_device *kbdev,
 
        lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
        lockdep_assert_held(&js_devdata->runpool_mutex);
-       lockdep_assert_held(&current_as->transaction_mutex);
-       lockdep_assert_held(&js_devdata->runpool_irq.lock);
+       lockdep_assert_held(&kbdev->hwaccess_lock);
 
        js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
 
@@ -142,8 +141,7 @@ void kbase_backend_release_ctx_irq(struct kbase_device *kbdev,
                return;
        }
 
-       lockdep_assert_held(&kbdev->as[as_nr].transaction_mutex);
-       lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+       lockdep_assert_held(&kbdev->hwaccess_lock);
 
        js_per_as_data = &kbdev->js_data.runpool_irq.per_as_data[kctx->as_nr];
        if (js_per_as_data->as_busy_refcount != 0) {
@@ -219,8 +217,7 @@ static bool check_is_runpool_full(struct kbase_device *kbdev,
        is_runpool_full = (bool) (js_devdata->nr_all_contexts_running >=
                                                kbdev->nr_hw_address_spaces);
 
-       if (kctx != NULL && (kctx->jctx.sched_info.ctx.flags &
-                                       KBASE_CTX_FLAG_SUBMIT_DISABLED) == 0) {
+       if (kctx && !kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
                lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
                /* Contexts that submit might use less of the address spaces
                 * available, due to HW workarounds.  In which case, the runpool
@@ -267,7 +264,7 @@ int kbase_backend_find_free_address_space(struct kbase_device *kbdev,
                return i;
        }
 
-       spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+       spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
        /* No address space currently free, see if we can release one */
        for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
@@ -281,16 +278,14 @@ int kbase_backend_find_free_address_space(struct kbase_device *kbdev,
 
                /* Don't release privileged or active contexts, or contexts with
                 * jobs running */
-               if (as_kctx && !(as_kctx->jctx.sched_info.ctx.flags &
-                                               KBASE_CTX_FLAG_PRIVILEGED) &&
+               if (as_kctx && !kbase_ctx_flag(as_kctx, KCTX_PRIVILEGED) &&
                        js_per_as_data->as_busy_refcount == 0) {
                        if (!kbasep_js_runpool_retain_ctx_nolock(kbdev,
                                                                as_kctx)) {
                                WARN(1, "Failed to retain active context\n");
 
-                               spin_unlock_irqrestore(
-                                               &js_devdata->runpool_irq.lock,
-                                                                       flags);
+                               spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+                                               flags);
                                mutex_unlock(&js_devdata->runpool_mutex);
                                mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
 
@@ -303,8 +298,7 @@ int kbase_backend_find_free_address_space(struct kbase_device *kbdev,
                         * context we're about to release without violating lock
                         * ordering
                         */
-                       spin_unlock_irqrestore(&js_devdata->runpool_irq.lock,
-                                                                       flags);
+                       spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
                        mutex_unlock(&js_devdata->runpool_mutex);
                        mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
 
@@ -315,7 +309,7 @@ int kbase_backend_find_free_address_space(struct kbase_device *kbdev,
 
                        kbasep_js_runpool_release_ctx_nolock(kbdev, as_kctx);
 
-                       if (!as_js_kctx_info->ctx.is_scheduled) {
+                       if (!kbase_ctx_flag(as_kctx, KCTX_SCHEDULED)) {
                                kbasep_js_runpool_requeue_or_kill_ctx(kbdev,
                                                                as_kctx,
                                                                true);
@@ -336,11 +330,11 @@ int kbase_backend_find_free_address_space(struct kbase_device *kbdev,
 
                        mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
                        mutex_lock(&js_devdata->runpool_mutex);
-                       spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+                       spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
                }
        }
 
-       spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+       spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
        mutex_unlock(&js_devdata->runpool_mutex);
        mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
@@ -369,12 +363,11 @@ bool kbase_backend_use_ctx(struct kbase_device *kbdev,
        new_address_space = &kbdev->as[as_nr];
 
        lockdep_assert_held(&js_devdata->runpool_mutex);
-       lockdep_assert_held(&new_address_space->transaction_mutex);
-       lockdep_assert_held(&js_devdata->runpool_irq.lock);
+       lockdep_assert_held(&kbdev->hwaccess_lock);
 
        assign_and_activate_kctx_addr_space(kbdev, kctx, new_address_space);
 
-       if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_PRIVILEGED) != 0) {
+       if (kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) {
                /* We need to retain it to keep the corresponding address space
                 */
                kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
index 83d4778..08a7400 100644 (file)
@@ -73,8 +73,7 @@ struct slot_rb {
  * @reset_timer:               Timeout for soft-stops before the reset
  * @timeouts_updated:           Have timeout values just been updated?
  *
- * The kbasep_js_device_data::runpool_irq::lock (a spinlock) must be held when
- * accessing this structure
+ * The hwaccess_lock (a spinlock) must be held when accessing this structure
  */
 struct kbase_backend_data {
        struct slot_rb slot_rb[BASE_JM_MAX_NR_SLOTS];
index 00900a9..668258b 100644 (file)
@@ -221,19 +221,40 @@ static void kbasep_job_slot_update_head_start_timestamp(
 /**
  * kbasep_trace_tl_nret_atom_lpu - Call nret_atom_lpu timeline tracepoint
  * @kbdev: kbase device
- * @i: job slot
+ * @js: job slot
  *
  * Get kbase atom by calling kbase_gpu_inspect for given job slot.
  * Then use obtained katom and name of slot associated with the given
  * job slot number in tracepoint call to the instrumentation module
  * informing that given atom is no longer executed on given lpu (job slot).
  */
-static void kbasep_trace_tl_nret_atom_lpu(struct kbase_device *kbdev, int i)
+static void kbasep_trace_tl_nret_atom_lpu(struct kbase_device *kbdev, int js)
 {
-       struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, i, 0);
+       int i;
+       for (i = 0;
+            i < kbase_backend_nr_atoms_submitted(kbdev, js);
+            i++) {
+               struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+               kbase_tlstream_tl_nret_atom_lpu(katom,
+                       &kbdev->gpu_props.props.raw_props.js_features[js]);
+       }
+}
 
-       kbase_tlstream_tl_nret_atom_lpu(katom,
-               &kbdev->gpu_props.props.raw_props.js_features[i]);
+/**
+ * kbasep_trace_tl_event_lpu_softstop - Call event_lpu_softstop timeline
+ * tracepoint
+ * @kbdev: kbase device
+ * @js: job slot
+ *
+ * Make a tracepoint call to the instrumentation module informing that
+ * softstop happened on given lpu (job slot).
+ */
+static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev,
+                                       int js)
+{
+       kbase_tlstream_tl_event_lpu_softstop(
+               &kbdev->gpu_props.props.raw_props.js_features[js]);
 }
 
 void kbase_job_done(struct kbase_device *kbdev, u32 done)
@@ -262,7 +283,7 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
                                                irq_throttle_cycles, NULL);
        }
 
-       spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+       spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
        while (done) {
                u32 failed = done >> 16;
@@ -297,7 +318,8 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
                                                                NULL, 0);
 #endif
 
-                                       kbase_tlstream_aux_job_softstop(i);
+                                       kbasep_trace_tl_event_lpu_softstop(
+                                               kbdev, i);
 
                                        kbasep_trace_tl_nret_atom_lpu(
                                                kbdev, i);
@@ -456,7 +478,7 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
                                                                end_timestamp);
        }
 
-       spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+       spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 #if KBASE_GPU_RESET_EN
        if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
                                                KBASE_RESET_GPU_COMMITTED) {
@@ -539,7 +561,7 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
                target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED;
 
                /* Mark the point where we issue the soft-stop command */
-               kbase_tlstream_aux_issue_job_softstop(target_katom);
+               kbase_tlstream_tl_event_atom_softstop_issue(target_katom);
 
                if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) {
                        int i;
@@ -725,7 +747,7 @@ void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx)
 
        /* Cancel any remaining running jobs for this kctx  */
        mutex_lock(&kctx->jctx.lock);
-       spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+       spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
        /* Invalidate all jobs in context, to prevent re-submitting */
        for (i = 0; i < BASE_JD_ATOM_COUNT; i++) {
@@ -737,7 +759,7 @@ void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx)
        for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
                kbase_job_slot_hardstop(kctx, i, NULL);
 
-       spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+       spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
        mutex_unlock(&kctx->jctx.lock);
 }
 
@@ -748,12 +770,13 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
        int js = target_katom->slot_nr;
        int priority = target_katom->sched_priority;
        int i;
+       bool stop_sent = false;
 
        KBASE_DEBUG_ASSERT(kctx != NULL);
        kbdev = kctx->kbdev;
        KBASE_DEBUG_ASSERT(kbdev != NULL);
 
-       lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+       lockdep_assert_held(&kbdev->hwaccess_lock);
 
        for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, js); i++) {
                struct kbase_jd_atom *katom;
@@ -765,8 +788,14 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
                if (katom->kctx != kctx)
                        continue;
 
-               if (katom->sched_priority > priority)
+               if (katom->sched_priority > priority) {
+                       if (!stop_sent)
+                               kbase_tlstream_tl_attrib_atom_priority_change(
+                                               target_katom);
+
                        kbase_job_slot_softstop(kbdev, js, katom);
+                       stop_sent = true;
+               }
        }
 }
 
@@ -835,7 +864,7 @@ void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx)
         * policy queue either */
        wait_event(kctx->jctx.zero_jobs_wait, kctx->jctx.job_nr == 0);
        wait_event(kctx->jctx.sched_info.ctx.is_scheduled_wait,
-                       kctx->jctx.sched_info.ctx.is_scheduled == false);
+                  !kbase_ctx_flag(kctx, KCTX_SCHEDULED));
 
        spin_lock_irqsave(&reset_data.lock, flags);
        if (reset_data.stage == 1) {
@@ -945,7 +974,7 @@ static bool kbasep_check_for_afbc_on_slot(struct kbase_device *kbdev,
        bool ret = false;
        int i;
 
-       lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+       lockdep_assert_held(&kbdev->hwaccess_lock);
 
        /* When we have an atom the decision can be made straight away. */
        if (target_katom)
@@ -1034,7 +1063,7 @@ void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
 #if KBASE_GPU_RESET_EN
        /* We make the check for AFBC before evicting/stopping atoms.  Note
         * that no other thread can modify the slots whilst we have the
-        * runpool_irq lock. */
+        * hwaccess_lock. */
        int needs_workaround_for_afbc =
                        kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3542)
                        && kbasep_check_for_afbc_on_slot(kbdev, kctx, js,
@@ -1130,6 +1159,8 @@ static void kbase_debug_dump_registers(struct kbase_device *kbdev)
 {
        int i;
 
+       kbase_io_history_dump(kbdev);
+
        dev_err(kbdev->dev, "Register state:");
        dev_err(kbdev->dev, "  GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x",
                kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL),
@@ -1162,13 +1193,14 @@ static void kbase_debug_dump_registers(struct kbase_device *kbdev)
 
 static void kbasep_reset_timeout_worker(struct work_struct *data)
 {
-       unsigned long flags, mmu_flags;
+       unsigned long flags;
        struct kbase_device *kbdev;
        int i;
        ktime_t end_timestamp = ktime_get();
        struct kbasep_js_device_data *js_devdata;
        bool try_schedule = false;
        bool silent = false;
+       u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
 
        KBASE_DEBUG_ASSERT(data);
 
@@ -1206,17 +1238,19 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
 
        KBASE_DEBUG_ASSERT(kbdev->irq_reset_flush == false);
 
-       spin_lock_irqsave(&kbdev->mmu_mask_change, mmu_flags);
+       spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+       spin_lock(&kbdev->hwaccess_lock);
+       spin_lock(&kbdev->mmu_mask_change);
        /* We're about to flush out the IRQs and their bottom half's */
        kbdev->irq_reset_flush = true;
 
        /* Disable IRQ to avoid IRQ handlers to kick in after releasing the
         * spinlock; this also clears any outstanding interrupts */
-       spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
-       kbase_pm_disable_interrupts(kbdev);
-       spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+       kbase_pm_disable_interrupts_nolock(kbdev);
 
-       spin_unlock_irqrestore(&kbdev->mmu_mask_change, mmu_flags);
+       spin_unlock(&kbdev->mmu_mask_change);
+       spin_unlock(&kbdev->hwaccess_lock);
+       spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
 
        /* Ensure that any IRQ handlers have finished
         * Must be done without any locks IRQ handlers will take */
@@ -1228,6 +1262,16 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
        /* The flush has completed so reset the active indicator */
        kbdev->irq_reset_flush = false;
 
+       if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8463)) {
+               /* Ensure that L2 is not transitioning when we send the reset
+                * command */
+               while (--max_loops && kbase_pm_get_trans_cores(kbdev,
+                               KBASE_PM_CORE_L2))
+                       ;
+
+               WARN(!max_loops, "L2 power transition timed out while trying to reset\n");
+       }
+
        mutex_lock(&kbdev->pm.lock);
        /* We hold the pm lock, so there ought to be a current policy */
        KBASE_DEBUG_ASSERT(kbdev->pm.backend.pm_current_policy);
@@ -1250,21 +1294,19 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
        kbase_pm_init_hw(kbdev, 0);
 
        /* Complete any jobs that were still on the GPU */
-       spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+       spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
        kbase_backend_reset(kbdev, &end_timestamp);
        kbase_pm_metrics_update(kbdev, NULL);
-       spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+       spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
        mutex_unlock(&kbdev->pm.lock);
 
        mutex_lock(&js_devdata->runpool_mutex);
 
+       mutex_lock(&kbdev->mmu_hw_mutex);
        /* Reprogram the GPU's MMU */
        for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
-               struct kbase_as *as = &kbdev->as[i];
-
-               mutex_lock(&as->transaction_mutex);
-               spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+               spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
                if (js_devdata->runpool_irq.per_as_data[i].kctx)
                        kbase_mmu_update(
@@ -1272,9 +1314,9 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
                else
                        kbase_mmu_disable_as(kbdev, i);
 
-               spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
-               mutex_unlock(&as->transaction_mutex);
+               spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
        }
+       mutex_unlock(&kbdev->mmu_hw_mutex);
 
        kbase_pm_enable_interrupts(kbdev);
 
@@ -1382,9 +1424,9 @@ static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev)
        struct kbasep_js_device_data *js_devdata;
 
        js_devdata = &kbdev->js_data;
-       spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+       spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
        kbasep_try_reset_gpu_early_locked(kbdev);
-       spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+       spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 }
 
 /**
@@ -1429,9 +1471,9 @@ bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev)
        struct kbasep_js_device_data *js_devdata;
 
        js_devdata = &kbdev->js_data;
-       spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+       spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
        ret = kbase_prepare_to_reset_gpu_locked(kbdev);
-       spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+       spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
        return ret;
 }
index 8f1e561..89b1288 100644 (file)
@@ -40,7 +40,7 @@
  * calling this.
  *
  * The following locking conditions are made on the caller:
- * - it must hold the kbasep_js_device_data::runpoool_irq::lock
+ * - it must hold the hwaccess_lock
  */
 void kbase_job_submit_nolock(struct kbase_device *kbdev,
                                        struct kbase_jd_atom *katom, int js);
@@ -74,7 +74,7 @@ static inline char *kbasep_make_job_slot_string(int js, char *js_string)
  * calling this.
  *
  * The following locking conditions are made on the caller:
- * - it must hold the kbasep_js_device_data::runpoool_irq::lock
+ * - it must hold the hwaccess_lock
  */
 void kbase_job_hw_submit(struct kbase_device *kbdev,
                                struct kbase_jd_atom *katom,
@@ -91,7 +91,7 @@ void kbase_job_hw_submit(struct kbase_device *kbdev,
  * @target_katom:      Atom to stop
  *
  * The following locking conditions are made on the caller:
- * - it must hold the kbasep_js_device_data::runpool_irq::lock
+ * - it must hold the hwaccess_lock
  */
 void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
                                        int js,
index da7c4df..d7b4d3f 100644 (file)
@@ -26,6 +26,7 @@
 #include <mali_kbase_js.h>
 #include <mali_kbase_tlstream.h>
 #include <mali_kbase_10969_workaround.h>
+#include <backend/gpu/mali_kbase_cache_policy_backend.h>
 #include <backend/gpu/mali_kbase_device_internal.h>
 #include <backend/gpu/mali_kbase_jm_internal.h>
 #include <backend/gpu/mali_kbase_js_affinity.h>
@@ -56,7 +57,7 @@ static void kbase_gpu_enqueue_atom(struct kbase_device *kbdev,
 
        WARN_ON(SLOT_RB_ENTRIES(rb) >= SLOT_RB_SIZE);
 
-       lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+       lockdep_assert_held(&kbdev->hwaccess_lock);
 
        rb->entries[rb->write_idx & SLOT_RB_MASK].katom = katom;
        rb->write_idx++;
@@ -88,7 +89,7 @@ static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev,
                return NULL;
        }
 
-       lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+       lockdep_assert_held(&kbdev->hwaccess_lock);
 
        katom = rb->entries[rb->read_idx & SLOT_RB_MASK].katom;
 
@@ -108,7 +109,7 @@ struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
 {
        struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
 
-       lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+       lockdep_assert_held(&kbdev->hwaccess_lock);
 
        if ((SLOT_RB_ENTRIES(rb) - 1) < idx)
                return NULL; /* idx out of range */
@@ -146,7 +147,7 @@ static bool kbase_gpu_atoms_submitted(struct kbase_device *kbdev, int js)
 {
        int i;
 
-       lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+       lockdep_assert_held(&kbdev->hwaccess_lock);
 
        for (i = 0; i < SLOT_RB_SIZE; i++) {
                struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
@@ -173,7 +174,7 @@ static bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev)
        int js;
        int i;
 
-       lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+       lockdep_assert_held(&kbdev->hwaccess_lock);
 
        for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
                for (i = 0; i < SLOT_RB_SIZE; i++) {
@@ -191,7 +192,7 @@ int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js)
        int nr = 0;
        int i;
 
-       lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+       lockdep_assert_held(&kbdev->hwaccess_lock);
 
        for (i = 0; i < SLOT_RB_SIZE; i++) {
                struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
@@ -209,7 +210,7 @@ int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js)
        int nr = 0;
        int i;
 
-       lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+       lockdep_assert_held(&kbdev->hwaccess_lock);
 
        for (i = 0; i < SLOT_RB_SIZE; i++) {
                if (kbase_gpu_inspect(kbdev, js, i))
@@ -225,7 +226,7 @@ static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, int js,
        int nr = 0;
        int i;
 
-       lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+       lockdep_assert_held(&kbdev->hwaccess_lock);
 
        for (i = 0; i < SLOT_RB_SIZE; i++) {
                struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
@@ -237,6 +238,56 @@ static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, int js,
        return nr;
 }
 
+/**
+ * check_secure_atom - Check if the given atom is in the given secure state and
+ *                     has a ringbuffer state of at least
+ *                     KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION
+ * @katom:  Atom pointer
+ * @secure: Desired secure state
+ *
+ * Return: true if atom is in the given state, false otherwise
+ */
+static bool check_secure_atom(struct kbase_jd_atom *katom, bool secure)
+{
+       if (katom->gpu_rb_state >=
+                       KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION &&
+                       ((kbase_jd_katom_is_protected(katom) && secure) ||
+                       (!kbase_jd_katom_is_protected(katom) && !secure)))
+               return true;
+
+       return false;
+}
+
+/**
+ * kbase_gpu_check_secure_atoms - Check if there are any atoms in the given
+ *                                secure state in the ringbuffers of at least
+ *                                state
+ *                                KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE
+ * @kbdev:  Device pointer
+ * @secure: Desired secure state
+ *
+ * Return: true if any atoms are in the given state, false otherwise
+ */
+static bool kbase_gpu_check_secure_atoms(struct kbase_device *kbdev,
+               bool secure)
+{
+       int js, i;
+
+       for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+               for (i = 0; i < SLOT_RB_SIZE; i++) {
+                       struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
+                                       js, i);
+
+                       if (katom) {
+                               if (check_secure_atom(katom, secure))
+                                       return true;
+                       }
+               }
+       }
+
+       return false;
+}
+
 int kbase_backend_slot_free(struct kbase_device *kbdev, int js)
 {
        if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) !=
@@ -439,7 +490,7 @@ static bool kbasep_js_job_check_ref_cores(struct kbase_device *kbdev,
                                                recently_chosen_affinity);
 
                        /* Note: this is where the caller must've taken the
-                        * runpool_irq.lock */
+                        * hwaccess_lock */
 
                        /* Check for affinity violations - if there are any,
                         * then we just ask the caller to requeue and try again
@@ -586,15 +637,12 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev,
                kbase_pm_metrics_update(kbdev, end_timestamp);
 
                if (katom->core_req & BASE_JD_REQ_PERMON)
-                       kbase_pm_release_gpu_cycle_counter(kbdev);
+                       kbase_pm_release_gpu_cycle_counter_nolock(kbdev);
                /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
 
        case KBASE_ATOM_GPU_RB_READY:
                /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
 
-       case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_ENTRY:
-               /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
-
        case KBASE_ATOM_GPU_RB_WAITING_AFFINITY:
                kbase_js_affinity_release_slot_cores(kbdev, katom->slot_nr,
                                                        katom->affinity);
@@ -603,7 +651,21 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev,
        case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE:
                break;
 
-       case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_EXIT:
+       case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION:
+               if (katom->protected_state.enter !=
+                               KBASE_ATOM_ENTER_PROTECTED_CHECK ||
+                               katom->protected_state.exit !=
+                               KBASE_ATOM_EXIT_PROTECTED_CHECK)
+                       kbdev->protected_mode_transition = false;
+
+               if (kbase_jd_katom_is_protected(katom) &&
+                               (katom->protected_state.enter ==
+                               KBASE_ATOM_ENTER_PROTECTED_IDLE_L2))
+                       kbase_vinstr_resume(kbdev->vinstr_ctx);
+
+               /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+       case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV:
                /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
 
        case KBASE_ATOM_GPU_RB_WAITING_BLOCKED:
@@ -666,11 +728,19 @@ static int kbase_gpu_protected_mode_enter(struct kbase_device *kbdev)
 {
        int err = -EINVAL;
 
-       lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+       lockdep_assert_held(&kbdev->hwaccess_lock);
 
        WARN_ONCE(!kbdev->protected_ops,
                        "Cannot enter protected mode: protected callbacks not specified.\n");
 
+       /*
+        * When entering into protected mode, we must ensure that the
+        * GPU is not operating in coherent mode as well. This is to
+        * ensure that no protected memory can be leaked.
+        */
+       if (kbdev->system_coherency == COHERENCY_ACE)
+               kbase_cache_set_coherency_mode(kbdev, COHERENCY_ACE_LITE);
+
        if (kbdev->protected_ops) {
                /* Switch GPU to protected mode */
                err = kbdev->protected_ops->protected_mode_enter(kbdev);
@@ -687,7 +757,7 @@ static int kbase_gpu_protected_mode_enter(struct kbase_device *kbdev)
 
 static int kbase_gpu_protected_mode_reset(struct kbase_device *kbdev)
 {
-       lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+       lockdep_assert_held(&kbdev->hwaccess_lock);
 
        WARN_ONCE(!kbdev->protected_ops,
                        "Cannot exit protected mode: protected callbacks not specified.\n");
@@ -695,53 +765,146 @@ static int kbase_gpu_protected_mode_reset(struct kbase_device *kbdev)
        if (!kbdev->protected_ops)
                return -EINVAL;
 
-       kbdev->protected_mode_transition = true;
        kbase_reset_gpu_silent(kbdev);
 
        return 0;
 }
 
-static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev,
+static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev,
                struct kbase_jd_atom **katom, int idx, int js)
 {
        int err = 0;
 
-       switch (katom[idx]->exit_protected_state) {
-       case KBASE_ATOM_EXIT_PROTECTED_CHECK:
+       switch (katom[idx]->protected_state.enter) {
+       case KBASE_ATOM_ENTER_PROTECTED_CHECK:
+               /* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV
+                * should ensure that we are not already transitiong, and that
+                * there are no atoms currently on the GPU. */
+               WARN_ON(kbdev->protected_mode_transition);
+               WARN_ON(kbase_gpu_atoms_submitted_any(kbdev));
+
+               kbdev->protected_mode_transition = true;
+               katom[idx]->protected_state.enter =
+                       KBASE_ATOM_ENTER_PROTECTED_VINSTR;
+
+               /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+       case KBASE_ATOM_ENTER_PROTECTED_VINSTR:
+               if (kbase_vinstr_try_suspend(kbdev->vinstr_ctx) < 0) {
+                       /*
+                        * We can't switch now because
+                        * the vinstr core state switch
+                        * is not done yet.
+                        */
+                       return -EAGAIN;
+               }
+
+               /* Once reaching this point GPU must be
+                * switched to protected mode or vinstr
+                * re-enabled. */
+
                /*
-                * If the atom ahead of this one hasn't got to being
-                * submitted yet then bail.
+                * Not in correct mode, begin protected mode switch.
+                * Entering protected mode requires us to power down the L2,
+                * and drop out of fully coherent mode.
                 */
-               if (idx == 1 &&
-                       (katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED &&
-                       katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB))
-                       return -EAGAIN;
+               katom[idx]->protected_state.enter =
+                       KBASE_ATOM_ENTER_PROTECTED_IDLE_L2;
+
+               kbase_pm_update_cores_state_nolock(kbdev);
+
+               /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+       case KBASE_ATOM_ENTER_PROTECTED_IDLE_L2:
+               /* Avoid unnecessary waiting on non-ACE platforms. */
+               if (kbdev->current_gpu_coherency_mode == COHERENCY_ACE) {
+                       if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2) ||
+                               kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) {
+                               /*
+                               * The L2 is still powered, wait for all the users to
+                               * finish with it before doing the actual reset.
+                               */
+                               return -EAGAIN;
+                       }
+               }
 
-               /* If we're not exiting protected mode then we're done here. */
-               if (!(kbase_gpu_in_protected_mode(kbdev) &&
-                               !kbase_jd_katom_is_protected(katom[idx])))
-                       return 0;
+               katom[idx]->protected_state.enter =
+                       KBASE_ATOM_ENTER_PROTECTED_FINISHED;
+
+               /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+       case KBASE_ATOM_ENTER_PROTECTED_FINISHED:
+
+               /* No jobs running, so we can switch GPU mode right now. */
+               err = kbase_gpu_protected_mode_enter(kbdev);
 
                /*
-                * If there is a transition in progress, or work still
-                * on the GPU try again later.
+                * Regardless of result, we are no longer transitioning
+                * the GPU.
                 */
-               if (kbdev->protected_mode_transition ||
-                               kbase_gpu_atoms_submitted_any(kbdev))
-                       return -EAGAIN;
+               kbdev->protected_mode_transition = false;
+
+               if (err) {
+                       /*
+                        * Failed to switch into protected mode, resume
+                        * vinstr core and fail atom.
+                        */
+                       kbase_vinstr_resume(kbdev->vinstr_ctx);
+                       katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
+                       kbase_gpu_mark_atom_for_return(kbdev, katom[idx]);
+                       /* Only return if head atom or previous atom
+                        * already removed - as atoms must be returned
+                        * in order. */
+                       if (idx == 0 || katom[0]->gpu_rb_state ==
+                                       KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+                               kbase_gpu_dequeue_atom(kbdev, js, NULL);
+                               kbase_jm_return_atom_to_js(kbdev, katom[idx]);
+                       }
+                       return -EINVAL;
+               }
+
+               /* Protected mode sanity checks. */
+               KBASE_DEBUG_ASSERT_MSG(
+                       kbase_jd_katom_is_protected(katom[idx]) ==
+                       kbase_gpu_in_protected_mode(kbdev),
+                       "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
+                       kbase_jd_katom_is_protected(katom[idx]),
+                       kbase_gpu_in_protected_mode(kbdev));
+               katom[idx]->gpu_rb_state =
+                       KBASE_ATOM_GPU_RB_READY;
+       }
+
+       return 0;
+}
+
+static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev,
+               struct kbase_jd_atom **katom, int idx, int js)
+{
+       int err = 0;
+
+
+       switch (katom[idx]->protected_state.exit) {
+       case KBASE_ATOM_EXIT_PROTECTED_CHECK:
+               /* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV
+                * should ensure that we are not already transitiong, and that
+                * there are no atoms currently on the GPU. */
+               WARN_ON(kbdev->protected_mode_transition);
+               WARN_ON(kbase_gpu_atoms_submitted_any(kbdev));
 
                /*
                 * Exiting protected mode requires a reset, but first the L2
                 * needs to be powered down to ensure it's not active when the
                 * reset is issued.
                 */
-               katom[idx]->exit_protected_state =
+               katom[idx]->protected_state.exit =
                                KBASE_ATOM_EXIT_PROTECTED_IDLE_L2;
 
-               /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+               kbdev->protected_mode_transition = true;
+               kbase_pm_update_cores_state_nolock(kbdev);
 
+               /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
        case KBASE_ATOM_EXIT_PROTECTED_IDLE_L2:
-               if (kbase_pm_get_active_cores(kbdev, KBASE_PM_CORE_L2) ||
+               if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2) ||
                                kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) {
                        /*
                         * The L2 is still powered, wait for all the users to
@@ -749,7 +912,7 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev,
                         */
                        return -EAGAIN;
                }
-               katom[idx]->exit_protected_state =
+               katom[idx]->protected_state.exit =
                                KBASE_ATOM_EXIT_PROTECTED_RESET;
 
                /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
@@ -757,7 +920,10 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev,
        case KBASE_ATOM_EXIT_PROTECTED_RESET:
                /* Issue the reset to the GPU */
                err = kbase_gpu_protected_mode_reset(kbdev);
+
                if (err) {
+                       kbdev->protected_mode_transition = false;
+
                        /* Failed to exit protected mode, fail atom */
                        katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
                        kbase_gpu_mark_atom_for_return(kbdev, katom[idx]);
@@ -775,7 +941,7 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev,
                        return -EINVAL;
                }
 
-               katom[idx]->exit_protected_state =
+               katom[idx]->protected_state.exit =
                                KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT;
 
                /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
@@ -784,6 +950,9 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev,
                if (kbase_reset_gpu_active(kbdev))
                        return -EAGAIN;
 
+               kbdev->protected_mode_transition = false;
+               kbdev->protected_mode = false;
+
                /* protected mode sanity checks */
                KBASE_DEBUG_ASSERT_MSG(
                        kbase_jd_katom_is_protected(katom[idx]) == kbase_gpu_in_protected_mode(kbdev),
@@ -798,11 +967,11 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev,
        return 0;
 }
 
-void kbase_gpu_slot_update(struct kbase_device *kbdev)
+void kbase_backend_slot_update(struct kbase_device *kbdev)
 {
        int js;
 
-       lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+       lockdep_assert_held(&kbdev->hwaccess_lock);
 
        for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
                struct kbase_jd_atom *katom[2];
@@ -831,11 +1000,26 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev)
                                        break;
 
                                katom[idx]->gpu_rb_state =
-                                       KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_EXIT;
+                               KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV;
 
                        /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
 
-                       case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_EXIT:
+                       case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV:
+                               if (kbase_gpu_check_secure_atoms(kbdev,
+                                               !kbase_jd_katom_is_protected(
+                                               katom[idx])))
+                                       break;
+
+                               if (kbdev->protected_mode_transition)
+                                       break;
+
+                               katom[idx]->gpu_rb_state =
+                                       KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION;
+
+                       /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+                       case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION:
+
                                /*
                                 * Exiting protected mode must be done before
                                 * the references on the cores are taken as
@@ -843,10 +1027,26 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev)
                                 * can't happen after the references for this
                                 * atom are taken.
                                 */
-                               ret = kbase_jm_exit_protected_mode(kbdev,
-                                               katom, idx, js);
-                               if (ret)
-                                       break;
+
+                               if (!kbase_gpu_in_protected_mode(kbdev) &&
+                                       kbase_jd_katom_is_protected(katom[idx])) {
+                                       /* Atom needs to transition into protected mode. */
+                                       ret = kbase_jm_enter_protected_mode(kbdev,
+                                                       katom, idx, js);
+                                       if (ret)
+                                               break;
+                               } else if (kbase_gpu_in_protected_mode(kbdev) &&
+                                       !kbase_jd_katom_is_protected(katom[idx])) {
+                                       /* Atom needs to transition out of protected mode. */
+                                       ret = kbase_jm_exit_protected_mode(kbdev,
+                                                       katom, idx, js);
+                                       if (ret)
+                                               break;
+                               }
+                               katom[idx]->protected_state.exit =
+                                               KBASE_ATOM_EXIT_PROTECTED_CHECK;
+
+                               /* Atom needs no protected mode transition. */
 
                                katom[idx]->gpu_rb_state =
                                        KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE;
@@ -872,7 +1072,6 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev)
                                        break;
                                }
 
-
                                cores_ready =
                                        kbasep_js_job_check_ref_cores(kbdev, js,
                                                                katom[idx]);
@@ -898,81 +1097,6 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev)
                                if (!kbase_gpu_rmu_workaround(kbdev, js))
                                        break;
 
-                               katom[idx]->gpu_rb_state =
-                                       KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_ENTRY;
-
-                       /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
-
-                       case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_ENTRY:
-
-                               /* Only submit if head atom or previous atom
-                                * already submitted */
-                               if (idx == 1 &&
-                                       (katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED &&
-                                       katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB))
-                                       break;
-
-                               /*
-                                * If the GPU is transitioning protected mode
-                                * then bail now and we'll be called when the
-                                * new state has settled.
-                                */
-                               if (kbdev->protected_mode_transition)
-                                       break;
-
-                               if (!kbase_gpu_in_protected_mode(kbdev) && kbase_jd_katom_is_protected(katom[idx])) {
-                                       int err = 0;
-
-                                       /* Not in correct mode, take action */
-                                       if (kbase_gpu_atoms_submitted_any(kbdev)) {
-                                               /*
-                                                * We are not in the correct
-                                                * GPU mode for this job, and
-                                                * we can't switch now because
-                                                * there are jobs already
-                                                * running.
-                                                */
-                                               break;
-                                       }
-                                       if (kbase_vinstr_try_suspend(kbdev->vinstr_ctx) < 0) {
-                                               /*
-                                                * We can't switch now because
-                                                * the vinstr core state switch
-                                                * is not done yet.
-                                                */
-                                               break;
-                                       }
-                                       /* Once reaching this point GPU must be
-                                        * switched to protected mode or vinstr
-                                        * re-enabled. */
-
-                                       /* No jobs running, so we can switch GPU mode right now */
-                                       err = kbase_gpu_protected_mode_enter(kbdev);
-                                       if (err) {
-                                               /*
-                                                * Failed to switch into protected mode, resume
-                                                * vinstr core and fail atom.
-                                                */
-                                               kbase_vinstr_resume(kbdev->vinstr_ctx);
-                                               katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
-                                               kbase_gpu_mark_atom_for_return(kbdev, katom[idx]);
-                                               /* Only return if head atom or previous atom
-                                                * already removed - as atoms must be returned
-                                                * in order */
-                                               if (idx == 0 || katom[0]->gpu_rb_state ==
-                                                               KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
-                                                       kbase_gpu_dequeue_atom(kbdev, js, NULL);
-                                                       kbase_jm_return_atom_to_js(kbdev, katom[idx]);
-                                               }
-                                               break;
-                                       }
-                               }
-
-                               /* Protected mode sanity checks */
-                               KBASE_DEBUG_ASSERT_MSG(
-                                       kbase_jd_katom_is_protected(katom[idx]) == kbase_gpu_in_protected_mode(kbdev),
-                                       "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
-                                       kbase_jd_katom_is_protected(katom[idx]), kbase_gpu_in_protected_mode(kbdev));
                                katom[idx]->gpu_rb_state =
                                        KBASE_ATOM_GPU_RB_READY;
 
@@ -1037,10 +1161,9 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev)
 void kbase_backend_run_atom(struct kbase_device *kbdev,
                                struct kbase_jd_atom *katom)
 {
-       lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
-
+       lockdep_assert_held(&kbdev->hwaccess_lock);
        kbase_gpu_enqueue_atom(kbdev, katom);
-       kbase_gpu_slot_update(kbdev);
+       kbase_backend_slot_update(kbdev);
 }
 
 bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js)
@@ -1048,7 +1171,7 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js)
        struct kbase_jd_atom *katom;
        struct kbase_jd_atom *next_katom;
 
-       lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+       lockdep_assert_held(&kbdev->hwaccess_lock);
 
        katom = kbase_gpu_inspect(kbdev, js, 0);
        next_katom = kbase_gpu_inspect(kbdev, js, 1);
@@ -1076,7 +1199,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
        struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0);
        struct kbase_context *kctx = katom->kctx;
 
-       lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+       lockdep_assert_held(&kbdev->hwaccess_lock);
 
        if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6787) &&
                        completion_code != BASE_JD_EVENT_DONE &&
@@ -1241,31 +1364,42 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
 #endif
 
        if (completion_code == BASE_JD_EVENT_STOPPED)
-               kbase_jm_return_atom_to_js(kbdev, katom);
+               katom = kbase_jm_return_atom_to_js(kbdev, katom);
        else
-               kbase_jm_complete(kbdev, katom, end_timestamp);
+               katom = kbase_jm_complete(kbdev, katom, end_timestamp);
+
+       if (katom) {
+               /* Cross-slot dependency has now become runnable. Try to submit
+                * it. */
+
+               /* Check if there are lower priority jobs to soft stop */
+               kbase_job_slot_ctx_priority_check_locked(kctx, katom);
+
+               kbase_jm_try_kick(kbdev, 1 << katom->slot_nr);
+       }
 
        /* Job completion may have unblocked other atoms. Try to update all job
         * slots */
-       kbase_gpu_slot_update(kbdev);
+       kbase_backend_slot_update(kbdev);
 }
 
 void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp)
 {
        int js;
 
-       lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+       lockdep_assert_held(&kbdev->hwaccess_lock);
 
        for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+               int atom_idx = 0;
                int idx;
 
-               for (idx = 0; idx < 2; idx++) {
+               for (idx = 0; idx < SLOT_RB_SIZE; idx++) {
                        struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
-                                                                       js, 0);
+                                       js, atom_idx);
                        bool keep_in_jm_rb = false;
 
                        if (!katom)
-                               continue;
+                               break;
 
                        if (katom->gpu_rb_state < KBASE_ATOM_GPU_RB_SUBMITTED)
                                keep_in_jm_rb = true;
@@ -1279,7 +1413,12 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp)
                         */
                        if (keep_in_jm_rb) {
                                katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
-                               katom->exit_protected_state = KBASE_ATOM_EXIT_PROTECTED_CHECK;
+                               katom->affinity = 0;
+                               katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK;
+                               /* As the atom was not removed, increment the
+                                * index so that we read the correct atom in the
+                                * next iteration. */
+                               atom_idx++;
                                continue;
                        }
 
@@ -1292,6 +1431,9 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp)
                        kbase_jm_complete(kbdev, katom, end_timestamp);
                }
        }
+
+       kbdev->protected_mode_transition = false;
+       kbdev->protected_mode = false;
 }
 
 static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev,
@@ -1361,7 +1503,7 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
 
        int stop_x_dep_idx0 = -1, stop_x_dep_idx1 = -1;
 
-       lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+       lockdep_assert_held(&kbdev->hwaccess_lock);
 
        katom_idx0 = kbase_gpu_inspect(kbdev, js, 0);
        katom_idx1 = kbase_gpu_inspect(kbdev, js, 1);
@@ -1548,6 +1690,7 @@ void kbase_gpu_cacheclean(struct kbase_device *kbdev,
        /* Limit the number of loops to avoid a hang if the interrupt is missed
         */
        u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
+       unsigned long flags;
 
        mutex_lock(&kbdev->cacheclean_lock);
 
@@ -1574,8 +1717,10 @@ void kbase_gpu_cacheclean(struct kbase_device *kbdev,
 
        mutex_unlock(&kbdev->cacheclean_lock);
 
+       spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
        kbase_pm_unrequest_cores(kbdev, false,
                                        katom->need_cache_flush_cores_retained);
+       spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 }
 
 void kbase_backend_complete_wq(struct kbase_device *kbdev,
@@ -1620,8 +1765,12 @@ void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev,
                base_jd_core_req core_req, u64 affinity,
                enum kbase_atom_coreref_state coreref_state)
 {
+       unsigned long flags;
+
+       spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
        kbasep_js_job_check_deref_cores_nokatom(kbdev, core_req, affinity,
                        coreref_state);
+       spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
        if (!kbdev->pm.active_count) {
                mutex_lock(&kbdev->js_data.runpool_mutex);
@@ -1640,7 +1789,7 @@ void kbase_gpu_dump_slots(struct kbase_device *kbdev)
 
        js_devdata = &kbdev->js_data;
 
-       spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+       spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
        dev_info(kbdev->dev, "kbase_gpu_dump_slots:\n");
 
@@ -1662,7 +1811,7 @@ void kbase_gpu_dump_slots(struct kbase_device *kbdev)
                }
        }
 
-       spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+       spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 }
 
 
index 102d94b..1e0e05a 100644 (file)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -66,17 +66,6 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
 struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
                                        int idx);
 
-/**
- * kbase_gpu_slot_update - Update state based on slot ringbuffers
- *
- * @kbdev:  Device pointer
- *
- * Inspect the jobs in the slot ringbuffers and update state.
- *
- * This will cause jobs to be submitted to hardware if they are unblocked
- */
-void kbase_gpu_slot_update(struct kbase_device *kbdev);
-
 /**
  * kbase_gpu_dump_slots - Print the contents of the slot ringbuffers
  *
index d665420..54d8ddd 100644 (file)
@@ -94,9 +94,8 @@ bool kbase_js_choose_affinity(u64 * const affinity,
        base_jd_core_req core_req = katom->core_req;
        unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
        u64 core_availability_mask;
-       unsigned long flags;
 
-       spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+       lockdep_assert_held(&kbdev->hwaccess_lock);
 
        core_availability_mask = kbase_pm_ca_get_core_mask(kbdev);
 
@@ -105,7 +104,6 @@ bool kbase_js_choose_affinity(u64 * const affinity,
         * transitioning) then fail.
         */
        if (0 == core_availability_mask) {
-               spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
                *affinity = 0;
                return false;
        }
@@ -114,7 +112,6 @@ bool kbase_js_choose_affinity(u64 * const affinity,
 
        if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) ==
                                                                BASE_JD_REQ_T) {
-               spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
                 /* If the hardware supports XAFFINITY then we'll only enable
                  * the tiler (which is the default so this is a no-op),
                  * otherwise enable shader core 0. */
@@ -169,8 +166,6 @@ bool kbase_js_choose_affinity(u64 * const affinity,
                }
        }
 
-       spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
-
        /*
         * If no cores are currently available in the desired core group(s)
         * (core availability policy is transitioning) then fail.
index fbffa3b..35d9781 100644 (file)
  * violated.
  *
  * The following locking conditions are made on the caller
- * - it must hold kbasep_js_device_data.runpool_irq.lock
+ * - it must hold hwaccess_lock
  */
-bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev,
-                                                                       int js);
+bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev, int js);
 
 /**
  * kbase_js_choose_affinity - Compute affinity for a given job.
@@ -71,7 +70,7 @@ bool kbase_js_choose_affinity(u64 * const affinity,
  * @affinity: The affinity mask to test
  *
  * The following locks must be held by the caller
- * - kbasep_js_device_data.runpool_irq.lock
+ * - hwaccess_lock
  *
  * Return: true if the affinity would violate the restrictions
  */
@@ -87,7 +86,7 @@ bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js,
  * @affinity: The cores to retain
  *
  * The following locks must be held by the caller
- * - kbasep_js_device_data.runpool_irq.lock
+ * - hwaccess_lock
  */
 void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js,
                                                                u64 affinity);
@@ -106,7 +105,7 @@ void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js,
  * %BASE_JM_SUBMIT_SLOTS.
  *
  * The following locks must be held by the caller
- * - kbasep_js_device_data.runpool_irq.lock
+ * - hwaccess_lock
  */
 void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js,
                                                                u64 affinity);
index a23deb4..b09d491 100644 (file)
@@ -104,7 +104,7 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer)
        js_devdata = &kbdev->js_data;
 
        /* Loop through the slots */
-       spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+       spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
        for (s = 0; s < kbdev->gpu_props.num_job_slots; s++) {
                struct kbase_jd_atom *atom = NULL;
 
@@ -168,8 +168,8 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer)
                                         * However, if it's about to be
                                         * increased then the new context can't
                                         * run any jobs until they take the
-                                        * runpool_irq lock, so it's OK to
-                                        * observe the older value.
+                                        * hwaccess_lock, so it's OK to observe
+                                        * the older value.
                                         *
                                         * Similarly, if it's about to be
                                         * decreased, the last job from another
@@ -270,7 +270,7 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer)
 
        backend->timeouts_updated = false;
 
-       spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+       spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
        return HRTIMER_NORESTART;
 }
@@ -285,9 +285,9 @@ void kbase_backend_ctx_count_changed(struct kbase_device *kbdev)
 
        if (!timer_callback_should_run(kbdev)) {
                /* Take spinlock to force synchronisation with timer */
-               spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+               spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
                backend->timer_running = false;
-               spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+               spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
                /* From now on, return value of timer_callback_should_run() will
                 * also cause the timer to not requeue itself. Its return value
                 * cannot change, because it depends on variables updated with
@@ -298,9 +298,9 @@ void kbase_backend_ctx_count_changed(struct kbase_device *kbdev)
 
        if (timer_callback_should_run(kbdev) && !backend->timer_running) {
                /* Take spinlock to force synchronisation with timer */
-               spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+               spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
                backend->timer_running = true;
-               spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+               spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
                hrtimer_start(&backend->scheduling_timer,
                        HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns),
                                                        HRTIMER_MODE_REL);
index 4a3572d..08eea1c 100644 (file)
@@ -97,6 +97,30 @@ static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd,
        return status;
 }
 
+static void validate_protected_page_fault(struct kbase_device *kbdev,
+               struct kbase_context *kctx)
+{
+       /* GPUs which support (native) protected mode shall not report page
+        * fault addresses unless it has protected debug mode and protected
+        * debug mode is turned on */
+       u32 protected_debug_mode = 0;
+
+       if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE))
+               return;
+
+       if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) {
+               protected_debug_mode = kbase_reg_read(kbdev,
+                               GPU_CONTROL_REG(GPU_STATUS),
+                               kctx) & GPU_DBGEN;
+       }
+
+       if (!protected_debug_mode) {
+               /* fault_addr should never be reported in protected mode.
+                * However, we just continue by printing an error message */
+               dev_err(kbdev->dev, "Fault address reported in protected mode\n");
+       }
+}
+
 void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
 {
        const int num_as = 16;
@@ -141,6 +165,7 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
                 */
                kctx = kbasep_js_runpool_lookup_ctx(kbdev, as_no);
 
+
                /* find faulting address */
                as->fault_addr = kbase_reg_read(kbdev,
                                                MMU_AS_REG(as_no,
@@ -152,6 +177,15 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
                                                        AS_FAULTADDRESS_LO),
                                                kctx);
 
+               /* Mark the fault protected or not */
+               as->protected_mode = kbdev->protected_mode;
+
+               if (kbdev->protected_mode && as->fault_addr)
+               {
+                       /* check if address reporting is allowed */
+                       validate_protected_page_fault(kbdev, kctx);
+               }
+
                /* report the fault to debugfs */
                kbase_as_fault_debugfs_new(kbdev, as_no);
 
@@ -195,10 +229,9 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
                }
 
                /* Process the interrupt for this address space */
-               spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
+               spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
                kbase_mmu_interrupt_process(kbdev, kctx, as);
-               spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock,
-                               flags);
+               spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
        }
 
        /* reenable interrupts */
@@ -268,6 +301,8 @@ int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
 {
        int ret;
 
+       lockdep_assert_held(&kbdev->mmu_hw_mutex);
+
        if (op == AS_COMMAND_UNLOCK) {
                /* Unlock doesn't require a lock first */
                ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
index 711e44c..7690ec5 100644 (file)
 #endif /* CONFIG_MALI_PLATFORM_DEVICETREE */
 
 #include <mali_kbase_pm.h>
-#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <mali_kbase_hwaccess_jm.h>
 #include <backend/gpu/mali_kbase_js_internal.h>
 #include <backend/gpu/mali_kbase_pm_internal.h>
 
+static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data);
+
 void kbase_pm_register_access_enable(struct kbase_device *kbdev)
 {
        struct kbase_pm_callback_conf *callbacks;
@@ -65,6 +67,14 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev)
 
        mutex_init(&kbdev->pm.lock);
 
+       kbdev->pm.backend.gpu_poweroff_wait_wq = alloc_workqueue("kbase_pm_poweroff_wait",
+                       WQ_HIGHPRI | WQ_UNBOUND, 1);
+       if (!kbdev->pm.backend.gpu_poweroff_wait_wq)
+               return -ENOMEM;
+
+       INIT_WORK(&kbdev->pm.backend.gpu_poweroff_wait_work,
+                       kbase_pm_gpu_poweroff_wait_wq);
+
        kbdev->pm.backend.gpu_powered = false;
        kbdev->pm.suspending = false;
 #ifdef CONFIG_MALI_DEBUG
@@ -119,10 +129,11 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev)
        init_waitqueue_head(&kbdev->pm.zero_active_count_wait);
        kbdev->pm.active_count = 0;
 
-       spin_lock_init(&kbdev->pm.power_change_lock);
        spin_lock_init(&kbdev->pm.backend.gpu_cycle_counter_requests_lock);
        spin_lock_init(&kbdev->pm.backend.gpu_powered_lock);
 
+       init_waitqueue_head(&kbdev->pm.backend.poweroff_wait);
+
        if (kbase_pm_ca_init(kbdev) != 0)
                goto workq_fail;
 
@@ -157,47 +168,121 @@ void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume)
         * will wait for that state to be reached anyway */
 }
 
-bool kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend)
+static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data)
 {
+       struct kbase_device *kbdev = container_of(data, struct kbase_device,
+                       pm.backend.gpu_poweroff_wait_work);
+       struct kbase_pm_device_data *pm = &kbdev->pm;
+       struct kbase_pm_backend_data *backend = &pm->backend;
+       struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
        unsigned long flags;
-       bool cores_are_available;
-
-       lockdep_assert_held(&kbdev->pm.lock);
-
-       spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
-
-       /* Force all cores off */
-       kbdev->pm.backend.desired_shader_state = 0;
-       kbdev->pm.backend.desired_tiler_state = 0;
-
-       /* Force all cores to be unavailable, in the situation where
-        * transitions are in progress for some cores but not others,
-        * and kbase_pm_check_transitions_nolock can not immediately
-        * power off the cores */
-       kbdev->shader_available_bitmap = 0;
-       kbdev->tiler_available_bitmap = 0;
-       kbdev->l2_available_bitmap = 0;
 
+       /* Wait for power transitions to complete. We do this with no locks held
+        * so that we don't deadlock with any pending workqueues */
        KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
                                SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START);
-       cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+       kbase_pm_check_transitions_sync(kbdev);
        KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
                                SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END);
-       /* Don't need 'cores_are_available', because we don't return anything */
-       CSTD_UNUSED(cores_are_available);
 
-       spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+       mutex_lock(&js_devdata->runpool_mutex);
+       mutex_lock(&kbdev->pm.lock);
+
+       if (!backend->poweron_required) {
+               WARN_ON(kbdev->l2_available_bitmap ||
+                               kbdev->shader_available_bitmap ||
+                               kbdev->tiler_available_bitmap);
+
+               /* Consume any change-state events */
+               kbase_timeline_pm_check_handle_event(kbdev,
+                                       KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+
+               /* Disable interrupts and turn the clock off */
+               if (!kbase_pm_clock_off(kbdev, backend->poweroff_is_suspend)) {
+                       /*
+                        * Page/bus faults are pending, must drop locks to
+                        * process.  Interrupts are disabled so no more faults
+                        * should be generated at this point.
+                        */
+                       mutex_unlock(&kbdev->pm.lock);
+                       mutex_unlock(&js_devdata->runpool_mutex);
+                       kbase_flush_mmu_wqs(kbdev);
+                       mutex_lock(&js_devdata->runpool_mutex);
+                       mutex_lock(&kbdev->pm.lock);
+
+                       /* Turn off clock now that fault have been handled. We
+                        * dropped locks so poweron_required may have changed -
+                        * power back on if this is the case.*/
+                       if (backend->poweron_required)
+                               kbase_pm_clock_on(kbdev, false);
+                       else
+                               WARN_ON(!kbase_pm_clock_off(kbdev,
+                                               backend->poweroff_is_suspend));
+               }
+       }
 
-       /* NOTE: We won't wait to reach the core's desired state, even if we're
-        * powering off the GPU itself too. It's safe to cut the power whilst
-        * they're transitioning to off, because the cores should be idle and
-        * all cache flushes should already have occurred */
+       spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+       backend->poweroff_wait_in_progress = false;
+       if (backend->poweron_required) {
+               backend->poweron_required = false;
+               kbase_pm_update_cores_state_nolock(kbdev);
+       }
+       spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
-       /* Consume any change-state events */
-       kbase_timeline_pm_check_handle_event(kbdev,
-                               KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
-       /* Disable interrupts and turn the clock off */
-       return kbase_pm_clock_off(kbdev, is_suspend);
+       mutex_unlock(&kbdev->pm.lock);
+       mutex_unlock(&js_devdata->runpool_mutex);
+
+       wake_up(&kbdev->pm.backend.poweroff_wait);
+}
+
+void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend)
+{
+       unsigned long flags;
+
+       lockdep_assert_held(&kbdev->pm.lock);
+
+       spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+       if (!kbdev->pm.backend.poweroff_wait_in_progress) {
+               /* Force all cores off */
+               kbdev->pm.backend.desired_shader_state = 0;
+               kbdev->pm.backend.desired_tiler_state = 0;
+
+               /* Force all cores to be unavailable, in the situation where
+                * transitions are in progress for some cores but not others,
+                * and kbase_pm_check_transitions_nolock can not immediately
+                * power off the cores */
+               kbdev->shader_available_bitmap = 0;
+               kbdev->tiler_available_bitmap = 0;
+               kbdev->l2_available_bitmap = 0;
+
+               kbdev->pm.backend.poweroff_wait_in_progress = true;
+               kbdev->pm.backend.poweroff_is_suspend = is_suspend;
+
+               spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+               /*Kick off wq here. Callers will have to wait*/
+               queue_work(kbdev->pm.backend.gpu_poweroff_wait_wq,
+                               &kbdev->pm.backend.gpu_poweroff_wait_work);
+       } else {
+               spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+       }
+}
+
+static bool is_poweroff_in_progress(struct kbase_device *kbdev)
+{
+       bool ret;
+       unsigned long flags;
+
+       spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+       ret = (kbdev->pm.backend.poweroff_wait_in_progress == false);
+       spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+       return ret;
+}
+
+void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev)
+{
+       wait_event_killable(kbdev->pm.backend.poweroff_wait,
+                       is_poweroff_in_progress(kbdev));
 }
 
 int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
@@ -269,15 +354,7 @@ void kbase_hwaccess_pm_halt(struct kbase_device *kbdev)
 
        mutex_lock(&kbdev->pm.lock);
        kbase_pm_cancel_deferred_poweroff(kbdev);
-       if (!kbase_pm_do_poweroff(kbdev, false)) {
-               /* Page/bus faults are pending, must drop pm.lock to process.
-                * Interrupts are disabled so no more faults should be
-                * generated at this point */
-               mutex_unlock(&kbdev->pm.lock);
-               kbase_flush_mmu_wqs(kbdev);
-               mutex_lock(&kbdev->pm.lock);
-               WARN_ON(!kbase_pm_do_poweroff(kbdev, false));
-       }
+       kbase_pm_do_poweroff(kbdev, false);
        mutex_unlock(&kbdev->pm.lock);
 }
 
@@ -295,6 +372,8 @@ void kbase_hwaccess_pm_term(struct kbase_device *kbdev)
 
        /* Shut down the metrics subsystem */
        kbasep_pm_metrics_term(kbdev);
+
+       destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wait_wq);
 }
 
 void kbase_pm_power_changed(struct kbase_device *kbdev)
@@ -304,9 +383,8 @@ void kbase_pm_power_changed(struct kbase_device *kbdev)
 
        KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
                                SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START);
-       spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+       spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
        cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
-       spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
        KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
                                SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END);
 
@@ -316,10 +394,9 @@ void kbase_pm_power_changed(struct kbase_device *kbdev)
                kbase_timeline_pm_handle_event(kbdev,
                                KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
 
-               spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
-               kbase_gpu_slot_update(kbdev);
-               spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags);
+               kbase_backend_slot_update(kbdev);
        }
+       spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 }
 
 void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
@@ -354,21 +431,16 @@ void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev)
         * off prematurely) */
        mutex_lock(&js_devdata->runpool_mutex);
        mutex_lock(&kbdev->pm.lock);
+
        kbase_pm_cancel_deferred_poweroff(kbdev);
-       if (!kbase_pm_do_poweroff(kbdev, true)) {
-               /* Page/bus faults are pending, must drop pm.lock to process.
-                * Interrupts are disabled so no more faults should be
-                * generated at this point */
-               mutex_unlock(&kbdev->pm.lock);
-               kbase_flush_mmu_wqs(kbdev);
-               mutex_lock(&kbdev->pm.lock);
-               WARN_ON(!kbase_pm_do_poweroff(kbdev, false));
-       }
+       kbase_pm_do_poweroff(kbdev, true);
 
        kbase_backend_timer_suspend(kbdev);
 
        mutex_unlock(&kbdev->pm.lock);
        mutex_unlock(&js_devdata->runpool_mutex);
+
+       kbase_pm_wait_for_poweroff_complete(kbdev);
 }
 
 void kbase_hwaccess_pm_resume(struct kbase_device *kbdev)
index 4b903cc..e8cd8cb 100644 (file)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2013-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -95,10 +95,10 @@ void kbase_pm_ca_set_policy(struct kbase_device *kbdev,
        mutex_lock(&kbdev->pm.lock);
 
        /* Remove the policy to prevent IRQ handlers from working on it */
-       spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+       spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
        old_policy = kbdev->pm.backend.ca_current_policy;
        kbdev->pm.backend.ca_current_policy = NULL;
-       spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+       spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
        if (old_policy->term)
                old_policy->term(kbdev);
@@ -106,7 +106,7 @@ void kbase_pm_ca_set_policy(struct kbase_device *kbdev,
        if (new_policy->init)
                new_policy->init(kbdev);
 
-       spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+       spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
        kbdev->pm.backend.ca_current_policy = new_policy;
 
        /* If any core power state changes were previously attempted, but
@@ -118,7 +118,7 @@ void kbase_pm_ca_set_policy(struct kbase_device *kbdev,
                                        kbdev->shader_ready_bitmap,
                                        kbdev->shader_transitioning_bitmap);
 
-       spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+       spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
        mutex_unlock(&kbdev->pm.lock);
 
@@ -131,7 +131,7 @@ KBASE_EXPORT_TEST_API(kbase_pm_ca_set_policy);
 
 u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev)
 {
-       lockdep_assert_held(&kbdev->pm.power_change_lock);
+       lockdep_assert_held(&kbdev->hwaccess_lock);
 
        /* All cores must be enabled when instrumentation is in use */
        if (kbdev->pm.backend.instr_enabled)
@@ -151,7 +151,7 @@ KBASE_EXPORT_TEST_API(kbase_pm_ca_get_core_mask);
 void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready,
                                                        u64 cores_transitioning)
 {
-       lockdep_assert_held(&kbdev->pm.power_change_lock);
+       lockdep_assert_held(&kbdev->hwaccess_lock);
 
        if (kbdev->pm.backend.ca_current_policy != NULL)
                kbdev->pm.backend.ca_current_policy->update_core_status(kbdev,
@@ -163,20 +163,17 @@ void kbase_pm_ca_instr_enable(struct kbase_device *kbdev)
 {
        unsigned long flags;
 
-       spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+       spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
        kbdev->pm.backend.instr_enabled = true;
 
        kbase_pm_update_cores_state_nolock(kbdev);
-       spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+       spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 }
 
 void kbase_pm_ca_instr_disable(struct kbase_device *kbdev)
 {
-       unsigned long flags;
-
-       spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+       lockdep_assert_held(&kbdev->hwaccess_lock);
        kbdev->pm.backend.instr_enabled = false;
 
        kbase_pm_update_cores_state_nolock(kbdev);
-       spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
 }
index e8f96fe..99fb62d 100644 (file)
@@ -207,6 +207,17 @@ union kbase_pm_ca_policy_data {
  *                          power_change_lock should be held when accessing,
  *                          unless there is no way the timer can be running (eg
  *                          hrtimer_cancel() was called immediately before)
+ * @poweroff_wait_in_progress: true if a wait for GPU power off is in progress.
+ *                             hwaccess_lock must be held when accessing
+ * @poweron_required: true if a GPU power on is required. Should only be set
+ *                    when poweroff_wait_in_progress is true, and therefore the
+ *                    GPU can not immediately be powered on. pm.lock must be
+ *                    held when accessing
+ * @poweroff_is_suspend: true if the GPU is being powered off due to a suspend
+ *                       request. pm.lock must be held when accessing
+ * @gpu_poweroff_wait_wq: workqueue for waiting for GPU to power off
+ * @gpu_poweroff_wait_work: work item for use with @gpu_poweroff_wait_wq
+ * @poweroff_wait: waitqueue for waiting for @gpu_poweroff_wait_work to complete
  * @callback_power_on: Callback when the GPU needs to be turned on. See
  *                     &struct kbase_pm_callback_conf
  * @callback_power_off: Callback when the GPU may be turned off. See
@@ -281,6 +292,15 @@ struct kbase_pm_backend_data {
        bool poweroff_timer_needed;
        bool poweroff_timer_running;
 
+       bool poweroff_wait_in_progress;
+       bool poweron_required;
+       bool poweroff_is_suspend;
+
+       struct workqueue_struct *gpu_poweroff_wait_wq;
+       struct work_struct gpu_poweroff_wait_work;
+
+       wait_queue_head_t poweroff_wait;
+
        int (*callback_power_on)(struct kbase_device *kbdev);
        void (*callback_power_off)(struct kbase_device *kbdev);
        void (*callback_power_suspend)(struct kbase_device *kbdev);
index 03ba23d..9271314 100644 (file)
@@ -174,7 +174,7 @@ static void kbase_pm_invoke(struct kbase_device *kbdev,
        u32 lo = cores & 0xFFFFFFFF;
        u32 hi = (cores >> 32) & 0xFFFFFFFF;
 
-       lockdep_assert_held(&kbdev->pm.power_change_lock);
+       lockdep_assert_held(&kbdev->hwaccess_lock);
 
        /*-------------------------------------------------------*/
 
@@ -449,7 +449,7 @@ static bool kbase_pm_transition_core_type(struct kbase_device *kbdev,
        u64 powering_on_trans;
        u64 desired_state_in_use;
 
-       lockdep_assert_held(&kbdev->pm.power_change_lock);
+       lockdep_assert_held(&kbdev->hwaccess_lock);
 
        /* Get current state */
        present = kbase_pm_get_present_cores(kbdev, type);
@@ -493,7 +493,7 @@ static bool kbase_pm_transition_core_type(struct kbase_device *kbdev,
                        kbdev->pm.backend.l2_powered = 0;
        }
 
-       if (desired_state_in_use == ready && (trans == 0))
+       if (desired_state == ready && (trans == 0))
                return true;
 
        /* Restrict the cores to those that are actually present */
@@ -604,7 +604,7 @@ MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev)
        u64 prev_l2_available_bitmap;
 
        KBASE_DEBUG_ASSERT(NULL != kbdev);
-       lockdep_assert_held(&kbdev->pm.power_change_lock);
+       lockdep_assert_held(&kbdev->hwaccess_lock);
 
        spin_lock(&kbdev->pm.backend.gpu_powered_lock);
        if (kbdev->pm.backend.gpu_powered == false) {
@@ -776,6 +776,7 @@ MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev)
                                KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
                /* Wake slow-path waiters. Job scheduler does not use this. */
                KBASE_TRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, NULL, 0u, 0);
+
                wake_up(&kbdev->pm.backend.gpu_in_desired_state_wait);
        }
 
@@ -832,11 +833,13 @@ void kbase_pm_check_transitions_sync(struct kbase_device *kbdev)
 
        /* Force the transition to be checked and reported - the cores may be
         * 'available' (for job submission) but not fully powered up. */
-       spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+       spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
        cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+
        /* Don't need 'cores_are_available', because we don't return anything */
        CSTD_UNUSED(cores_are_available);
-       spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+       spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
        timeout = jiffies + PM_TIMEOUT;
 
@@ -907,12 +910,12 @@ void kbase_pm_enable_interrupts(struct kbase_device *kbdev)
         * Clear all interrupts,
         * and unmask them all.
         */
-       spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+       spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
        kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL,
                                                                        NULL);
        kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), GPU_IRQ_REG_ALL,
                                                                        NULL);
-       spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+       spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
        kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF,
                                                                        NULL);
@@ -924,21 +927,18 @@ void kbase_pm_enable_interrupts(struct kbase_device *kbdev)
 
 KBASE_EXPORT_TEST_API(kbase_pm_enable_interrupts);
 
-void kbase_pm_disable_interrupts(struct kbase_device *kbdev)
+void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev)
 {
-       unsigned long flags;
-
        KBASE_DEBUG_ASSERT(NULL != kbdev);
        /*
         * Mask all interrupts,
         * and clear them all.
         */
-       spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+       lockdep_assert_held(&kbdev->hwaccess_lock);
+
        kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0, NULL);
        kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL,
                                                                        NULL);
-       spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
-
        kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0, NULL);
        kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF,
                                                                        NULL);
@@ -947,8 +947,18 @@ void kbase_pm_disable_interrupts(struct kbase_device *kbdev)
        kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL);
 }
 
+void kbase_pm_disable_interrupts(struct kbase_device *kbdev)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+       kbase_pm_disable_interrupts_nolock(kbdev);
+       spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
 KBASE_EXPORT_TEST_API(kbase_pm_disable_interrupts);
 
+
 /*
  * pmu layout:
  * 0x0000: PMU TAG (RO) (0xCAFECAFE)
@@ -1001,12 +1011,10 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume)
                kbase_pm_init_hw(kbdev, PM_ENABLE_IRQS);
        }
 
+       mutex_lock(&kbdev->mmu_hw_mutex);
        /* Reprogram the GPU's MMU */
        for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
-               struct kbase_as *as = &kbdev->as[i];
-
-               mutex_lock(&as->transaction_mutex);
-               spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+               spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
                if (js_devdata->runpool_irq.per_as_data[i].kctx)
                        kbase_mmu_update(
@@ -1014,9 +1022,9 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume)
                else
                        kbase_mmu_disable_as(kbdev, i);
 
-               spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
-               mutex_unlock(&as->transaction_mutex);
+               spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
        }
+       mutex_unlock(&kbdev->mmu_hw_mutex);
 
        /* Lastly, enable the interrupts */
        kbase_pm_enable_interrupts(kbdev);
@@ -1253,7 +1261,7 @@ static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev)
 
 void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev)
 {
-       if ((kbdev->system_coherency == COHERENCY_ACE) &&
+       if ((kbdev->current_gpu_coherency_mode == COHERENCY_ACE) &&
                !kbdev->cci_snoop_enabled) {
 #ifdef CONFIG_ARM64
                if (kbdev->snoop_enable_smc != 0)
@@ -1266,8 +1274,7 @@ void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev)
 
 void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev)
 {
-       if ((kbdev->system_coherency == COHERENCY_ACE) &&
-               kbdev->cci_snoop_enabled) {
+       if (kbdev->cci_snoop_enabled) {
 #ifdef CONFIG_ARM64
                if (kbdev->snoop_disable_smc != 0) {
                        mali_cci_flush_l2(kbdev);
@@ -1397,7 +1404,7 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
        kbdev->pm.backend.reset_done = false;
 
        /* The cores should be made unavailable due to the reset */
-       spin_lock_irqsave(&kbdev->pm.power_change_lock, irq_flags);
+       spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
        if (kbdev->shader_available_bitmap != 0u)
                        KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL,
                                                NULL, 0u, (u32)0u);
@@ -1407,7 +1414,7 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
        kbdev->shader_available_bitmap = 0u;
        kbdev->tiler_available_bitmap = 0u;
        kbdev->l2_available_bitmap = 0u;
-       spin_unlock_irqrestore(&kbdev->pm.power_change_lock, irq_flags);
+       spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
 
        /* Soft reset the GPU */
        if (kbdev->protected_mode_support &&
@@ -1416,12 +1423,11 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
        else
                err = kbase_pm_reset_do_normal(kbdev);
 
-       spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, irq_flags);
+       spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
        if (kbdev->protected_mode)
                resume_vinstr = true;
-       kbdev->protected_mode_transition = false;
        kbdev->protected_mode = false;
-       spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, irq_flags);
+       spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
 
        if (err)
                goto exit;
@@ -1430,7 +1436,6 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
                kbase_pm_hw_issues_detect(kbdev);
 
        kbase_pm_hw_issues_apply(kbdev);
-
        kbase_cache_set_coherency_mode(kbdev, kbdev->system_coherency);
 
        /* Sanity check protected mode was left after reset */
@@ -1460,7 +1465,10 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
                        &kbdev->pm.backend.gpu_cycle_counter_requests_lock,
                                                                irq_flags);
 
+               spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
                kbase_pm_release_l2_caches(kbdev);
+               spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
+
                kbase_pm_disable_interrupts(kbdev);
        }
 
@@ -1542,12 +1550,14 @@ void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev)
 
 KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter_l2_is_on);
 
-void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev)
+void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev)
 {
        unsigned long flags;
 
        KBASE_DEBUG_ASSERT(kbdev != NULL);
 
+       lockdep_assert_held(&kbdev->hwaccess_lock);
+
        spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
                                                                        flags);
 
@@ -1566,4 +1576,15 @@ void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev)
        kbase_pm_release_l2_caches(kbdev);
 }
 
+void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+       kbase_pm_release_gpu_cycle_counter_nolock(kbdev);
+
+       spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
 KBASE_EXPORT_TEST_API(kbase_pm_release_gpu_cycle_counter);
index aa51b8c..ad2667a 100644 (file)
@@ -167,6 +167,16 @@ void kbase_pm_enable_interrupts(struct kbase_device *kbdev);
  */
 void kbase_pm_disable_interrupts(struct kbase_device *kbdev);
 
+/**
+ * kbase_pm_disable_interrupts_nolock - Version of kbase_pm_disable_interrupts()
+ *                                      that does not take the hwaccess_lock
+ *
+ * Caller must hold the hwaccess_lock.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev);
+
 /**
  * kbase_pm_init_hw - Initialize the hardware.
  * @kbdev: The kbase device structure for the device (must be a valid pointer)
@@ -373,14 +383,35 @@ void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev);
  * kbase_pm_release_gpu_cycle_counter - Mark that the GPU cycle counter is no
  *                                      longer in use
  *
- * If the caller is the
- * last caller then the GPU cycle counters will be disabled. A request must have
- * been made before a call to this.
+ * If the caller is the last caller then the GPU cycle counters will be
+ * disabled. A request must have been made before a call to this.
+ *
+ * Caller must not hold the hwaccess_lock, as it will be taken in this function.
+ * If the caller is already holding this lock then
+ * kbase_pm_release_gpu_cycle_counter_nolock() must be used instead.
  *
  * @kbdev: The kbase device structure for the device (must be a valid pointer)
  */
 void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev);
 
+/**
+ * kbase_pm_release_gpu_cycle_counter_nolock - Version of kbase_pm_release_gpu_cycle_counter()
+ *                                             that does not take hwaccess_lock
+ *
+ * Caller must hold the hwaccess_lock.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_wait_for_poweroff_complete - Wait for the poweroff workqueue to
+ *                                       complete
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev);
+
 /**
  * kbase_pm_register_access_enable - Enable access to GPU registers
  *
@@ -454,12 +485,8 @@ void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume);
  *              pointer)
  * @is_suspend: true if power off due to suspend,
  *              false otherwise
- * Return:
- *         true      if power was turned off, else
- *         false     if power can not be turned off due to pending page/bus
- *                   fault workers. Caller must flush MMU workqueues and retry
  */
-bool kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend);
+void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend);
 
 #ifdef CONFIG_PM_DEVFREQ
 void kbase_pm_get_dvfs_utilisation(struct kbase_device *kbdev,
@@ -496,7 +523,7 @@ void kbase_pm_power_changed(struct kbase_device *kbdev);
  * @kbdev: The kbase device structure for the device (must be a valid pointer)
  * @now:   Pointer to the timestamp of the change, or NULL to use current time
  *
- * Caller must hold runpool_irq.lock
+ * Caller must hold hwaccess_lock
  */
 void kbase_pm_metrics_update(struct kbase_device *kbdev,
                                ktime_t *now);
index ae63256..7613e1d 100644 (file)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -362,14 +362,15 @@ static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev)
                                int device_nr = (katom->core_req &
                                        BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)
                                                ? katom->device_nr : 0;
-                               WARN_ON(device_nr >= 2);
-                               kbdev->pm.backend.metrics.active_cl_ctx[
-                                               device_nr] = 1;
+                               if (!WARN_ON(device_nr >= 2))
+                                       kbdev->pm.backend.metrics.
+                                               active_cl_ctx[device_nr] = 1;
                        } else {
                                /* Slot 2 should not be running non-compute
                                 * atoms */
-                               WARN_ON(js >= 2);
-                               kbdev->pm.backend.metrics.active_gl_ctx[js] = 1;
+                               if (!WARN_ON(js >= 2))
+                                       kbdev->pm.backend.metrics.
+                                               active_gl_ctx[js] = 1;
                        }
                        kbdev->pm.backend.metrics.gpu_active = true;
                }
@@ -382,7 +383,7 @@ void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *timestamp)
        unsigned long flags;
        ktime_t now;
 
-       lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+       lockdep_assert_held(&kbdev->hwaccess_lock);
 
        spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
 
index 4d00602..92457e8 100644 (file)
@@ -156,7 +156,7 @@ static void kbasep_pm_do_poweroff_cores(struct kbase_device *kbdev)
        u64 prev_shader_state = kbdev->pm.backend.desired_shader_state;
        u64 prev_tiler_state = kbdev->pm.backend.desired_tiler_state;
 
-       lockdep_assert_held(&kbdev->pm.power_change_lock);
+       lockdep_assert_held(&kbdev->hwaccess_lock);
 
        kbdev->pm.backend.desired_shader_state &=
                        ~kbdev->pm.backend.shader_poweroff_pending;
@@ -193,7 +193,7 @@ kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer)
        kbdev = container_of(timer, struct kbase_device,
                                                pm.backend.gpu_poweroff_timer);
 
-       spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+       spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
        /* It is safe for this call to do nothing if the work item is already
         * queued. The worker function will read the must up-to-date state of
@@ -220,7 +220,7 @@ kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer)
        }
 
        if (kbdev->pm.backend.poweroff_timer_needed) {
-               spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+               spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
                hrtimer_add_expires(timer, kbdev->pm.gpu_poweroff_time);
 
@@ -228,7 +228,7 @@ kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer)
        }
 
        kbdev->pm.backend.poweroff_timer_running = false;
-       spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+       spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
        return HRTIMER_NORESTART;
 }
@@ -258,13 +258,13 @@ static void kbasep_pm_do_gpu_poweroff_wq(struct work_struct *data)
 
        KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_poweroff_pending == 0);
 
-       spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+       spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
        /* Only power off the GPU if a request is still pending */
        if (!kbdev->pm.backend.pm_current_policy->get_core_active(kbdev))
                do_poweroff = true;
 
-       spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+       spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
        if (do_poweroff) {
                kbdev->pm.backend.poweroff_timer_needed = false;
@@ -272,14 +272,7 @@ static void kbasep_pm_do_gpu_poweroff_wq(struct work_struct *data)
                kbdev->pm.backend.poweroff_timer_running = false;
 
                /* Power off the GPU */
-               if (!kbase_pm_do_poweroff(kbdev, false)) {
-                       /* GPU can not be powered off at present */
-                       kbdev->pm.backend.poweroff_timer_needed = true;
-                       kbdev->pm.backend.poweroff_timer_running = true;
-                       hrtimer_start(&kbdev->pm.backend.gpu_poweroff_timer,
-                                       kbdev->pm.gpu_poweroff_time,
-                                       HRTIMER_MODE_REL);
-               }
+               kbase_pm_do_poweroff(kbdev, false);
        }
 
        mutex_unlock(&kbdev->pm.lock);
@@ -325,7 +318,7 @@ void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev)
 
        kbdev->pm.backend.poweroff_timer_needed = false;
        hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer);
-       spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+       spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
        kbdev->pm.backend.poweroff_timer_running = false;
 
        /* If wq is already running but is held off by pm.lock, make sure it has
@@ -336,7 +329,7 @@ void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev)
        kbdev->pm.backend.tiler_poweroff_pending = 0;
        kbdev->pm.backend.shader_poweroff_pending_time = 0;
 
-       spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+       spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 }
 
 void kbase_pm_update_active(struct kbase_device *kbdev)
@@ -351,7 +344,7 @@ void kbase_pm_update_active(struct kbase_device *kbdev)
        /* pm_current_policy will never be NULL while pm.lock is held */
        KBASE_DEBUG_ASSERT(backend->pm_current_policy);
 
-       spin_lock_irqsave(&pm->power_change_lock, flags);
+       spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
        active = backend->pm_current_policy->get_core_active(kbdev);
 
@@ -363,7 +356,7 @@ void kbase_pm_update_active(struct kbase_device *kbdev)
                        /* If a request was pending then the GPU was still
                         * powered, so no need to continue */
                        if (!kbdev->poweroff_pending) {
-                               spin_unlock_irqrestore(&pm->power_change_lock,
+                               spin_unlock_irqrestore(&kbdev->hwaccess_lock,
                                                flags);
                                return;
                        }
@@ -379,10 +372,14 @@ void kbase_pm_update_active(struct kbase_device *kbdev)
                                        HRTIMER_MODE_REL);
                }
 
-               spin_unlock_irqrestore(&pm->power_change_lock, flags);
-
                /* Power on the GPU and any cores requested by the policy */
-               kbase_pm_do_poweron(kbdev, false);
+               if (pm->backend.poweroff_wait_in_progress) {
+                       pm->backend.poweron_required = true;
+                       spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+               } else {
+                       spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+                       kbase_pm_do_poweron(kbdev, false);
+               }
        } else {
                /* It is an error for the power policy to power off the GPU
                 * when there are contexts active */
@@ -414,35 +411,17 @@ void kbase_pm_update_active(struct kbase_device *kbdev)
                                                pm->gpu_poweroff_time,
                                                HRTIMER_MODE_REL);
                                }
-                               spin_unlock_irqrestore(&pm->power_change_lock,
+                               spin_unlock_irqrestore(&kbdev->hwaccess_lock,
                                                flags);
                        } else {
-                               spin_unlock_irqrestore(&pm->power_change_lock,
+                               spin_unlock_irqrestore(&kbdev->hwaccess_lock,
                                                flags);
 
                                /* Power off the GPU immediately */
-                               if (!kbase_pm_do_poweroff(kbdev, false)) {
-                                       /* GPU can not be powered off at present
-                                        */
-                                       spin_lock_irqsave(
-                                                       &pm->power_change_lock,
-                                                       flags);
-                                       backend->poweroff_timer_needed = true;
-                                       if (!backend->poweroff_timer_running) {
-                                               backend->poweroff_timer_running
-                                                               = true;
-                                               hrtimer_start(
-                                               &backend->gpu_poweroff_timer,
-                                                       pm->gpu_poweroff_time,
-                                                       HRTIMER_MODE_REL);
-                                       }
-                                       spin_unlock_irqrestore(
-                                                       &pm->power_change_lock,
-                                                       flags);
-                               }
+                               kbase_pm_do_poweroff(kbdev, false);
                        }
                } else {
-                       spin_unlock_irqrestore(&pm->power_change_lock, flags);
+                       spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
                }
        }
 }
@@ -454,25 +433,37 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev)
        bool cores_are_available;
        bool do_poweroff = false;
 
-       lockdep_assert_held(&kbdev->pm.power_change_lock);
+       lockdep_assert_held(&kbdev->hwaccess_lock);
 
        if (kbdev->pm.backend.pm_current_policy == NULL)
                return;
+       if (kbdev->pm.backend.poweroff_wait_in_progress)
+               return;
 
-       desired_bitmap =
-               kbdev->pm.backend.pm_current_policy->get_core_mask(kbdev);
-       desired_bitmap &= kbase_pm_ca_get_core_mask(kbdev);
-
-       if (kbdev->tiler_needed_cnt > 0 || kbdev->tiler_inuse_cnt > 0)
-               desired_tiler_bitmap = 1;
-       else
+       if (kbdev->protected_mode_transition && !kbdev->shader_needed_bitmap &&
+                       !kbdev->shader_inuse_bitmap && !kbdev->tiler_needed_cnt
+                       && !kbdev->tiler_inuse_cnt) {
+               /* We are trying to change in/out of protected mode - force all
+                * cores off so that the L2 powers down */
+               desired_bitmap = 0;
                desired_tiler_bitmap = 0;
+       } else {
+               desired_bitmap =
+               kbdev->pm.backend.pm_current_policy->get_core_mask(kbdev);
+               desired_bitmap &= kbase_pm_ca_get_core_mask(kbdev);
 
-       if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) {
-               /* Unless XAFFINITY is supported, enable core 0 if tiler
-                * required, regardless of core availability */
                if (kbdev->tiler_needed_cnt > 0 || kbdev->tiler_inuse_cnt > 0)
-                       desired_bitmap |= 1;
+                       desired_tiler_bitmap = 1;
+               else
+                       desired_tiler_bitmap = 0;
+
+               if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) {
+                       /* Unless XAFFINITY is supported, enable core 0 if tiler
+                        * required, regardless of core availability */
+                       if (kbdev->tiler_needed_cnt > 0 ||
+                                       kbdev->tiler_inuse_cnt > 0)
+                               desired_bitmap |= 1;
+               }
        }
 
        if (kbdev->pm.backend.desired_shader_state != desired_bitmap)
@@ -495,7 +486,8 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev)
                                (kbdev->pm.backend.desired_tiler_state &
                                                        ~desired_tiler_bitmap);
 
-                       if (kbdev->pm.poweroff_shader_ticks)
+                       if (kbdev->pm.poweroff_shader_ticks &&
+                                       !kbdev->protected_mode_transition)
                                kbdev->pm.backend.shader_poweroff_pending_time =
                                                kbdev->pm.poweroff_shader_ticks;
                        else
@@ -517,7 +509,8 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev)
                kbdev->pm.backend.tiler_poweroff_pending |=
                                (kbdev->pm.backend.desired_tiler_state &
                                                        ~desired_tiler_bitmap);
-               if (kbdev->pm.poweroff_shader_ticks)
+               if (kbdev->pm.poweroff_shader_ticks &&
+                               !kbdev->protected_mode_transition)
                        kbdev->pm.backend.shader_poweroff_pending_time =
                                        kbdev->pm.poweroff_shader_ticks;
                else
@@ -563,11 +556,11 @@ void kbase_pm_update_cores_state(struct kbase_device *kbdev)
 {
        unsigned long flags;
 
-       spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+       spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
        kbase_pm_update_cores_state_nolock(kbdev);
 
-       spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+       spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 }
 
 int kbase_pm_list_policies(const struct kbase_pm_policy * const **list)
@@ -612,10 +605,10 @@ void kbase_pm_set_policy(struct kbase_device *kbdev,
        mutex_lock(&kbdev->pm.lock);
 
        /* Remove the policy to prevent IRQ handlers from working on it */
-       spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+       spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
        old_policy = kbdev->pm.backend.pm_current_policy;
        kbdev->pm.backend.pm_current_policy = NULL;
-       spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+       spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
        KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_TERM, NULL, NULL, 0u,
                                                                old_policy->id);
@@ -627,9 +620,9 @@ void kbase_pm_set_policy(struct kbase_device *kbdev,
        if (new_policy->init)
                new_policy->init(kbdev);
 
-       spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+       spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
        kbdev->pm.backend.pm_current_policy = new_policy;
-       spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+       spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
        /* If any core power state changes were previously attempted, but
         * couldn't be made because the policy was changing (current_policy was
@@ -664,14 +657,13 @@ kbase_pm_trace_check_and_finish_state_change(struct kbase_device *kbdev)
 void kbase_pm_request_cores(struct kbase_device *kbdev,
                                bool tiler_required, u64 shader_cores)
 {
-       unsigned long flags;
        u64 cores;
 
        kbase_pm_change_state change_gpu_state = 0u;
 
        KBASE_DEBUG_ASSERT(kbdev != NULL);
 
-       spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+       lockdep_assert_held(&kbdev->hwaccess_lock);
 
        cores = shader_cores;
        while (cores) {
@@ -714,8 +706,6 @@ void kbase_pm_request_cores(struct kbase_device *kbdev,
                                        KBASE_PM_FUNC_ID_REQUEST_CORES_END,
                                                        change_gpu_state);
        }
-
-       spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
 }
 
 KBASE_EXPORT_TEST_API(kbase_pm_request_cores);
@@ -723,13 +713,11 @@ KBASE_EXPORT_TEST_API(kbase_pm_request_cores);
 void kbase_pm_unrequest_cores(struct kbase_device *kbdev,
                                bool tiler_required, u64 shader_cores)
 {
-       unsigned long flags;
-
        kbase_pm_change_state change_gpu_state = 0u;
 
        KBASE_DEBUG_ASSERT(kbdev != NULL);
 
-       spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+       lockdep_assert_held(&kbdev->hwaccess_lock);
 
        while (shader_cores) {
                int bitnum = fls64(shader_cores) - 1;
@@ -770,8 +758,6 @@ void kbase_pm_unrequest_cores(struct kbase_device *kbdev,
                 * - no-one will wait on the state change */
                kbase_pm_trace_check_and_finish_state_change(kbdev);
        }
-
-       spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
 }
 
 KBASE_EXPORT_TEST_API(kbase_pm_unrequest_cores);
@@ -780,11 +766,10 @@ enum kbase_pm_cores_ready
 kbase_pm_register_inuse_cores(struct kbase_device *kbdev,
                                bool tiler_required, u64 shader_cores)
 {
-       unsigned long flags;
        u64 prev_shader_needed; /* Just for tracing */
        u64 prev_shader_inuse;  /* Just for tracing */
 
-       spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+       lockdep_assert_held(&kbdev->hwaccess_lock);
 
        prev_shader_needed = kbdev->shader_needed_bitmap;
        prev_shader_inuse = kbdev->shader_inuse_bitmap;
@@ -795,16 +780,15 @@ kbase_pm_register_inuse_cores(struct kbase_device *kbdev,
         * be chosen */
        if ((kbdev->pm.backend.desired_shader_state & shader_cores) !=
                                                        shader_cores) {
-               spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
-
-               return KBASE_NEW_AFFINITY;
+               return (kbdev->pm.backend.poweroff_wait_in_progress ||
+                               kbdev->pm.backend.pm_current_policy == NULL) ?
+                               KBASE_CORES_NOT_READY : KBASE_NEW_AFFINITY;
        }
 
        if ((kbdev->shader_available_bitmap & shader_cores) != shader_cores ||
            (tiler_required && !kbdev->tiler_available_bitmap)) {
                /* Trace ongoing core transition */
                kbase_timeline_pm_l2_transition_start(kbdev);
-               spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
                return KBASE_CORES_NOT_READY;
        }
 
@@ -853,8 +837,6 @@ kbase_pm_register_inuse_cores(struct kbase_device *kbdev,
                KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_INUSE, NULL,
                                NULL, 0u, (u32) kbdev->shader_inuse_bitmap);
 
-       spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
-
        return KBASE_CORES_READY;
 }
 
@@ -863,12 +845,11 @@ KBASE_EXPORT_TEST_API(kbase_pm_register_inuse_cores);
 void kbase_pm_release_cores(struct kbase_device *kbdev,
                                bool tiler_required, u64 shader_cores)
 {
-       unsigned long flags;
        kbase_pm_change_state change_gpu_state = 0u;
 
        KBASE_DEBUG_ASSERT(kbdev != NULL);
 
-       spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+       lockdep_assert_held(&kbdev->hwaccess_lock);
 
        while (shader_cores) {
                int bitnum = fls64(shader_cores) - 1;
@@ -913,8 +894,6 @@ void kbase_pm_release_cores(struct kbase_device *kbdev,
                /* Trace that any state change completed immediately */
                kbase_pm_trace_check_and_finish_state_change(kbdev);
        }
-
-       spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
 }
 
 KBASE_EXPORT_TEST_API(kbase_pm_release_cores);
@@ -923,7 +902,13 @@ void kbase_pm_request_cores_sync(struct kbase_device *kbdev,
                                        bool tiler_required,
                                        u64 shader_cores)
 {
+       unsigned long flags;
+
+       kbase_pm_wait_for_poweroff_complete(kbdev);
+
+       spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
        kbase_pm_request_cores(kbdev, tiler_required, shader_cores);
+       spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
        kbase_pm_check_transitions_sync(kbdev);
 }
@@ -935,7 +920,7 @@ void kbase_pm_request_l2_caches(struct kbase_device *kbdev)
        unsigned long flags;
        u32 prior_l2_users_count;
 
-       spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+       spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
        prior_l2_users_count = kbdev->l2_users_count++;
 
@@ -947,7 +932,7 @@ void kbase_pm_request_l2_caches(struct kbase_device *kbdev)
        if (!prior_l2_users_count || !kbdev->l2_available_bitmap)
                kbase_pm_check_transitions_nolock(kbdev);
 
-       spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+       spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
        wait_event(kbdev->pm.backend.l2_powered_wait,
                                        kbdev->pm.backend.l2_powered == 1);
 
@@ -959,22 +944,16 @@ KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches);
 
 void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev)
 {
-       unsigned long flags;
-
-       spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+       lockdep_assert_held(&kbdev->hwaccess_lock);
 
        kbdev->l2_users_count++;
-
-       spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
 }
 
 KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches_l2_is_on);
 
 void kbase_pm_release_l2_caches(struct kbase_device *kbdev)
 {
-       unsigned long flags;
-
-       spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+       lockdep_assert_held(&kbdev->hwaccess_lock);
 
        KBASE_DEBUG_ASSERT(kbdev->l2_users_count > 0);
 
@@ -985,8 +964,6 @@ void kbase_pm_release_l2_caches(struct kbase_device *kbdev)
                /* Trace that any state change completed immediately */
                kbase_pm_trace_check_and_finish_state_change(kbdev);
        }
-
-       spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
 }
 
 KBASE_EXPORT_TEST_API(kbase_pm_release_l2_caches);
index f7c0ff6..8b07cbc 100644 (file)
@@ -50,6 +50,7 @@ enum base_hw_feature {
        BASE_HW_FEATURE_FLUSH_REDUCTION,
        BASE_HW_FEATURE_PROTECTED_MODE,
        BASE_HW_FEATURE_COHERENCY_REG,
+       BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
        BASE_HW_FEATURE_END
 };
 
@@ -192,6 +193,31 @@ static const enum base_hw_feature base_hw_features_tMIx[] = {
        BASE_HW_FEATURE_END
 };
 
+static const enum base_hw_feature base_hw_features_tHEx[] = {
+       BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+       BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+       BASE_HW_FEATURE_XAFFINITY,
+       BASE_HW_FEATURE_WARPING,
+       BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+       BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+       BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+       BASE_HW_FEATURE_BRNDOUT_CC,
+       BASE_HW_FEATURE_BRNDOUT_KILL,
+       BASE_HW_FEATURE_LD_ST_LEA_TEX,
+       BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+       BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+       BASE_HW_FEATURE_MRT,
+       BASE_HW_FEATURE_MSAA_16X,
+       BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+       BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+       BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+       BASE_HW_FEATURE_TEST4_DATUM_MODE,
+       BASE_HW_FEATURE_FLUSH_REDUCTION,
+       BASE_HW_FEATURE_PROTECTED_MODE,
+       BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+       BASE_HW_FEATURE_COHERENCY_REG,
+       BASE_HW_FEATURE_END
+};
 
 
 #endif /* _BASE_HWCONFIG_FEATURES_H_ */
index 149f44c..4d95b4f 100644 (file)
@@ -113,6 +113,8 @@ enum base_hw_issue {
        BASE_HW_ISSUE_TMIX_8138,
        BASE_HW_ISSUE_TMIX_8206,
        BASE_HW_ISSUE_TMIX_8343,
+       BASE_HW_ISSUE_TMIX_8463,
+       BASE_HW_ISSUE_TMIX_8456,
        GPUCORE_1619,
        BASE_HW_ISSUE_END
 };
@@ -944,6 +946,8 @@ static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = {
        BASE_HW_ISSUE_TMIX_8133,
        BASE_HW_ISSUE_TMIX_8138,
        BASE_HW_ISSUE_TMIX_8343,
+       BASE_HW_ISSUE_TMIX_8463,
+       BASE_HW_ISSUE_TMIX_8456,
        BASE_HW_ISSUE_END
 };
 
@@ -961,6 +965,8 @@ static const enum base_hw_issue base_hw_issues_tMIx_r0p0[] = {
        BASE_HW_ISSUE_TMIX_8138,
        BASE_HW_ISSUE_TMIX_8206,
        BASE_HW_ISSUE_TMIX_8343,
+       BASE_HW_ISSUE_TMIX_8463,
+       BASE_HW_ISSUE_TMIX_8456,
        BASE_HW_ISSUE_END
 };
 
@@ -976,11 +982,32 @@ static const enum base_hw_issue base_hw_issues_model_tMIx[] = {
        BASE_HW_ISSUE_TMIX_8138,
        BASE_HW_ISSUE_TMIX_8206,
        BASE_HW_ISSUE_TMIX_8343,
+       BASE_HW_ISSUE_TMIX_8456,
        GPUCORE_1619,
        BASE_HW_ISSUE_END
 };
 
+static const enum base_hw_issue base_hw_issues_tHEx_r0p0[] = {
+       BASE_HW_ISSUE_9435,
+       BASE_HW_ISSUE_10682,
+       BASE_HW_ISSUE_10821,
+       BASE_HW_ISSUE_T76X_3700,
+       BASE_HW_ISSUE_TMIX_7891,
+       BASE_HW_ISSUE_TMIX_8042,
+       BASE_HW_ISSUE_TMIX_8133,
+       BASE_HW_ISSUE_END
+};
 
+static const enum base_hw_issue base_hw_issues_model_tHEx[] = {
+       BASE_HW_ISSUE_5736,
+       BASE_HW_ISSUE_9435,
+       BASE_HW_ISSUE_T76X_3700,
+       BASE_HW_ISSUE_TMIX_7891,
+       BASE_HW_ISSUE_TMIX_8042,
+       BASE_HW_ISSUE_TMIX_8133,
+       GPUCORE_1619,
+       BASE_HW_ISSUE_END
+};
 
 
 
index 749dd9a..bcb05e4 100644 (file)
@@ -45,6 +45,9 @@
 /* Support UK10_2 IOCTLS */
 #define BASE_LEGACY_UK10_2_SUPPORT 1
 
+/* Support UK10_4 IOCTLS */
+#define BASE_LEGACY_UK10_4_SUPPORT 1
+
 typedef struct base_mem_handle {
        struct {
                u64 handle;
@@ -1807,4 +1810,10 @@ typedef struct base_profiling_controls {
        u32 profiling_controls[FBDUMP_CONTROL_MAX];
 } base_profiling_controls;
 
+/* Enable additional tracepoints for latency measurements (TL_ATOM_READY,
+ * TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST) */
+#define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1 << 0)
+
+#define BASE_TLSTREAM_FLAGS_MASK (BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS)
+
 #endif                         /* _BASE_KERNEL_H_ */
index b6d28fe..443d4b1 100644 (file)
 #include <mali_kbase_uku.h>
 #include <mali_kbase_linux.h>
 
+/*
+ * Include mali_kbase_defs.h first as this provides types needed by other local
+ * header files.
+ */
+#include "mali_kbase_defs.h"
+
+#include "mali_kbase_context.h"
 #include "mali_kbase_strings.h"
-#include "mali_kbase_pm.h"
 #include "mali_kbase_mem_lowlevel.h"
-#include "mali_kbase_defs.h"
 #include "mali_kbase_trace_timeline.h"
 #include "mali_kbase_js.h"
 #include "mali_kbase_mem.h"
@@ -105,7 +110,6 @@ u32 kbase_get_profiling_control(struct kbase_device *kbdev, u32 control);
 struct kbase_context *
 kbase_create_context(struct kbase_device *kbdev, bool is_compat);
 void kbase_destroy_context(struct kbase_context *kctx);
-int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags);
 
 int kbase_jd_init(struct kbase_context *kctx);
 void kbase_jd_exit(struct kbase_context *kctx);
@@ -163,7 +167,7 @@ void kbase_gpu_cacheclean(struct kbase_device *kbdev,
  * than @katom will be soft stopped and put back in the queue, so that atoms
  * with higher priority can run.
  *
- * The js_data.runpool_irq.lock must be held when calling this function.
+ * The hwaccess_lock must be held when calling this function.
  */
 void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
                                struct kbase_jd_atom *katom);
@@ -546,4 +550,58 @@ void kbasep_trace_dump(struct kbase_device *kbdev);
 void kbase_set_driver_inactive(struct kbase_device *kbdev, bool inactive);
 #endif /* CONFIG_MALI_DEBUG */
 
+
+#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI)
+
+/* kbase_io_history_init - initialize data struct for register access history
+ *
+ * @kbdev The register history to initialize
+ * @n The number of register accesses that the buffer could hold
+ *
+ * @return 0 if successfully initialized, failure otherwise
+ */
+int kbase_io_history_init(struct kbase_io_history *h, u16 n);
+
+/* kbase_io_history_term - uninit all resources for the register access history
+ *
+ * @h The register history to terminate
+ */
+void kbase_io_history_term(struct kbase_io_history *h);
+
+/* kbase_io_history_dump - print the register history to the kernel ring buffer
+ *
+ * @kbdev Pointer to kbase_device containing the register history to dump
+ */
+void kbase_io_history_dump(struct kbase_device *kbdev);
+
+/**
+ * kbase_io_history_resize - resize the register access history buffer.
+ *
+ * @h: Pointer to a valid register history to resize
+ * @new_size: Number of accesses the buffer could hold
+ *
+ * A successful resize will clear all recent register accesses.
+ * If resizing fails for any reason (e.g., could not allocate memory, invalid
+ * buffer size) then the original buffer will be kept intact.
+ *
+ * @return 0 if the buffer was resized, failure otherwise
+ */
+int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size);
+
+#else /* CONFIG_DEBUG_FS */
+
+#define kbase_io_history_init(...) ((int)0)
+
+#define kbase_io_history_term CSTD_NOP
+
+#define kbase_io_history_dump CSTD_NOP
+
+#define kbase_io_history_resize CSTD_NOP
+
+#endif /* CONFIG_DEBUG_FS */
+
+
 #endif
+
+
+
index 9b00cce..e674cc2 100644 (file)
@@ -201,13 +201,13 @@ enum {
 /*
  * Default minimum number of scheduling ticks before jobs are hard-stopped
  */
-#define DEFAULT_JS_HARD_STOP_TICKS_SS    (50) /* 5s */
+#define DEFAULT_JS_HARD_STOP_TICKS_SS    (100) /* 10s */
 #define DEFAULT_JS_HARD_STOP_TICKS_SS_8408  (300) /* 30s */
 
 /*
  * Default minimum number of scheduling ticks before CL jobs are hard-stopped.
  */
-#define DEFAULT_JS_HARD_STOP_TICKS_CL    (50) /* 5s */
+#define DEFAULT_JS_HARD_STOP_TICKS_CL    (100) /* 10s */
 
 /*
  * Default minimum number of scheduling ticks before jobs are hard-stopped
@@ -225,14 +225,14 @@ enum {
  * Default minimum number of scheduling ticks before the GPU is reset to clear a
  * "stuck" job
  */
-#define DEFAULT_JS_RESET_TICKS_SS           (55) /* 5.5s */
+#define DEFAULT_JS_RESET_TICKS_SS           (105) /* 10.5s */
 #define DEFAULT_JS_RESET_TICKS_SS_8408     (450) /* 45s */
 
 /*
  * Default minimum number of scheduling ticks before the GPU is reset to clear a
  * "stuck" CL job.
  */
-#define DEFAULT_JS_RESET_TICKS_CL        (55) /* 5.5s */
+#define DEFAULT_JS_RESET_TICKS_CL        (105) /* 10.5s */
 
 /*
  * Default minimum number of scheduling ticks before the GPU is reset to clear a
index 344a1f1..55c5ef6 100644 (file)
@@ -53,13 +53,13 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat)
 
        kctx->kbdev = kbdev;
        kctx->as_nr = KBASEP_AS_NR_INVALID;
-       kctx->is_compat = is_compat;
+       if (is_compat)
+               kbase_ctx_flag_set(kctx, KCTX_COMPAT);
 #ifdef CONFIG_MALI_TRACE_TIMELINE
        kctx->timeline.owner_tgid = task_tgid_nr(current);
 #endif
        atomic_set(&kctx->setup_complete, 0);
        atomic_set(&kctx->setup_in_progress, 0);
-       kctx->infinite_cache_active = 0;
        spin_lock_init(&kctx->mm_update_lock);
        kctx->process_mm = NULL;
        atomic_set(&kctx->nonmapped_pages, 0);
@@ -108,11 +108,15 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat)
        if (err)
                goto term_dma_fence;
 
-       kctx->pgd = kbase_mmu_alloc_pgd(kctx);
-       if (!kctx->pgd)
-               goto free_mmu;
+       do {
+               err = kbase_mem_pool_grow(&kctx->mem_pool,
+                               MIDGARD_MMU_BOTTOMLEVEL);
+               if (err)
+                       goto pgd_no_mem;
+               kctx->pgd = kbase_mmu_alloc_pgd(kctx);
+       } while (!kctx->pgd);
 
-       kctx->aliasing_sink_page = kbase_mem_pool_alloc(&kctx->mem_pool);
+       kctx->aliasing_sink_page = kbase_mem_alloc_page(kctx->kbdev);
        if (!kctx->aliasing_sink_page)
                goto no_sink_page;
 
@@ -162,7 +166,7 @@ no_sink_page:
        kbase_gpu_vm_lock(kctx);
        kbase_mmu_free_pgd(kctx);
        kbase_gpu_vm_unlock(kctx);
-free_mmu:
+pgd_no_mem:
        kbase_mmu_term(kctx);
 term_dma_fence:
        kbase_dma_fence_term(kctx);
@@ -300,17 +304,16 @@ int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags)
        }
 
        mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
-       spin_lock_irqsave(&kctx->kbdev->js_data.runpool_irq.lock, irq_flags);
+       spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags);
 
        /* Translate the flags */
        if ((flags & BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0)
-               js_kctx_info->ctx.flags &= ~((u32) KBASE_CTX_FLAG_SUBMIT_DISABLED);
+               kbase_ctx_flag_clear(kctx, KCTX_SUBMIT_DISABLED);
 
        /* Latch the initial attributes into the Job Scheduler */
        kbasep_js_ctx_attr_set_initial_attrs(kctx->kbdev, kctx);
 
-       spin_unlock_irqrestore(&kctx->kbdev->js_data.runpool_irq.lock,
-                       irq_flags);
+       spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags);
        mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
  out:
        return err;
diff --git a/drivers/gpu/arm/midgard/mali_kbase_context.h b/drivers/gpu/arm/midgard/mali_kbase_context.h
new file mode 100644 (file)
index 0000000..a3f5bb0
--- /dev/null
@@ -0,0 +1,90 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_CONTEXT_H_
+#define _KBASE_CONTEXT_H_
+
+#include <linux/atomic.h>
+
+
+int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags);
+
+/**
+ * kbase_ctx_flag - Check if @flag is set on @kctx
+ * @kctx: Pointer to kbase context to check
+ * @flag: Flag to check
+ *
+ * Return: true if @flag is set on @kctx, false if not.
+ */
+static inline bool kbase_ctx_flag(struct kbase_context *kctx,
+                                     enum kbase_context_flags flag)
+{
+       return atomic_read(&kctx->flags) & flag;
+}
+
+/**
+ * kbase_ctx_flag_clear - Clear @flag on @kctx
+ * @kctx: Pointer to kbase context
+ * @flag: Flag to clear
+ *
+ * Clear the @flag on @kctx. This is done atomically, so other flags being
+ * cleared or set at the same time will be safe.
+ *
+ * Some flags have locking requirements, check the documentation for the
+ * respective flags.
+ */
+static inline void kbase_ctx_flag_clear(struct kbase_context *kctx,
+                                       enum kbase_context_flags flag)
+{
+#if KERNEL_VERSION(4, 3, 0) > LINUX_VERSION_CODE
+       /*
+        * Earlier kernel versions doesn't have atomic_andnot() or
+        * atomic_and(). atomic_clear_mask() was only available on some
+        * architectures and removed on arm in v3.13 on arm and arm64.
+        *
+        * Use a compare-exchange loop to clear the flag on pre 4.3 kernels,
+        * when atomic_andnot() becomes available.
+        */
+       int old, new;
+
+       do {
+               old = atomic_read(&kctx->flags);
+               new = old & ~flag;
+
+       } while (atomic_cmpxchg(&kctx->flags, old, new) != old);
+#else
+       atomic_andnot(flag, &kctx->flags);
+#endif
+}
+
+/**
+ * kbase_ctx_flag_set - Set @flag on @kctx
+ * @kctx: Pointer to kbase context
+ * @flag: Flag to clear
+ *
+ * Set the @flag on @kctx. This is done atomically, so other flags being
+ * cleared or set at the same time will be safe.
+ *
+ * Some flags have locking requirements, check the documentation for the
+ * respective flags.
+ */
+static inline void kbase_ctx_flag_set(struct kbase_context *kctx,
+                                     enum kbase_context_flags flag)
+{
+       atomic_or(flag, &kctx->flags);
+}
+#endif /* _KBASE_CONTEXT_H_ */
index ee59504..205d44e 100644 (file)
@@ -35,6 +35,7 @@
 #if !MALI_CUSTOMER_RELEASE
 #include "mali_kbase_regs_dump_debugfs.h"
 #endif /* !MALI_CUSTOMER_RELEASE */
+#include "mali_kbase_regs_history_debugfs.h"
 #include <mali_kbase_hwaccess_backend.h>
 #include <mali_kbase_hwaccess_jm.h>
 #include <backend/gpu/mali_kbase_device_internal.h>
@@ -117,39 +118,6 @@ static inline void __compile_time_asserts(void)
        CSTD_COMPILE_TIME_ASSERT(sizeof(KERNEL_SIDE_DDK_VERSION_STRING) <= KBASE_GET_VERSION_BUFFER_SIZE);
 }
 
-#ifdef CONFIG_KDS
-
-struct kbasep_kds_resource_set_file_data {
-       struct kds_resource_set *lock;
-};
-
-static int kds_resource_release(struct inode *inode, struct file *file);
-
-static const struct file_operations kds_resource_fops = {
-       .release = kds_resource_release
-};
-
-struct kbase_kds_resource_list_data {
-       struct kds_resource **kds_resources;
-       unsigned long *kds_access_bitmap;
-       int num_elems;
-};
-
-static int kds_resource_release(struct inode *inode, struct file *file)
-{
-       struct kbasep_kds_resource_set_file_data *data;
-
-       data = (struct kbasep_kds_resource_set_file_data *)file->private_data;
-       if (NULL != data) {
-               if (NULL != data->lock)
-                       kds_resource_set_release(&data->lock);
-
-               kfree(data);
-       }
-       return 0;
-}
-#endif /* CONFIG_KDS */
-
 static void kbase_create_timeline_objects(struct kbase_context *kctx)
 {
        struct kbase_device             *kbdev = kctx->kbdev;
@@ -291,6 +259,7 @@ enum {
        inited_debugfs = (1u << 15),
        inited_gpu_device = (1u << 16),
        inited_registers_map = (1u << 17),
+       inited_io_history = (1u << 18),
        inited_power_control = (1u << 19),
        inited_buslogger = (1u << 20)
 };
@@ -402,7 +371,7 @@ static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 arg
                                goto bad_size;
 
 #if defined(CONFIG_64BIT)
-                       if (!kctx->is_compat) {
+                       if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
                                /* force SAME_VA if a 64-bit client */
                                mem->flags |= BASE_MEM_SAME_VA;
                        }
@@ -423,7 +392,7 @@ static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 arg
                        if (sizeof(*mem_import) != args_size)
                                goto bad_size;
 #ifdef CONFIG_COMPAT
-                       if (kctx->is_compat)
+                       if (kbase_ctx_flag(kctx, KCTX_COMPAT))
                                phandle = compat_ptr(mem_import->phandle.compat_value);
                        else
 #endif
@@ -464,7 +433,7 @@ static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 arg
                        }
 
 #ifdef CONFIG_COMPAT
-                       if (kctx->is_compat)
+                       if (kbase_ctx_flag(kctx, KCTX_COMPAT))
                                user_ai = compat_ptr(alias->ai.compat_value);
                        else
 #endif
@@ -902,14 +871,14 @@ copy_failed:
                        }
 
 #ifdef CONFIG_COMPAT
-                       if (kctx->is_compat)
+                       if (kbase_ctx_flag(kctx, KCTX_COMPAT))
                                user_buf = compat_ptr(add_data->buf.compat_value);
                        else
 #endif
                                user_buf = add_data->buf.value;
 
                        buf = kmalloc(add_data->len, GFP_KERNEL);
-                       if (!buf)
+                       if (ZERO_OR_NULL_PTR(buf))
                                goto out_bad;
 
                        if (0 != copy_from_user(buf, user_buf, add_data->len)) {
@@ -940,7 +909,28 @@ copy_failed:
                        break;
                }
 #endif /* CONFIG_MALI_NO_MALI */
+#ifdef BASE_LEGACY_UK10_4_SUPPORT
+       case KBASE_FUNC_TLSTREAM_ACQUIRE_V10_4:
+               {
+                       struct kbase_uk_tlstream_acquire_v10_4 *tlstream_acquire
+                                       = args;
 
+                       if (sizeof(*tlstream_acquire) != args_size)
+                               goto bad_size;
+
+                       if (0 != kbase_tlstream_acquire(
+                                               kctx,
+                                               &tlstream_acquire->fd, 0)) {
+                               ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+                       } else if (0 <= tlstream_acquire->fd) {
+                               /* Summary stream was cleared during acquire.
+                                * Create static timeline objects that will be
+                                * read by client. */
+                               kbase_create_timeline_objects(kctx);
+                       }
+                       break;
+               }
+#endif /* BASE_LEGACY_UK10_4_SUPPORT */
        case KBASE_FUNC_TLSTREAM_ACQUIRE:
                {
                        struct kbase_uk_tlstream_acquire *tlstream_acquire =
@@ -949,9 +939,13 @@ copy_failed:
                        if (sizeof(*tlstream_acquire) != args_size)
                                goto bad_size;
 
+                       if (tlstream_acquire->flags & ~BASE_TLSTREAM_FLAGS_MASK)
+                               goto out_bad;
+
                        if (0 != kbase_tlstream_acquire(
                                                kctx,
-                                               &tlstream_acquire->fd)) {
+                                               &tlstream_acquire->fd,
+                                               tlstream_acquire->flags)) {
                                ukh->ret = MALI_ERROR_FUNCTION_FAILED;
                        } else if (0 <= tlstream_acquire->fd) {
                                /* Summary stream was cleared during acquire.
@@ -1134,6 +1128,63 @@ void kbase_release_device(struct kbase_device *kbdev)
 }
 EXPORT_SYMBOL(kbase_release_device);
 
+#if KERNEL_VERSION(4, 4, 0) > LINUX_VERSION_CODE
+/*
+ * Older versions, before v4.6, of the kernel doesn't have
+ * kstrtobool_from_user().
+ */
+static int kstrtobool_from_user(const char __user *s, size_t count, bool *res)
+{
+       char buf[32];
+
+       count = min(sizeof(buf), count);
+
+       if (copy_from_user(buf, s, count))
+               return -EFAULT;
+       buf[count] = '\0';
+
+       return strtobool(buf, res);
+}
+#endif
+
+static ssize_t write_ctx_infinite_cache(struct file *f, const char __user *ubuf, size_t size, loff_t *off)
+{
+       struct kbase_context *kctx = f->private_data;
+       int err;
+       bool value;
+
+       err = kstrtobool_from_user(ubuf, size, &value);
+       if (err)
+               return err;
+
+       if (value)
+               kbase_ctx_flag_set(kctx, KCTX_INFINITE_CACHE);
+       else
+               kbase_ctx_flag_clear(kctx, KCTX_INFINITE_CACHE);
+
+       return size;
+}
+
+static ssize_t read_ctx_infinite_cache(struct file *f, char __user *ubuf, size_t size, loff_t *off)
+{
+       struct kbase_context *kctx = f->private_data;
+       char buf[32];
+       int count;
+       bool value;
+
+       value = kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE);
+
+       count = scnprintf(buf, sizeof(buf), "%s\n", value ? "Y" : "N");
+
+       return simple_read_from_buffer(ubuf, size, off, buf, count);
+}
+
+static const struct file_operations kbase_infinite_cache_fops = {
+       .open = simple_open,
+       .write = write_ctx_infinite_cache,
+       .read = read_ctx_infinite_cache,
+};
+
 static int kbase_open(struct inode *inode, struct file *filp)
 {
        struct kbase_device *kbdev = NULL;
@@ -1158,7 +1209,8 @@ static int kbase_open(struct inode *inode, struct file *filp)
        filp->private_data = kctx;
        kctx->filp = filp;
 
-       kctx->infinite_cache_active = kbdev->infinite_cache_active_default;
+       if (kbdev->infinite_cache_active_default)
+               kbase_ctx_flag_set(kctx, KCTX_INFINITE_CACHE);
 
 #ifdef CONFIG_DEBUG_FS
        snprintf(kctx_name, 64, "%d_%d", kctx->tgid, kctx->id);
@@ -1176,20 +1228,20 @@ static int kbase_open(struct inode *inode, struct file *filp)
          * infinite cache control support from debugfs.
          */
 #else
-       debugfs_create_bool("infinite_cache", 0644, kctx->kctx_dentry,
-                       (bool*)&(kctx->infinite_cache_active));
+       debugfs_create_file("infinite_cache", 0644, kctx->kctx_dentry,
+                           kctx, &kbase_infinite_cache_fops);
 #endif /* CONFIG_MALI_COH_USER */
 
        mutex_init(&kctx->mem_profile_lock);
 
-       kbasep_jd_debugfs_ctx_add(kctx);
+       kbasep_jd_debugfs_ctx_init(kctx);
        kbase_debug_mem_view_init(filp);
 
        kbase_debug_job_fault_context_init(kctx);
 
-       kbase_mem_pool_debugfs_add(kctx->kctx_dentry, &kctx->mem_pool);
+       kbase_mem_pool_debugfs_init(kctx->kctx_dentry, &kctx->mem_pool);
 
-       kbase_jit_debugfs_add(kctx);
+       kbase_jit_debugfs_init(kctx);
 #endif /* CONFIG_DEBUG_FS */
 
        dev_dbg(kbdev->dev, "created base context\n");
@@ -1486,7 +1538,7 @@ static unsigned long kbase_get_unmapped_area(struct file *filp,
        if (len > TASK_SIZE - SZ_2M)
                return -ENOMEM;
 
-       if (kctx->is_compat)
+       if (kbase_ctx_flag(kctx, KCTX_COMPAT))
                return current->mm->get_unmapped_area(filp, addr, len, pgoff,
                                flags);
 
@@ -1836,13 +1888,12 @@ static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr,
                                                new_core_mask[2]) {
                        unsigned long flags;
 
-                       spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+                       spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
                        kbase_pm_set_debug_core_mask(kbdev, new_core_mask[0],
                                        new_core_mask[1], new_core_mask[2]);
 
-                       spin_unlock_irqrestore(&kbdev->pm.power_change_lock,
-                                       flags);
+                       spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
                }
 
                return count;
@@ -1992,7 +2043,7 @@ static ssize_t set_js_timeouts(struct device *dev, struct device_attribute *attr
                struct kbasep_js_device_data *js_data = &kbdev->js_data;
                unsigned long flags;
 
-               spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
+               spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
 #define UPDATE_TIMEOUT(ticks_name, ms_name, default) do {\
        js_data->ticks_name = timeout_ms_to_ticks(kbdev, ms_name, \
@@ -2027,7 +2078,7 @@ static ssize_t set_js_timeouts(struct device *dev, struct device_attribute *attr
 
                kbase_js_set_timeouts(kbdev);
 
-               spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags);
+               spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
                return count;
        }
@@ -2176,7 +2227,7 @@ static ssize_t set_js_scheduling_period(struct device *dev,
 
        /* Update scheduling timeouts */
        mutex_lock(&js_data->runpool_mutex);
-       spin_lock_irqsave(&js_data->runpool_irq.lock, flags);
+       spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
        /* If no contexts have been scheduled since js_timeouts was last written
         * to, the new timeouts might not have been latched yet. So check if an
@@ -2206,7 +2257,7 @@ static ssize_t set_js_scheduling_period(struct device *dev,
 
        kbase_js_set_timeouts(kbdev);
 
-       spin_unlock_irqrestore(&js_data->runpool_irq.lock, flags);
+       spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
        mutex_unlock(&js_data->runpool_mutex);
 
        dev_dbg(kbdev->dev, "JS scheduling period: %dms\n",
@@ -2540,6 +2591,8 @@ static ssize_t kbase_show_gpuinfo(struct device *dev,
                { .id = GPU_ID_PI_TFRX, .name = "Mali-T88x" },
                { .id = GPU_ID2_PRODUCT_TMIX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
                  .name = "Mali-G71" },
+               { .id = GPU_ID2_PRODUCT_THEX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+                 .name = "Mali-THEx" },
        };
        const char *product_name = "(Unknown Mali GPU)";
        struct kbase_device *kbdev;
@@ -3029,7 +3082,8 @@ static int power_control_init(struct platform_device *pdev)
 
 #if defined(CONFIG_OF) && defined(CONFIG_PM_OPP)
        /* Register the OPPs if they are available in device tree */
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) \
+       || defined(LSK_OPPV2_BACKPORT)
        err = dev_pm_opp_of_add_table(kbdev->dev);
 #elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
        err = of_init_opp_table(kbdev->dev);
@@ -3121,6 +3175,48 @@ MAKE_QUIRK_ACCESSORS(mmu);
 
 #endif /* KBASE_GPU_RESET_EN */
 
+/**
+ * debugfs_protected_debug_mode_read - "protected_debug_mode" debugfs read
+ * @file: File object to read is for
+ * @buf:  User buffer to populate with data
+ * @len:  Length of user buffer
+ * @ppos: Offset within file object
+ *
+ * Retrieves the current status of protected debug mode
+ * (0 = disabled, 1 = enabled)
+ *
+ * Return: Number of bytes added to user buffer
+ */
+static ssize_t debugfs_protected_debug_mode_read(struct file *file,
+                               char __user *buf, size_t len, loff_t *ppos)
+{
+       struct kbase_device *kbdev = (struct kbase_device *)file->private_data;
+       u32 gpu_status;
+       ssize_t ret_val;
+
+       kbase_pm_context_active(kbdev);
+       gpu_status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS), NULL);
+       kbase_pm_context_idle(kbdev);
+
+       if (gpu_status & GPU_DBGEN)
+               ret_val = simple_read_from_buffer(buf, len, ppos, "1\n", 2);
+       else
+               ret_val = simple_read_from_buffer(buf, len, ppos, "0\n", 2);
+
+       return ret_val;
+}
+
+/*
+ * struct fops_protected_debug_mode - "protected_debug_mode" debugfs fops
+ *
+ * Contains the file operations for the "protected_debug_mode" debugfs file
+ */
+static const struct file_operations fops_protected_debug_mode = {
+       .open = simple_open,
+       .read = debugfs_protected_debug_mode_read,
+       .llseek = default_llseek,
+};
+
 static int kbase_device_debugfs_init(struct kbase_device *kbdev)
 {
        struct dentry *debugfs_ctx_defaults_directory;
@@ -3151,8 +3247,9 @@ static int kbase_device_debugfs_init(struct kbase_device *kbdev)
        }
 
 #if !MALI_CUSTOMER_RELEASE
-       kbasep_regs_dump_debugfs_add(kbdev);
+       kbasep_regs_dump_debugfs_init(kbdev);
 #endif /* !MALI_CUSTOMER_RELEASE */
+       kbasep_regs_history_debugfs_init(kbdev);
 
        kbase_debug_job_fault_debugfs_init(kbdev);
        kbasep_gpu_memory_debugfs_init(kbdev);
@@ -3179,6 +3276,12 @@ static int kbase_device_debugfs_init(struct kbase_device *kbdev)
                        debugfs_ctx_defaults_directory,
                        &kbdev->mem_pool_max_size_default);
 
+       if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) {
+               debugfs_create_file("protected_debug_mode", S_IRUGO,
+                               kbdev->mali_debugfs_directory, kbdev,
+                               &fops_protected_debug_mode);
+       }
+
 #if KBASE_TRACE_ENABLE
        kbasep_trace_debugfs_init(kbdev);
 #endif /* KBASE_TRACE_ENABLE */
@@ -3401,6 +3504,11 @@ static int kbase_platform_device_remove(struct platform_device *pdev)
                kbdev->inited_subsys &= ~inited_backend_early;
        }
 
+       if (kbdev->inited_subsys & inited_io_history) {
+               kbase_io_history_term(&kbdev->io_history);
+               kbdev->inited_subsys &= ~inited_io_history;
+       }
+
        if (kbdev->inited_subsys & inited_power_control) {
                power_control_term(kbdev);
                kbdev->inited_subsys &= ~inited_power_control;
@@ -3434,6 +3542,10 @@ static void kbase_platform_device_shutdown(struct platform_device *pdev)
        kbase_platform_rk_shutdown(kbdev);
 }
 
+/* Number of register accesses for the buffer that we allocate during
+ * initialization time. The buffer size can be changed later via debugfs. */
+#define KBASEP_DEFAULT_REGISTER_HISTORY_SIZE ((u16)512)
+
 static int kbase_platform_device_probe(struct platform_device *pdev)
 {
        struct kbase_device *kbdev;
@@ -3494,6 +3606,15 @@ static int kbase_platform_device_probe(struct platform_device *pdev)
        }
        kbdev->inited_subsys |= inited_power_control;
 
+       err = kbase_io_history_init(&kbdev->io_history,
+                       KBASEP_DEFAULT_REGISTER_HISTORY_SIZE);
+       if (err) {
+               dev_err(&pdev->dev, "Register access history initialization failed\n");
+               kbase_platform_device_remove(pdev);
+               return -ENOMEM;
+       }
+       kbdev->inited_subsys |= inited_io_history;
+
        err = kbase_backend_early_init(kbdev);
        if (err) {
                dev_err(kbdev->dev, "Early backend initialization failed\n");
@@ -3673,6 +3794,9 @@ static int kbase_platform_device_probe(struct platform_device *pdev)
        return err;
 }
 
+#undef KBASEP_DEFAULT_REGISTER_HISTORY_SIZE
+
+
 /** Suspend callback from the OS.
  *
  * This is called by Linux when the device should suspend.
index 4bb8c2c..edd6711 100644 (file)
@@ -34,6 +34,7 @@
 #include <mali_kbase_mmu_hw.h>
 #include <mali_kbase_mmu_mode.h>
 #include <mali_kbase_instr_defs.h>
+#include <mali_kbase_pm.h>
 
 #include <linux/atomic.h>
 #include <linux/mempool.h>
@@ -227,6 +228,39 @@ struct kbase_jd_atom_dependency {
        u8 dep_type;
 };
 
+/**
+ * struct kbase_io_access - holds information about 1 register access
+ *
+ * @addr: first bit indicates r/w (r=0, w=1)
+ * @value: value written or read
+ */
+struct kbase_io_access {
+       uintptr_t addr;
+       u32 value;
+};
+
+/**
+ * struct kbase_io_history - keeps track of all recent register accesses
+ *
+ * @enabled: true if register accesses are recorded, false otherwise
+ * @lock: spinlock protecting kbase_io_access array
+ * @count: number of registers read/written
+ * @size: number of elements in kbase_io_access array
+ * @buf: array of kbase_io_access
+ */
+struct kbase_io_history {
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
+       bool enabled;
+#else
+       u32 enabled;
+#endif
+
+       spinlock_t lock;
+       size_t count;
+       u16 size;
+       struct kbase_io_access *buf;
+};
+
 /**
  * @brief The function retrieves a read-only reference to the atom field from
  * the  kbase_jd_atom_dependency structure
@@ -301,15 +335,17 @@ enum kbase_atom_gpu_rb_state {
        KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB,
        /* Atom is in slot ringbuffer but is blocked on a previous atom */
        KBASE_ATOM_GPU_RB_WAITING_BLOCKED,
-       /* Atom is in slot ringbuffer but is waiting for proected mode exit */
-       KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_EXIT,
+       /* Atom is in slot ringbuffer but is waiting for a previous protected
+        * mode transition to complete */
+       KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV,
+       /* Atom is in slot ringbuffer but is waiting for proected mode
+        * transition */
+       KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION,
        /* Atom is in slot ringbuffer but is waiting for cores to become
         * available */
        KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE,
        /* Atom is in slot ringbuffer but is blocked on affinity */
        KBASE_ATOM_GPU_RB_WAITING_AFFINITY,
-       /* Atom is in slot ringbuffer but is waiting for protected mode entry */
-       KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_ENTRY,
        /* Atom is in slot ringbuffer and ready to run */
        KBASE_ATOM_GPU_RB_READY,
        /* Atom is in slot ringbuffer and has been submitted to the GPU */
@@ -319,20 +355,41 @@ enum kbase_atom_gpu_rb_state {
        KBASE_ATOM_GPU_RB_RETURN_TO_JS
 };
 
+enum kbase_atom_enter_protected_state {
+       /*
+        * Starting state:
+        * Check if a transition into protected mode is required.
+        *
+        * NOTE: The integer value of this must
+        *       match KBASE_ATOM_EXIT_PROTECTED_CHECK.
+        */
+       KBASE_ATOM_ENTER_PROTECTED_CHECK = 0,
+       /* Wait for vinstr to suspend. */
+       KBASE_ATOM_ENTER_PROTECTED_VINSTR,
+       /* Wait for the L2 to become idle in preparation for
+        * the coherency change. */
+       KBASE_ATOM_ENTER_PROTECTED_IDLE_L2,
+       /* End state;
+        * Prepare coherency change. */
+       KBASE_ATOM_ENTER_PROTECTED_FINISHED,
+};
+
 enum kbase_atom_exit_protected_state {
        /*
         * Starting state:
         * Check if a transition out of protected mode is required.
+        *
+        * NOTE: The integer value of this must
+        *       match KBASE_ATOM_ENTER_PROTECTED_CHECK.
         */
-       KBASE_ATOM_EXIT_PROTECTED_CHECK,
-       /* Wait for the L2 to become idle in preparation for the reset. */
+       KBASE_ATOM_EXIT_PROTECTED_CHECK = 0,
+       /* Wait for the L2 to become idle in preparation
+        * for the reset. */
        KBASE_ATOM_EXIT_PROTECTED_IDLE_L2,
        /* Issue the protected reset. */
        KBASE_ATOM_EXIT_PROTECTED_RESET,
-       /*
-        * End state;
-        * Wait for the reset to complete.
-        */
+       /* End state;
+        * Wait for the reset to complete. */
        KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT,
 };
 
@@ -497,7 +554,13 @@ struct kbase_jd_atom {
         * event_code when the atom is processed. */
        enum base_jd_event_code will_fail_event_code;
 
-       enum kbase_atom_exit_protected_state exit_protected_state;
+       /* Atoms will only ever be transitioning into, or out of
+        * protected mode so we do not need two separate fields.
+        */
+       union {
+               enum kbase_atom_enter_protected_state enter;
+               enum kbase_atom_exit_protected_state exit;
+       } protected_state;
 
        struct rb_node runnable_tree_node;
 
@@ -600,19 +663,19 @@ struct kbase_as {
        struct work_struct work_pagefault;
        struct work_struct work_busfault;
        enum kbase_mmu_fault_type fault_type;
+       bool protected_mode;
        u32 fault_status;
        u64 fault_addr;
        u64 fault_extra_addr;
-       struct mutex transaction_mutex;
 
        struct kbase_mmu_setup current_setup;
 
        /* BASE_HW_ISSUE_8316  */
        struct workqueue_struct *poke_wq;
        struct work_struct poke_work;
-       /** Protected by kbasep_js_device_data::runpool_irq::lock */
+       /** Protected by hwaccess_lock */
        int poke_refcount;
-       /** Protected by kbasep_js_device_data::runpool_irq::lock */
+       /** Protected by hwaccess_lock */
        kbase_as_poke_state poke_state;
        struct hrtimer poke_timer;
 };
@@ -733,8 +796,7 @@ struct kbase_trace_kbdev_timeline {
         * But it's kept as an example of how to add global timeline tracking
         * information
         *
-        * The caller must hold kbasep_js_device_data::runpool_irq::lock when
-        * accessing this */
+        * The caller must hold hwaccess_lock when accessing this */
        u8 slot_atoms_submitted[BASE_JM_MAX_NR_SLOTS];
 
        /* Last UID for each PM event */
@@ -743,7 +805,7 @@ struct kbase_trace_kbdev_timeline {
        atomic_t pm_event_uid_counter;
        /*
         * L2 transition state - true indicates that the transition is ongoing
-        * Expected to be protected by pm.power_change_lock */
+        * Expected to be protected by hwaccess_lock */
        bool l2_transitioning;
 };
 #endif /* CONFIG_MALI_TRACE_TIMELINE */
@@ -784,19 +846,6 @@ struct kbase_pm_device_data {
        u64 debug_core_mask[BASE_JM_MAX_NR_SLOTS];
        u64 debug_core_mask_all;
 
-       /**
-        * Lock protecting the power state of the device.
-        *
-        * This lock must be held when accessing the shader_available_bitmap,
-        * tiler_available_bitmap, l2_available_bitmap, shader_inuse_bitmap and
-        * tiler_inuse_bitmap fields of kbase_device, and the ca_in_transition
-        * and shader_poweroff_pending fields of kbase_pm_device_data. It is
-        * also held when the hardware power registers are being written to, to
-        * ensure that two threads do not conflict over the power transitions
-        * that the hardware should make.
-        */
-       spinlock_t power_change_lock;
-
        /**
         * Callback for initializing the runtime power management.
         *
@@ -1108,6 +1157,11 @@ struct kbase_device {
        /* Total number of created contexts */
        atomic_t ctx_num;
 
+#ifdef CONFIG_DEBUG_FS
+       /* Holds the most recent register accesses */
+       struct kbase_io_history io_history;
+#endif /* CONFIG_DEBUG_FS */
+
        struct kbase_hwaccess_data hwaccess;
 
        /* Count of page/bus faults waiting for workqueues to process */
@@ -1125,6 +1179,8 @@ struct kbase_device {
 #endif
        size_t mem_pool_max_size_default;
 
+       /* current gpu coherency mode */
+       u32 current_gpu_coherency_mode;
        /* system coherency mode  */
        u32 system_coherency;
        /* Flag to track when cci snoops have been enabled on the interface */
@@ -1171,6 +1227,11 @@ struct kbase_device {
 
        /* list of inited sub systems. Used during terminate/error recovery */
        u32 inited_subsys;
+
+       spinlock_t hwaccess_lock;
+
+       /* Protects access to MMU operations */
+       struct mutex mmu_hw_mutex;
 };
 
 /**
@@ -1181,7 +1242,7 @@ struct kbase_device {
  *                 dependencies. Atoms on this list will be moved to the
  *                 runnable_tree when the blocking atom completes.
  *
- * runpool_irq.lock must be held when accessing this structure.
+ * hwaccess_lock must be held when accessing this structure.
  */
 struct jsctx_queue {
        struct rb_root runnable_tree;
@@ -1193,6 +1254,52 @@ struct jsctx_queue {
                                         (((minor) & 0xFFF) << 8) | \
                                         ((0 & 0xFF) << 0))
 
+/**
+ * enum kbase_context_flags - Flags for kbase contexts
+ *
+ * @KCTX_COMPAT: Set when the context process is a compat process, 32-bit
+ * process on a 64-bit kernel.
+ *
+ * @KCTX_RUNNABLE_REF: Set when context is counted in
+ * kbdev->js_data.nr_contexts_runnable. Must hold queue_mutex when accessing.
+ *
+ * @KCTX_ACTIVE: Set when the context is active.
+ *
+ * @KCTX_PULLED: Set when last kick() caused atoms to be pulled from this
+ * context.
+ *
+ * @KCTX_MEM_PROFILE_INITIALIZED: Set when the context's memory profile has been
+ * initialized.
+ *
+ * @KCTX_INFINITE_CACHE: Set when infinite cache is to be enabled for new
+ * allocations. Existing allocations will not change.
+ *
+ * @KCTX_SUBMIT_DISABLED: Set to prevent context from submitting any jobs.
+ *
+ * @KCTX_PRIVILEGED:Set if the context uses an address space and should be kept
+ * scheduled in.
+ *
+ * @KCTX_SCHEDULED: Set when the context is scheduled on the Run Pool.
+ * This is only ever updated whilst the jsctx_mutex is held.
+ *
+ * @KCTX_DYING: Set when the context process is in the process of being evicted.
+ *
+ * All members need to be separate bits. This enum is intended for use in a
+ * bitmask where multiple values get OR-ed together.
+ */
+enum kbase_context_flags {
+       KCTX_COMPAT = 1U << 0,
+       KCTX_RUNNABLE_REF = 1U << 1,
+       KCTX_ACTIVE = 1U << 2,
+       KCTX_PULLED = 1U << 3,
+       KCTX_MEM_PROFILE_INITIALIZED = 1U << 4,
+       KCTX_INFINITE_CACHE = 1U << 5,
+       KCTX_SUBMIT_DISABLED = 1U << 6,
+       KCTX_PRIVILEGED = 1U << 7,
+       KCTX_SCHEDULED = 1U << 8,
+       KCTX_DYING = 1U << 9,
+};
+
 struct kbase_context {
        struct file *filp;
        struct kbase_device *kbdev;
@@ -1207,7 +1314,7 @@ struct kbase_context {
        atomic_t event_count;
        int event_coalesce_count;
 
-       bool is_compat;
+       atomic_t flags;
 
        atomic_t                setup_complete;
        atomic_t                setup_in_progress;
@@ -1251,12 +1358,11 @@ struct kbase_context {
        /** This is effectively part of the Run Pool, because it only has a valid
         * setting (!=KBASEP_AS_NR_INVALID) whilst the context is scheduled in
         *
-        * The kbasep_js_device_data::runpool_irq::lock must be held whilst accessing
-        * this.
+        * The hwaccess_lock must be held whilst accessing this.
         *
         * If the context relating to this as_nr is required, you must use
         * kbasep_js_runpool_retain_ctx() to ensure that the context doesn't disappear
-        * whilst you're using it. Alternatively, just hold the kbasep_js_device_data::runpool_irq::lock
+        * whilst you're using it. Alternatively, just hold the hwaccess_lock
         * to ensure the context doesn't disappear (but this has restrictions on what other locks
         * you can take whilst doing this) */
        int as_nr;
@@ -1282,8 +1388,7 @@ struct kbase_context {
        size_t mem_profile_size;
        /* Mutex guarding memory profile state */
        struct mutex mem_profile_lock;
-       /* Memory profile file created */
-       bool mem_profile_initialized;
+       /* Memory profile directory under debugfs */
        struct dentry *kctx_dentry;
 
        /* for job fault debug */
@@ -1303,15 +1408,6 @@ struct kbase_context {
        atomic_t atoms_pulled;
        /* Number of atoms currently pulled from this context, per slot */
        atomic_t atoms_pulled_slot[BASE_JM_MAX_NR_SLOTS];
-       /* true if last kick() caused atoms to be pulled from this context */
-       bool pulled;
-       /* true if infinite cache is to be enabled for new allocations. Existing
-        * allocations will not change. bool stored as a u32 per Linux API */
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
-       bool infinite_cache_active;
-#else
-       u32 infinite_cache_active;
-#endif
        /* Bitmask of slots that can be pulled from */
        u32 slots_pullable;
 
@@ -1325,17 +1421,11 @@ struct kbase_context {
        struct kbase_vinstr_client *vinstr_cli;
        struct mutex vinstr_cli_lock;
 
-       /* Must hold queue_mutex when accessing */
-       bool ctx_active;
-
        /* List of completed jobs waiting for events to be posted */
        struct list_head completed_jobs;
        /* Number of work items currently pending on job_done_wq */
        atomic_t work_count;
 
-       /* true if context is counted in kbdev->js_data.nr_contexts_runnable */
-       bool ctx_runnable_ref;
-
        /* Waiting soft-jobs will fail when this timer expires */
        struct timer_list soft_job_timeout;
 
index 62ab0ca..7484eec 100644 (file)
@@ -82,7 +82,6 @@ static int kbase_device_as_init(struct kbase_device *kbdev, int i)
        if (!kbdev->as[i].pf_wq)
                return -EINVAL;
 
-       mutex_init(&kbdev->as[i].transaction_mutex);
        INIT_WORK(&kbdev->as[i].work_pagefault, page_fault_worker);
        INIT_WORK(&kbdev->as[i].work_busfault, bus_fault_worker);
 
@@ -151,6 +150,7 @@ int kbase_device_init(struct kbase_device * const kbdev)
 #endif /* CONFIG_ARM64 */
 
        spin_lock_init(&kbdev->mmu_mask_change);
+       mutex_init(&kbdev->mmu_hw_mutex);
 #ifdef CONFIG_ARM64
        kbdev->cci_snoop_enabled = false;
        np = kbdev->dev->of_node;
index 4d3836a..97bb6c5 100644 (file)
@@ -38,6 +38,8 @@
 /* Spin lock protecting all Mali fences as fence->lock. */
 static DEFINE_SPINLOCK(kbase_dma_fence_lock);
 
+static void
+kbase_dma_fence_work(struct work_struct *pwork);
 
 static void
 kbase_dma_fence_waiters_add(struct kbase_jd_atom *katom)
@@ -167,9 +169,30 @@ kbase_dma_fence_unlock_reservations(struct kbase_dma_fence_resv_info *info,
        ww_acquire_fini(ctx);
 }
 
+/**
+ * kbase_dma_fence_queue_work() - Queue work to handle @katom
+ * @katom: Pointer to atom for which to queue work
+ *
+ * Queue kbase_dma_fence_work() for @katom to clean up the fence callbacks and
+ * submit the atom.
+ */
+static void
+kbase_dma_fence_queue_work(struct kbase_jd_atom *katom)
+{
+       struct kbase_context *kctx = katom->kctx;
+       bool ret;
+
+       INIT_WORK(&katom->work, kbase_dma_fence_work);
+       ret = queue_work(kctx->dma_fence.wq, &katom->work);
+       /* Warn if work was already queued, that should not happen. */
+       WARN_ON(!ret);
+}
+
 /**
  * kbase_dma_fence_free_callbacks - Free dma-fence callbacks on a katom
  * @katom: Pointer to katom
+ * @queue_worker: Boolean indicating if fence worker is to be queued when
+ *                dep_count reaches 0.
  *
  * This function will free all fence callbacks on the katom's list of
  * callbacks. Callbacks that have not yet been called, because their fence
@@ -178,7 +201,7 @@ kbase_dma_fence_unlock_reservations(struct kbase_dma_fence_resv_info *info,
  * Locking: katom->dma_fence.callbacks list assumes jctx.lock is held.
  */
 static void
-kbase_dma_fence_free_callbacks(struct kbase_jd_atom *katom)
+kbase_dma_fence_free_callbacks(struct kbase_jd_atom *katom, bool queue_worker)
 {
        struct kbase_dma_fence_cb *cb, *tmp;
 
@@ -191,10 +214,21 @@ kbase_dma_fence_free_callbacks(struct kbase_jd_atom *katom)
                /* Cancel callbacks that hasn't been called yet. */
                ret = fence_remove_callback(cb->fence, &cb->fence_cb);
                if (ret) {
+                       int ret;
+
                        /* Fence had not signaled, clean up after
                         * canceling.
                         */
-                       atomic_dec(&katom->dma_fence.dep_count);
+                       ret = atomic_dec_return(&katom->dma_fence.dep_count);
+
+                       if (unlikely(queue_worker && ret == 0)) {
+                               /*
+                                * dep_count went to zero and queue_worker is
+                                * true. Queue the worker to handle the
+                                * completion of the katom.
+                                */
+                               kbase_dma_fence_queue_work(katom);
+                       }
                }
 
                /*
@@ -219,7 +253,7 @@ kbase_dma_fence_cancel_atom(struct kbase_jd_atom *katom)
        lockdep_assert_held(&katom->kctx->jctx.lock);
 
        /* Cancel callbacks and clean up. */
-       kbase_dma_fence_free_callbacks(katom);
+       kbase_dma_fence_free_callbacks(katom, false);
 
        KBASE_DEBUG_ASSERT(atomic_read(&katom->dma_fence.dep_count) == 0);
 
@@ -264,9 +298,15 @@ kbase_dma_fence_work(struct work_struct *pwork)
        /* Remove atom from list of dma-fence waiting atoms. */
        kbase_dma_fence_waiters_remove(katom);
        /* Cleanup callbacks. */
-       kbase_dma_fence_free_callbacks(katom);
-       /* Queue atom on GPU. */
-       kbase_jd_dep_clear_locked(katom);
+       kbase_dma_fence_free_callbacks(katom, false);
+       /*
+        * Queue atom on GPU, unless it has already completed due to a failing
+        * dependency. Run jd_done_nolock() on the katom if it is completed.
+        */
+       if (unlikely(katom->status == KBASE_JD_ATOM_STATE_COMPLETED))
+               jd_done_nolock(katom, NULL);
+       else
+               kbase_jd_dep_clear_locked(katom);
 
 out:
        mutex_unlock(&ctx->lock);
@@ -332,20 +372,13 @@ kbase_dma_fence_cb(struct fence *fence, struct fence_cb *cb)
                                struct kbase_dma_fence_cb,
                                fence_cb);
        struct kbase_jd_atom *katom = kcb->katom;
-       struct kbase_context *kctx = katom->kctx;
 
        /* If the atom is zapped dep_count will be forced to a negative number
         * preventing this callback from ever scheduling work. Which in turn
         * would reschedule the atom.
         */
-       if (atomic_dec_and_test(&katom->dma_fence.dep_count)) {
-               bool ret;
-
-               INIT_WORK(&katom->work, kbase_dma_fence_work);
-               ret = queue_work(kctx->dma_fence.wq, &katom->work);
-               /* Warn if work was already queued, that should not happen. */
-               WARN_ON(!ret);
-       }
+       if (atomic_dec_and_test(&katom->dma_fence.dep_count))
+               kbase_dma_fence_queue_work(katom);
 }
 
 static int
@@ -406,7 +439,7 @@ out:
                 * On error, cancel and clean up all callbacks that was set up
                 * before the error.
                 */
-               kbase_dma_fence_free_callbacks(katom);
+               kbase_dma_fence_free_callbacks(katom, false);
        }
 
        return err;
@@ -499,7 +532,7 @@ end:
                /* Test if the callbacks are already triggered */
                if (atomic_dec_and_test(&katom->dma_fence.dep_count)) {
                        atomic_set(&katom->dma_fence.dep_count, -1);
-                       kbase_dma_fence_free_callbacks(katom);
+                       kbase_dma_fence_free_callbacks(katom, false);
                } else {
                        /* Add katom to the list of dma-buf fence waiting atoms
                         * only if it is still waiting.
@@ -512,7 +545,7 @@ end:
                 * kill it for us), signal the fence, free callbacks and the
                 * fence.
                 */
-               kbase_dma_fence_free_callbacks(katom);
+               kbase_dma_fence_free_callbacks(katom, false);
                atomic_set(&katom->dma_fence.dep_count, -1);
                kbase_dma_fence_signal(katom);
        }
@@ -522,10 +555,12 @@ end:
 
 void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx)
 {
-       struct kbase_jd_atom *katom, *katom_tmp;
+       struct list_head *list = &kctx->dma_fence.waiting_resource;
+
+       while (!list_empty(list)) {
+               struct kbase_jd_atom *katom;
 
-       list_for_each_entry_safe(katom, katom_tmp,
-                                &kctx->dma_fence.waiting_resource, queue) {
+               katom = list_first_entry(list, struct kbase_jd_atom, queue);
                kbase_dma_fence_waiters_remove(katom);
                kbase_dma_fence_cancel_atom(katom);
        }
@@ -534,7 +569,7 @@ void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx)
 void kbase_dma_fence_cancel_callbacks(struct kbase_jd_atom *katom)
 {
        /* Cancel callbacks and clean up. */
-       kbase_dma_fence_free_callbacks(katom);
+       kbase_dma_fence_free_callbacks(katom, true);
 }
 
 void kbase_dma_fence_signal(struct kbase_jd_atom *katom)
@@ -549,7 +584,7 @@ void kbase_dma_fence_signal(struct kbase_jd_atom *katom)
        fence_put(katom->dma_fence.fence);
        katom->dma_fence.fence = NULL;
 
-       kbase_dma_fence_free_callbacks(katom);
+       kbase_dma_fence_free_callbacks(katom, false);
 }
 
 void kbase_dma_fence_term(struct kbase_context *kctx)
index bf8c304..f07406c 100644 (file)
@@ -180,7 +180,7 @@ void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom)
                kbase_event_process_noreport(ctx, atom);
                return;
        }
-
+       kbase_tlstream_tl_attrib_atom_state(atom, TL_ATOM_STATE_POSTED);
        if (atom->core_req & BASE_JD_REQ_EVENT_COALESCE) {
                /* Don't report the event until other event(s) have completed */
                mutex_lock(&ctx->event_mutex);
index 4af3e48..3292fa9 100644 (file)
@@ -41,7 +41,6 @@ const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters)
 {
        const char * const *hardware_counters;
        struct kbase_device *kbdev;
-       uint32_t gpu_id;
        uint32_t product_id;
        uint32_t count;
 
@@ -53,25 +52,27 @@ const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters)
        if (!kbdev)
                return NULL;
 
-       gpu_id = kbdev->gpu_props.props.core_props.product_id;
-       product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID;
-       product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+       product_id = kbdev->gpu_props.props.core_props.product_id;
 
        if (GPU_ID_IS_NEW_FORMAT(product_id)) {
-               switch (gpu_id & GPU_ID2_PRODUCT_MODEL) {
+               switch (GPU_ID2_MODEL_MATCH_VALUE(product_id)) {
                case GPU_ID2_PRODUCT_TMIX:
                        hardware_counters = hardware_counters_mali_tMIx;
                        count = ARRAY_SIZE(hardware_counters_mali_tMIx);
                        break;
+               case GPU_ID2_PRODUCT_THEX:
+                       hardware_counters = hardware_counters_mali_tHEx;
+                       count = ARRAY_SIZE(hardware_counters_mali_tHEx);
+                       break;
                default:
                        hardware_counters = NULL;
                        count = 0;
-                       dev_err(kbdev->dev, "Unrecognized gpu ID: %u\n",
-                               gpu_id);
+                       dev_err(kbdev->dev, "Unrecognized product ID: %u\n",
+                               product_id);
                        break;
                }
        } else {
-               switch (gpu_id) {
+               switch (product_id) {
                        /* If we are using a Mali-T60x device */
                case GPU_ID_PI_T60X:
                        hardware_counters = hardware_counters_mali_t60x;
@@ -115,8 +116,8 @@ const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters)
                default:
                        hardware_counters = NULL;
                        count = 0;
-                       dev_err(kbdev->dev, "Unrecognized gpu ID: %u\n",
-                               gpu_id);
+                       dev_err(kbdev->dev, "Unrecognized product ID: %u\n",
+                               product_id);
                        break;
                }
        }
index c247dd6..7ec05c1 100644 (file)
@@ -2158,6 +2158,7 @@ static const char * const hardware_counters_mali_t88x[] = {
 
 #include "mali_kbase_gator_hwcnt_names_tmix.h"
 
+#include "mali_kbase_gator_hwcnt_names_thex.h"
 
 
 #endif
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_thex.h b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_thex.h
new file mode 100644 (file)
index 0000000..bcceef4
--- /dev/null
@@ -0,0 +1,291 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * This header was autogenerated, it should not be edited.
+ */
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_THEX_H_
+#define _KBASE_GATOR_HWCNT_NAMES_THEX_H_
+
+static const char * const hardware_counters_mali_tHEx[] = {
+       /* Performance counters for the Job Manager */
+       "",
+       "",
+       "",
+       "",
+       "THEx_MESSAGES_SENT",
+       "THEx_MESSAGES_RECEIVED",
+       "THEx_GPU_ACTIVE",
+       "THEx_IRQ_ACTIVE",
+       "THEx_JS0_JOBS",
+       "THEx_JS0_TASKS",
+       "THEx_JS0_ACTIVE",
+       "",
+       "THEx_JS0_WAIT_READ",
+       "THEx_JS0_WAIT_ISSUE",
+       "THEx_JS0_WAIT_DEPEND",
+       "THEx_JS0_WAIT_FINISH",
+       "THEx_JS1_JOBS",
+       "THEx_JS1_TASKS",
+       "THEx_JS1_ACTIVE",
+       "",
+       "THEx_JS1_WAIT_READ",
+       "THEx_JS1_WAIT_ISSUE",
+       "THEx_JS1_WAIT_DEPEND",
+       "THEx_JS1_WAIT_FINISH",
+       "THEx_JS2_JOBS",
+       "THEx_JS2_TASKS",
+       "THEx_JS2_ACTIVE",
+       "",
+       "THEx_JS2_WAIT_READ",
+       "THEx_JS2_WAIT_ISSUE",
+       "THEx_JS2_WAIT_DEPEND",
+       "THEx_JS2_WAIT_FINISH",
+       "",
+       "",
+       "",
+       "",
+       "",
+       "",
+       "",
+       "",
+       "",
+       "",
+       "",
+       "",
+       "",
+       "",
+       "",
+       "",
+       "",
+       "",
+       "",
+       "",
+       "",
+       "",
+       "",
+       "",
+       "",
+       "",
+       "",
+       "",
+       "",
+       "",
+       "",
+       "",
+
+       /* Performance counters for the Tiler */
+       "",
+       "",
+       "",
+       "",
+       "THEx_TILER_ACTIVE",
+       "THEx_JOBS_PROCESSED",
+       "THEx_TRIANGLES",
+       "THEx_LINES",
+       "THEx_POINTS",
+       "THEx_FRONT_FACING",
+       "THEx_BACK_FACING",
+       "THEx_PRIM_VISIBLE",
+       "THEx_PRIM_CULLED",
+       "THEx_PRIM_CLIPPED",
+       "THEx_PRIM_SAT_CULLED",
+       "",
+       "",
+       "THEx_BUS_READ",
+       "",
+       "THEx_BUS_WRITE",
+       "THEx_LOADING_DESC",
+       "THEx_IDVS_POS_SHAD_REQ",
+       "THEx_IDVS_POS_SHAD_WAIT",
+       "THEx_IDVS_POS_SHAD_STALL",
+       "THEx_IDVS_POS_FIFO_FULL",
+       "THEx_PREFETCH_STALL",
+       "THEx_VCACHE_HIT",
+       "THEx_VCACHE_MISS",
+       "THEx_VCACHE_LINE_WAIT",
+       "THEx_VFETCH_POS_READ_WAIT",
+       "THEx_VFETCH_VERTEX_WAIT",
+       "THEx_VFETCH_STALL",
+       "THEx_PRIMASSY_STALL",
+       "THEx_BBOX_GEN_STALL",
+       "THEx_IDVS_VBU_HIT",
+       "THEx_IDVS_VBU_MISS",
+       "THEx_IDVS_VBU_LINE_DEALLOCATE",
+       "THEx_IDVS_VAR_SHAD_REQ",
+       "THEx_IDVS_VAR_SHAD_STALL",
+       "THEx_BINNER_STALL",
+       "THEx_ITER_STALL",
+       "THEx_COMPRESS_MISS",
+       "THEx_COMPRESS_STALL",
+       "THEx_PCACHE_HIT",
+       "THEx_PCACHE_MISS",
+       "THEx_PCACHE_MISS_STALL",
+       "THEx_PCACHE_EVICT_STALL",
+       "THEx_PMGR_PTR_WR_STALL",
+       "THEx_PMGR_PTR_RD_STALL",
+       "THEx_PMGR_CMD_WR_STALL",
+       "THEx_WRBUF_ACTIVE",
+       "THEx_WRBUF_HIT",
+       "THEx_WRBUF_MISS",
+       "THEx_WRBUF_NO_FREE_LINE_STALL",
+       "THEx_WRBUF_NO_AXI_ID_STALL",
+       "THEx_WRBUF_AXI_STALL",
+       "",
+       "",
+       "",
+       "THEx_UTLB_TRANS",
+       "THEx_UTLB_TRANS_HIT",
+       "THEx_UTLB_TRANS_STALL",
+       "THEx_UTLB_TRANS_MISS_DELAY",
+       "THEx_UTLB_MMU_REQ",
+
+       /* Performance counters for the Shader Core */
+       "",
+       "",
+       "",
+       "",
+       "THEx_FRAG_ACTIVE",
+       "THEx_FRAG_PRIMITIVES",
+       "THEx_FRAG_PRIM_RAST",
+       "THEx_FRAG_FPK_ACTIVE",
+       "THEx_FRAG_STARVING",
+       "THEx_FRAG_WARPS",
+       "THEx_FRAG_PARTIAL_WARPS",
+       "THEx_FRAG_QUADS_RAST",
+       "THEx_FRAG_QUADS_EZS_TEST",
+       "THEx_FRAG_QUADS_EZS_UPDATE",
+       "THEx_FRAG_QUADS_EZS_KILL",
+       "THEx_FRAG_LZS_TEST",
+       "THEx_FRAG_LZS_KILL",
+       "",
+       "THEx_FRAG_PTILES",
+       "THEx_FRAG_TRANS_ELIM",
+       "THEx_QUAD_FPK_KILLER",
+       "",
+       "THEx_COMPUTE_ACTIVE",
+       "THEx_COMPUTE_TASKS",
+       "THEx_COMPUTE_WARPS",
+       "THEx_COMPUTE_STARVING",
+       "THEx_EXEC_CORE_ACTIVE",
+       "THEx_EXEC_ACTIVE",
+       "THEx_EXEC_INSTR_COUNT",
+       "THEx_EXEC_INSTR_DIVERGED",
+       "THEx_EXEC_INSTR_STARVING",
+       "THEx_ARITH_INSTR_SINGLE_FMA",
+       "THEx_ARITH_INSTR_DOUBLE",
+       "THEx_ARITH_INSTR_MSG",
+       "THEx_ARITH_INSTR_MSG_ONLY",
+       "THEx_TEX_INSTR",
+       "THEx_TEX_INSTR_MIPMAP",
+       "THEx_TEX_INSTR_COMPRESSED",
+       "THEx_TEX_INSTR_3D",
+       "THEx_TEX_INSTR_TRILINEAR",
+       "THEx_TEX_COORD_ISSUE",
+       "THEx_TEX_COORD_STALL",
+       "THEx_TEX_STARVE_CACHE",
+       "THEx_TEX_STARVE_FILTER",
+       "THEx_LS_MEM_READ_FULL",
+       "THEx_LS_MEM_READ_SHORT",
+       "THEx_LS_MEM_WRITE_FULL",
+       "THEx_LS_MEM_WRITE_SHORT",
+       "THEx_LS_MEM_ATOMIC",
+       "THEx_VARY_INSTR",
+       "THEx_VARY_SLOT_32",
+       "THEx_VARY_SLOT_16",
+       "THEx_ATTR_INSTR",
+       "THEx_ARITH_INSTR_FP_MUL",
+       "THEx_BEATS_RD_FTC",
+       "THEx_BEATS_RD_FTC_EXT",
+       "THEx_BEATS_RD_LSC",
+       "THEx_BEATS_RD_LSC_EXT",
+       "THEx_BEATS_RD_TEX",
+       "THEx_BEATS_RD_TEX_EXT",
+       "THEx_BEATS_RD_OTHER",
+       "THEx_BEATS_WR_LSC",
+       "THEx_BEATS_WR_TIB",
+       "",
+
+       /* Performance counters for the Memory System */
+       "",
+       "",
+       "",
+       "",
+       "THEx_MMU_REQUESTS",
+       "",
+       "",
+       "",
+       "",
+       "",
+       "",
+       "",
+       "",
+       "",
+       "",
+       "",
+       "THEx_L2_RD_MSG_IN",
+       "THEx_L2_RD_MSG_IN_STALL",
+       "THEx_L2_WR_MSG_IN",
+       "THEx_L2_WR_MSG_IN_STALL",
+       "THEx_L2_SNP_MSG_IN",
+       "THEx_L2_SNP_MSG_IN_STALL",
+       "THEx_L2_RD_MSG_OUT",
+       "THEx_L2_RD_MSG_OUT_STALL",
+       "THEx_L2_WR_MSG_OUT",
+       "THEx_L2_ANY_LOOKUP",
+       "THEx_L2_READ_LOOKUP",
+       "THEx_L2_WRITE_LOOKUP",
+       "THEx_L2_EXT_SNOOP_LOOKUP",
+       "THEx_L2_EXT_READ",
+       "THEx_L2_EXT_READ_NOSNP",
+       "THEx_L2_EXT_READ_UNIQUE",
+       "THEx_L2_EXT_READ_BEATS",
+       "THEx_L2_EXT_AR_STALL",
+       "THEx_L2_EXT_AR_CNT_Q1",
+       "THEx_L2_EXT_AR_CNT_Q2",
+       "THEx_L2_EXT_AR_CNT_Q3",
+       "THEx_L2_EXT_RRESP_0_127",
+       "THEx_L2_EXT_RRESP_128_191",
+       "THEx_L2_EXT_RRESP_192_255",
+       "THEx_L2_EXT_RRESP_256_319",
+       "THEx_L2_EXT_RRESP_320_383",
+       "THEx_L2_EXT_WRITE",
+       "THEx_L2_EXT_WRITE_NOSNP_FULL",
+       "THEx_L2_EXT_WRITE_NOSNP_PTL",
+       "THEx_L2_EXT_WRITE_SNP_FULL",
+       "THEx_L2_EXT_WRITE_SNP_PTL",
+       "THEx_L2_EXT_WRITE_BEATS",
+       "THEx_L2_EXT_W_STALL",
+       "THEx_L2_EXT_AW_CNT_Q1",
+       "THEx_L2_EXT_AW_CNT_Q2",
+       "THEx_L2_EXT_AW_CNT_Q3",
+       "THEx_L2_EXT_SNOOP",
+       "THEx_L2_EXT_SNOOP_STALL",
+       "THEx_L2_EXT_SNOOP_RESP_CLEAN",
+       "THEx_L2_EXT_SNOOP_RESP_DATA",
+       "THEx_L2_EXT_SNOOP_INTERNAL",
+       "",
+       "",
+       "",
+       "",
+       "",
+       "",
+       "",
+};
+
+#endif /* _KBASE_GATOR_HWCNT_NAMES_THEX_H_ */
index a962ecb..a3377b2 100644 (file)
@@ -96,6 +96,7 @@
                    GPU_ID2_PRODUCT_MODEL)
 
 #define GPU_ID2_PRODUCT_TMIX              GPU_ID2_MODEL_MAKE(6, 0)
+#define GPU_ID2_PRODUCT_THEX              GPU_ID2_MODEL_MAKE(6, 1)
 
 /* Values for GPU_ID_VERSION_STATUS field for PRODUCT_ID GPU_ID_PI_T60X */
 #define GPU_ID_S_15DEV0                   0x1
index de2461f..1d7e5e9 100644 (file)
@@ -42,6 +42,9 @@ void kbase_hw_set_features_mask(struct kbase_device *kbdev)
                case GPU_ID2_PRODUCT_TMIX:
                        features = base_hw_features_tMIx;
                        break;
+               case GPU_ID2_PRODUCT_THEX:
+                       features = base_hw_features_tHEx;
+                       break;
                default:
                        features = base_hw_features_generic;
                        break;
@@ -106,6 +109,9 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev)
                                if ((gpu_id & GPU_ID2_PRODUCT_MODEL) ==
                                                        GPU_ID2_PRODUCT_TMIX) {
                                        issues = base_hw_issues_tMIx_r0p0;
+                               } else if ((gpu_id & GPU_ID2_PRODUCT_MODEL) ==
+                                                       GPU_ID2_PRODUCT_THEX) {
+                                       issues = base_hw_issues_tHEx_r0p0;
                                } else {
                                        dev_err(kbdev->dev,
                                                "Unknown GPU ID %x", gpu_id);
@@ -215,6 +221,9 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev)
                        case GPU_ID2_PRODUCT_TMIX:
                                issues = base_hw_issues_model_tMIx;
                                break;
+                       case GPU_ID2_PRODUCT_THEX:
+                               issues = base_hw_issues_model_tHEx;
+                               break;
                        default:
                                dev_err(kbdev->dev,
                                        "Unknown GPU ID %x", gpu_id);
index 261453e..0acf297 100644 (file)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -26,8 +26,7 @@
 
 #include <mali_kbase_jm_defs.h>
 
-/* The kbasep_js_device_data::runpool_irq::lock (a spinlock) must be held when
- * accessing this structure */
+/* The hwaccess_lock (a spinlock) must be held when accessing this structure */
 struct kbase_hwaccess_data {
        struct kbase_context *active_kctx;
 
index abe6607..c2c3909 100644 (file)
 void kbase_backend_run_atom(struct kbase_device *kbdev,
                                struct kbase_jd_atom *katom);
 
+/**
+ * kbase_backend_slot_update - Update state based on slot ringbuffers
+ *
+ * @kbdev:  Device pointer
+ *
+ * Inspect the jobs in the slot ringbuffers and update state.
+ *
+ * This will cause jobs to be submitted to hardware if they are unblocked
+ */
+void kbase_backend_slot_update(struct kbase_device *kbdev);
+
 /**
  * kbase_backend_find_free_address_space() - Find a free address space.
  * @kbdev:     Device pointer
@@ -88,7 +99,7 @@ bool kbase_backend_use_ctx(struct kbase_device *kbdev,
  * the context is not scheduled, then kbase_gpu_use_ctx() should be used
  * instead.
  *
- * Caller must hold runpool_irq.lock
+ * Caller must hold hwaccess_lock
  *
  * Return: true if context is now active, false otherwise (ie if context does
  *        not have an address space assigned)
@@ -102,7 +113,7 @@ bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev,
  * @kbdev: Device pointer
  * @kctx:  Context pointer
  *
- * Caller must hold as->transaction_mutex and runpool_irq.lock
+ * Caller must hold kbase_device->mmu_hw_mutex and hwaccess_lock
  */
 void kbase_backend_release_ctx_irq(struct kbase_device *kbdev,
                                struct kbase_context *kctx);
@@ -113,7 +124,7 @@ void kbase_backend_release_ctx_irq(struct kbase_device *kbdev,
  * @kbdev: Device pointer
  * @kctx:  Context pointer
  *
- * Caller must hold as->transaction_mutex
+ * Caller must hold kbase_device->mmu_hw_mutex
  *
  * This function must perform any operations that could not be performed in IRQ
  * context by kbase_backend_release_ctx_irq().
index 3e0a589..81952e2 100644 (file)
@@ -61,7 +61,7 @@ static void __user *
 get_compat_pointer(struct kbase_context *kctx, const union kbase_pointer *p)
 {
 #ifdef CONFIG_COMPAT
-       if (kctx->is_compat)
+       if (kbase_ctx_flag(kctx, KCTX_COMPAT))
                return compat_ptr(p->compat_value);
 #endif
        return p->value;
@@ -581,24 +581,20 @@ static inline void jd_resolve_dep(struct list_head *out_list,
                        dep_count = atomic_read(&dep_atom->dma_fence.dep_count);
                        if (likely(dep_count == -1)) {
                                dep_satisfied = true;
-                       } else if (dep_count == 0) {
+                       } else {
                                /*
-                                * All fences for this atom has signaled, but
-                                * the worker that will queue the atom has not
-                                * yet run.
+                                * There are either still active callbacks, or
+                                * all fences for this @dep_atom has signaled,
+                                * but the worker that will queue the atom has
+                                * not yet run.
                                 *
-                                * Mark the atom as handled by setting
-                                * dep_count to -1 so that the worker doesn't
-                                * queue the atom again.
-                                */
-                               atomic_set(&dep_atom->dma_fence.dep_count, -1);
-                               /*
-                                * Remove the atom from the list of dma-fence
-                                * waiting atoms.
+                                * Wait for the fences to signal and the fence
+                                * worker to run and handle @dep_atom. If
+                                * @dep_atom was completed due to error on
+                                * @katom, then the fence worker will pick up
+                                * the complete status and error code set on
+                                * @dep_atom above.
                                 */
-                               kbase_dma_fence_waiters_remove(dep_atom);
-                               dep_satisfied = true;
-                       } else {
                                dep_satisfied = false;
                        }
 #endif /* CONFIG_MALI_DMA_FENCE */
@@ -665,6 +661,40 @@ static void jd_check_force_failure(struct kbase_jd_atom *katom)
 }
 #endif
 
+/**
+ * is_dep_valid - Validate that a dependency is valid for early dependency
+ *                submission
+ * @katom: Dependency atom to validate
+ *
+ * A dependency is valid if any of the following are true :
+ * - It does not exist (a non-existent dependency does not block submission)
+ * - It is in the job scheduler
+ * - It has completed, does not have a failure event code, and has not been
+ *   marked to fail in the future
+ *
+ * Return: true if valid, false otherwise
+ */
+static bool is_dep_valid(struct kbase_jd_atom *katom)
+{
+       /* If there's no dependency then this is 'valid' from the perspective of
+        * early dependency submission */
+       if (!katom)
+               return true;
+
+       /* Dependency must have reached the job scheduler */
+       if (katom->status < KBASE_JD_ATOM_STATE_IN_JS)
+               return false;
+
+       /* If dependency has completed and has failed or will fail then it is
+        * not valid */
+       if (katom->status >= KBASE_JD_ATOM_STATE_HW_COMPLETED &&
+                       (katom->event_code != BASE_JD_EVENT_DONE ||
+                       katom->will_fail_event_code))
+               return false;
+
+       return true;
+}
+
 static void jd_try_submitting_deps(struct list_head *out_list,
                struct kbase_jd_atom *node)
 {
@@ -679,14 +709,41 @@ static void jd_try_submitting_deps(struct list_head *out_list,
 
                        if (IS_GPU_ATOM(dep_atom) && !dep_atom->in_jd_list) {
                                /*Check if atom deps look sane*/
-                               bool dep0_valid = !dep_atom->dep[0].atom ||
-                                               (dep_atom->dep[0].atom->status
-                                               >= KBASE_JD_ATOM_STATE_IN_JS);
-                               bool dep1_valid = !dep_atom->dep[1].atom ||
-                                               (dep_atom->dep[1].atom->status
-                                               >= KBASE_JD_ATOM_STATE_IN_JS);
-
-                               if (dep0_valid && dep1_valid) {
+                               bool dep0_valid = is_dep_valid(
+                                               dep_atom->dep[0].atom);
+                               bool dep1_valid = is_dep_valid(
+                                               dep_atom->dep[1].atom);
+                               bool dep_satisfied = true;
+#ifdef CONFIG_MALI_DMA_FENCE
+                               int dep_count;
+
+                               dep_count = atomic_read(
+                                               &dep_atom->dma_fence.dep_count);
+                               if (likely(dep_count == -1)) {
+                                       dep_satisfied = true;
+                               } else {
+                               /*
+                                * There are either still active callbacks, or
+                                * all fences for this @dep_atom has signaled,
+                                * but the worker that will queue the atom has
+                                * not yet run.
+                                *
+                                * Wait for the fences to signal and the fence
+                                * worker to run and handle @dep_atom. If
+                                * @dep_atom was completed due to error on
+                                * @katom, then the fence worker will pick up
+                                * the complete status and error code set on
+                                * @dep_atom above.
+                                */
+                                       dep_satisfied = false;
+                               }
+#endif /* CONFIG_MALI_DMA_FENCE */
+#ifdef CONFIG_KDS
+                               dep_satisfied = dep_satisfied &&
+                                               dep_atom->kds_dep_satisfied;
+#endif
+
+                               if (dep0_valid && dep1_valid && dep_satisfied) {
                                        dep_atom->in_jd_list = true;
                                        list_add(&dep_atom->jd_item, out_list);
                                }
@@ -758,7 +815,7 @@ bool jd_done_nolock(struct kbase_jd_atom *katom,
 
                for (i = 0; i < 2; i++)
                        jd_resolve_dep(&runnable_jobs, katom, i,
-                                       kctx->jctx.sched_info.ctx.is_dying);
+                                       kbase_ctx_flag(kctx, KCTX_DYING));
 
                if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
                        kbase_jd_post_external_resources(katom);
@@ -774,7 +831,7 @@ bool jd_done_nolock(struct kbase_jd_atom *katom,
                        KBASE_DEBUG_ASSERT(node->status != KBASE_JD_ATOM_STATE_UNUSED);
 
                        if (node->status != KBASE_JD_ATOM_STATE_COMPLETED &&
-                                       !kctx->jctx.sched_info.ctx.is_dying) {
+                                       !kbase_ctx_flag(kctx, KCTX_DYING)) {
                                need_to_try_schedule_context |= jd_run_atom(node);
                        } else {
                                node->event_code = katom->event_code;
@@ -919,7 +976,10 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us
        katom->x_pre_dep = NULL;
        katom->x_post_dep = NULL;
        katom->will_fail_event_code = BASE_JD_EVENT_NOT_STARTED;
-       katom->exit_protected_state = KBASE_ATOM_EXIT_PROTECTED_CHECK;
+
+       /* Implicitly sets katom->protected_state.enter as well. */
+       katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK;
+
        katom->age = kctx->age_count++;
 
        INIT_LIST_HEAD(&katom->jd_item);
@@ -933,6 +993,8 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us
        atomic_set(&katom->dma_fence.dep_count, -1);
 #endif
 
+       kbase_tlstream_tl_attrib_atom_state(katom, TL_ATOM_STATE_IDLE);
+
        /* Don't do anything if there is a mess up with dependencies.
           This is done in a separate cycle to check both the dependencies at ones, otherwise
           it will be extra complexity to deal with 1st dependency ( just added to the list )
@@ -1037,10 +1099,17 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us
                katom->status = KBASE_JD_ATOM_STATE_QUEUED;
        }
 
+       /* For invalid priority, be most lenient and choose the default */
+       sched_prio = kbasep_js_atom_prio_to_sched_prio(user_atom->prio);
+       if (sched_prio == KBASE_JS_ATOM_SCHED_PRIO_INVALID)
+               sched_prio = KBASE_JS_ATOM_SCHED_PRIO_DEFAULT;
+       katom->sched_priority = sched_prio;
+
        /* Create a new atom recording all dependencies it was set up with. */
        kbase_tlstream_tl_new_atom(
                        katom,
                        kbase_jd_atom_id(kctx, katom));
+       kbase_tlstream_tl_attrib_atom_priority(katom, katom->sched_priority);
        kbase_tlstream_tl_ret_atom_ctx(katom, kctx);
        for (i = 0; i < 2; i++)
                if (BASE_JD_DEP_TYPE_INVALID != kbase_jd_katom_dep_type(
@@ -1092,12 +1161,6 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us
                goto out;
        }
 
-       /* For invalid priority, be most lenient and choose the default */
-       sched_prio = kbasep_js_atom_prio_to_sched_prio(user_atom->prio);
-       if (sched_prio == KBASE_JS_ATOM_SCHED_PRIO_INVALID)
-               sched_prio = KBASE_JS_ATOM_SCHED_PRIO_DEFAULT;
-       katom->sched_priority = sched_prio;
-
        if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
                /* handle what we need to do to access the external resources */
                if (kbase_jd_pre_external_resources(katom, user_atom) != 0) {
@@ -1212,7 +1275,7 @@ int kbase_jd_submit(struct kbase_context *kctx,
 
        beenthere(kctx, "%s", "Enter");
 
-       if ((kctx->jctx.sched_info.ctx.flags & KBASE_CTX_FLAG_SUBMIT_DISABLED) != 0) {
+       if (kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
                dev_err(kbdev->dev, "Attempt to submit to a context that has SUBMIT_DISABLED set on it");
                return -EINVAL;
        }
@@ -1374,7 +1437,6 @@ void kbase_jd_done_worker(struct work_struct *data)
        struct kbasep_js_device_data *js_devdata;
        u64 cache_jc = katom->jc;
        struct kbasep_js_atom_retained_state katom_retained_state;
-       bool schedule = false;
        bool context_idle;
        base_jd_core_req core_req = katom->core_req;
        u64 affinity = katom->affinity;
@@ -1397,6 +1459,7 @@ void kbase_jd_done_worker(struct work_struct *data)
         * Begin transaction on JD context and JS context
         */
        mutex_lock(&jctx->lock);
+       kbase_tlstream_tl_attrib_atom_state(katom, TL_ATOM_STATE_DONE);
        mutex_lock(&js_devdata->queue_mutex);
        mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
 
@@ -1404,7 +1467,7 @@ void kbase_jd_done_worker(struct work_struct *data)
         * because it only happens in response to an IRQ from a job that was
         * running.
         */
-       KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled);
+       KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
 
        if (katom->event_code == BASE_JD_EVENT_STOPPED) {
                /* Atom has been promoted to stopped */
@@ -1413,12 +1476,12 @@ void kbase_jd_done_worker(struct work_struct *data)
                mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
                mutex_unlock(&js_devdata->queue_mutex);
 
-               spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+               spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
                katom->status = KBASE_JD_ATOM_STATE_IN_JS;
                kbase_js_unpull(kctx, katom);
 
-               spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+               spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
                mutex_unlock(&jctx->lock);
 
                return;
@@ -1436,19 +1499,6 @@ void kbase_jd_done_worker(struct work_struct *data)
        /* Retain state before the katom disappears */
        kbasep_js_atom_retained_state_copy(&katom_retained_state, katom);
 
-       if (!kbasep_js_has_atom_finished(&katom_retained_state)) {
-               mutex_lock(&js_devdata->runpool_mutex);
-               kbasep_js_clear_job_retry_submit(katom);
-               /* An atom that has been hard-stopped might have previously
-                * been soft-stopped and has just finished before the hard-stop
-                * occurred. For this reason, clear the hard-stopped flag */
-               katom->atom_flags &= ~(KBASE_KATOM_FLAG_BEEN_HARD_STOPPED);
-               mutex_unlock(&js_devdata->runpool_mutex);
-       }
-
-       if (kbasep_js_has_atom_finished(&katom_retained_state))
-               schedule = true;
-
        context_idle = kbase_js_complete_atom_wq(kctx, katom);
 
        KBASE_DEBUG_ASSERT(kbasep_js_has_atom_finished(&katom_retained_state));
@@ -1458,54 +1508,53 @@ void kbase_jd_done_worker(struct work_struct *data)
        mutex_unlock(&js_devdata->queue_mutex);
        katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF;
        /* jd_done_nolock() requires the jsctx_mutex lock to be dropped */
-       schedule |= jd_done_nolock(katom, &kctx->completed_jobs);
+       jd_done_nolock(katom, &kctx->completed_jobs);
 
        /* katom may have been freed now, do not use! */
 
        if (context_idle) {
                unsigned long flags;
 
+               context_idle = false;
                mutex_lock(&js_devdata->queue_mutex);
-               spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+               spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
                /* If kbase_sched() has scheduled this context back in then
-                * ctx_active will have been set after we marked it as inactive,
-                * and another pm reference will have been taken, so drop our
-                * reference. But do not call kbase_jm_idle_ctx(), as the
-                * context is active and fast-starting is allowed.
+                * KCTX_ACTIVE will have been set after we marked it as
+                * inactive, and another pm reference will have been taken, so
+                * drop our reference. But do not call kbase_jm_idle_ctx(), as
+                * the context is active and fast-starting is allowed.
                 *
                 * If an atom has been fast-started then kctx->atoms_pulled will
-                * be non-zero but ctx_active will still be false (as the
+                * be non-zero but KCTX_ACTIVE will still be false (as the
                 * previous pm reference has been inherited). Do NOT drop our
                 * reference, as it has been re-used, and leave the context as
                 * active.
                 *
-                * If no new atoms have been started then ctx_active will still
+                * If no new atoms have been started then KCTX_ACTIVE will still
                 * be false and atoms_pulled will be zero, so drop the reference
                 * and call kbase_jm_idle_ctx().
                 *
                 * As the checks are done under both the queue_mutex and
-                * runpool_irq.lock is should be impossible for this to race
+                * hwaccess_lock is should be impossible for this to race
                 * with the scheduler code.
                 */
-               if (kctx->ctx_active || !atomic_read(&kctx->atoms_pulled)) {
+               if (kbase_ctx_flag(kctx, KCTX_ACTIVE) ||
+                   !atomic_read(&kctx->atoms_pulled)) {
                        /* Calling kbase_jm_idle_ctx() here will ensure that
                         * atoms are not fast-started when we drop the
-                        * runpool_irq.lock. This is not performed if ctx_active
-                        * is set as in that case another pm reference has been
-                        * taken and a fast-start would be valid.
+                        * hwaccess_lock. This is not performed if
+                        * KCTX_ACTIVE is set as in that case another pm
+                        * reference has been taken and a fast-start would be
+                        * valid.
                         */
-                       if (!kctx->ctx_active)
+                       if (!kbase_ctx_flag(kctx, KCTX_ACTIVE))
                                kbase_jm_idle_ctx(kbdev, kctx);
-                       spin_unlock_irqrestore(&js_devdata->runpool_irq.lock,
-                                       flags);
-
-                       kbase_pm_context_idle(kbdev);
+                       context_idle = true;
                } else {
-                       kctx->ctx_active = true;
-                       spin_unlock_irqrestore(&js_devdata->runpool_irq.lock,
-                                       flags);
+                       kbase_ctx_flag_set(kctx, KCTX_ACTIVE);
                }
+               spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
                mutex_unlock(&js_devdata->queue_mutex);
        }
 
@@ -1519,8 +1568,7 @@ void kbase_jd_done_worker(struct work_struct *data)
 
        kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, &katom_retained_state);
 
-       if (schedule)
-               kbase_js_sched_all(kbdev);
+       kbase_js_sched_all(kbdev);
 
        if (!atomic_dec_return(&kctx->work_count)) {
                /* If worker now idle then post all events that jd_done_nolock()
@@ -1540,6 +1588,9 @@ void kbase_jd_done_worker(struct work_struct *data)
        kbase_backend_complete_wq_post_sched(kbdev, core_req, affinity,
                        coreref_state);
 
+       if (context_idle)
+               kbase_pm_context_idle(kbdev);
+
        KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER_END, kctx, NULL, cache_jc, 0);
 }
 
@@ -1582,7 +1633,7 @@ static void jd_cancel_worker(struct work_struct *data)
         * any), nor must we try to schedule out the context (it's already
         * scheduled out).
         */
-       KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled);
+       KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
 
        /* Scheduler: Remove the job from the system */
        mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
@@ -1620,7 +1671,7 @@ static void jd_cancel_worker(struct work_struct *data)
  *
  * Context:
  *   This can be called safely from atomic context.
- *   The caller must hold kbasep_js_device_data.runpool_irq.lock
+ *   The caller must hold kbdev->hwaccess_lock
  */
 void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr,
                ktime_t *end_timestamp, kbasep_js_atom_done_code done_code)
@@ -1675,7 +1726,7 @@ void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom)
        KBASE_TRACE_ADD(kbdev, JD_CANCEL, kctx, katom, katom->jc, 0);
 
        /* This should only be done from a context that is not scheduled */
-       KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled);
+       KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
 
        WARN_ON(work_pending(&katom->work));
 
index 0cf75f5..6437e42 100644 (file)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -57,7 +57,7 @@ static int kbasep_jd_debugfs_atoms_show(struct seq_file *sfile, void *data)
        /* General atom states */
        mutex_lock(&kctx->jctx.lock);
        /* JS-related states */
-       spin_lock_irqsave(&kctx->kbdev->js_data.runpool_irq.lock, irq_flags);
+       spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags);
        for (i = 0; i != BASE_JD_ATOM_COUNT; ++i) {
                struct kbase_jd_atom *atom = &atoms[i];
                s64 start_timestamp = 0;
@@ -84,7 +84,7 @@ static int kbasep_jd_debugfs_atoms_show(struct seq_file *sfile, void *data)
                                        atom->time_spent_us * 1000 : start_timestamp)
                                );
        }
-       spin_unlock_irqrestore(&kctx->kbdev->js_data.runpool_irq.lock, irq_flags);
+       spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags);
        mutex_unlock(&kctx->jctx.lock);
 
        return 0;
@@ -110,7 +110,7 @@ static const struct file_operations kbasep_jd_debugfs_atoms_fops = {
        .release = single_release,
 };
 
-void kbasep_jd_debugfs_ctx_add(struct kbase_context *kctx)
+void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx)
 {
        KBASE_DEBUG_ASSERT(kctx != NULL);
 
index bc1878f..090f816 100644 (file)
 #define MALI_JD_DEBUGFS_VERSION 1
 
 /**
- * kbasep_jd_debugfs_ctx_add() - Add debugfs entries for JD system
+ * kbasep_jd_debugfs_ctx_init() - Add debugfs entries for JD system
  *
  * @kctx Pointer to kbase_context
  */
-void kbasep_jd_debugfs_ctx_add(struct kbase_context *kctx);
+void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx);
 
 #endif  /*_KBASE_JD_DEBUGFS_H*/
index 6342532..0c5c6a6 100644 (file)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -60,7 +60,7 @@ u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask)
 {
        u32 ret_mask = 0;
 
-       lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+       lockdep_assert_held(&kbdev->hwaccess_lock);
 
        while (js_mask) {
                int js = ffs(js_mask) - 1;
@@ -79,7 +79,7 @@ void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask)
 {
        struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
 
-       lockdep_assert_held(&js_devdata->runpool_irq.lock);
+       lockdep_assert_held(&kbdev->hwaccess_lock);
 
        if (!down_trylock(&js_devdata->schedule_sem)) {
                kbase_jm_kick(kbdev, js_mask);
@@ -91,7 +91,7 @@ void kbase_jm_try_kick_all(struct kbase_device *kbdev)
 {
        struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
 
-       lockdep_assert_held(&js_devdata->runpool_irq.lock);
+       lockdep_assert_held(&kbdev->hwaccess_lock);
 
        if (!down_trylock(&js_devdata->schedule_sem)) {
                kbase_jm_kick_all(kbdev);
@@ -101,30 +101,31 @@ void kbase_jm_try_kick_all(struct kbase_device *kbdev)
 
 void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
 {
-       lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+       lockdep_assert_held(&kbdev->hwaccess_lock);
 
        if (kbdev->hwaccess.active_kctx == kctx)
                kbdev->hwaccess.active_kctx = NULL;
 }
 
-void kbase_jm_return_atom_to_js(struct kbase_device *kbdev,
+struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev,
                                struct kbase_jd_atom *katom)
 {
-       lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+       lockdep_assert_held(&kbdev->hwaccess_lock);
 
        if (katom->event_code != BASE_JD_EVENT_STOPPED &&
                        katom->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT) {
-               kbase_js_complete_atom(katom, NULL);
+               return kbase_js_complete_atom(katom, NULL);
        } else {
                kbase_js_unpull(katom->kctx, katom);
+               return NULL;
        }
 }
 
-void kbase_jm_complete(struct kbase_device *kbdev, struct kbase_jd_atom *katom,
-                       ktime_t *end_timestamp)
+struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev,
+               struct kbase_jd_atom *katom, ktime_t *end_timestamp)
 {
-       lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+       lockdep_assert_held(&kbdev->hwaccess_lock);
 
-       kbase_js_complete_atom(katom, end_timestamp);
+       return kbase_js_complete_atom(katom, end_timestamp);
 }
 
index 27aca3a..a74ee24 100644 (file)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -28,7 +28,7 @@
  * @kbdev:     Device pointer
  * @js_mask:   Mask of the job slots that can be pulled from.
  *
- * Caller must hold the runpool_irq lock and schedule_sem semaphore
+ * Caller must hold the hwaccess_lock and schedule_sem semaphore
  *
  * Return: Mask of the job slots that can still be submitted to.
  */
@@ -39,7 +39,7 @@ u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask);
  *                      slots.
  * @kbdev:     Device pointer
  *
- * Caller must hold the runpool_irq lock and schedule_sem semaphore
+ * Caller must hold the hwaccess_lock and schedule_sem semaphore
  *
  * Return: Mask of the job slots that can still be submitted to.
  */
@@ -52,7 +52,7 @@ static inline u32 kbase_jm_kick_all(struct kbase_device *kbdev)
  * kbase_jm_try_kick - Attempt to call kbase_jm_kick
  * @kbdev:   Device pointer
  * @js_mask: Mask of the job slots that can be pulled from
- * Context: Caller must hold runpool_irq lock
+ * Context: Caller must hold hwaccess_lock
  *
  * If schedule_sem can be immediately obtained then this function will call
  * kbase_jm_kick() otherwise it will do nothing.
@@ -62,7 +62,7 @@ void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask);
 /**
  * kbase_jm_try_kick_all() - Attempt to call kbase_jm_kick_all
  * @kbdev:  Device pointer
- * Context: Caller must hold runpool_irq lock
+ * Context: Caller must hold hwaccess_lock
  *
  * If schedule_sem can be immediately obtained then this function will call
  * kbase_jm_kick_all() otherwise it will do nothing.
@@ -80,7 +80,7 @@ void kbase_jm_try_kick_all(struct kbase_device *kbdev);
  * The context should have no atoms currently pulled from it
  * (kctx->atoms_pulled == 0).
  *
- * Caller must hold the runpool_irq lock
+ * Caller must hold the hwaccess_lock
  */
 void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
 
@@ -90,17 +90,21 @@ void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
  *                               dependency
  * @kbdev:     Device pointer
  * @katom:     Atom that has been stopped or will be failed
+ *
+ * Return: Atom that has now been unblocked and can now be run, or NULL if none
  */
-void kbase_jm_return_atom_to_js(struct kbase_device *kbdev,
-                               struct kbase_jd_atom *katom);
+struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev,
+                       struct kbase_jd_atom *katom);
 
 /**
  * kbase_jm_complete() - Complete an atom
  * @kbdev:             Device pointer
  * @katom:             Atom that has completed
  * @end_timestamp:     Timestamp of atom completion
+ *
+ * Return: Atom that has now been unblocked and can now be run, or NULL if none
  */
-void kbase_jm_complete(struct kbase_device *kbdev, struct kbase_jd_atom *katom,
-                       ktime_t *end_timestamp);
+struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev,
+               struct kbase_jd_atom *katom, ktime_t *end_timestamp);
 
 #endif /* _KBASE_JM_H_ */
index c591ebb..60a7373 100644 (file)
@@ -89,7 +89,7 @@ static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev,
 
        js_devdata = &kbdev->js_data;
 
-       spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+       spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
        as_nr = kctx->as_nr;
        if (as_nr != KBASEP_AS_NR_INVALID) {
                struct kbasep_js_per_as_data *js_per_as_data;
@@ -98,7 +98,7 @@ static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev,
 
                refcnt = js_per_as_data->as_busy_refcount;
        }
-       spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+       spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
        return refcnt;
 }
@@ -198,7 +198,7 @@ static void kbase_js_sync_timers(struct kbase_device *kbdev)
        mutex_unlock(&kbdev->js_data.runpool_mutex);
 }
 
-/* Hold the kbasep_js_device_data::runpool_irq::lock for this */
+/* Hold the hwaccess_lock for this */
 bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev,
                struct kbase_context *kctx)
 {
@@ -248,7 +248,7 @@ jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio)
 {
        struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
 
-       lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+       lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
 
        return RB_EMPTY_ROOT(&rb->runnable_tree);
 }
@@ -259,7 +259,7 @@ jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio)
  * @kctx: Pointer to kbase context with ring buffer.
  * @js:   Job slot id to check.
  *
- * Caller must hold runpool_irq.lock
+ * Caller must hold hwaccess_lock
  *
  * Return: true if the ring buffers for all priorities have no pullable atoms,
  *        false otherwise.
@@ -269,7 +269,7 @@ jsctx_rb_none_to_pull(struct kbase_context *kctx, int js)
 {
        int prio;
 
-       lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+       lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
 
        for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
                if (!jsctx_rb_none_to_pull_prio(kctx, js, prio))
@@ -294,8 +294,7 @@ jsctx_rb_none_to_pull(struct kbase_context *kctx, int js)
  * enumerated when this function returns jsctx->lock must be held when calling
  * this function.
  *
- * The HW access lock, js_data.runpool_irq.lock, must always be held when
- * calling this function.
+ * The HW access lock must always be held when calling this function.
  */
 static void
 jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio,
@@ -303,7 +302,7 @@ jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio,
 {
        struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
 
-       lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+       lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
 
        while (!RB_EMPTY_ROOT(&queue->runnable_tree)) {
                struct rb_node *node = rb_first(&queue->runnable_tree);
@@ -361,7 +360,7 @@ jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio)
        struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
        struct rb_node *node;
 
-       lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+       lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
 
        node = rb_first(&rb->runnable_tree);
        if (!node)
@@ -379,7 +378,7 @@ jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio)
  * KBASE_JS_ATOM_SCHED_PRIO_HIGH, for the specified @js and @prio and return a
  * pointer to the next atom, unless all the priority's ring buffers are empty.
  *
- * Caller must hold the runpool_irq.lock.
+ * Caller must hold the hwaccess_lock.
  *
  * Return: Pointer to next atom in buffer, or NULL if there is no atom.
  */
@@ -388,7 +387,7 @@ jsctx_rb_peek(struct kbase_context *kctx, int js)
 {
        int prio;
 
-       lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+       lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
 
        for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
                struct kbase_jd_atom *katom;
@@ -417,7 +416,7 @@ jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
        int js = katom->slot_nr;
        struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
 
-       lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+       lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
 
        /* Atoms must be pulled in the correct order. */
        WARN_ON(katom != jsctx_rb_peek_prio(kctx, js, prio));
@@ -435,7 +434,7 @@ jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom)
        struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
        struct rb_node **new = &(queue->runnable_tree.rb_node), *parent = NULL;
 
-       lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+       lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
 
        while (*new) {
                struct kbase_jd_atom *entry = container_of(*new,
@@ -466,7 +465,7 @@ jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom)
 static inline void
 jsctx_rb_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
 {
-       lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+       lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
 
        jsctx_tree_add(kctx, katom);
 }
@@ -630,7 +629,7 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev)
 
        mutex_init(&jsdd->runpool_mutex);
        mutex_init(&jsdd->queue_mutex);
-       spin_lock_init(&jsdd->runpool_irq.lock);
+       spin_lock_init(&kbdev->hwaccess_lock);
        sema_init(&jsdd->schedule_sem, 1);
 
        err = kbasep_js_policy_init(kbdev);
@@ -699,14 +698,14 @@ int kbasep_js_kctx_init(struct kbase_context * const kctx)
        KBASE_DEBUG_ASSERT(js_kctx_info->init_status == JS_KCTX_INIT_NONE);
 
        js_kctx_info->ctx.nr_jobs = 0;
-       js_kctx_info->ctx.is_scheduled = false;
-       js_kctx_info->ctx.is_dying = false;
+       kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED);
+       kbase_ctx_flag_clear(kctx, KCTX_DYING);
        memset(js_kctx_info->ctx.ctx_attr_ref_count, 0,
                        sizeof(js_kctx_info->ctx.ctx_attr_ref_count));
 
        /* Initially, the context is disabled from submission until the create
         * flags are set */
-       js_kctx_info->ctx.flags = KBASE_CTX_FLAG_SUBMIT_DISABLED;
+       kbase_ctx_flag_set(kctx, KCTX_SUBMIT_DISABLED);
 
        js_kctx_info->init_status |= JS_KCTX_INIT_CONSTANTS;
 
@@ -753,7 +752,7 @@ void kbasep_js_kctx_term(struct kbase_context *kctx)
 
        if ((js_kctx_info->init_status & JS_KCTX_INIT_CONSTANTS)) {
                /* The caller must de-register all jobs before calling this */
-               KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled);
+               KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
                KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs == 0);
        }
 
@@ -763,11 +762,11 @@ void kbasep_js_kctx_term(struct kbase_context *kctx)
        for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
                list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
 
-       if (kctx->ctx_runnable_ref) {
+       if (kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)) {
                WARN_ON(atomic_read(&kbdev->js_data.nr_contexts_runnable) <= 0);
                atomic_dec(&kbdev->js_data.nr_contexts_runnable);
                update_ctx_count = true;
-               kctx->ctx_runnable_ref = false;
+               kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
        }
 
        mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
@@ -789,12 +788,12 @@ void kbasep_js_kctx_term(struct kbase_context *kctx)
  * kbase_js_ctx_list_add_pullable_nolock - Variant of
  *                                         kbase_jd_ctx_list_add_pullable()
  *                                         where the caller must hold
- *                                         runpool_irq.lock
+ *                                         hwaccess_lock
  * @kbdev:  Device pointer
  * @kctx:   Context to add to queue
  * @js:     Job slot to use
  *
- * Caller must hold runpool_irq.lock
+ * Caller must hold hwaccess_lock
  *
  * Return: true if caller should call kbase_backend_ctx_count_changed()
  */
@@ -804,7 +803,7 @@ static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev,
 {
        bool ret = false;
 
-       lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+       lockdep_assert_held(&kbdev->hwaccess_lock);
 
        if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
                list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
@@ -816,8 +815,8 @@ static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev,
                kbdev->js_data.nr_contexts_pullable++;
                ret = true;
                if (!atomic_read(&kctx->atoms_pulled)) {
-                       WARN_ON(kctx->ctx_runnable_ref);
-                       kctx->ctx_runnable_ref = true;
+                       WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+                       kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF);
                        atomic_inc(&kbdev->js_data.nr_contexts_runnable);
                }
        }
@@ -830,12 +829,12 @@ static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev,
  * kbase_js_ctx_list_add_pullable_head_nolock - Variant of
  *                                              kbase_js_ctx_list_add_pullable_head()
  *                                              where the caller must hold
- *                                              runpool_irq.lock
+ *                                              hwaccess_lock
  * @kbdev:  Device pointer
  * @kctx:   Context to add to queue
  * @js:     Job slot to use
  *
- * Caller must hold runpool_irq.lock
+ * Caller must hold hwaccess_lock
  *
  * Return:  true if caller should call kbase_backend_ctx_count_changed()
  */
@@ -844,7 +843,7 @@ static bool kbase_js_ctx_list_add_pullable_head_nolock(
 {
        bool ret = false;
 
-       lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+       lockdep_assert_held(&kbdev->hwaccess_lock);
 
        if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
                list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
@@ -856,8 +855,8 @@ static bool kbase_js_ctx_list_add_pullable_head_nolock(
                kbdev->js_data.nr_contexts_pullable++;
                ret = true;
                if (!atomic_read(&kctx->atoms_pulled)) {
-                       WARN_ON(kctx->ctx_runnable_ref);
-                       kctx->ctx_runnable_ref = true;
+                       WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+                       kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF);
                        atomic_inc(&kbdev->js_data.nr_contexts_runnable);
                }
        }
@@ -888,9 +887,9 @@ static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev,
        bool ret;
        unsigned long flags;
 
-       spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
+       spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
        ret = kbase_js_ctx_list_add_pullable_head_nolock(kbdev, kctx, js);
-       spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags);
+       spin_unlock_irqrestore(&kbdev->