From 7f166ae12d17f9dd42874b5182f3ad0cf1e95ff2 Mon Sep 17 00:00:00 2001 From: chenzhen Date: Thu, 12 Jan 2017 17:18:47 +0800 Subject: [PATCH] Revert "MALI: rockchip: upgrade midgard DDK to r14p0-01rel0" This reverts commit d1637ff80953fd46692f923f3ee7b656fb917081. Change-Id: Ib99bae99fe7246142bfa7369b8e79ebbfae1e736 Signed-off-by: chenzhen --- drivers/gpu/arm/midgard/Kbuild | 5 +- .../gpu/mali_kbase_cache_policy_backend.c | 2 - .../midgard/backend/gpu/mali_kbase_devfreq.c | 3 - .../backend/gpu/mali_kbase_device_hw.c | 137 +----- .../backend/gpu/mali_kbase_instr_backend.c | 22 +- .../backend/gpu/mali_kbase_irq_linux.c | 2 - .../midgard/backend/gpu/mali_kbase_jm_as.c | 35 +- .../midgard/backend/gpu/mali_kbase_jm_defs.h | 3 +- .../midgard/backend/gpu/mali_kbase_jm_hw.c | 110 ++--- .../backend/gpu/mali_kbase_jm_internal.h | 6 +- .../midgard/backend/gpu/mali_kbase_jm_rb.c | 429 ++++++------------ .../midgard/backend/gpu/mali_kbase_jm_rb.h | 13 +- .../backend/gpu/mali_kbase_js_affinity.c | 7 +- .../backend/gpu/mali_kbase_js_affinity.h | 11 +- .../backend/gpu/mali_kbase_js_backend.c | 16 +- .../backend/gpu/mali_kbase_mmu_hw_direct.c | 41 +- .../backend/gpu/mali_kbase_pm_backend.c | 182 +++----- .../midgard/backend/gpu/mali_kbase_pm_ca.c | 23 +- .../midgard/backend/gpu/mali_kbase_pm_defs.h | 20 - .../backend/gpu/mali_kbase_pm_driver.c | 81 ++-- .../backend/gpu/mali_kbase_pm_internal.h | 45 +- .../backend/gpu/mali_kbase_pm_metrics.c | 15 +- .../backend/gpu/mali_kbase_pm_policy.c | 169 ++++--- .../arm/midgard/mali_base_hwconfig_features.h | 26 -- .../arm/midgard/mali_base_hwconfig_issues.h | 27 -- drivers/gpu/arm/midgard/mali_base_kernel.h | 9 - drivers/gpu/arm/midgard/mali_kbase.h | 66 +-- .../arm/midgard/mali_kbase_config_defaults.h | 8 +- drivers/gpu/arm/midgard/mali_kbase_context.c | 25 +- drivers/gpu/arm/midgard/mali_kbase_context.h | 90 ---- .../gpu/arm/midgard/mali_kbase_core_linux.c | 234 +++------- drivers/gpu/arm/midgard/mali_kbase_defs.h | 194 +++----- drivers/gpu/arm/midgard/mali_kbase_device.c | 2 +- .../gpu/arm/midgard/mali_kbase_dma_fence.c | 81 +--- drivers/gpu/arm/midgard/mali_kbase_event.c | 2 +- .../gpu/arm/midgard/mali_kbase_gator_api.c | 21 +- .../midgard/mali_kbase_gator_hwcnt_names.h | 1 - .../mali_kbase_gator_hwcnt_names_thex.h | 291 ------------ drivers/gpu/arm/midgard/mali_kbase_gpu_id.h | 1 - drivers/gpu/arm/midgard/mali_kbase_hw.c | 9 - .../arm/midgard/mali_kbase_hwaccess_defs.h | 5 +- .../gpu/arm/midgard/mali_kbase_hwaccess_jm.h | 17 +- drivers/gpu/arm/midgard/mali_kbase_jd.c | 205 ++++----- .../gpu/arm/midgard/mali_kbase_jd_debugfs.c | 8 +- .../gpu/arm/midgard/mali_kbase_jd_debugfs.h | 4 +- drivers/gpu/arm/midgard/mali_kbase_jm.c | 25 +- drivers/gpu/arm/midgard/mali_kbase_jm.h | 24 +- drivers/gpu/arm/midgard/mali_kbase_js.c | 334 +++++++------- drivers/gpu/arm/midgard/mali_kbase_js.h | 74 +-- .../gpu/arm/midgard/mali_kbase_js_ctx_attr.c | 20 +- drivers/gpu/arm/midgard/mali_kbase_js_defs.h | 54 ++- .../gpu/arm/midgard/mali_kbase_js_policy.h | 12 +- .../arm/midgard/mali_kbase_js_policy_cfs.c | 6 +- .../arm/midgard/mali_kbase_js_policy_cfs.h | 12 +- drivers/gpu/arm/midgard/mali_kbase_mem.c | 8 +- drivers/gpu/arm/midgard/mali_kbase_mem.h | 47 +- .../gpu/arm/midgard/mali_kbase_mem_linux.c | 168 +++---- drivers/gpu/arm/midgard/mali_kbase_mem_pool.c | 47 +- .../arm/midgard/mali_kbase_mem_pool_debugfs.c | 4 +- .../arm/midgard/mali_kbase_mem_pool_debugfs.h | 6 +- .../midgard/mali_kbase_mem_profile_debugfs.c | 13 +- drivers/gpu/arm/midgard/mali_kbase_mmu.c | 199 +++----- .../midgard/mali_kbase_regs_history_debugfs.c | 130 ------ .../midgard/mali_kbase_regs_history_debugfs.h | 50 -- drivers/gpu/arm/midgard/mali_kbase_replay.c | 3 +- drivers/gpu/arm/midgard/mali_kbase_softjobs.c | 41 +- drivers/gpu/arm/midgard/mali_kbase_tlstream.c | 295 ++++-------- drivers/gpu/arm/midgard/mali_kbase_tlstream.h | 119 ++--- .../arm/midgard/mali_kbase_trace_timeline.c | 12 +- .../arm/midgard/mali_kbase_trace_timeline.h | 17 +- drivers/gpu/arm/midgard/mali_kbase_uku.h | 38 +- drivers/gpu/arm/midgard/mali_kbase_vinstr.c | 11 +- drivers/gpu/arm/midgard/mali_midg_regmap.h | 1 - drivers/gpu/arm/midgard/sconscript | 98 ++-- 74 files changed, 1462 insertions(+), 3109 deletions(-) delete mode 100644 drivers/gpu/arm/midgard/mali_kbase_context.h delete mode 100644 drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_thex.h delete mode 100644 drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c delete mode 100644 drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h diff --git a/drivers/gpu/arm/midgard/Kbuild b/drivers/gpu/arm/midgard/Kbuild index 4348704e7640..8a47cddf7936 100644 --- a/drivers/gpu/arm/midgard/Kbuild +++ b/drivers/gpu/arm/midgard/Kbuild @@ -15,7 +15,7 @@ # Driver version string which is returned to userspace via an ioctl -MALI_RELEASE_NAME ?= "r14p0-01rel0" +MALI_RELEASE_NAME ?= "r13p0-00rel0" # Paths required for build KBASE_PATH = $(src) @@ -113,8 +113,7 @@ SRC := \ mali_kbase_mem_pool_debugfs.c \ mali_kbase_tlstream.c \ mali_kbase_strings.c \ - mali_kbase_as_fault_debugfs.c \ - mali_kbase_regs_history_debugfs.c + mali_kbase_as_fault_debugfs.c ifeq ($(MALI_UNIT_TEST),1) SRC += mali_kbase_tlstream_test.c diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c index fef9a2cb743e..c6862539c8dd 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c @@ -21,8 +21,6 @@ void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, u32 mode) { - kbdev->current_gpu_coherency_mode = mode; - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG)) kbase_reg_write(kbdev, COHERENCY_ENABLE, mode, NULL); } diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c index 805dbd658906..083810f853ae 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c @@ -19,7 +19,6 @@ #include -#include #include #include #ifdef CONFIG_DEVFREQ_THERMAL @@ -124,8 +123,6 @@ kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags) kbdev->current_voltage = voltage; - kbase_tlstream_aux_devfreq_target((u64)freq); - kbase_pm_reset_dvfs_utilisation(kbdev); return err; diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c index dcdf15cdc3e8..b9238a305177 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c @@ -26,145 +26,16 @@ #include #if !defined(CONFIG_MALI_NO_MALI) - - -#ifdef CONFIG_DEBUG_FS - - -int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size) -{ - struct kbase_io_access *old_buf; - struct kbase_io_access *new_buf; - unsigned long flags; - - if (!new_size) - goto out_err; /* The new size must not be 0 */ - - new_buf = vmalloc(new_size * sizeof(*h->buf)); - if (!new_buf) - goto out_err; - - spin_lock_irqsave(&h->lock, flags); - - old_buf = h->buf; - - /* Note: we won't bother with copying the old data over. The dumping - * logic wouldn't work properly as it relies on 'count' both as a - * counter and as an index to the buffer which would have changed with - * the new array. This is a corner case that we don't need to support. - */ - h->count = 0; - h->size = new_size; - h->buf = new_buf; - - spin_unlock_irqrestore(&h->lock, flags); - - vfree(old_buf); - - return 0; - -out_err: - return -1; -} - - -int kbase_io_history_init(struct kbase_io_history *h, u16 n) -{ - h->enabled = false; - spin_lock_init(&h->lock); - h->count = 0; - h->size = 0; - h->buf = NULL; - if (kbase_io_history_resize(h, n)) - return -1; - - return 0; -} - - -void kbase_io_history_term(struct kbase_io_history *h) -{ - vfree(h->buf); - h->buf = NULL; -} - - -/* kbase_io_history_add - add new entry to the register access history - * - * @h: Pointer to the history data structure - * @addr: Register address - * @value: The value that is either read from or written to the register - * @write: 1 if it's a register write, 0 if it's a read - */ -static void kbase_io_history_add(struct kbase_io_history *h, - void __iomem const *addr, u32 value, u8 write) -{ - struct kbase_io_access *io; - unsigned long flags; - - spin_lock_irqsave(&h->lock, flags); - - io = &h->buf[h->count % h->size]; - io->addr = (uintptr_t)addr | write; - io->value = value; - ++h->count; - /* If count overflows, move the index by the buffer size so the entire - * buffer will still be dumped later */ - if (unlikely(!h->count)) - h->count = h->size; - - spin_unlock_irqrestore(&h->lock, flags); -} - - -void kbase_io_history_dump(struct kbase_device *kbdev) -{ - struct kbase_io_history *const h = &kbdev->io_history; - u16 i; - size_t iters; - unsigned long flags; - - if (!unlikely(h->enabled)) - return; - - spin_lock_irqsave(&h->lock, flags); - - dev_err(kbdev->dev, "Register IO History:"); - iters = (h->size > h->count) ? h->count : h->size; - dev_err(kbdev->dev, "Last %zu register accesses of %zu total:\n", iters, - h->count); - for (i = 0; i < iters; ++i) { - struct kbase_io_access *io = - &h->buf[(h->count - iters + i) % h->size]; - char const access = (io->addr & 1) ? 'w' : 'r'; - - dev_err(kbdev->dev, "%6i: %c: reg 0x%p val %08x\n", i, access, - (void *)(io->addr & ~0x1), io->value); - } - - spin_unlock_irqrestore(&h->lock, flags); -} - - -#endif /* CONFIG_DEBUG_FS */ - - void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value, struct kbase_context *kctx) { KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID); KBASE_DEBUG_ASSERT(kbdev->dev != NULL); + dev_dbg(kbdev->dev, "w: reg %04x val %08x", offset, value); writel(value, kbdev->reg + offset); -#ifdef CONFIG_DEBUG_FS - if (unlikely(kbdev->io_history.enabled)) - kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset, - value, 1); -#endif /* CONFIG_DEBUG_FS */ - dev_dbg(kbdev->dev, "w: reg %04x val %08x", offset, value); - if (kctx && kctx->jctx.tb) kbase_device_trace_register_access(kctx, REG_WRITE, offset, value); @@ -182,13 +53,7 @@ u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset, val = readl(kbdev->reg + offset); -#ifdef CONFIG_DEBUG_FS - if (unlikely(kbdev->io_history.enabled)) - kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset, - val, 0); -#endif /* CONFIG_DEBUG_FS */ dev_dbg(kbdev->dev, "r: reg %04x val %08x", offset, val); - if (kctx && kctx->jctx.tb) kbase_device_trace_register_access(kctx, REG_READ, offset, val); return val; diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c index 7ad309e8d7f4..3f06a10f7fed 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c @@ -45,11 +45,11 @@ static void kbasep_instr_hwcnt_cacheclean(struct kbase_device *kbdev) KBASE_INSTR_STATE_REQUEST_CLEAN); /* Enable interrupt */ - spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); + spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags); irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask | CLEAN_CACHES_COMPLETED, NULL); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags); /* clean&invalidate the caches so we're sure the mmu tables for the dump * buffer is valid */ @@ -96,11 +96,11 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, } /* Enable interrupt */ - spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); + spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags); irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask | PRFCNT_SAMPLE_COMPLETED, NULL); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags); /* In use, this context is the owner */ kbdev->hwcnt.kctx = kctx; @@ -185,9 +185,7 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx); return err; out_unrequest_cores: - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbase_pm_unrequest_cores(kbdev, true, shader_cores_needed); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); out_err: return err; } @@ -228,10 +226,11 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx) kbdev->hwcnt.backend.triggered = 0; /* Disable interrupt */ - spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); + spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags); irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask & ~PRFCNT_SAMPLE_COMPLETED, NULL); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags); /* Disable the counters */ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0, kctx); @@ -244,11 +243,10 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx) kbase_pm_unrequest_cores(kbdev, true, kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER)); - kbase_pm_release_l2_caches(kbdev); - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + kbase_pm_release_l2_caches(kbdev); + dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p", kctx); @@ -393,12 +391,12 @@ void kbase_clean_caches_done(struct kbase_device *kbdev) spin_lock_irqsave(&kbdev->hwcnt.lock, flags); /* Disable interrupt */ - spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); + spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags); irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask & ~CLEAN_CACHES_COMPLETED, NULL); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags); /* Wakeup... */ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) { diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c index 8416b80e8b77..b891b12a3299 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c @@ -148,8 +148,6 @@ static irqreturn_t kbase_gpu_irq_handler(int irq, void *data) return IRQ_HANDLED; } -KBASE_EXPORT_TEST_API(kbase_gpu_irq_handler); - static irq_handler_t kbase_handler_table[] = { [JOB_IRQ_TAG] = kbase_job_irq_handler, [MMU_IRQ_TAG] = kbase_mmu_irq_handler, diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c index 202dcfa384a2..f2167887229b 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -52,7 +52,8 @@ static void assign_and_activate_kctx_addr_space(struct kbase_device *kbdev, lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); lockdep_assert_held(&js_devdata->runpool_mutex); - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(¤t_as->transaction_mutex); + lockdep_assert_held(&js_devdata->runpool_irq.lock); js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr]; @@ -141,7 +142,8 @@ void kbase_backend_release_ctx_irq(struct kbase_device *kbdev, return; } - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->as[as_nr].transaction_mutex); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); js_per_as_data = &kbdev->js_data.runpool_irq.per_as_data[kctx->as_nr]; if (js_per_as_data->as_busy_refcount != 0) { @@ -217,7 +219,8 @@ static bool check_is_runpool_full(struct kbase_device *kbdev, is_runpool_full = (bool) (js_devdata->nr_all_contexts_running >= kbdev->nr_hw_address_spaces); - if (kctx && !kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { + if (kctx != NULL && (kctx->jctx.sched_info.ctx.flags & + KBASE_CTX_FLAG_SUBMIT_DISABLED) == 0) { lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); /* Contexts that submit might use less of the address spaces * available, due to HW workarounds. In which case, the runpool @@ -264,7 +267,7 @@ int kbase_backend_find_free_address_space(struct kbase_device *kbdev, return i; } - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); /* No address space currently free, see if we can release one */ for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { @@ -278,14 +281,16 @@ int kbase_backend_find_free_address_space(struct kbase_device *kbdev, /* Don't release privileged or active contexts, or contexts with * jobs running */ - if (as_kctx && !kbase_ctx_flag(as_kctx, KCTX_PRIVILEGED) && + if (as_kctx && !(as_kctx->jctx.sched_info.ctx.flags & + KBASE_CTX_FLAG_PRIVILEGED) && js_per_as_data->as_busy_refcount == 0) { if (!kbasep_js_runpool_retain_ctx_nolock(kbdev, as_kctx)) { WARN(1, "Failed to retain active context\n"); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, - flags); + spin_unlock_irqrestore( + &js_devdata->runpool_irq.lock, + flags); mutex_unlock(&js_devdata->runpool_mutex); mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); @@ -298,7 +303,8 @@ int kbase_backend_find_free_address_space(struct kbase_device *kbdev, * context we're about to release without violating lock * ordering */ - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, + flags); mutex_unlock(&js_devdata->runpool_mutex); mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); @@ -309,7 +315,7 @@ int kbase_backend_find_free_address_space(struct kbase_device *kbdev, kbasep_js_runpool_release_ctx_nolock(kbdev, as_kctx); - if (!kbase_ctx_flag(as_kctx, KCTX_SCHEDULED)) { + if (!as_js_kctx_info->ctx.is_scheduled) { kbasep_js_runpool_requeue_or_kill_ctx(kbdev, as_kctx, true); @@ -330,11 +336,11 @@ int kbase_backend_find_free_address_space(struct kbase_device *kbdev, mutex_lock(&js_kctx_info->ctx.jsctx_mutex); mutex_lock(&js_devdata->runpool_mutex); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); } } - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); mutex_unlock(&js_devdata->runpool_mutex); mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); @@ -363,11 +369,12 @@ bool kbase_backend_use_ctx(struct kbase_device *kbdev, new_address_space = &kbdev->as[as_nr]; lockdep_assert_held(&js_devdata->runpool_mutex); - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&new_address_space->transaction_mutex); + lockdep_assert_held(&js_devdata->runpool_irq.lock); assign_and_activate_kctx_addr_space(kbdev, kctx, new_address_space); - if (kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) { + if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_PRIVILEGED) != 0) { /* We need to retain it to keep the corresponding address space */ kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx); diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h index 08a7400e66d5..83d477898c5e 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h @@ -73,7 +73,8 @@ struct slot_rb { * @reset_timer: Timeout for soft-stops before the reset * @timeouts_updated: Have timeout values just been updated? * - * The hwaccess_lock (a spinlock) must be held when accessing this structure + * The kbasep_js_device_data::runpool_irq::lock (a spinlock) must be held when + * accessing this structure */ struct kbase_backend_data { struct slot_rb slot_rb[BASE_JM_MAX_NR_SLOTS]; diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c index 668258b4f303..00900a99a898 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c @@ -221,40 +221,19 @@ static void kbasep_job_slot_update_head_start_timestamp( /** * kbasep_trace_tl_nret_atom_lpu - Call nret_atom_lpu timeline tracepoint * @kbdev: kbase device - * @js: job slot + * @i: job slot * * Get kbase atom by calling kbase_gpu_inspect for given job slot. * Then use obtained katom and name of slot associated with the given * job slot number in tracepoint call to the instrumentation module * informing that given atom is no longer executed on given lpu (job slot). */ -static void kbasep_trace_tl_nret_atom_lpu(struct kbase_device *kbdev, int js) +static void kbasep_trace_tl_nret_atom_lpu(struct kbase_device *kbdev, int i) { - int i; - for (i = 0; - i < kbase_backend_nr_atoms_submitted(kbdev, js); - i++) { - struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); - - kbase_tlstream_tl_nret_atom_lpu(katom, - &kbdev->gpu_props.props.raw_props.js_features[js]); - } -} + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, i, 0); -/** - * kbasep_trace_tl_event_lpu_softstop - Call event_lpu_softstop timeline - * tracepoint - * @kbdev: kbase device - * @js: job slot - * - * Make a tracepoint call to the instrumentation module informing that - * softstop happened on given lpu (job slot). - */ -static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev, - int js) -{ - kbase_tlstream_tl_event_lpu_softstop( - &kbdev->gpu_props.props.raw_props.js_features[js]); + kbase_tlstream_tl_nret_atom_lpu(katom, + &kbdev->gpu_props.props.raw_props.js_features[i]); } void kbase_job_done(struct kbase_device *kbdev, u32 done) @@ -283,7 +262,7 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) irq_throttle_cycles, NULL); } - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); while (done) { u32 failed = done >> 16; @@ -318,8 +297,7 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) NULL, 0); #endif - kbasep_trace_tl_event_lpu_softstop( - kbdev, i); + kbase_tlstream_aux_job_softstop(i); kbasep_trace_tl_nret_atom_lpu( kbdev, i); @@ -478,7 +456,7 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) end_timestamp); } - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); #if KBASE_GPU_RESET_EN if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == KBASE_RESET_GPU_COMMITTED) { @@ -561,7 +539,7 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED; /* Mark the point where we issue the soft-stop command */ - kbase_tlstream_tl_event_atom_softstop_issue(target_katom); + kbase_tlstream_aux_issue_job_softstop(target_katom); if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) { int i; @@ -747,7 +725,7 @@ void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx) /* Cancel any remaining running jobs for this kctx */ mutex_lock(&kctx->jctx.lock); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); /* Invalidate all jobs in context, to prevent re-submitting */ for (i = 0; i < BASE_JD_ATOM_COUNT; i++) { @@ -759,7 +737,7 @@ void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx) for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) kbase_job_slot_hardstop(kctx, i, NULL); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); mutex_unlock(&kctx->jctx.lock); } @@ -770,13 +748,12 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, int js = target_katom->slot_nr; int priority = target_katom->sched_priority; int i; - bool stop_sent = false; KBASE_DEBUG_ASSERT(kctx != NULL); kbdev = kctx->kbdev; KBASE_DEBUG_ASSERT(kbdev != NULL); - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, js); i++) { struct kbase_jd_atom *katom; @@ -788,14 +765,8 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, if (katom->kctx != kctx) continue; - if (katom->sched_priority > priority) { - if (!stop_sent) - kbase_tlstream_tl_attrib_atom_priority_change( - target_katom); - + if (katom->sched_priority > priority) kbase_job_slot_softstop(kbdev, js, katom); - stop_sent = true; - } } } @@ -864,7 +835,7 @@ void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx) * policy queue either */ wait_event(kctx->jctx.zero_jobs_wait, kctx->jctx.job_nr == 0); wait_event(kctx->jctx.sched_info.ctx.is_scheduled_wait, - !kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + kctx->jctx.sched_info.ctx.is_scheduled == false); spin_lock_irqsave(&reset_data.lock, flags); if (reset_data.stage == 1) { @@ -974,7 +945,7 @@ static bool kbasep_check_for_afbc_on_slot(struct kbase_device *kbdev, bool ret = false; int i; - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); /* When we have an atom the decision can be made straight away. */ if (target_katom) @@ -1063,7 +1034,7 @@ void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, #if KBASE_GPU_RESET_EN /* We make the check for AFBC before evicting/stopping atoms. Note * that no other thread can modify the slots whilst we have the - * hwaccess_lock. */ + * runpool_irq lock. */ int needs_workaround_for_afbc = kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3542) && kbasep_check_for_afbc_on_slot(kbdev, kctx, js, @@ -1159,8 +1130,6 @@ static void kbase_debug_dump_registers(struct kbase_device *kbdev) { int i; - kbase_io_history_dump(kbdev); - dev_err(kbdev->dev, "Register state:"); dev_err(kbdev->dev, " GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x", kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL), @@ -1193,14 +1162,13 @@ static void kbase_debug_dump_registers(struct kbase_device *kbdev) static void kbasep_reset_timeout_worker(struct work_struct *data) { - unsigned long flags; + unsigned long flags, mmu_flags; struct kbase_device *kbdev; int i; ktime_t end_timestamp = ktime_get(); struct kbasep_js_device_data *js_devdata; bool try_schedule = false; bool silent = false; - u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS; KBASE_DEBUG_ASSERT(data); @@ -1238,20 +1206,18 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) KBASE_DEBUG_ASSERT(kbdev->irq_reset_flush == false); - spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - spin_lock(&kbdev->hwaccess_lock); - spin_lock(&kbdev->mmu_mask_change); + spin_lock_irqsave(&kbdev->mmu_mask_change, mmu_flags); /* We're about to flush out the IRQs and their bottom half's */ kbdev->irq_reset_flush = true; /* Disable IRQ to avoid IRQ handlers to kick in after releasing the * spinlock; this also clears any outstanding interrupts */ - kbase_pm_disable_interrupts_nolock(kbdev); - - spin_unlock(&kbdev->mmu_mask_change); - spin_unlock(&kbdev->hwaccess_lock); + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + kbase_pm_disable_interrupts(kbdev); spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + spin_unlock_irqrestore(&kbdev->mmu_mask_change, mmu_flags); + /* Ensure that any IRQ handlers have finished * Must be done without any locks IRQ handlers will take */ kbase_synchronize_irqs(kbdev); @@ -1262,16 +1228,6 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) /* The flush has completed so reset the active indicator */ kbdev->irq_reset_flush = false; - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8463)) { - /* Ensure that L2 is not transitioning when we send the reset - * command */ - while (--max_loops && kbase_pm_get_trans_cores(kbdev, - KBASE_PM_CORE_L2)) - ; - - WARN(!max_loops, "L2 power transition timed out while trying to reset\n"); - } - mutex_lock(&kbdev->pm.lock); /* We hold the pm lock, so there ought to be a current policy */ KBASE_DEBUG_ASSERT(kbdev->pm.backend.pm_current_policy); @@ -1294,19 +1250,21 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) kbase_pm_init_hw(kbdev, 0); /* Complete any jobs that were still on the GPU */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); kbase_backend_reset(kbdev, &end_timestamp); kbase_pm_metrics_update(kbdev, NULL); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); mutex_unlock(&kbdev->pm.lock); mutex_lock(&js_devdata->runpool_mutex); - mutex_lock(&kbdev->mmu_hw_mutex); /* Reprogram the GPU's MMU */ for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + struct kbase_as *as = &kbdev->as[i]; + + mutex_lock(&as->transaction_mutex); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); if (js_devdata->runpool_irq.per_as_data[i].kctx) kbase_mmu_update( @@ -1314,9 +1272,9 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) else kbase_mmu_disable_as(kbdev, i); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + mutex_unlock(&as->transaction_mutex); } - mutex_unlock(&kbdev->mmu_hw_mutex); kbase_pm_enable_interrupts(kbdev); @@ -1424,9 +1382,9 @@ static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev) struct kbasep_js_device_data *js_devdata; js_devdata = &kbdev->js_data; - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); kbasep_try_reset_gpu_early_locked(kbdev); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); } /** @@ -1471,9 +1429,9 @@ bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev) struct kbasep_js_device_data *js_devdata; js_devdata = &kbdev->js_data; - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); ret = kbase_prepare_to_reset_gpu_locked(kbdev); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); return ret; } diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h index 89b1288a1f88..8f1e5615ea43 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h @@ -40,7 +40,7 @@ * calling this. * * The following locking conditions are made on the caller: - * - it must hold the hwaccess_lock + * - it must hold the kbasep_js_device_data::runpoool_irq::lock */ void kbase_job_submit_nolock(struct kbase_device *kbdev, struct kbase_jd_atom *katom, int js); @@ -74,7 +74,7 @@ static inline char *kbasep_make_job_slot_string(int js, char *js_string) * calling this. * * The following locking conditions are made on the caller: - * - it must hold the hwaccess_lock + * - it must hold the kbasep_js_device_data::runpoool_irq::lock */ void kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, @@ -91,7 +91,7 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, * @target_katom: Atom to stop * * The following locking conditions are made on the caller: - * - it must hold the hwaccess_lock + * - it must hold the kbasep_js_device_data::runpool_irq::lock */ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, int js, diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c index d7b4d3f10592..da7c4df7d277 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -57,7 +56,7 @@ static void kbase_gpu_enqueue_atom(struct kbase_device *kbdev, WARN_ON(SLOT_RB_ENTRIES(rb) >= SLOT_RB_SIZE); - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); rb->entries[rb->write_idx & SLOT_RB_MASK].katom = katom; rb->write_idx++; @@ -89,7 +88,7 @@ static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev, return NULL; } - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); katom = rb->entries[rb->read_idx & SLOT_RB_MASK].katom; @@ -109,7 +108,7 @@ struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js, { struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js]; - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); if ((SLOT_RB_ENTRIES(rb) - 1) < idx) return NULL; /* idx out of range */ @@ -147,7 +146,7 @@ static bool kbase_gpu_atoms_submitted(struct kbase_device *kbdev, int js) { int i; - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); for (i = 0; i < SLOT_RB_SIZE; i++) { struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); @@ -174,7 +173,7 @@ static bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev) int js; int i; - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { for (i = 0; i < SLOT_RB_SIZE; i++) { @@ -192,7 +191,7 @@ int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js) int nr = 0; int i; - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); for (i = 0; i < SLOT_RB_SIZE; i++) { struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); @@ -210,7 +209,7 @@ int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js) int nr = 0; int i; - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); for (i = 0; i < SLOT_RB_SIZE; i++) { if (kbase_gpu_inspect(kbdev, js, i)) @@ -226,7 +225,7 @@ static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, int js, int nr = 0; int i; - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); for (i = 0; i < SLOT_RB_SIZE; i++) { struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); @@ -238,56 +237,6 @@ static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, int js, return nr; } -/** - * check_secure_atom - Check if the given atom is in the given secure state and - * has a ringbuffer state of at least - * KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION - * @katom: Atom pointer - * @secure: Desired secure state - * - * Return: true if atom is in the given state, false otherwise - */ -static bool check_secure_atom(struct kbase_jd_atom *katom, bool secure) -{ - if (katom->gpu_rb_state >= - KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION && - ((kbase_jd_katom_is_protected(katom) && secure) || - (!kbase_jd_katom_is_protected(katom) && !secure))) - return true; - - return false; -} - -/** - * kbase_gpu_check_secure_atoms - Check if there are any atoms in the given - * secure state in the ringbuffers of at least - * state - * KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE - * @kbdev: Device pointer - * @secure: Desired secure state - * - * Return: true if any atoms are in the given state, false otherwise - */ -static bool kbase_gpu_check_secure_atoms(struct kbase_device *kbdev, - bool secure) -{ - int js, i; - - for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { - for (i = 0; i < SLOT_RB_SIZE; i++) { - struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, - js, i); - - if (katom) { - if (check_secure_atom(katom, secure)) - return true; - } - } - } - - return false; -} - int kbase_backend_slot_free(struct kbase_device *kbdev, int js) { if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) != @@ -490,7 +439,7 @@ static bool kbasep_js_job_check_ref_cores(struct kbase_device *kbdev, recently_chosen_affinity); /* Note: this is where the caller must've taken the - * hwaccess_lock */ + * runpool_irq.lock */ /* Check for affinity violations - if there are any, * then we just ask the caller to requeue and try again @@ -637,12 +586,15 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev, kbase_pm_metrics_update(kbdev, end_timestamp); if (katom->core_req & BASE_JD_REQ_PERMON) - kbase_pm_release_gpu_cycle_counter_nolock(kbdev); + kbase_pm_release_gpu_cycle_counter(kbdev); /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ case KBASE_ATOM_GPU_RB_READY: /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ + case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_ENTRY: + /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ + case KBASE_ATOM_GPU_RB_WAITING_AFFINITY: kbase_js_affinity_release_slot_cores(kbdev, katom->slot_nr, katom->affinity); @@ -651,21 +603,7 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev, case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: break; - case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION: - if (katom->protected_state.enter != - KBASE_ATOM_ENTER_PROTECTED_CHECK || - katom->protected_state.exit != - KBASE_ATOM_EXIT_PROTECTED_CHECK) - kbdev->protected_mode_transition = false; - - if (kbase_jd_katom_is_protected(katom) && - (katom->protected_state.enter == - KBASE_ATOM_ENTER_PROTECTED_IDLE_L2)) - kbase_vinstr_resume(kbdev->vinstr_ctx); - - /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ - - case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV: + case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_EXIT: /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ case KBASE_ATOM_GPU_RB_WAITING_BLOCKED: @@ -728,19 +666,11 @@ static int kbase_gpu_protected_mode_enter(struct kbase_device *kbdev) { int err = -EINVAL; - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); WARN_ONCE(!kbdev->protected_ops, "Cannot enter protected mode: protected callbacks not specified.\n"); - /* - * When entering into protected mode, we must ensure that the - * GPU is not operating in coherent mode as well. This is to - * ensure that no protected memory can be leaked. - */ - if (kbdev->system_coherency == COHERENCY_ACE) - kbase_cache_set_coherency_mode(kbdev, COHERENCY_ACE_LITE); - if (kbdev->protected_ops) { /* Switch GPU to protected mode */ err = kbdev->protected_ops->protected_mode_enter(kbdev); @@ -757,7 +687,7 @@ static int kbase_gpu_protected_mode_enter(struct kbase_device *kbdev) static int kbase_gpu_protected_mode_reset(struct kbase_device *kbdev) { - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); WARN_ONCE(!kbdev->protected_ops, "Cannot exit protected mode: protected callbacks not specified.\n"); @@ -765,146 +695,53 @@ static int kbase_gpu_protected_mode_reset(struct kbase_device *kbdev) if (!kbdev->protected_ops) return -EINVAL; + kbdev->protected_mode_transition = true; kbase_reset_gpu_silent(kbdev); return 0; } -static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, +static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, struct kbase_jd_atom **katom, int idx, int js) { int err = 0; - switch (katom[idx]->protected_state.enter) { - case KBASE_ATOM_ENTER_PROTECTED_CHECK: - /* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV - * should ensure that we are not already transitiong, and that - * there are no atoms currently on the GPU. */ - WARN_ON(kbdev->protected_mode_transition); - WARN_ON(kbase_gpu_atoms_submitted_any(kbdev)); - - kbdev->protected_mode_transition = true; - katom[idx]->protected_state.enter = - KBASE_ATOM_ENTER_PROTECTED_VINSTR; - - /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ - - case KBASE_ATOM_ENTER_PROTECTED_VINSTR: - if (kbase_vinstr_try_suspend(kbdev->vinstr_ctx) < 0) { - /* - * We can't switch now because - * the vinstr core state switch - * is not done yet. - */ - return -EAGAIN; - } - - /* Once reaching this point GPU must be - * switched to protected mode or vinstr - * re-enabled. */ - + switch (katom[idx]->exit_protected_state) { + case KBASE_ATOM_EXIT_PROTECTED_CHECK: /* - * Not in correct mode, begin protected mode switch. - * Entering protected mode requires us to power down the L2, - * and drop out of fully coherent mode. + * If the atom ahead of this one hasn't got to being + * submitted yet then bail. */ - katom[idx]->protected_state.enter = - KBASE_ATOM_ENTER_PROTECTED_IDLE_L2; - - kbase_pm_update_cores_state_nolock(kbdev); - - /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ - - case KBASE_ATOM_ENTER_PROTECTED_IDLE_L2: - /* Avoid unnecessary waiting on non-ACE platforms. */ - if (kbdev->current_gpu_coherency_mode == COHERENCY_ACE) { - if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2) || - kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) { - /* - * The L2 is still powered, wait for all the users to - * finish with it before doing the actual reset. - */ - return -EAGAIN; - } - } - - katom[idx]->protected_state.enter = - KBASE_ATOM_ENTER_PROTECTED_FINISHED; - - /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ - - case KBASE_ATOM_ENTER_PROTECTED_FINISHED: + if (idx == 1 && + (katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED && + katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB)) + return -EAGAIN; - /* No jobs running, so we can switch GPU mode right now. */ - err = kbase_gpu_protected_mode_enter(kbdev); + /* If we're not exiting protected mode then we're done here. */ + if (!(kbase_gpu_in_protected_mode(kbdev) && + !kbase_jd_katom_is_protected(katom[idx]))) + return 0; /* - * Regardless of result, we are no longer transitioning - * the GPU. + * If there is a transition in progress, or work still + * on the GPU try again later. */ - kbdev->protected_mode_transition = false; - - if (err) { - /* - * Failed to switch into protected mode, resume - * vinstr core and fail atom. - */ - kbase_vinstr_resume(kbdev->vinstr_ctx); - katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID; - kbase_gpu_mark_atom_for_return(kbdev, katom[idx]); - /* Only return if head atom or previous atom - * already removed - as atoms must be returned - * in order. */ - if (idx == 0 || katom[0]->gpu_rb_state == - KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { - kbase_gpu_dequeue_atom(kbdev, js, NULL); - kbase_jm_return_atom_to_js(kbdev, katom[idx]); - } - return -EINVAL; - } - - /* Protected mode sanity checks. */ - KBASE_DEBUG_ASSERT_MSG( - kbase_jd_katom_is_protected(katom[idx]) == - kbase_gpu_in_protected_mode(kbdev), - "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", - kbase_jd_katom_is_protected(katom[idx]), - kbase_gpu_in_protected_mode(kbdev)); - katom[idx]->gpu_rb_state = - KBASE_ATOM_GPU_RB_READY; - } - - return 0; -} - -static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, - struct kbase_jd_atom **katom, int idx, int js) -{ - int err = 0; - - - switch (katom[idx]->protected_state.exit) { - case KBASE_ATOM_EXIT_PROTECTED_CHECK: - /* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV - * should ensure that we are not already transitiong, and that - * there are no atoms currently on the GPU. */ - WARN_ON(kbdev->protected_mode_transition); - WARN_ON(kbase_gpu_atoms_submitted_any(kbdev)); + if (kbdev->protected_mode_transition || + kbase_gpu_atoms_submitted_any(kbdev)) + return -EAGAIN; /* * Exiting protected mode requires a reset, but first the L2 * needs to be powered down to ensure it's not active when the * reset is issued. */ - katom[idx]->protected_state.exit = + katom[idx]->exit_protected_state = KBASE_ATOM_EXIT_PROTECTED_IDLE_L2; - kbdev->protected_mode_transition = true; - kbase_pm_update_cores_state_nolock(kbdev); - /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + case KBASE_ATOM_EXIT_PROTECTED_IDLE_L2: - if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2) || + if (kbase_pm_get_active_cores(kbdev, KBASE_PM_CORE_L2) || kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) { /* * The L2 is still powered, wait for all the users to @@ -912,7 +749,7 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, */ return -EAGAIN; } - katom[idx]->protected_state.exit = + katom[idx]->exit_protected_state = KBASE_ATOM_EXIT_PROTECTED_RESET; /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ @@ -920,10 +757,7 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, case KBASE_ATOM_EXIT_PROTECTED_RESET: /* Issue the reset to the GPU */ err = kbase_gpu_protected_mode_reset(kbdev); - if (err) { - kbdev->protected_mode_transition = false; - /* Failed to exit protected mode, fail atom */ katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID; kbase_gpu_mark_atom_for_return(kbdev, katom[idx]); @@ -941,7 +775,7 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, return -EINVAL; } - katom[idx]->protected_state.exit = + katom[idx]->exit_protected_state = KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT; /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ @@ -950,9 +784,6 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, if (kbase_reset_gpu_active(kbdev)) return -EAGAIN; - kbdev->protected_mode_transition = false; - kbdev->protected_mode = false; - /* protected mode sanity checks */ KBASE_DEBUG_ASSERT_MSG( kbase_jd_katom_is_protected(katom[idx]) == kbase_gpu_in_protected_mode(kbdev), @@ -967,11 +798,11 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, return 0; } -void kbase_backend_slot_update(struct kbase_device *kbdev) +void kbase_gpu_slot_update(struct kbase_device *kbdev) { int js; - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { struct kbase_jd_atom *katom[2]; @@ -1000,26 +831,11 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) break; katom[idx]->gpu_rb_state = - KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV; + KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_EXIT; /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ - case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV: - if (kbase_gpu_check_secure_atoms(kbdev, - !kbase_jd_katom_is_protected( - katom[idx]))) - break; - - if (kbdev->protected_mode_transition) - break; - - katom[idx]->gpu_rb_state = - KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION; - - /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ - - case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION: - + case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_EXIT: /* * Exiting protected mode must be done before * the references on the cores are taken as @@ -1027,26 +843,10 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) * can't happen after the references for this * atom are taken. */ - - if (!kbase_gpu_in_protected_mode(kbdev) && - kbase_jd_katom_is_protected(katom[idx])) { - /* Atom needs to transition into protected mode. */ - ret = kbase_jm_enter_protected_mode(kbdev, - katom, idx, js); - if (ret) - break; - } else if (kbase_gpu_in_protected_mode(kbdev) && - !kbase_jd_katom_is_protected(katom[idx])) { - /* Atom needs to transition out of protected mode. */ - ret = kbase_jm_exit_protected_mode(kbdev, - katom, idx, js); - if (ret) - break; - } - katom[idx]->protected_state.exit = - KBASE_ATOM_EXIT_PROTECTED_CHECK; - - /* Atom needs no protected mode transition. */ + ret = kbase_jm_exit_protected_mode(kbdev, + katom, idx, js); + if (ret) + break; katom[idx]->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE; @@ -1072,6 +872,7 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) break; } + cores_ready = kbasep_js_job_check_ref_cores(kbdev, js, katom[idx]); @@ -1097,6 +898,81 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) if (!kbase_gpu_rmu_workaround(kbdev, js)) break; + katom[idx]->gpu_rb_state = + KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_ENTRY; + + /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + + case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_ENTRY: + + /* Only submit if head atom or previous atom + * already submitted */ + if (idx == 1 && + (katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED && + katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB)) + break; + + /* + * If the GPU is transitioning protected mode + * then bail now and we'll be called when the + * new state has settled. + */ + if (kbdev->protected_mode_transition) + break; + + if (!kbase_gpu_in_protected_mode(kbdev) && kbase_jd_katom_is_protected(katom[idx])) { + int err = 0; + + /* Not in correct mode, take action */ + if (kbase_gpu_atoms_submitted_any(kbdev)) { + /* + * We are not in the correct + * GPU mode for this job, and + * we can't switch now because + * there are jobs already + * running. + */ + break; + } + if (kbase_vinstr_try_suspend(kbdev->vinstr_ctx) < 0) { + /* + * We can't switch now because + * the vinstr core state switch + * is not done yet. + */ + break; + } + /* Once reaching this point GPU must be + * switched to protected mode or vinstr + * re-enabled. */ + + /* No jobs running, so we can switch GPU mode right now */ + err = kbase_gpu_protected_mode_enter(kbdev); + if (err) { + /* + * Failed to switch into protected mode, resume + * vinstr core and fail atom. + */ + kbase_vinstr_resume(kbdev->vinstr_ctx); + katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID; + kbase_gpu_mark_atom_for_return(kbdev, katom[idx]); + /* Only return if head atom or previous atom + * already removed - as atoms must be returned + * in order */ + if (idx == 0 || katom[0]->gpu_rb_state == + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { + kbase_gpu_dequeue_atom(kbdev, js, NULL); + kbase_jm_return_atom_to_js(kbdev, katom[idx]); + } + break; + } + } + + /* Protected mode sanity checks */ + KBASE_DEBUG_ASSERT_MSG( + kbase_jd_katom_is_protected(katom[idx]) == kbase_gpu_in_protected_mode(kbdev), + "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", + kbase_jd_katom_is_protected(katom[idx]), kbase_gpu_in_protected_mode(kbdev)); katom[idx]->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; @@ -1161,9 +1037,10 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) void kbase_backend_run_atom(struct kbase_device *kbdev, struct kbase_jd_atom *katom) { - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + kbase_gpu_enqueue_atom(kbdev, katom); - kbase_backend_slot_update(kbdev); + kbase_gpu_slot_update(kbdev); } bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js) @@ -1171,7 +1048,7 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js) struct kbase_jd_atom *katom; struct kbase_jd_atom *next_katom; - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); katom = kbase_gpu_inspect(kbdev, js, 0); next_katom = kbase_gpu_inspect(kbdev, js, 1); @@ -1199,7 +1076,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0); struct kbase_context *kctx = katom->kctx; - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6787) && completion_code != BASE_JD_EVENT_DONE && @@ -1364,42 +1241,31 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, #endif if (completion_code == BASE_JD_EVENT_STOPPED) - katom = kbase_jm_return_atom_to_js(kbdev, katom); + kbase_jm_return_atom_to_js(kbdev, katom); else - katom = kbase_jm_complete(kbdev, katom, end_timestamp); - - if (katom) { - /* Cross-slot dependency has now become runnable. Try to submit - * it. */ - - /* Check if there are lower priority jobs to soft stop */ - kbase_job_slot_ctx_priority_check_locked(kctx, katom); - - kbase_jm_try_kick(kbdev, 1 << katom->slot_nr); - } + kbase_jm_complete(kbdev, katom, end_timestamp); /* Job completion may have unblocked other atoms. Try to update all job * slots */ - kbase_backend_slot_update(kbdev); + kbase_gpu_slot_update(kbdev); } void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) { int js; - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { - int atom_idx = 0; int idx; - for (idx = 0; idx < SLOT_RB_SIZE; idx++) { + for (idx = 0; idx < 2; idx++) { struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, - js, atom_idx); + js, 0); bool keep_in_jm_rb = false; if (!katom) - break; + continue; if (katom->gpu_rb_state < KBASE_ATOM_GPU_RB_SUBMITTED) keep_in_jm_rb = true; @@ -1413,12 +1279,7 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) */ if (keep_in_jm_rb) { katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED; - katom->affinity = 0; - katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK; - /* As the atom was not removed, increment the - * index so that we read the correct atom in the - * next iteration. */ - atom_idx++; + katom->exit_protected_state = KBASE_ATOM_EXIT_PROTECTED_CHECK; continue; } @@ -1431,9 +1292,6 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) kbase_jm_complete(kbdev, katom, end_timestamp); } } - - kbdev->protected_mode_transition = false; - kbdev->protected_mode = false; } static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev, @@ -1503,7 +1361,7 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, int stop_x_dep_idx0 = -1, stop_x_dep_idx1 = -1; - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); katom_idx0 = kbase_gpu_inspect(kbdev, js, 0); katom_idx1 = kbase_gpu_inspect(kbdev, js, 1); @@ -1690,7 +1548,6 @@ void kbase_gpu_cacheclean(struct kbase_device *kbdev, /* Limit the number of loops to avoid a hang if the interrupt is missed */ u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS; - unsigned long flags; mutex_lock(&kbdev->cacheclean_lock); @@ -1717,10 +1574,8 @@ void kbase_gpu_cacheclean(struct kbase_device *kbdev, mutex_unlock(&kbdev->cacheclean_lock); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbase_pm_unrequest_cores(kbdev, false, katom->need_cache_flush_cores_retained); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } void kbase_backend_complete_wq(struct kbase_device *kbdev, @@ -1765,12 +1620,8 @@ void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev, base_jd_core_req core_req, u64 affinity, enum kbase_atom_coreref_state coreref_state) { - unsigned long flags; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbasep_js_job_check_deref_cores_nokatom(kbdev, core_req, affinity, coreref_state); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); if (!kbdev->pm.active_count) { mutex_lock(&kbdev->js_data.runpool_mutex); @@ -1789,7 +1640,7 @@ void kbase_gpu_dump_slots(struct kbase_device *kbdev) js_devdata = &kbdev->js_data; - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); dev_info(kbdev->dev, "kbase_gpu_dump_slots:\n"); @@ -1811,7 +1662,7 @@ void kbase_gpu_dump_slots(struct kbase_device *kbdev) } } - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); } diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h index 1e0e05ad3ea4..102d94be93d1 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -66,6 +66,17 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js, int idx); +/** + * kbase_gpu_slot_update - Update state based on slot ringbuffers + * + * @kbdev: Device pointer + * + * Inspect the jobs in the slot ringbuffers and update state. + * + * This will cause jobs to be submitted to hardware if they are unblocked + */ +void kbase_gpu_slot_update(struct kbase_device *kbdev); + /** * kbase_gpu_dump_slots - Print the contents of the slot ringbuffers * diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c index 54d8ddd80097..d665420ab380 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c @@ -94,8 +94,9 @@ bool kbase_js_choose_affinity(u64 * const affinity, base_jd_core_req core_req = katom->core_req; unsigned int num_core_groups = kbdev->gpu_props.num_core_groups; u64 core_availability_mask; + unsigned long flags; - lockdep_assert_held(&kbdev->hwaccess_lock); + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); core_availability_mask = kbase_pm_ca_get_core_mask(kbdev); @@ -104,6 +105,7 @@ bool kbase_js_choose_affinity(u64 * const affinity, * transitioning) then fail. */ if (0 == core_availability_mask) { + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); *affinity = 0; return false; } @@ -112,6 +114,7 @@ bool kbase_js_choose_affinity(u64 * const affinity, if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) == BASE_JD_REQ_T) { + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); /* If the hardware supports XAFFINITY then we'll only enable * the tiler (which is the default so this is a no-op), * otherwise enable shader core 0. */ @@ -166,6 +169,8 @@ bool kbase_js_choose_affinity(u64 * const affinity, } } + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + /* * If no cores are currently available in the desired core group(s) * (core availability policy is transitioning) then fail. diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h index 35d9781ae092..fbffa3b40962 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h @@ -38,9 +38,10 @@ * violated. * * The following locking conditions are made on the caller - * - it must hold hwaccess_lock + * - it must hold kbasep_js_device_data.runpool_irq.lock */ -bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev, int js); +bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev, + int js); /** * kbase_js_choose_affinity - Compute affinity for a given job. @@ -70,7 +71,7 @@ bool kbase_js_choose_affinity(u64 * const affinity, * @affinity: The affinity mask to test * * The following locks must be held by the caller - * - hwaccess_lock + * - kbasep_js_device_data.runpool_irq.lock * * Return: true if the affinity would violate the restrictions */ @@ -86,7 +87,7 @@ bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js, * @affinity: The cores to retain * * The following locks must be held by the caller - * - hwaccess_lock + * - kbasep_js_device_data.runpool_irq.lock */ void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js, u64 affinity); @@ -105,7 +106,7 @@ void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js, * %BASE_JM_SUBMIT_SLOTS. * * The following locks must be held by the caller - * - hwaccess_lock + * - kbasep_js_device_data.runpool_irq.lock */ void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js, u64 affinity); diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c index b09d491c0ec5..a23deb4ca20c 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c @@ -104,7 +104,7 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) js_devdata = &kbdev->js_data; /* Loop through the slots */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); for (s = 0; s < kbdev->gpu_props.num_job_slots; s++) { struct kbase_jd_atom *atom = NULL; @@ -168,8 +168,8 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) * However, if it's about to be * increased then the new context can't * run any jobs until they take the - * hwaccess_lock, so it's OK to observe - * the older value. + * runpool_irq lock, so it's OK to + * observe the older value. * * Similarly, if it's about to be * decreased, the last job from another @@ -270,7 +270,7 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) backend->timeouts_updated = false; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); return HRTIMER_NORESTART; } @@ -285,9 +285,9 @@ void kbase_backend_ctx_count_changed(struct kbase_device *kbdev) if (!timer_callback_should_run(kbdev)) { /* Take spinlock to force synchronisation with timer */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); backend->timer_running = false; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); /* From now on, return value of timer_callback_should_run() will * also cause the timer to not requeue itself. Its return value * cannot change, because it depends on variables updated with @@ -298,9 +298,9 @@ void kbase_backend_ctx_count_changed(struct kbase_device *kbdev) if (timer_callback_should_run(kbdev) && !backend->timer_running) { /* Take spinlock to force synchronisation with timer */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); backend->timer_running = true; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); hrtimer_start(&backend->scheduling_timer, HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns), HRTIMER_MODE_REL); diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c index 08eea1c104e0..4a3572d971a6 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c @@ -97,30 +97,6 @@ static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd, return status; } -static void validate_protected_page_fault(struct kbase_device *kbdev, - struct kbase_context *kctx) -{ - /* GPUs which support (native) protected mode shall not report page - * fault addresses unless it has protected debug mode and protected - * debug mode is turned on */ - u32 protected_debug_mode = 0; - - if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) - return; - - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) { - protected_debug_mode = kbase_reg_read(kbdev, - GPU_CONTROL_REG(GPU_STATUS), - kctx) & GPU_DBGEN; - } - - if (!protected_debug_mode) { - /* fault_addr should never be reported in protected mode. - * However, we just continue by printing an error message */ - dev_err(kbdev->dev, "Fault address reported in protected mode\n"); - } -} - void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) { const int num_as = 16; @@ -165,7 +141,6 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) */ kctx = kbasep_js_runpool_lookup_ctx(kbdev, as_no); - /* find faulting address */ as->fault_addr = kbase_reg_read(kbdev, MMU_AS_REG(as_no, @@ -177,15 +152,6 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) AS_FAULTADDRESS_LO), kctx); - /* Mark the fault protected or not */ - as->protected_mode = kbdev->protected_mode; - - if (kbdev->protected_mode && as->fault_addr) - { - /* check if address reporting is allowed */ - validate_protected_page_fault(kbdev, kctx); - } - /* report the fault to debugfs */ kbase_as_fault_debugfs_new(kbdev, as_no); @@ -229,9 +195,10 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) } /* Process the interrupt for this address space */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags); kbase_mmu_interrupt_process(kbdev, kctx, as); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, + flags); } /* reenable interrupts */ @@ -301,8 +268,6 @@ int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as, { int ret; - lockdep_assert_held(&kbdev->mmu_hw_mutex); - if (op == AS_COMMAND_UNLOCK) { /* Unlock doesn't require a lock first */ ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx); diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c index f36fdeafdb1e..0919969da523 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c @@ -31,7 +31,7 @@ #endif /* CONFIG_MALI_PLATFORM_DEVICETREE */ #include -#include +#include #include #include @@ -65,8 +65,6 @@ static int rk_restore_clk_gpu(struct kbase_device *kbdev) return ret; } -static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data); - void kbase_pm_register_access_enable(struct kbase_device *kbdev) { struct kbase_pm_callback_conf *callbacks; @@ -100,14 +98,6 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev) mutex_init(&kbdev->pm.lock); - kbdev->pm.backend.gpu_poweroff_wait_wq = alloc_workqueue("kbase_pm_poweroff_wait", - WQ_HIGHPRI | WQ_UNBOUND, 1); - if (!kbdev->pm.backend.gpu_poweroff_wait_wq) - return -ENOMEM; - - INIT_WORK(&kbdev->pm.backend.gpu_poweroff_wait_work, - kbase_pm_gpu_poweroff_wait_wq); - kbdev->pm.backend.gpu_powered = false; kbdev->pm.suspending = false; #ifdef CONFIG_MALI_DEBUG @@ -162,11 +152,10 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev) init_waitqueue_head(&kbdev->pm.zero_active_count_wait); kbdev->pm.active_count = 0; + spin_lock_init(&kbdev->pm.power_change_lock); spin_lock_init(&kbdev->pm.backend.gpu_cycle_counter_requests_lock); spin_lock_init(&kbdev->pm.backend.gpu_powered_lock); - init_waitqueue_head(&kbdev->pm.backend.poweroff_wait); - if (kbase_pm_ca_init(kbdev) != 0) goto workq_fail; @@ -204,126 +193,50 @@ void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume) rk_restore_clk_gpu(kbdev); } -static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data) +bool kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend) { - struct kbase_device *kbdev = container_of(data, struct kbase_device, - pm.backend.gpu_poweroff_wait_work); - struct kbase_pm_device_data *pm = &kbdev->pm; - struct kbase_pm_backend_data *backend = &pm->backend; - struct kbasep_js_device_data *js_devdata = &kbdev->js_data; unsigned long flags; + bool cores_are_available; lockdep_assert_held(&kbdev->pm.lock); D("to slowdown clk_gpu before poweroff pm_cores."); rk_slowdown_clk_gpu_before_poweroff_cores(kbdev); - /* Wait for power transitions to complete. We do this with no locks held - * so that we don't deadlock with any pending workqueues */ + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + + /* Force all cores off */ + kbdev->pm.backend.desired_shader_state = 0; + kbdev->pm.backend.desired_tiler_state = 0; + + /* Force all cores to be unavailable, in the situation where + * transitions are in progress for some cores but not others, + * and kbase_pm_check_transitions_nolock can not immediately + * power off the cores */ + kbdev->shader_available_bitmap = 0; + kbdev->tiler_available_bitmap = 0; + kbdev->l2_available_bitmap = 0; + KBASE_TIMELINE_PM_CHECKTRANS(kbdev, SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START); - kbase_pm_check_transitions_sync(kbdev); + cores_are_available = kbase_pm_check_transitions_nolock(kbdev); KBASE_TIMELINE_PM_CHECKTRANS(kbdev, SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END); + /* Don't need 'cores_are_available', because we don't return anything */ + CSTD_UNUSED(cores_are_available); - mutex_lock(&js_devdata->runpool_mutex); - mutex_lock(&kbdev->pm.lock); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); - if (!backend->poweron_required) { - WARN_ON(kbdev->l2_available_bitmap || - kbdev->shader_available_bitmap || - kbdev->tiler_available_bitmap); - - /* Consume any change-state events */ - kbase_timeline_pm_check_handle_event(kbdev, - KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); - - /* Disable interrupts and turn the clock off */ - if (!kbase_pm_clock_off(kbdev, backend->poweroff_is_suspend)) { - /* - * Page/bus faults are pending, must drop locks to - * process. Interrupts are disabled so no more faults - * should be generated at this point. - */ - mutex_unlock(&kbdev->pm.lock); - mutex_unlock(&js_devdata->runpool_mutex); - kbase_flush_mmu_wqs(kbdev); - mutex_lock(&js_devdata->runpool_mutex); - mutex_lock(&kbdev->pm.lock); - - /* Turn off clock now that fault have been handled. We - * dropped locks so poweron_required may have changed - - * power back on if this is the case.*/ - if (backend->poweron_required) - kbase_pm_clock_on(kbdev, false); - else - WARN_ON(!kbase_pm_clock_off(kbdev, - backend->poweroff_is_suspend)); - } - } - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - backend->poweroff_wait_in_progress = false; - if (backend->poweron_required) { - backend->poweron_required = false; - kbase_pm_update_cores_state_nolock(kbdev); - } - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - mutex_unlock(&kbdev->pm.lock); - mutex_unlock(&js_devdata->runpool_mutex); - - wake_up(&kbdev->pm.backend.poweroff_wait); -} - -void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend) -{ - unsigned long flags; + /* NOTE: We won't wait to reach the core's desired state, even if we're + * powering off the GPU itself too. It's safe to cut the power whilst + * they're transitioning to off, because the cores should be idle and + * all cache flushes should already have occurred */ - lockdep_assert_held(&kbdev->pm.lock); - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - if (!kbdev->pm.backend.poweroff_wait_in_progress) { - /* Force all cores off */ - kbdev->pm.backend.desired_shader_state = 0; - kbdev->pm.backend.desired_tiler_state = 0; - - /* Force all cores to be unavailable, in the situation where - * transitions are in progress for some cores but not others, - * and kbase_pm_check_transitions_nolock can not immediately - * power off the cores */ - kbdev->shader_available_bitmap = 0; - kbdev->tiler_available_bitmap = 0; - kbdev->l2_available_bitmap = 0; - - kbdev->pm.backend.poweroff_wait_in_progress = true; - kbdev->pm.backend.poweroff_is_suspend = is_suspend; - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - /*Kick off wq here. Callers will have to wait*/ - queue_work(kbdev->pm.backend.gpu_poweroff_wait_wq, - &kbdev->pm.backend.gpu_poweroff_wait_work); - } else { - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - } -} - -static bool is_poweroff_in_progress(struct kbase_device *kbdev) -{ - bool ret; - unsigned long flags; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - ret = (kbdev->pm.backend.poweroff_wait_in_progress == false); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - return ret; -} - -void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev) -{ - wait_event_killable(kbdev->pm.backend.poweroff_wait, - is_poweroff_in_progress(kbdev)); + /* Consume any change-state events */ + kbase_timeline_pm_check_handle_event(kbdev, + KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); + /* Disable interrupts and turn the clock off */ + return kbase_pm_clock_off(kbdev, is_suspend); } int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev, @@ -395,7 +308,15 @@ void kbase_hwaccess_pm_halt(struct kbase_device *kbdev) mutex_lock(&kbdev->pm.lock); kbase_pm_cancel_deferred_poweroff(kbdev); - kbase_pm_do_poweroff(kbdev, false); + if (!kbase_pm_do_poweroff(kbdev, false)) { + /* Page/bus faults are pending, must drop pm.lock to process. + * Interrupts are disabled so no more faults should be + * generated at this point */ + mutex_unlock(&kbdev->pm.lock); + kbase_flush_mmu_wqs(kbdev); + mutex_lock(&kbdev->pm.lock); + WARN_ON(!kbase_pm_do_poweroff(kbdev, false)); + } mutex_unlock(&kbdev->pm.lock); } @@ -413,8 +334,6 @@ void kbase_hwaccess_pm_term(struct kbase_device *kbdev) /* Shut down the metrics subsystem */ kbasep_pm_metrics_term(kbdev); - - destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wait_wq); } void kbase_pm_power_changed(struct kbase_device *kbdev) @@ -424,8 +343,9 @@ void kbase_pm_power_changed(struct kbase_device *kbdev) KBASE_TIMELINE_PM_CHECKTRANS(kbdev, SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); cores_are_available = kbase_pm_check_transitions_nolock(kbdev); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); KBASE_TIMELINE_PM_CHECKTRANS(kbdev, SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END); @@ -435,9 +355,10 @@ void kbase_pm_power_changed(struct kbase_device *kbdev) kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); - kbase_backend_slot_update(kbdev); + spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags); + kbase_gpu_slot_update(kbdev); + spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags); } - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, @@ -472,16 +393,21 @@ void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev) * off prematurely) */ mutex_lock(&js_devdata->runpool_mutex); mutex_lock(&kbdev->pm.lock); - kbase_pm_cancel_deferred_poweroff(kbdev); - kbase_pm_do_poweroff(kbdev, true); + if (!kbase_pm_do_poweroff(kbdev, true)) { + /* Page/bus faults are pending, must drop pm.lock to process. + * Interrupts are disabled so no more faults should be + * generated at this point */ + mutex_unlock(&kbdev->pm.lock); + kbase_flush_mmu_wqs(kbdev); + mutex_lock(&kbdev->pm.lock); + WARN_ON(!kbase_pm_do_poweroff(kbdev, false)); + } kbase_backend_timer_suspend(kbdev); mutex_unlock(&kbdev->pm.lock); mutex_unlock(&js_devdata->runpool_mutex); - - kbase_pm_wait_for_poweroff_complete(kbdev); } void kbase_hwaccess_pm_resume(struct kbase_device *kbdev) diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c index e8cd8cbd35af..4b903cca020b 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2013-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -95,10 +95,10 @@ void kbase_pm_ca_set_policy(struct kbase_device *kbdev, mutex_lock(&kbdev->pm.lock); /* Remove the policy to prevent IRQ handlers from working on it */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); old_policy = kbdev->pm.backend.ca_current_policy; kbdev->pm.backend.ca_current_policy = NULL; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); if (old_policy->term) old_policy->term(kbdev); @@ -106,7 +106,7 @@ void kbase_pm_ca_set_policy(struct kbase_device *kbdev, if (new_policy->init) new_policy->init(kbdev); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); kbdev->pm.backend.ca_current_policy = new_policy; /* If any core power state changes were previously attempted, but @@ -118,7 +118,7 @@ void kbase_pm_ca_set_policy(struct kbase_device *kbdev, kbdev->shader_ready_bitmap, kbdev->shader_transitioning_bitmap); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); mutex_unlock(&kbdev->pm.lock); @@ -131,7 +131,7 @@ KBASE_EXPORT_TEST_API(kbase_pm_ca_set_policy); u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev) { - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->pm.power_change_lock); /* All cores must be enabled when instrumentation is in use */ if (kbdev->pm.backend.instr_enabled) @@ -151,7 +151,7 @@ KBASE_EXPORT_TEST_API(kbase_pm_ca_get_core_mask); void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready, u64 cores_transitioning) { - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->pm.power_change_lock); if (kbdev->pm.backend.ca_current_policy != NULL) kbdev->pm.backend.ca_current_policy->update_core_status(kbdev, @@ -163,17 +163,20 @@ void kbase_pm_ca_instr_enable(struct kbase_device *kbdev) { unsigned long flags; - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); kbdev->pm.backend.instr_enabled = true; kbase_pm_update_cores_state_nolock(kbdev); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); } void kbase_pm_ca_instr_disable(struct kbase_device *kbdev) { - lockdep_assert_held(&kbdev->hwaccess_lock); + unsigned long flags; + + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); kbdev->pm.backend.instr_enabled = false; kbase_pm_update_cores_state_nolock(kbdev); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); } diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h index 99fb62d006bc..e8f96fe6c514 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h @@ -207,17 +207,6 @@ union kbase_pm_ca_policy_data { * power_change_lock should be held when accessing, * unless there is no way the timer can be running (eg * hrtimer_cancel() was called immediately before) - * @poweroff_wait_in_progress: true if a wait for GPU power off is in progress. - * hwaccess_lock must be held when accessing - * @poweron_required: true if a GPU power on is required. Should only be set - * when poweroff_wait_in_progress is true, and therefore the - * GPU can not immediately be powered on. pm.lock must be - * held when accessing - * @poweroff_is_suspend: true if the GPU is being powered off due to a suspend - * request. pm.lock must be held when accessing - * @gpu_poweroff_wait_wq: workqueue for waiting for GPU to power off - * @gpu_poweroff_wait_work: work item for use with @gpu_poweroff_wait_wq - * @poweroff_wait: waitqueue for waiting for @gpu_poweroff_wait_work to complete * @callback_power_on: Callback when the GPU needs to be turned on. See * &struct kbase_pm_callback_conf * @callback_power_off: Callback when the GPU may be turned off. See @@ -292,15 +281,6 @@ struct kbase_pm_backend_data { bool poweroff_timer_needed; bool poweroff_timer_running; - bool poweroff_wait_in_progress; - bool poweron_required; - bool poweroff_is_suspend; - - struct workqueue_struct *gpu_poweroff_wait_wq; - struct work_struct gpu_poweroff_wait_work; - - wait_queue_head_t poweroff_wait; - int (*callback_power_on)(struct kbase_device *kbdev); void (*callback_power_off)(struct kbase_device *kbdev); void (*callback_power_suspend)(struct kbase_device *kbdev); diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c index a162ff8bc0e4..046ebcb7b508 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c @@ -151,7 +151,7 @@ static void kbase_pm_invoke(struct kbase_device *kbdev, u32 lo = cores & 0xFFFFFFFF; u32 hi = (cores >> 32) & 0xFFFFFFFF; - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->pm.power_change_lock); reg = core_type_to_reg(core_type, action); @@ -407,7 +407,7 @@ static bool kbase_pm_transition_core_type(struct kbase_device *kbdev, u64 powering_on_trans; u64 desired_state_in_use; - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->pm.power_change_lock); /* Get current state */ present = kbase_pm_get_present_cores(kbdev, type); @@ -451,7 +451,7 @@ static bool kbase_pm_transition_core_type(struct kbase_device *kbdev, kbdev->pm.backend.l2_powered = 0; } - if (desired_state == ready && (trans == 0)) + if (desired_state_in_use == ready && (trans == 0)) return true; /* Restrict the cores to those that are actually present */ @@ -562,7 +562,7 @@ MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev) u64 prev_l2_available_bitmap; KBASE_DEBUG_ASSERT(NULL != kbdev); - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->pm.power_change_lock); spin_lock(&kbdev->pm.backend.gpu_powered_lock); if (kbdev->pm.backend.gpu_powered == false) { @@ -734,7 +734,6 @@ MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev) KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); /* Wake slow-path waiters. Job scheduler does not use this. */ KBASE_TRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, NULL, 0u, 0); - wake_up(&kbdev->pm.backend.gpu_in_desired_state_wait); } @@ -791,13 +790,11 @@ void kbase_pm_check_transitions_sync(struct kbase_device *kbdev) /* Force the transition to be checked and reported - the cores may be * 'available' (for job submission) but not fully powered up. */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); cores_are_available = kbase_pm_check_transitions_nolock(kbdev); - /* Don't need 'cores_are_available', because we don't return anything */ CSTD_UNUSED(cores_are_available); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); timeout = jiffies + PM_TIMEOUT; @@ -868,12 +865,12 @@ void kbase_pm_enable_interrupts(struct kbase_device *kbdev) * Clear all interrupts, * and unmask them all. */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL, NULL); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), GPU_IRQ_REG_ALL, NULL); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF, NULL); @@ -885,18 +882,21 @@ void kbase_pm_enable_interrupts(struct kbase_device *kbdev) KBASE_EXPORT_TEST_API(kbase_pm_enable_interrupts); -void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev) +void kbase_pm_disable_interrupts(struct kbase_device *kbdev) { + unsigned long flags; + KBASE_DEBUG_ASSERT(NULL != kbdev); /* * Mask all interrupts, * and clear them all. */ - lockdep_assert_held(&kbdev->hwaccess_lock); - + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0, NULL); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL, NULL); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0, NULL); kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF, NULL); @@ -905,18 +905,8 @@ void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev) kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL); } -void kbase_pm_disable_interrupts(struct kbase_device *kbdev) -{ - unsigned long flags; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_pm_disable_interrupts_nolock(kbdev); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -} - KBASE_EXPORT_TEST_API(kbase_pm_disable_interrupts); - /* * pmu layout: * 0x0000: PMU TAG (RO) (0xCAFECAFE) @@ -969,10 +959,12 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) kbase_pm_init_hw(kbdev, PM_ENABLE_IRQS); } - mutex_lock(&kbdev->mmu_hw_mutex); /* Reprogram the GPU's MMU */ for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + struct kbase_as *as = &kbdev->as[i]; + + mutex_lock(&as->transaction_mutex); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); if (js_devdata->runpool_irq.per_as_data[i].kctx) kbase_mmu_update( @@ -980,9 +972,9 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) else kbase_mmu_disable_as(kbdev, i); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + mutex_unlock(&as->transaction_mutex); } - mutex_unlock(&kbdev->mmu_hw_mutex); /* Lastly, enable the interrupts */ kbase_pm_enable_interrupts(kbdev); @@ -1219,7 +1211,7 @@ static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev) void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev) { - if ((kbdev->current_gpu_coherency_mode == COHERENCY_ACE) && + if ((kbdev->system_coherency == COHERENCY_ACE) && !kbdev->cci_snoop_enabled) { #ifdef CONFIG_ARM64 if (kbdev->snoop_enable_smc != 0) @@ -1232,7 +1224,8 @@ void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev) void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev) { - if (kbdev->cci_snoop_enabled) { + if ((kbdev->system_coherency == COHERENCY_ACE) && + kbdev->cci_snoop_enabled) { #ifdef CONFIG_ARM64 if (kbdev->snoop_disable_smc != 0) { mali_cci_flush_l2(kbdev); @@ -1362,7 +1355,7 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) kbdev->pm.backend.reset_done = false; /* The cores should be made unavailable due to the reset */ - spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); + spin_lock_irqsave(&kbdev->pm.power_change_lock, irq_flags); if (kbdev->shader_available_bitmap != 0u) KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, NULL, 0u, (u32)0u); @@ -1372,7 +1365,7 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) kbdev->shader_available_bitmap = 0u; kbdev->tiler_available_bitmap = 0u; kbdev->l2_available_bitmap = 0u; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, irq_flags); /* Soft reset the GPU */ if (kbdev->protected_mode_support && @@ -1381,11 +1374,12 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) else err = kbase_pm_reset_do_normal(kbdev); - spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); + spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, irq_flags); if (kbdev->protected_mode) resume_vinstr = true; + kbdev->protected_mode_transition = false; kbdev->protected_mode = false; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); + spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, irq_flags); if (err) goto exit; @@ -1394,6 +1388,7 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) kbase_pm_hw_issues_detect(kbdev); kbase_pm_hw_issues_apply(kbdev); + kbase_cache_set_coherency_mode(kbdev, kbdev->system_coherency); /* Sanity check protected mode was left after reset */ @@ -1423,10 +1418,7 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) &kbdev->pm.backend.gpu_cycle_counter_requests_lock, irq_flags); - spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); kbase_pm_release_l2_caches(kbdev); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); - kbase_pm_disable_interrupts(kbdev); } @@ -1508,14 +1500,12 @@ void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev) KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter_l2_is_on); -void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev) +void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev) { unsigned long flags; KBASE_DEBUG_ASSERT(kbdev != NULL); - lockdep_assert_held(&kbdev->hwaccess_lock); - spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, flags); @@ -1534,15 +1524,4 @@ void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev) kbase_pm_release_l2_caches(kbdev); } -void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev) -{ - unsigned long flags; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - kbase_pm_release_gpu_cycle_counter_nolock(kbdev); - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -} - KBASE_EXPORT_TEST_API(kbase_pm_release_gpu_cycle_counter); diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h index ad2667ae7c21..aa51b8cdef8f 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h @@ -167,16 +167,6 @@ void kbase_pm_enable_interrupts(struct kbase_device *kbdev); */ void kbase_pm_disable_interrupts(struct kbase_device *kbdev); -/** - * kbase_pm_disable_interrupts_nolock - Version of kbase_pm_disable_interrupts() - * that does not take the hwaccess_lock - * - * Caller must hold the hwaccess_lock. - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - */ -void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev); - /** * kbase_pm_init_hw - Initialize the hardware. * @kbdev: The kbase device structure for the device (must be a valid pointer) @@ -383,35 +373,14 @@ void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev); * kbase_pm_release_gpu_cycle_counter - Mark that the GPU cycle counter is no * longer in use * - * If the caller is the last caller then the GPU cycle counters will be - * disabled. A request must have been made before a call to this. - * - * Caller must not hold the hwaccess_lock, as it will be taken in this function. - * If the caller is already holding this lock then - * kbase_pm_release_gpu_cycle_counter_nolock() must be used instead. + * If the caller is the + * last caller then the GPU cycle counters will be disabled. A request must have + * been made before a call to this. * * @kbdev: The kbase device structure for the device (must be a valid pointer) */ void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev); -/** - * kbase_pm_release_gpu_cycle_counter_nolock - Version of kbase_pm_release_gpu_cycle_counter() - * that does not take hwaccess_lock - * - * Caller must hold the hwaccess_lock. - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - */ -void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev); - -/** - * kbase_pm_wait_for_poweroff_complete - Wait for the poweroff workqueue to - * complete - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - */ -void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev); - /** * kbase_pm_register_access_enable - Enable access to GPU registers * @@ -485,8 +454,12 @@ void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume); * pointer) * @is_suspend: true if power off due to suspend, * false otherwise + * Return: + * true if power was turned off, else + * false if power can not be turned off due to pending page/bus + * fault workers. Caller must flush MMU workqueues and retry */ -void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend); +bool kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend); #ifdef CONFIG_PM_DEVFREQ void kbase_pm_get_dvfs_utilisation(struct kbase_device *kbdev, @@ -523,7 +496,7 @@ void kbase_pm_power_changed(struct kbase_device *kbdev); * @kbdev: The kbase device structure for the device (must be a valid pointer) * @now: Pointer to the timestamp of the change, or NULL to use current time * - * Caller must hold hwaccess_lock + * Caller must hold runpool_irq.lock */ void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *now); diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c index 7613e1d39fdf..ae632564b96a 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -362,15 +362,14 @@ static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev) int device_nr = (katom->core_req & BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) ? katom->device_nr : 0; - if (!WARN_ON(device_nr >= 2)) - kbdev->pm.backend.metrics. - active_cl_ctx[device_nr] = 1; + WARN_ON(device_nr >= 2); + kbdev->pm.backend.metrics.active_cl_ctx[ + device_nr] = 1; } else { /* Slot 2 should not be running non-compute * atoms */ - if (!WARN_ON(js >= 2)) - kbdev->pm.backend.metrics. - active_gl_ctx[js] = 1; + WARN_ON(js >= 2); + kbdev->pm.backend.metrics.active_gl_ctx[js] = 1; } kbdev->pm.backend.metrics.gpu_active = true; } @@ -383,7 +382,7 @@ void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *timestamp) unsigned long flags; ktime_t now; - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c index 92457e8c0054..4d006028089a 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c @@ -156,7 +156,7 @@ static void kbasep_pm_do_poweroff_cores(struct kbase_device *kbdev) u64 prev_shader_state = kbdev->pm.backend.desired_shader_state; u64 prev_tiler_state = kbdev->pm.backend.desired_tiler_state; - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->pm.power_change_lock); kbdev->pm.backend.desired_shader_state &= ~kbdev->pm.backend.shader_poweroff_pending; @@ -193,7 +193,7 @@ kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer) kbdev = container_of(timer, struct kbase_device, pm.backend.gpu_poweroff_timer); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); /* It is safe for this call to do nothing if the work item is already * queued. The worker function will read the must up-to-date state of @@ -220,7 +220,7 @@ kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer) } if (kbdev->pm.backend.poweroff_timer_needed) { - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); hrtimer_add_expires(timer, kbdev->pm.gpu_poweroff_time); @@ -228,7 +228,7 @@ kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer) } kbdev->pm.backend.poweroff_timer_running = false; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); return HRTIMER_NORESTART; } @@ -258,13 +258,13 @@ static void kbasep_pm_do_gpu_poweroff_wq(struct work_struct *data) KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_poweroff_pending == 0); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); /* Only power off the GPU if a request is still pending */ if (!kbdev->pm.backend.pm_current_policy->get_core_active(kbdev)) do_poweroff = true; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); if (do_poweroff) { kbdev->pm.backend.poweroff_timer_needed = false; @@ -272,7 +272,14 @@ static void kbasep_pm_do_gpu_poweroff_wq(struct work_struct *data) kbdev->pm.backend.poweroff_timer_running = false; /* Power off the GPU */ - kbase_pm_do_poweroff(kbdev, false); + if (!kbase_pm_do_poweroff(kbdev, false)) { + /* GPU can not be powered off at present */ + kbdev->pm.backend.poweroff_timer_needed = true; + kbdev->pm.backend.poweroff_timer_running = true; + hrtimer_start(&kbdev->pm.backend.gpu_poweroff_timer, + kbdev->pm.gpu_poweroff_time, + HRTIMER_MODE_REL); + } } mutex_unlock(&kbdev->pm.lock); @@ -318,7 +325,7 @@ void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev) kbdev->pm.backend.poweroff_timer_needed = false; hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); kbdev->pm.backend.poweroff_timer_running = false; /* If wq is already running but is held off by pm.lock, make sure it has @@ -329,7 +336,7 @@ void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev) kbdev->pm.backend.tiler_poweroff_pending = 0; kbdev->pm.backend.shader_poweroff_pending_time = 0; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); } void kbase_pm_update_active(struct kbase_device *kbdev) @@ -344,7 +351,7 @@ void kbase_pm_update_active(struct kbase_device *kbdev) /* pm_current_policy will never be NULL while pm.lock is held */ KBASE_DEBUG_ASSERT(backend->pm_current_policy); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&pm->power_change_lock, flags); active = backend->pm_current_policy->get_core_active(kbdev); @@ -356,7 +363,7 @@ void kbase_pm_update_active(struct kbase_device *kbdev) /* If a request was pending then the GPU was still * powered, so no need to continue */ if (!kbdev->poweroff_pending) { - spin_unlock_irqrestore(&kbdev->hwaccess_lock, + spin_unlock_irqrestore(&pm->power_change_lock, flags); return; } @@ -372,14 +379,10 @@ void kbase_pm_update_active(struct kbase_device *kbdev) HRTIMER_MODE_REL); } + spin_unlock_irqrestore(&pm->power_change_lock, flags); + /* Power on the GPU and any cores requested by the policy */ - if (pm->backend.poweroff_wait_in_progress) { - pm->backend.poweron_required = true; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - } else { - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - kbase_pm_do_poweron(kbdev, false); - } + kbase_pm_do_poweron(kbdev, false); } else { /* It is an error for the power policy to power off the GPU * when there are contexts active */ @@ -411,17 +414,35 @@ void kbase_pm_update_active(struct kbase_device *kbdev) pm->gpu_poweroff_time, HRTIMER_MODE_REL); } - spin_unlock_irqrestore(&kbdev->hwaccess_lock, + spin_unlock_irqrestore(&pm->power_change_lock, flags); } else { - spin_unlock_irqrestore(&kbdev->hwaccess_lock, + spin_unlock_irqrestore(&pm->power_change_lock, flags); /* Power off the GPU immediately */ - kbase_pm_do_poweroff(kbdev, false); + if (!kbase_pm_do_poweroff(kbdev, false)) { + /* GPU can not be powered off at present + */ + spin_lock_irqsave( + &pm->power_change_lock, + flags); + backend->poweroff_timer_needed = true; + if (!backend->poweroff_timer_running) { + backend->poweroff_timer_running + = true; + hrtimer_start( + &backend->gpu_poweroff_timer, + pm->gpu_poweroff_time, + HRTIMER_MODE_REL); + } + spin_unlock_irqrestore( + &pm->power_change_lock, + flags); + } } } else { - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&pm->power_change_lock, flags); } } } @@ -433,37 +454,25 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) bool cores_are_available; bool do_poweroff = false; - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->pm.power_change_lock); if (kbdev->pm.backend.pm_current_policy == NULL) return; - if (kbdev->pm.backend.poweroff_wait_in_progress) - return; - if (kbdev->protected_mode_transition && !kbdev->shader_needed_bitmap && - !kbdev->shader_inuse_bitmap && !kbdev->tiler_needed_cnt - && !kbdev->tiler_inuse_cnt) { - /* We are trying to change in/out of protected mode - force all - * cores off so that the L2 powers down */ - desired_bitmap = 0; - desired_tiler_bitmap = 0; - } else { - desired_bitmap = + desired_bitmap = kbdev->pm.backend.pm_current_policy->get_core_mask(kbdev); - desired_bitmap &= kbase_pm_ca_get_core_mask(kbdev); + desired_bitmap &= kbase_pm_ca_get_core_mask(kbdev); + + if (kbdev->tiler_needed_cnt > 0 || kbdev->tiler_inuse_cnt > 0) + desired_tiler_bitmap = 1; + else + desired_tiler_bitmap = 0; + if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) { + /* Unless XAFFINITY is supported, enable core 0 if tiler + * required, regardless of core availability */ if (kbdev->tiler_needed_cnt > 0 || kbdev->tiler_inuse_cnt > 0) - desired_tiler_bitmap = 1; - else - desired_tiler_bitmap = 0; - - if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) { - /* Unless XAFFINITY is supported, enable core 0 if tiler - * required, regardless of core availability */ - if (kbdev->tiler_needed_cnt > 0 || - kbdev->tiler_inuse_cnt > 0) - desired_bitmap |= 1; - } + desired_bitmap |= 1; } if (kbdev->pm.backend.desired_shader_state != desired_bitmap) @@ -486,8 +495,7 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) (kbdev->pm.backend.desired_tiler_state & ~desired_tiler_bitmap); - if (kbdev->pm.poweroff_shader_ticks && - !kbdev->protected_mode_transition) + if (kbdev->pm.poweroff_shader_ticks) kbdev->pm.backend.shader_poweroff_pending_time = kbdev->pm.poweroff_shader_ticks; else @@ -509,8 +517,7 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) kbdev->pm.backend.tiler_poweroff_pending |= (kbdev->pm.backend.desired_tiler_state & ~desired_tiler_bitmap); - if (kbdev->pm.poweroff_shader_ticks && - !kbdev->protected_mode_transition) + if (kbdev->pm.poweroff_shader_ticks) kbdev->pm.backend.shader_poweroff_pending_time = kbdev->pm.poweroff_shader_ticks; else @@ -556,11 +563,11 @@ void kbase_pm_update_cores_state(struct kbase_device *kbdev) { unsigned long flags; - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); kbase_pm_update_cores_state_nolock(kbdev); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); } int kbase_pm_list_policies(const struct kbase_pm_policy * const **list) @@ -605,10 +612,10 @@ void kbase_pm_set_policy(struct kbase_device *kbdev, mutex_lock(&kbdev->pm.lock); /* Remove the policy to prevent IRQ handlers from working on it */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); old_policy = kbdev->pm.backend.pm_current_policy; kbdev->pm.backend.pm_current_policy = NULL; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_TERM, NULL, NULL, 0u, old_policy->id); @@ -620,9 +627,9 @@ void kbase_pm_set_policy(struct kbase_device *kbdev, if (new_policy->init) new_policy->init(kbdev); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); kbdev->pm.backend.pm_current_policy = new_policy; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); /* If any core power state changes were previously attempted, but * couldn't be made because the policy was changing (current_policy was @@ -657,13 +664,14 @@ kbase_pm_trace_check_and_finish_state_change(struct kbase_device *kbdev) void kbase_pm_request_cores(struct kbase_device *kbdev, bool tiler_required, u64 shader_cores) { + unsigned long flags; u64 cores; kbase_pm_change_state change_gpu_state = 0u; KBASE_DEBUG_ASSERT(kbdev != NULL); - lockdep_assert_held(&kbdev->hwaccess_lock); + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); cores = shader_cores; while (cores) { @@ -706,6 +714,8 @@ void kbase_pm_request_cores(struct kbase_device *kbdev, KBASE_PM_FUNC_ID_REQUEST_CORES_END, change_gpu_state); } + + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); } KBASE_EXPORT_TEST_API(kbase_pm_request_cores); @@ -713,11 +723,13 @@ KBASE_EXPORT_TEST_API(kbase_pm_request_cores); void kbase_pm_unrequest_cores(struct kbase_device *kbdev, bool tiler_required, u64 shader_cores) { + unsigned long flags; + kbase_pm_change_state change_gpu_state = 0u; KBASE_DEBUG_ASSERT(kbdev != NULL); - lockdep_assert_held(&kbdev->hwaccess_lock); + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); while (shader_cores) { int bitnum = fls64(shader_cores) - 1; @@ -758,6 +770,8 @@ void kbase_pm_unrequest_cores(struct kbase_device *kbdev, * - no-one will wait on the state change */ kbase_pm_trace_check_and_finish_state_change(kbdev); } + + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); } KBASE_EXPORT_TEST_API(kbase_pm_unrequest_cores); @@ -766,10 +780,11 @@ enum kbase_pm_cores_ready kbase_pm_register_inuse_cores(struct kbase_device *kbdev, bool tiler_required, u64 shader_cores) { + unsigned long flags; u64 prev_shader_needed; /* Just for tracing */ u64 prev_shader_inuse; /* Just for tracing */ - lockdep_assert_held(&kbdev->hwaccess_lock); + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); prev_shader_needed = kbdev->shader_needed_bitmap; prev_shader_inuse = kbdev->shader_inuse_bitmap; @@ -780,15 +795,16 @@ kbase_pm_register_inuse_cores(struct kbase_device *kbdev, * be chosen */ if ((kbdev->pm.backend.desired_shader_state & shader_cores) != shader_cores) { - return (kbdev->pm.backend.poweroff_wait_in_progress || - kbdev->pm.backend.pm_current_policy == NULL) ? - KBASE_CORES_NOT_READY : KBASE_NEW_AFFINITY; + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + + return KBASE_NEW_AFFINITY; } if ((kbdev->shader_available_bitmap & shader_cores) != shader_cores || (tiler_required && !kbdev->tiler_available_bitmap)) { /* Trace ongoing core transition */ kbase_timeline_pm_l2_transition_start(kbdev); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); return KBASE_CORES_NOT_READY; } @@ -837,6 +853,8 @@ kbase_pm_register_inuse_cores(struct kbase_device *kbdev, KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_INUSE, NULL, NULL, 0u, (u32) kbdev->shader_inuse_bitmap); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + return KBASE_CORES_READY; } @@ -845,11 +863,12 @@ KBASE_EXPORT_TEST_API(kbase_pm_register_inuse_cores); void kbase_pm_release_cores(struct kbase_device *kbdev, bool tiler_required, u64 shader_cores) { + unsigned long flags; kbase_pm_change_state change_gpu_state = 0u; KBASE_DEBUG_ASSERT(kbdev != NULL); - lockdep_assert_held(&kbdev->hwaccess_lock); + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); while (shader_cores) { int bitnum = fls64(shader_cores) - 1; @@ -894,6 +913,8 @@ void kbase_pm_release_cores(struct kbase_device *kbdev, /* Trace that any state change completed immediately */ kbase_pm_trace_check_and_finish_state_change(kbdev); } + + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); } KBASE_EXPORT_TEST_API(kbase_pm_release_cores); @@ -902,13 +923,7 @@ void kbase_pm_request_cores_sync(struct kbase_device *kbdev, bool tiler_required, u64 shader_cores) { - unsigned long flags; - - kbase_pm_wait_for_poweroff_complete(kbdev); - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbase_pm_request_cores(kbdev, tiler_required, shader_cores); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); kbase_pm_check_transitions_sync(kbdev); } @@ -920,7 +935,7 @@ void kbase_pm_request_l2_caches(struct kbase_device *kbdev) unsigned long flags; u32 prior_l2_users_count; - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); prior_l2_users_count = kbdev->l2_users_count++; @@ -932,7 +947,7 @@ void kbase_pm_request_l2_caches(struct kbase_device *kbdev) if (!prior_l2_users_count || !kbdev->l2_available_bitmap) kbase_pm_check_transitions_nolock(kbdev); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); wait_event(kbdev->pm.backend.l2_powered_wait, kbdev->pm.backend.l2_powered == 1); @@ -944,16 +959,22 @@ KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches); void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev) { - lockdep_assert_held(&kbdev->hwaccess_lock); + unsigned long flags; + + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); kbdev->l2_users_count++; + + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); } KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches_l2_is_on); void kbase_pm_release_l2_caches(struct kbase_device *kbdev) { - lockdep_assert_held(&kbdev->hwaccess_lock); + unsigned long flags; + + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); KBASE_DEBUG_ASSERT(kbdev->l2_users_count > 0); @@ -964,6 +985,8 @@ void kbase_pm_release_l2_caches(struct kbase_device *kbdev) /* Trace that any state change completed immediately */ kbase_pm_trace_check_and_finish_state_change(kbdev); } + + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); } KBASE_EXPORT_TEST_API(kbase_pm_release_l2_caches); diff --git a/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h b/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h index 8b07cbc5eae8..f7c0ff674906 100644 --- a/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h +++ b/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h @@ -50,7 +50,6 @@ enum base_hw_feature { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_MODE, BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_END }; @@ -193,31 +192,6 @@ static const enum base_hw_feature base_hw_features_tMIx[] = { BASE_HW_FEATURE_END }; -static const enum base_hw_feature base_hw_features_tHEx[] = { - BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, - BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, - BASE_HW_FEATURE_XAFFINITY, - BASE_HW_FEATURE_WARPING, - BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, - BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, - BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, - BASE_HW_FEATURE_BRNDOUT_CC, - BASE_HW_FEATURE_BRNDOUT_KILL, - BASE_HW_FEATURE_LD_ST_LEA_TEX, - BASE_HW_FEATURE_LD_ST_TILEBUFFER, - BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, - BASE_HW_FEATURE_MRT, - BASE_HW_FEATURE_MSAA_16X, - BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, - BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, - BASE_HW_FEATURE_T7XX_PAIRING_RULES, - BASE_HW_FEATURE_TEST4_DATUM_MODE, - BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_MODE, - BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, - BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_END -}; #endif /* _BASE_HWCONFIG_FEATURES_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h b/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h index 4d95b4f9f649..149f44cb8674 100644 --- a/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h +++ b/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h @@ -113,8 +113,6 @@ enum base_hw_issue { BASE_HW_ISSUE_TMIX_8138, BASE_HW_ISSUE_TMIX_8206, BASE_HW_ISSUE_TMIX_8343, - BASE_HW_ISSUE_TMIX_8463, - BASE_HW_ISSUE_TMIX_8456, GPUCORE_1619, BASE_HW_ISSUE_END }; @@ -946,8 +944,6 @@ static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = { BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TMIX_8138, BASE_HW_ISSUE_TMIX_8343, - BASE_HW_ISSUE_TMIX_8463, - BASE_HW_ISSUE_TMIX_8456, BASE_HW_ISSUE_END }; @@ -965,8 +961,6 @@ static const enum base_hw_issue base_hw_issues_tMIx_r0p0[] = { BASE_HW_ISSUE_TMIX_8138, BASE_HW_ISSUE_TMIX_8206, BASE_HW_ISSUE_TMIX_8343, - BASE_HW_ISSUE_TMIX_8463, - BASE_HW_ISSUE_TMIX_8456, BASE_HW_ISSUE_END }; @@ -982,32 +976,11 @@ static const enum base_hw_issue base_hw_issues_model_tMIx[] = { BASE_HW_ISSUE_TMIX_8138, BASE_HW_ISSUE_TMIX_8206, BASE_HW_ISSUE_TMIX_8343, - BASE_HW_ISSUE_TMIX_8456, GPUCORE_1619, BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_tHEx_r0p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_10682, - BASE_HW_ISSUE_10821, - BASE_HW_ISSUE_T76X_3700, - BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_TMIX_8042, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_END -}; -static const enum base_hw_issue base_hw_issues_model_tHEx[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_T76X_3700, - BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_TMIX_8042, - BASE_HW_ISSUE_TMIX_8133, - GPUCORE_1619, - BASE_HW_ISSUE_END -}; diff --git a/drivers/gpu/arm/midgard/mali_base_kernel.h b/drivers/gpu/arm/midgard/mali_base_kernel.h index bcb05e40f7fa..749dd9a1cc9e 100644 --- a/drivers/gpu/arm/midgard/mali_base_kernel.h +++ b/drivers/gpu/arm/midgard/mali_base_kernel.h @@ -45,9 +45,6 @@ /* Support UK10_2 IOCTLS */ #define BASE_LEGACY_UK10_2_SUPPORT 1 -/* Support UK10_4 IOCTLS */ -#define BASE_LEGACY_UK10_4_SUPPORT 1 - typedef struct base_mem_handle { struct { u64 handle; @@ -1810,10 +1807,4 @@ typedef struct base_profiling_controls { u32 profiling_controls[FBDUMP_CONTROL_MAX]; } base_profiling_controls; -/* Enable additional tracepoints for latency measurements (TL_ATOM_READY, - * TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST) */ -#define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1 << 0) - -#define BASE_TLSTREAM_FLAGS_MASK (BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) - #endif /* _BASE_KERNEL_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase.h b/drivers/gpu/arm/midgard/mali_kbase.h index 443d4b13a4fb..b6d28fea9987 100644 --- a/drivers/gpu/arm/midgard/mali_kbase.h +++ b/drivers/gpu/arm/midgard/mali_kbase.h @@ -45,15 +45,10 @@ #include #include -/* - * Include mali_kbase_defs.h first as this provides types needed by other local - * header files. - */ -#include "mali_kbase_defs.h" - -#include "mali_kbase_context.h" #include "mali_kbase_strings.h" +#include "mali_kbase_pm.h" #include "mali_kbase_mem_lowlevel.h" +#include "mali_kbase_defs.h" #include "mali_kbase_trace_timeline.h" #include "mali_kbase_js.h" #include "mali_kbase_mem.h" @@ -110,6 +105,7 @@ u32 kbase_get_profiling_control(struct kbase_device *kbdev, u32 control); struct kbase_context * kbase_create_context(struct kbase_device *kbdev, bool is_compat); void kbase_destroy_context(struct kbase_context *kctx); +int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags); int kbase_jd_init(struct kbase_context *kctx); void kbase_jd_exit(struct kbase_context *kctx); @@ -167,7 +163,7 @@ void kbase_gpu_cacheclean(struct kbase_device *kbdev, * than @katom will be soft stopped and put back in the queue, so that atoms * with higher priority can run. * - * The hwaccess_lock must be held when calling this function. + * The js_data.runpool_irq.lock must be held when calling this function. */ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, struct kbase_jd_atom *katom); @@ -550,58 +546,4 @@ void kbasep_trace_dump(struct kbase_device *kbdev); void kbase_set_driver_inactive(struct kbase_device *kbdev, bool inactive); #endif /* CONFIG_MALI_DEBUG */ - -#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI) - -/* kbase_io_history_init - initialize data struct for register access history - * - * @kbdev The register history to initialize - * @n The number of register accesses that the buffer could hold - * - * @return 0 if successfully initialized, failure otherwise - */ -int kbase_io_history_init(struct kbase_io_history *h, u16 n); - -/* kbase_io_history_term - uninit all resources for the register access history - * - * @h The register history to terminate - */ -void kbase_io_history_term(struct kbase_io_history *h); - -/* kbase_io_history_dump - print the register history to the kernel ring buffer - * - * @kbdev Pointer to kbase_device containing the register history to dump - */ -void kbase_io_history_dump(struct kbase_device *kbdev); - -/** - * kbase_io_history_resize - resize the register access history buffer. - * - * @h: Pointer to a valid register history to resize - * @new_size: Number of accesses the buffer could hold - * - * A successful resize will clear all recent register accesses. - * If resizing fails for any reason (e.g., could not allocate memory, invalid - * buffer size) then the original buffer will be kept intact. - * - * @return 0 if the buffer was resized, failure otherwise - */ -int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size); - -#else /* CONFIG_DEBUG_FS */ - -#define kbase_io_history_init(...) ((int)0) - -#define kbase_io_history_term CSTD_NOP - -#define kbase_io_history_dump CSTD_NOP - -#define kbase_io_history_resize CSTD_NOP - -#endif /* CONFIG_DEBUG_FS */ - - #endif - - - diff --git a/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h b/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h index e674cc2ea183..9b00cce9b2b3 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h +++ b/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h @@ -201,13 +201,13 @@ enum { /* * Default minimum number of scheduling ticks before jobs are hard-stopped */ -#define DEFAULT_JS_HARD_STOP_TICKS_SS (100) /* 10s */ +#define DEFAULT_JS_HARD_STOP_TICKS_SS (50) /* 5s */ #define DEFAULT_JS_HARD_STOP_TICKS_SS_8408 (300) /* 30s */ /* * Default minimum number of scheduling ticks before CL jobs are hard-stopped. */ -#define DEFAULT_JS_HARD_STOP_TICKS_CL (100) /* 10s */ +#define DEFAULT_JS_HARD_STOP_TICKS_CL (50) /* 5s */ /* * Default minimum number of scheduling ticks before jobs are hard-stopped @@ -225,14 +225,14 @@ enum { * Default minimum number of scheduling ticks before the GPU is reset to clear a * "stuck" job */ -#define DEFAULT_JS_RESET_TICKS_SS (105) /* 10.5s */ +#define DEFAULT_JS_RESET_TICKS_SS (55) /* 5.5s */ #define DEFAULT_JS_RESET_TICKS_SS_8408 (450) /* 45s */ /* * Default minimum number of scheduling ticks before the GPU is reset to clear a * "stuck" CL job. */ -#define DEFAULT_JS_RESET_TICKS_CL (105) /* 10.5s */ +#define DEFAULT_JS_RESET_TICKS_CL (55) /* 5.5s */ /* * Default minimum number of scheduling ticks before the GPU is reset to clear a diff --git a/drivers/gpu/arm/midgard/mali_kbase_context.c b/drivers/gpu/arm/midgard/mali_kbase_context.c index 55c5ef613c37..344a1f16de8a 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_context.c +++ b/drivers/gpu/arm/midgard/mali_kbase_context.c @@ -53,13 +53,13 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat) kctx->kbdev = kbdev; kctx->as_nr = KBASEP_AS_NR_INVALID; - if (is_compat) - kbase_ctx_flag_set(kctx, KCTX_COMPAT); + kctx->is_compat = is_compat; #ifdef CONFIG_MALI_TRACE_TIMELINE kctx->timeline.owner_tgid = task_tgid_nr(current); #endif atomic_set(&kctx->setup_complete, 0); atomic_set(&kctx->setup_in_progress, 0); + kctx->infinite_cache_active = 0; spin_lock_init(&kctx->mm_update_lock); kctx->process_mm = NULL; atomic_set(&kctx->nonmapped_pages, 0); @@ -108,15 +108,11 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat) if (err) goto term_dma_fence; - do { - err = kbase_mem_pool_grow(&kctx->mem_pool, - MIDGARD_MMU_BOTTOMLEVEL); - if (err) - goto pgd_no_mem; - kctx->pgd = kbase_mmu_alloc_pgd(kctx); - } while (!kctx->pgd); + kctx->pgd = kbase_mmu_alloc_pgd(kctx); + if (!kctx->pgd) + goto free_mmu; - kctx->aliasing_sink_page = kbase_mem_alloc_page(kctx->kbdev); + kctx->aliasing_sink_page = kbase_mem_pool_alloc(&kctx->mem_pool); if (!kctx->aliasing_sink_page) goto no_sink_page; @@ -166,7 +162,7 @@ no_sink_page: kbase_gpu_vm_lock(kctx); kbase_mmu_free_pgd(kctx); kbase_gpu_vm_unlock(kctx); -pgd_no_mem: +free_mmu: kbase_mmu_term(kctx); term_dma_fence: kbase_dma_fence_term(kctx); @@ -304,16 +300,17 @@ int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags) } mutex_lock(&js_kctx_info->ctx.jsctx_mutex); - spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags); + spin_lock_irqsave(&kctx->kbdev->js_data.runpool_irq.lock, irq_flags); /* Translate the flags */ if ((flags & BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0) - kbase_ctx_flag_clear(kctx, KCTX_SUBMIT_DISABLED); + js_kctx_info->ctx.flags &= ~((u32) KBASE_CTX_FLAG_SUBMIT_DISABLED); /* Latch the initial attributes into the Job Scheduler */ kbasep_js_ctx_attr_set_initial_attrs(kctx->kbdev, kctx); - spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags); + spin_unlock_irqrestore(&kctx->kbdev->js_data.runpool_irq.lock, + irq_flags); mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); out: return err; diff --git a/drivers/gpu/arm/midgard/mali_kbase_context.h b/drivers/gpu/arm/midgard/mali_kbase_context.h deleted file mode 100644 index a3f5bb0ce0da..000000000000 --- a/drivers/gpu/arm/midgard/mali_kbase_context.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * - * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * A copy of the licence is included with the program, and can also be obtained - * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - */ - - - -#ifndef _KBASE_CONTEXT_H_ -#define _KBASE_CONTEXT_H_ - -#include - - -int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags); - -/** - * kbase_ctx_flag - Check if @flag is set on @kctx - * @kctx: Pointer to kbase context to check - * @flag: Flag to check - * - * Return: true if @flag is set on @kctx, false if not. - */ -static inline bool kbase_ctx_flag(struct kbase_context *kctx, - enum kbase_context_flags flag) -{ - return atomic_read(&kctx->flags) & flag; -} - -/** - * kbase_ctx_flag_clear - Clear @flag on @kctx - * @kctx: Pointer to kbase context - * @flag: Flag to clear - * - * Clear the @flag on @kctx. This is done atomically, so other flags being - * cleared or set at the same time will be safe. - * - * Some flags have locking requirements, check the documentation for the - * respective flags. - */ -static inline void kbase_ctx_flag_clear(struct kbase_context *kctx, - enum kbase_context_flags flag) -{ -#if KERNEL_VERSION(4, 3, 0) > LINUX_VERSION_CODE - /* - * Earlier kernel versions doesn't have atomic_andnot() or - * atomic_and(). atomic_clear_mask() was only available on some - * architectures and removed on arm in v3.13 on arm and arm64. - * - * Use a compare-exchange loop to clear the flag on pre 4.3 kernels, - * when atomic_andnot() becomes available. - */ - int old, new; - - do { - old = atomic_read(&kctx->flags); - new = old & ~flag; - - } while (atomic_cmpxchg(&kctx->flags, old, new) != old); -#else - atomic_andnot(flag, &kctx->flags); -#endif -} - -/** - * kbase_ctx_flag_set - Set @flag on @kctx - * @kctx: Pointer to kbase context - * @flag: Flag to clear - * - * Set the @flag on @kctx. This is done atomically, so other flags being - * cleared or set at the same time will be safe. - * - * Some flags have locking requirements, check the documentation for the - * respective flags. - */ -static inline void kbase_ctx_flag_set(struct kbase_context *kctx, - enum kbase_context_flags flag) -{ - atomic_or(flag, &kctx->flags); -} -#endif /* _KBASE_CONTEXT_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_core_linux.c b/drivers/gpu/arm/midgard/mali_kbase_core_linux.c index da6b8e9ef4b1..499ef46a0fe2 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_core_linux.c +++ b/drivers/gpu/arm/midgard/mali_kbase_core_linux.c @@ -35,7 +35,6 @@ #if !MALI_CUSTOMER_RELEASE #include "mali_kbase_regs_dump_debugfs.h" #endif /* !MALI_CUSTOMER_RELEASE */ -#include "mali_kbase_regs_history_debugfs.h" #include #include #include @@ -118,6 +117,39 @@ static inline void __compile_time_asserts(void) CSTD_COMPILE_TIME_ASSERT(sizeof(KERNEL_SIDE_DDK_VERSION_STRING) <= KBASE_GET_VERSION_BUFFER_SIZE); } +#ifdef CONFIG_KDS + +struct kbasep_kds_resource_set_file_data { + struct kds_resource_set *lock; +}; + +static int kds_resource_release(struct inode *inode, struct file *file); + +static const struct file_operations kds_resource_fops = { + .release = kds_resource_release +}; + +struct kbase_kds_resource_list_data { + struct kds_resource **kds_resources; + unsigned long *kds_access_bitmap; + int num_elems; +}; + +static int kds_resource_release(struct inode *inode, struct file *file) +{ + struct kbasep_kds_resource_set_file_data *data; + + data = (struct kbasep_kds_resource_set_file_data *)file->private_data; + if (NULL != data) { + if (NULL != data->lock) + kds_resource_set_release(&data->lock); + + kfree(data); + } + return 0; +} +#endif /* CONFIG_KDS */ + static void kbase_create_timeline_objects(struct kbase_context *kctx) { struct kbase_device *kbdev = kctx->kbdev; @@ -259,7 +291,6 @@ enum { inited_debugfs = (1u << 15), inited_gpu_device = (1u << 16), inited_registers_map = (1u << 17), - inited_io_history = (1u << 18), inited_power_control = (1u << 19), inited_buslogger = (1u << 20) }; @@ -371,7 +402,7 @@ static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 arg goto bad_size; #if defined(CONFIG_64BIT) - if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { + if (!kctx->is_compat) { /* force SAME_VA if a 64-bit client */ mem->flags |= BASE_MEM_SAME_VA; } @@ -392,7 +423,7 @@ static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 arg if (sizeof(*mem_import) != args_size) goto bad_size; #ifdef CONFIG_COMPAT - if (kbase_ctx_flag(kctx, KCTX_COMPAT)) + if (kctx->is_compat) phandle = compat_ptr(mem_import->phandle.compat_value); else #endif @@ -433,7 +464,7 @@ static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 arg } #ifdef CONFIG_COMPAT - if (kbase_ctx_flag(kctx, KCTX_COMPAT)) + if (kctx->is_compat) user_ai = compat_ptr(alias->ai.compat_value); else #endif @@ -871,14 +902,14 @@ copy_failed: } #ifdef CONFIG_COMPAT - if (kbase_ctx_flag(kctx, KCTX_COMPAT)) + if (kctx->is_compat) user_buf = compat_ptr(add_data->buf.compat_value); else #endif user_buf = add_data->buf.value; buf = kmalloc(add_data->len, GFP_KERNEL); - if (ZERO_OR_NULL_PTR(buf)) + if (!buf) goto out_bad; if (0 != copy_from_user(buf, user_buf, add_data->len)) { @@ -909,28 +940,7 @@ copy_failed: break; } #endif /* CONFIG_MALI_NO_MALI */ -#ifdef BASE_LEGACY_UK10_4_SUPPORT - case KBASE_FUNC_TLSTREAM_ACQUIRE_V10_4: - { - struct kbase_uk_tlstream_acquire_v10_4 *tlstream_acquire - = args; - if (sizeof(*tlstream_acquire) != args_size) - goto bad_size; - - if (0 != kbase_tlstream_acquire( - kctx, - &tlstream_acquire->fd, 0)) { - ukh->ret = MALI_ERROR_FUNCTION_FAILED; - } else if (0 <= tlstream_acquire->fd) { - /* Summary stream was cleared during acquire. - * Create static timeline objects that will be - * read by client. */ - kbase_create_timeline_objects(kctx); - } - break; - } -#endif /* BASE_LEGACY_UK10_4_SUPPORT */ case KBASE_FUNC_TLSTREAM_ACQUIRE: { struct kbase_uk_tlstream_acquire *tlstream_acquire = @@ -939,13 +949,9 @@ copy_failed: if (sizeof(*tlstream_acquire) != args_size) goto bad_size; - if (tlstream_acquire->flags & ~BASE_TLSTREAM_FLAGS_MASK) - goto out_bad; - if (0 != kbase_tlstream_acquire( kctx, - &tlstream_acquire->fd, - tlstream_acquire->flags)) { + &tlstream_acquire->fd)) { ukh->ret = MALI_ERROR_FUNCTION_FAILED; } else if (0 <= tlstream_acquire->fd) { /* Summary stream was cleared during acquire. @@ -1128,63 +1134,6 @@ void kbase_release_device(struct kbase_device *kbdev) } EXPORT_SYMBOL(kbase_release_device); -#if KERNEL_VERSION(4, 4, 0) > LINUX_VERSION_CODE -/* - * Older versions, before v4.6, of the kernel doesn't have - * kstrtobool_from_user(). - */ -static int kstrtobool_from_user(const char __user *s, size_t count, bool *res) -{ - char buf[32]; - - count = min(sizeof(buf), count); - - if (copy_from_user(buf, s, count)) - return -EFAULT; - buf[count] = '\0'; - - return strtobool(buf, res); -} -#endif - -static ssize_t write_ctx_infinite_cache(struct file *f, const char __user *ubuf, size_t size, loff_t *off) -{ - struct kbase_context *kctx = f->private_data; - int err; - bool value; - - err = kstrtobool_from_user(ubuf, size, &value); - if (err) - return err; - - if (value) - kbase_ctx_flag_set(kctx, KCTX_INFINITE_CACHE); - else - kbase_ctx_flag_clear(kctx, KCTX_INFINITE_CACHE); - - return size; -} - -static ssize_t read_ctx_infinite_cache(struct file *f, char __user *ubuf, size_t size, loff_t *off) -{ - struct kbase_context *kctx = f->private_data; - char buf[32]; - int count; - bool value; - - value = kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE); - - count = scnprintf(buf, sizeof(buf), "%s\n", value ? "Y" : "N"); - - return simple_read_from_buffer(ubuf, size, off, buf, count); -} - -static const struct file_operations kbase_infinite_cache_fops = { - .open = simple_open, - .write = write_ctx_infinite_cache, - .read = read_ctx_infinite_cache, -}; - static int kbase_open(struct inode *inode, struct file *filp) { struct kbase_device *kbdev = NULL; @@ -1209,8 +1158,7 @@ static int kbase_open(struct inode *inode, struct file *filp) filp->private_data = kctx; kctx->filp = filp; - if (kbdev->infinite_cache_active_default) - kbase_ctx_flag_set(kctx, KCTX_INFINITE_CACHE); + kctx->infinite_cache_active = kbdev->infinite_cache_active_default; #ifdef CONFIG_DEBUG_FS snprintf(kctx_name, 64, "%d_%d", kctx->tgid, kctx->id); @@ -1228,20 +1176,20 @@ static int kbase_open(struct inode *inode, struct file *filp) * infinite cache control support from debugfs. */ #else - debugfs_create_file("infinite_cache", 0644, kctx->kctx_dentry, - kctx, &kbase_infinite_cache_fops); + debugfs_create_bool("infinite_cache", 0644, kctx->kctx_dentry, + (bool*)&(kctx->infinite_cache_active)); #endif /* CONFIG_MALI_COH_USER */ mutex_init(&kctx->mem_profile_lock); - kbasep_jd_debugfs_ctx_init(kctx); + kbasep_jd_debugfs_ctx_add(kctx); kbase_debug_mem_view_init(filp); kbase_debug_job_fault_context_init(kctx); - kbase_mem_pool_debugfs_init(kctx->kctx_dentry, &kctx->mem_pool); + kbase_mem_pool_debugfs_add(kctx->kctx_dentry, &kctx->mem_pool); - kbase_jit_debugfs_init(kctx); + kbase_jit_debugfs_add(kctx); #endif /* CONFIG_DEBUG_FS */ dev_dbg(kbdev->dev, "created base context\n"); @@ -1538,7 +1486,7 @@ static unsigned long kbase_get_unmapped_area(struct file *filp, if (len > TASK_SIZE - SZ_2M) return -ENOMEM; - if (kbase_ctx_flag(kctx, KCTX_COMPAT)) + if (kctx->is_compat) return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags); @@ -1888,12 +1836,13 @@ static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, new_core_mask[2]) { unsigned long flags; - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); kbase_pm_set_debug_core_mask(kbdev, new_core_mask[0], new_core_mask[1], new_core_mask[2]); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, + flags); } return count; @@ -2043,7 +1992,7 @@ static ssize_t set_js_timeouts(struct device *dev, struct device_attribute *attr struct kbasep_js_device_data *js_data = &kbdev->js_data; unsigned long flags; - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags); #define UPDATE_TIMEOUT(ticks_name, ms_name, default) do {\ js_data->ticks_name = timeout_ms_to_ticks(kbdev, ms_name, \ @@ -2078,7 +2027,7 @@ static ssize_t set_js_timeouts(struct device *dev, struct device_attribute *attr kbase_js_set_timeouts(kbdev); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags); return count; } @@ -2227,7 +2176,7 @@ static ssize_t set_js_scheduling_period(struct device *dev, /* Update scheduling timeouts */ mutex_lock(&js_data->runpool_mutex); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&js_data->runpool_irq.lock, flags); /* If no contexts have been scheduled since js_timeouts was last written * to, the new timeouts might not have been latched yet. So check if an @@ -2257,7 +2206,7 @@ static ssize_t set_js_scheduling_period(struct device *dev, kbase_js_set_timeouts(kbdev); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&js_data->runpool_irq.lock, flags); mutex_unlock(&js_data->runpool_mutex); dev_dbg(kbdev->dev, "JS scheduling period: %dms\n", @@ -2591,8 +2540,6 @@ static ssize_t kbase_show_gpuinfo(struct device *dev, { .id = GPU_ID_PI_TFRX, .name = "Mali-T88x" }, { .id = GPU_ID2_PRODUCT_TMIX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, .name = "Mali-G71" }, - { .id = GPU_ID2_PRODUCT_THEX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, - .name = "Mali-THEx" }, }; const char *product_name = "(Unknown Mali GPU)"; struct kbase_device *kbdev; @@ -3085,8 +3032,7 @@ static int power_control_init(struct platform_device *pdev) #if defined(CONFIG_OF) && defined(CONFIG_PM_OPP) /* Register the OPPs if they are available in device tree */ -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) \ - || defined(LSK_OPPV2_BACKPORT) +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) err = dev_pm_opp_of_add_table(kbdev->dev); #elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)) err = of_init_opp_table(kbdev->dev); @@ -3178,48 +3124,6 @@ MAKE_QUIRK_ACCESSORS(mmu); #endif /* KBASE_GPU_RESET_EN */ -/** - * debugfs_protected_debug_mode_read - "protected_debug_mode" debugfs read - * @file: File object to read is for - * @buf: User buffer to populate with data - * @len: Length of user buffer - * @ppos: Offset within file object - * - * Retrieves the current status of protected debug mode - * (0 = disabled, 1 = enabled) - * - * Return: Number of bytes added to user buffer - */ -static ssize_t debugfs_protected_debug_mode_read(struct file *file, - char __user *buf, size_t len, loff_t *ppos) -{ - struct kbase_device *kbdev = (struct kbase_device *)file->private_data; - u32 gpu_status; - ssize_t ret_val; - - kbase_pm_context_active(kbdev); - gpu_status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS), NULL); - kbase_pm_context_idle(kbdev); - - if (gpu_status & GPU_DBGEN) - ret_val = simple_read_from_buffer(buf, len, ppos, "1\n", 2); - else - ret_val = simple_read_from_buffer(buf, len, ppos, "0\n", 2); - - return ret_val; -} - -/* - * struct fops_protected_debug_mode - "protected_debug_mode" debugfs fops - * - * Contains the file operations for the "protected_debug_mode" debugfs file - */ -static const struct file_operations fops_protected_debug_mode = { - .open = simple_open, - .read = debugfs_protected_debug_mode_read, - .llseek = default_llseek, -}; - static int kbase_device_debugfs_init(struct kbase_device *kbdev) { struct dentry *debugfs_ctx_defaults_directory; @@ -3250,9 +3154,8 @@ static int kbase_device_debugfs_init(struct kbase_device *kbdev) } #if !MALI_CUSTOMER_RELEASE - kbasep_regs_dump_debugfs_init(kbdev); + kbasep_regs_dump_debugfs_add(kbdev); #endif /* !MALI_CUSTOMER_RELEASE */ - kbasep_regs_history_debugfs_init(kbdev); kbase_debug_job_fault_debugfs_init(kbdev); kbasep_gpu_memory_debugfs_init(kbdev); @@ -3279,12 +3182,6 @@ static int kbase_device_debugfs_init(struct kbase_device *kbdev) debugfs_ctx_defaults_directory, &kbdev->mem_pool_max_size_default); - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) { - debugfs_create_file("protected_debug_mode", S_IRUGO, - kbdev->mali_debugfs_directory, kbdev, - &fops_protected_debug_mode); - } - #if KBASE_TRACE_ENABLE kbasep_trace_debugfs_init(kbdev); #endif /* KBASE_TRACE_ENABLE */ @@ -3507,11 +3404,6 @@ static int kbase_platform_device_remove(struct platform_device *pdev) kbdev->inited_subsys &= ~inited_backend_early; } - if (kbdev->inited_subsys & inited_io_history) { - kbase_io_history_term(&kbdev->io_history); - kbdev->inited_subsys &= ~inited_io_history; - } - if (kbdev->inited_subsys & inited_power_control) { power_control_term(kbdev); kbdev->inited_subsys &= ~inited_power_control; @@ -3545,10 +3437,6 @@ static void kbase_platform_device_shutdown(struct platform_device *pdev) kbase_platform_rk_shutdown(kbdev); } -/* Number of register accesses for the buffer that we allocate during - * initialization time. The buffer size can be changed later via debugfs. */ -#define KBASEP_DEFAULT_REGISTER_HISTORY_SIZE ((u16)512) - static int kbase_platform_device_probe(struct platform_device *pdev) { struct kbase_device *kbdev; @@ -3609,15 +3497,6 @@ static int kbase_platform_device_probe(struct platform_device *pdev) } kbdev->inited_subsys |= inited_power_control; - err = kbase_io_history_init(&kbdev->io_history, - KBASEP_DEFAULT_REGISTER_HISTORY_SIZE); - if (err) { - dev_err(&pdev->dev, "Register access history initialization failed\n"); - kbase_platform_device_remove(pdev); - return -ENOMEM; - } - kbdev->inited_subsys |= inited_io_history; - err = kbase_backend_early_init(kbdev); if (err) { dev_err(kbdev->dev, "Early backend initialization failed\n"); @@ -3797,9 +3676,6 @@ static int kbase_platform_device_probe(struct platform_device *pdev) return err; } -#undef KBASEP_DEFAULT_REGISTER_HISTORY_SIZE - - /** Suspend callback from the OS. * * This is called by Linux when the device should suspend. diff --git a/drivers/gpu/arm/midgard/mali_kbase_defs.h b/drivers/gpu/arm/midgard/mali_kbase_defs.h index 845b4713ea1a..f5775bcbb248 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_defs.h +++ b/drivers/gpu/arm/midgard/mali_kbase_defs.h @@ -34,7 +34,6 @@ #include #include #include -#include #include #include @@ -228,39 +227,6 @@ struct kbase_jd_atom_dependency { u8 dep_type; }; -/** - * struct kbase_io_access - holds information about 1 register access - * - * @addr: first bit indicates r/w (r=0, w=1) - * @value: value written or read - */ -struct kbase_io_access { - uintptr_t addr; - u32 value; -}; - -/** - * struct kbase_io_history - keeps track of all recent register accesses - * - * @enabled: true if register accesses are recorded, false otherwise - * @lock: spinlock protecting kbase_io_access array - * @count: number of registers read/written - * @size: number of elements in kbase_io_access array - * @buf: array of kbase_io_access - */ -struct kbase_io_history { -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) - bool enabled; -#else - u32 enabled; -#endif - - spinlock_t lock; - size_t count; - u16 size; - struct kbase_io_access *buf; -}; - /** * @brief The function retrieves a read-only reference to the atom field from * the kbase_jd_atom_dependency structure @@ -335,17 +301,15 @@ enum kbase_atom_gpu_rb_state { KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB, /* Atom is in slot ringbuffer but is blocked on a previous atom */ KBASE_ATOM_GPU_RB_WAITING_BLOCKED, - /* Atom is in slot ringbuffer but is waiting for a previous protected - * mode transition to complete */ - KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV, - /* Atom is in slot ringbuffer but is waiting for proected mode - * transition */ - KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION, + /* Atom is in slot ringbuffer but is waiting for proected mode exit */ + KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_EXIT, /* Atom is in slot ringbuffer but is waiting for cores to become * available */ KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE, /* Atom is in slot ringbuffer but is blocked on affinity */ KBASE_ATOM_GPU_RB_WAITING_AFFINITY, + /* Atom is in slot ringbuffer but is waiting for protected mode entry */ + KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_ENTRY, /* Atom is in slot ringbuffer and ready to run */ KBASE_ATOM_GPU_RB_READY, /* Atom is in slot ringbuffer and has been submitted to the GPU */ @@ -355,41 +319,20 @@ enum kbase_atom_gpu_rb_state { KBASE_ATOM_GPU_RB_RETURN_TO_JS }; -enum kbase_atom_enter_protected_state { - /* - * Starting state: - * Check if a transition into protected mode is required. - * - * NOTE: The integer value of this must - * match KBASE_ATOM_EXIT_PROTECTED_CHECK. - */ - KBASE_ATOM_ENTER_PROTECTED_CHECK = 0, - /* Wait for vinstr to suspend. */ - KBASE_ATOM_ENTER_PROTECTED_VINSTR, - /* Wait for the L2 to become idle in preparation for - * the coherency change. */ - KBASE_ATOM_ENTER_PROTECTED_IDLE_L2, - /* End state; - * Prepare coherency change. */ - KBASE_ATOM_ENTER_PROTECTED_FINISHED, -}; - enum kbase_atom_exit_protected_state { /* * Starting state: * Check if a transition out of protected mode is required. - * - * NOTE: The integer value of this must - * match KBASE_ATOM_ENTER_PROTECTED_CHECK. */ - KBASE_ATOM_EXIT_PROTECTED_CHECK = 0, - /* Wait for the L2 to become idle in preparation - * for the reset. */ + KBASE_ATOM_EXIT_PROTECTED_CHECK, + /* Wait for the L2 to become idle in preparation for the reset. */ KBASE_ATOM_EXIT_PROTECTED_IDLE_L2, /* Issue the protected reset. */ KBASE_ATOM_EXIT_PROTECTED_RESET, - /* End state; - * Wait for the reset to complete. */ + /* + * End state; + * Wait for the reset to complete. + */ KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT, }; @@ -554,13 +497,7 @@ struct kbase_jd_atom { * event_code when the atom is processed. */ enum base_jd_event_code will_fail_event_code; - /* Atoms will only ever be transitioning into, or out of - * protected mode so we do not need two separate fields. - */ - union { - enum kbase_atom_enter_protected_state enter; - enum kbase_atom_exit_protected_state exit; - } protected_state; + enum kbase_atom_exit_protected_state exit_protected_state; struct rb_node runnable_tree_node; @@ -663,19 +600,19 @@ struct kbase_as { struct work_struct work_pagefault; struct work_struct work_busfault; enum kbase_mmu_fault_type fault_type; - bool protected_mode; u32 fault_status; u64 fault_addr; u64 fault_extra_addr; + struct mutex transaction_mutex; struct kbase_mmu_setup current_setup; /* BASE_HW_ISSUE_8316 */ struct workqueue_struct *poke_wq; struct work_struct poke_work; - /** Protected by hwaccess_lock */ + /** Protected by kbasep_js_device_data::runpool_irq::lock */ int poke_refcount; - /** Protected by hwaccess_lock */ + /** Protected by kbasep_js_device_data::runpool_irq::lock */ kbase_as_poke_state poke_state; struct hrtimer poke_timer; }; @@ -796,7 +733,8 @@ struct kbase_trace_kbdev_timeline { * But it's kept as an example of how to add global timeline tracking * information * - * The caller must hold hwaccess_lock when accessing this */ + * The caller must hold kbasep_js_device_data::runpool_irq::lock when + * accessing this */ u8 slot_atoms_submitted[BASE_JM_MAX_NR_SLOTS]; /* Last UID for each PM event */ @@ -805,7 +743,7 @@ struct kbase_trace_kbdev_timeline { atomic_t pm_event_uid_counter; /* * L2 transition state - true indicates that the transition is ongoing - * Expected to be protected by hwaccess_lock */ + * Expected to be protected by pm.power_change_lock */ bool l2_transitioning; }; #endif /* CONFIG_MALI_TRACE_TIMELINE */ @@ -846,6 +784,19 @@ struct kbase_pm_device_data { u64 debug_core_mask[BASE_JM_MAX_NR_SLOTS]; u64 debug_core_mask_all; + /** + * Lock protecting the power state of the device. + * + * This lock must be held when accessing the shader_available_bitmap, + * tiler_available_bitmap, l2_available_bitmap, shader_inuse_bitmap and + * tiler_inuse_bitmap fields of kbase_device, and the ca_in_transition + * and shader_poweroff_pending fields of kbase_pm_device_data. It is + * also held when the hardware power registers are being written to, to + * ensure that two threads do not conflict over the power transitions + * that the hardware should make. + */ + spinlock_t power_change_lock; + /** * Callback for initializing the runtime power management. * @@ -1168,11 +1119,6 @@ struct kbase_device { /* Total number of created contexts */ atomic_t ctx_num; -#ifdef CONFIG_DEBUG_FS - /* Holds the most recent register accesses */ - struct kbase_io_history io_history; -#endif /* CONFIG_DEBUG_FS */ - struct kbase_hwaccess_data hwaccess; /* Count of page/bus faults waiting for workqueues to process */ @@ -1190,8 +1136,6 @@ struct kbase_device { #endif size_t mem_pool_max_size_default; - /* current gpu coherency mode */ - u32 current_gpu_coherency_mode; /* system coherency mode */ u32 system_coherency; /* Flag to track when cci snoops have been enabled on the interface */ @@ -1238,11 +1182,6 @@ struct kbase_device { /* list of inited sub systems. Used during terminate/error recovery */ u32 inited_subsys; - - spinlock_t hwaccess_lock; - - /* Protects access to MMU operations */ - struct mutex mmu_hw_mutex; }; /** @@ -1253,7 +1192,7 @@ struct kbase_device { * dependencies. Atoms on this list will be moved to the * runnable_tree when the blocking atom completes. * - * hwaccess_lock must be held when accessing this structure. + * runpool_irq.lock must be held when accessing this structure. */ struct jsctx_queue { struct rb_root runnable_tree; @@ -1265,52 +1204,6 @@ struct jsctx_queue { (((minor) & 0xFFF) << 8) | \ ((0 & 0xFF) << 0)) -/** - * enum kbase_context_flags - Flags for kbase contexts - * - * @KCTX_COMPAT: Set when the context process is a compat process, 32-bit - * process on a 64-bit kernel. - * - * @KCTX_RUNNABLE_REF: Set when context is counted in - * kbdev->js_data.nr_contexts_runnable. Must hold queue_mutex when accessing. - * - * @KCTX_ACTIVE: Set when the context is active. - * - * @KCTX_PULLED: Set when last kick() caused atoms to be pulled from this - * context. - * - * @KCTX_MEM_PROFILE_INITIALIZED: Set when the context's memory profile has been - * initialized. - * - * @KCTX_INFINITE_CACHE: Set when infinite cache is to be enabled for new - * allocations. Existing allocations will not change. - * - * @KCTX_SUBMIT_DISABLED: Set to prevent context from submitting any jobs. - * - * @KCTX_PRIVILEGED:Set if the context uses an address space and should be kept - * scheduled in. - * - * @KCTX_SCHEDULED: Set when the context is scheduled on the Run Pool. - * This is only ever updated whilst the jsctx_mutex is held. - * - * @KCTX_DYING: Set when the context process is in the process of being evicted. - * - * All members need to be separate bits. This enum is intended for use in a - * bitmask where multiple values get OR-ed together. - */ -enum kbase_context_flags { - KCTX_COMPAT = 1U << 0, - KCTX_RUNNABLE_REF = 1U << 1, - KCTX_ACTIVE = 1U << 2, - KCTX_PULLED = 1U << 3, - KCTX_MEM_PROFILE_INITIALIZED = 1U << 4, - KCTX_INFINITE_CACHE = 1U << 5, - KCTX_SUBMIT_DISABLED = 1U << 6, - KCTX_PRIVILEGED = 1U << 7, - KCTX_SCHEDULED = 1U << 8, - KCTX_DYING = 1U << 9, -}; - struct kbase_context { struct file *filp; struct kbase_device *kbdev; @@ -1325,7 +1218,7 @@ struct kbase_context { atomic_t event_count; int event_coalesce_count; - atomic_t flags; + bool is_compat; atomic_t setup_complete; atomic_t setup_in_progress; @@ -1369,11 +1262,12 @@ struct kbase_context { /** This is effectively part of the Run Pool, because it only has a valid * setting (!=KBASEP_AS_NR_INVALID) whilst the context is scheduled in * - * The hwaccess_lock must be held whilst accessing this. + * The kbasep_js_device_data::runpool_irq::lock must be held whilst accessing + * this. * * If the context relating to this as_nr is required, you must use * kbasep_js_runpool_retain_ctx() to ensure that the context doesn't disappear - * whilst you're using it. Alternatively, just hold the hwaccess_lock + * whilst you're using it. Alternatively, just hold the kbasep_js_device_data::runpool_irq::lock * to ensure the context doesn't disappear (but this has restrictions on what other locks * you can take whilst doing this) */ int as_nr; @@ -1399,7 +1293,8 @@ struct kbase_context { size_t mem_profile_size; /* Mutex guarding memory profile state */ struct mutex mem_profile_lock; - /* Memory profile directory under debugfs */ + /* Memory profile file created */ + bool mem_profile_initialized; struct dentry *kctx_dentry; /* for job fault debug */ @@ -1419,6 +1314,15 @@ struct kbase_context { atomic_t atoms_pulled; /* Number of atoms currently pulled from this context, per slot */ atomic_t atoms_pulled_slot[BASE_JM_MAX_NR_SLOTS]; + /* true if last kick() caused atoms to be pulled from this context */ + bool pulled; + /* true if infinite cache is to be enabled for new allocations. Existing + * allocations will not change. bool stored as a u32 per Linux API */ +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) + bool infinite_cache_active; +#else + u32 infinite_cache_active; +#endif /* Bitmask of slots that can be pulled from */ u32 slots_pullable; @@ -1432,11 +1336,17 @@ struct kbase_context { struct kbase_vinstr_client *vinstr_cli; struct mutex vinstr_cli_lock; + /* Must hold queue_mutex when accessing */ + bool ctx_active; + /* List of completed jobs waiting for events to be posted */ struct list_head completed_jobs; /* Number of work items currently pending on job_done_wq */ atomic_t work_count; + /* true if context is counted in kbdev->js_data.nr_contexts_runnable */ + bool ctx_runnable_ref; + /* Waiting soft-jobs will fail when this timer expires */ struct timer_list soft_job_timeout; diff --git a/drivers/gpu/arm/midgard/mali_kbase_device.c b/drivers/gpu/arm/midgard/mali_kbase_device.c index 7484eec5108f..62ab0caf9858 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_device.c +++ b/drivers/gpu/arm/midgard/mali_kbase_device.c @@ -82,6 +82,7 @@ static int kbase_device_as_init(struct kbase_device *kbdev, int i) if (!kbdev->as[i].pf_wq) return -EINVAL; + mutex_init(&kbdev->as[i].transaction_mutex); INIT_WORK(&kbdev->as[i].work_pagefault, page_fault_worker); INIT_WORK(&kbdev->as[i].work_busfault, bus_fault_worker); @@ -150,7 +151,6 @@ int kbase_device_init(struct kbase_device * const kbdev) #endif /* CONFIG_ARM64 */ spin_lock_init(&kbdev->mmu_mask_change); - mutex_init(&kbdev->mmu_hw_mutex); #ifdef CONFIG_ARM64 kbdev->cci_snoop_enabled = false; np = kbdev->dev->of_node; diff --git a/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c b/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c index 97bb6c5db76c..4d3836a1d4ab 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c +++ b/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c @@ -38,8 +38,6 @@ /* Spin lock protecting all Mali fences as fence->lock. */ static DEFINE_SPINLOCK(kbase_dma_fence_lock); -static void -kbase_dma_fence_work(struct work_struct *pwork); static void kbase_dma_fence_waiters_add(struct kbase_jd_atom *katom) @@ -169,30 +167,9 @@ kbase_dma_fence_unlock_reservations(struct kbase_dma_fence_resv_info *info, ww_acquire_fini(ctx); } -/** - * kbase_dma_fence_queue_work() - Queue work to handle @katom - * @katom: Pointer to atom for which to queue work - * - * Queue kbase_dma_fence_work() for @katom to clean up the fence callbacks and - * submit the atom. - */ -static void -kbase_dma_fence_queue_work(struct kbase_jd_atom *katom) -{ - struct kbase_context *kctx = katom->kctx; - bool ret; - - INIT_WORK(&katom->work, kbase_dma_fence_work); - ret = queue_work(kctx->dma_fence.wq, &katom->work); - /* Warn if work was already queued, that should not happen. */ - WARN_ON(!ret); -} - /** * kbase_dma_fence_free_callbacks - Free dma-fence callbacks on a katom * @katom: Pointer to katom - * @queue_worker: Boolean indicating if fence worker is to be queued when - * dep_count reaches 0. * * This function will free all fence callbacks on the katom's list of * callbacks. Callbacks that have not yet been called, because their fence @@ -201,7 +178,7 @@ kbase_dma_fence_queue_work(struct kbase_jd_atom *katom) * Locking: katom->dma_fence.callbacks list assumes jctx.lock is held. */ static void -kbase_dma_fence_free_callbacks(struct kbase_jd_atom *katom, bool queue_worker) +kbase_dma_fence_free_callbacks(struct kbase_jd_atom *katom) { struct kbase_dma_fence_cb *cb, *tmp; @@ -214,21 +191,10 @@ kbase_dma_fence_free_callbacks(struct kbase_jd_atom *katom, bool queue_worker) /* Cancel callbacks that hasn't been called yet. */ ret = fence_remove_callback(cb->fence, &cb->fence_cb); if (ret) { - int ret; - /* Fence had not signaled, clean up after * canceling. */ - ret = atomic_dec_return(&katom->dma_fence.dep_count); - - if (unlikely(queue_worker && ret == 0)) { - /* - * dep_count went to zero and queue_worker is - * true. Queue the worker to handle the - * completion of the katom. - */ - kbase_dma_fence_queue_work(katom); - } + atomic_dec(&katom->dma_fence.dep_count); } /* @@ -253,7 +219,7 @@ kbase_dma_fence_cancel_atom(struct kbase_jd_atom *katom) lockdep_assert_held(&katom->kctx->jctx.lock); /* Cancel callbacks and clean up. */ - kbase_dma_fence_free_callbacks(katom, false); + kbase_dma_fence_free_callbacks(katom); KBASE_DEBUG_ASSERT(atomic_read(&katom->dma_fence.dep_count) == 0); @@ -298,15 +264,9 @@ kbase_dma_fence_work(struct work_struct *pwork) /* Remove atom from list of dma-fence waiting atoms. */ kbase_dma_fence_waiters_remove(katom); /* Cleanup callbacks. */ - kbase_dma_fence_free_callbacks(katom, false); - /* - * Queue atom on GPU, unless it has already completed due to a failing - * dependency. Run jd_done_nolock() on the katom if it is completed. - */ - if (unlikely(katom->status == KBASE_JD_ATOM_STATE_COMPLETED)) - jd_done_nolock(katom, NULL); - else - kbase_jd_dep_clear_locked(katom); + kbase_dma_fence_free_callbacks(katom); + /* Queue atom on GPU. */ + kbase_jd_dep_clear_locked(katom); out: mutex_unlock(&ctx->lock); @@ -372,13 +332,20 @@ kbase_dma_fence_cb(struct fence *fence, struct fence_cb *cb) struct kbase_dma_fence_cb, fence_cb); struct kbase_jd_atom *katom = kcb->katom; + struct kbase_context *kctx = katom->kctx; /* If the atom is zapped dep_count will be forced to a negative number * preventing this callback from ever scheduling work. Which in turn * would reschedule the atom. */ - if (atomic_dec_and_test(&katom->dma_fence.dep_count)) - kbase_dma_fence_queue_work(katom); + if (atomic_dec_and_test(&katom->dma_fence.dep_count)) { + bool ret; + + INIT_WORK(&katom->work, kbase_dma_fence_work); + ret = queue_work(kctx->dma_fence.wq, &katom->work); + /* Warn if work was already queued, that should not happen. */ + WARN_ON(!ret); + } } static int @@ -439,7 +406,7 @@ out: * On error, cancel and clean up all callbacks that was set up * before the error. */ - kbase_dma_fence_free_callbacks(katom, false); + kbase_dma_fence_free_callbacks(katom); } return err; @@ -532,7 +499,7 @@ end: /* Test if the callbacks are already triggered */ if (atomic_dec_and_test(&katom->dma_fence.dep_count)) { atomic_set(&katom->dma_fence.dep_count, -1); - kbase_dma_fence_free_callbacks(katom, false); + kbase_dma_fence_free_callbacks(katom); } else { /* Add katom to the list of dma-buf fence waiting atoms * only if it is still waiting. @@ -545,7 +512,7 @@ end: * kill it for us), signal the fence, free callbacks and the * fence. */ - kbase_dma_fence_free_callbacks(katom, false); + kbase_dma_fence_free_callbacks(katom); atomic_set(&katom->dma_fence.dep_count, -1); kbase_dma_fence_signal(katom); } @@ -555,12 +522,10 @@ end: void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx) { - struct list_head *list = &kctx->dma_fence.waiting_resource; - - while (!list_empty(list)) { - struct kbase_jd_atom *katom; + struct kbase_jd_atom *katom, *katom_tmp; - katom = list_first_entry(list, struct kbase_jd_atom, queue); + list_for_each_entry_safe(katom, katom_tmp, + &kctx->dma_fence.waiting_resource, queue) { kbase_dma_fence_waiters_remove(katom); kbase_dma_fence_cancel_atom(katom); } @@ -569,7 +534,7 @@ void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx) void kbase_dma_fence_cancel_callbacks(struct kbase_jd_atom *katom) { /* Cancel callbacks and clean up. */ - kbase_dma_fence_free_callbacks(katom, true); + kbase_dma_fence_free_callbacks(katom); } void kbase_dma_fence_signal(struct kbase_jd_atom *katom) @@ -584,7 +549,7 @@ void kbase_dma_fence_signal(struct kbase_jd_atom *katom) fence_put(katom->dma_fence.fence); katom->dma_fence.fence = NULL; - kbase_dma_fence_free_callbacks(katom, false); + kbase_dma_fence_free_callbacks(katom); } void kbase_dma_fence_term(struct kbase_context *kctx) diff --git a/drivers/gpu/arm/midgard/mali_kbase_event.c b/drivers/gpu/arm/midgard/mali_kbase_event.c index f07406c3d5ab..bf8c304610eb 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_event.c +++ b/drivers/gpu/arm/midgard/mali_kbase_event.c @@ -180,7 +180,7 @@ void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom) kbase_event_process_noreport(ctx, atom); return; } - kbase_tlstream_tl_attrib_atom_state(atom, TL_ATOM_STATE_POSTED); + if (atom->core_req & BASE_JD_REQ_EVENT_COALESCE) { /* Don't report the event until other event(s) have completed */ mutex_lock(&ctx->event_mutex); diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_api.c b/drivers/gpu/arm/midgard/mali_kbase_gator_api.c index 3292fa95fddd..4af3e4815e95 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_gator_api.c +++ b/drivers/gpu/arm/midgard/mali_kbase_gator_api.c @@ -41,6 +41,7 @@ const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters) { const char * const *hardware_counters; struct kbase_device *kbdev; + uint32_t gpu_id; uint32_t product_id; uint32_t count; @@ -52,27 +53,25 @@ const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters) if (!kbdev) return NULL; - product_id = kbdev->gpu_props.props.core_props.product_id; + gpu_id = kbdev->gpu_props.props.core_props.product_id; + product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID; + product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT; if (GPU_ID_IS_NEW_FORMAT(product_id)) { - switch (GPU_ID2_MODEL_MATCH_VALUE(product_id)) { + switch (gpu_id & GPU_ID2_PRODUCT_MODEL) { case GPU_ID2_PRODUCT_TMIX: hardware_counters = hardware_counters_mali_tMIx; count = ARRAY_SIZE(hardware_counters_mali_tMIx); break; - case GPU_ID2_PRODUCT_THEX: - hardware_counters = hardware_counters_mali_tHEx; - count = ARRAY_SIZE(hardware_counters_mali_tHEx); - break; default: hardware_counters = NULL; count = 0; - dev_err(kbdev->dev, "Unrecognized product ID: %u\n", - product_id); + dev_err(kbdev->dev, "Unrecognized gpu ID: %u\n", + gpu_id); break; } } else { - switch (product_id) { + switch (gpu_id) { /* If we are using a Mali-T60x device */ case GPU_ID_PI_T60X: hardware_counters = hardware_counters_mali_t60x; @@ -116,8 +115,8 @@ const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters) default: hardware_counters = NULL; count = 0; - dev_err(kbdev->dev, "Unrecognized product ID: %u\n", - product_id); + dev_err(kbdev->dev, "Unrecognized gpu ID: %u\n", + gpu_id); break; } } diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h index 7ec05c1c7aa3..c247dd698e19 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h +++ b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h @@ -2158,7 +2158,6 @@ static const char * const hardware_counters_mali_t88x[] = { #include "mali_kbase_gator_hwcnt_names_tmix.h" -#include "mali_kbase_gator_hwcnt_names_thex.h" #endif diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_thex.h b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_thex.h deleted file mode 100644 index bcceef4fc9bc..000000000000 --- a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_thex.h +++ /dev/null @@ -1,291 +0,0 @@ -/* - * - * (C) COPYRIGHT 2016 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * A copy of the licence is included with the program, and can also be obtained - * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - */ - - - -/* - * This header was autogenerated, it should not be edited. - */ - -#ifndef _KBASE_GATOR_HWCNT_NAMES_THEX_H_ -#define _KBASE_GATOR_HWCNT_NAMES_THEX_H_ - -static const char * const hardware_counters_mali_tHEx[] = { - /* Performance counters for the Job Manager */ - "", - "", - "", - "", - "THEx_MESSAGES_SENT", - "THEx_MESSAGES_RECEIVED", - "THEx_GPU_ACTIVE", - "THEx_IRQ_ACTIVE", - "THEx_JS0_JOBS", - "THEx_JS0_TASKS", - "THEx_JS0_ACTIVE", - "", - "THEx_JS0_WAIT_READ", - "THEx_JS0_WAIT_ISSUE", - "THEx_JS0_WAIT_DEPEND", - "THEx_JS0_WAIT_FINISH", - "THEx_JS1_JOBS", - "THEx_JS1_TASKS", - "THEx_JS1_ACTIVE", - "", - "THEx_JS1_WAIT_READ", - "THEx_JS1_WAIT_ISSUE", - "THEx_JS1_WAIT_DEPEND", - "THEx_JS1_WAIT_FINISH", - "THEx_JS2_JOBS", - "THEx_JS2_TASKS", - "THEx_JS2_ACTIVE", - "", - "THEx_JS2_WAIT_READ", - "THEx_JS2_WAIT_ISSUE", - "THEx_JS2_WAIT_DEPEND", - "THEx_JS2_WAIT_FINISH", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - - /* Performance counters for the Tiler */ - "", - "", - "", - "", - "THEx_TILER_ACTIVE", - "THEx_JOBS_PROCESSED", - "THEx_TRIANGLES", - "THEx_LINES", - "THEx_POINTS", - "THEx_FRONT_FACING", - "THEx_BACK_FACING", - "THEx_PRIM_VISIBLE", - "THEx_PRIM_CULLED", - "THEx_PRIM_CLIPPED", - "THEx_PRIM_SAT_CULLED", - "", - "", - "THEx_BUS_READ", - "", - "THEx_BUS_WRITE", - "THEx_LOADING_DESC", - "THEx_IDVS_POS_SHAD_REQ", - "THEx_IDVS_POS_SHAD_WAIT", - "THEx_IDVS_POS_SHAD_STALL", - "THEx_IDVS_POS_FIFO_FULL", - "THEx_PREFETCH_STALL", - "THEx_VCACHE_HIT", - "THEx_VCACHE_MISS", - "THEx_VCACHE_LINE_WAIT", - "THEx_VFETCH_POS_READ_WAIT", - "THEx_VFETCH_VERTEX_WAIT", - "THEx_VFETCH_STALL", - "THEx_PRIMASSY_STALL", - "THEx_BBOX_GEN_STALL", - "THEx_IDVS_VBU_HIT", - "THEx_IDVS_VBU_MISS", - "THEx_IDVS_VBU_LINE_DEALLOCATE", - "THEx_IDVS_VAR_SHAD_REQ", - "THEx_IDVS_VAR_SHAD_STALL", - "THEx_BINNER_STALL", - "THEx_ITER_STALL", - "THEx_COMPRESS_MISS", - "THEx_COMPRESS_STALL", - "THEx_PCACHE_HIT", - "THEx_PCACHE_MISS", - "THEx_PCACHE_MISS_STALL", - "THEx_PCACHE_EVICT_STALL", - "THEx_PMGR_PTR_WR_STALL", - "THEx_PMGR_PTR_RD_STALL", - "THEx_PMGR_CMD_WR_STALL", - "THEx_WRBUF_ACTIVE", - "THEx_WRBUF_HIT", - "THEx_WRBUF_MISS", - "THEx_WRBUF_NO_FREE_LINE_STALL", - "THEx_WRBUF_NO_AXI_ID_STALL", - "THEx_WRBUF_AXI_STALL", - "", - "", - "", - "THEx_UTLB_TRANS", - "THEx_UTLB_TRANS_HIT", - "THEx_UTLB_TRANS_STALL", - "THEx_UTLB_TRANS_MISS_DELAY", - "THEx_UTLB_MMU_REQ", - - /* Performance counters for the Shader Core */ - "", - "", - "", - "", - "THEx_FRAG_ACTIVE", - "THEx_FRAG_PRIMITIVES", - "THEx_FRAG_PRIM_RAST", - "THEx_FRAG_FPK_ACTIVE", - "THEx_FRAG_STARVING", - "THEx_FRAG_WARPS", - "THEx_FRAG_PARTIAL_WARPS", - "THEx_FRAG_QUADS_RAST", - "THEx_FRAG_QUADS_EZS_TEST", - "THEx_FRAG_QUADS_EZS_UPDATE", - "THEx_FRAG_QUADS_EZS_KILL", - "THEx_FRAG_LZS_TEST", - "THEx_FRAG_LZS_KILL", - "", - "THEx_FRAG_PTILES", - "THEx_FRAG_TRANS_ELIM", - "THEx_QUAD_FPK_KILLER", - "", - "THEx_COMPUTE_ACTIVE", - "THEx_COMPUTE_TASKS", - "THEx_COMPUTE_WARPS", - "THEx_COMPUTE_STARVING", - "THEx_EXEC_CORE_ACTIVE", - "THEx_EXEC_ACTIVE", - "THEx_EXEC_INSTR_COUNT", - "THEx_EXEC_INSTR_DIVERGED", - "THEx_EXEC_INSTR_STARVING", - "THEx_ARITH_INSTR_SINGLE_FMA", - "THEx_ARITH_INSTR_DOUBLE", - "THEx_ARITH_INSTR_MSG", - "THEx_ARITH_INSTR_MSG_ONLY", - "THEx_TEX_INSTR", - "THEx_TEX_INSTR_MIPMAP", - "THEx_TEX_INSTR_COMPRESSED", - "THEx_TEX_INSTR_3D", - "THEx_TEX_INSTR_TRILINEAR", - "THEx_TEX_COORD_ISSUE", - "THEx_TEX_COORD_STALL", - "THEx_TEX_STARVE_CACHE", - "THEx_TEX_STARVE_FILTER", - "THEx_LS_MEM_READ_FULL", - "THEx_LS_MEM_READ_SHORT", - "THEx_LS_MEM_WRITE_FULL", - "THEx_LS_MEM_WRITE_SHORT", - "THEx_LS_MEM_ATOMIC", - "THEx_VARY_INSTR", - "THEx_VARY_SLOT_32", - "THEx_VARY_SLOT_16", - "THEx_ATTR_INSTR", - "THEx_ARITH_INSTR_FP_MUL", - "THEx_BEATS_RD_FTC", - "THEx_BEATS_RD_FTC_EXT", - "THEx_BEATS_RD_LSC", - "THEx_BEATS_RD_LSC_EXT", - "THEx_BEATS_RD_TEX", - "THEx_BEATS_RD_TEX_EXT", - "THEx_BEATS_RD_OTHER", - "THEx_BEATS_WR_LSC", - "THEx_BEATS_WR_TIB", - "", - - /* Performance counters for the Memory System */ - "", - "", - "", - "", - "THEx_MMU_REQUESTS", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "THEx_L2_RD_MSG_IN", - "THEx_L2_RD_MSG_IN_STALL", - "THEx_L2_WR_MSG_IN", - "THEx_L2_WR_MSG_IN_STALL", - "THEx_L2_SNP_MSG_IN", - "THEx_L2_SNP_MSG_IN_STALL", - "THEx_L2_RD_MSG_OUT", - "THEx_L2_RD_MSG_OUT_STALL", - "THEx_L2_WR_MSG_OUT", - "THEx_L2_ANY_LOOKUP", - "THEx_L2_READ_LOOKUP", - "THEx_L2_WRITE_LOOKUP", - "THEx_L2_EXT_SNOOP_LOOKUP", - "THEx_L2_EXT_READ", - "THEx_L2_EXT_READ_NOSNP", - "THEx_L2_EXT_READ_UNIQUE", - "THEx_L2_EXT_READ_BEATS", - "THEx_L2_EXT_AR_STALL", - "THEx_L2_EXT_AR_CNT_Q1", - "THEx_L2_EXT_AR_CNT_Q2", - "THEx_L2_EXT_AR_CNT_Q3", - "THEx_L2_EXT_RRESP_0_127", - "THEx_L2_EXT_RRESP_128_191", - "THEx_L2_EXT_RRESP_192_255", - "THEx_L2_EXT_RRESP_256_319", - "THEx_L2_EXT_RRESP_320_383", - "THEx_L2_EXT_WRITE", - "THEx_L2_EXT_WRITE_NOSNP_FULL", - "THEx_L2_EXT_WRITE_NOSNP_PTL", - "THEx_L2_EXT_WRITE_SNP_FULL", - "THEx_L2_EXT_WRITE_SNP_PTL", - "THEx_L2_EXT_WRITE_BEATS", - "THEx_L2_EXT_W_STALL", - "THEx_L2_EXT_AW_CNT_Q1", - "THEx_L2_EXT_AW_CNT_Q2", - "THEx_L2_EXT_AW_CNT_Q3", - "THEx_L2_EXT_SNOOP", - "THEx_L2_EXT_SNOOP_STALL", - "THEx_L2_EXT_SNOOP_RESP_CLEAN", - "THEx_L2_EXT_SNOOP_RESP_DATA", - "THEx_L2_EXT_SNOOP_INTERNAL", - "", - "", - "", - "", - "", - "", - "", -}; - -#endif /* _KBASE_GATOR_HWCNT_NAMES_THEX_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h b/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h index a3377b27d487..a962ecb3f9c6 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h +++ b/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h @@ -96,7 +96,6 @@ GPU_ID2_PRODUCT_MODEL) #define GPU_ID2_PRODUCT_TMIX GPU_ID2_MODEL_MAKE(6, 0) -#define GPU_ID2_PRODUCT_THEX GPU_ID2_MODEL_MAKE(6, 1) /* Values for GPU_ID_VERSION_STATUS field for PRODUCT_ID GPU_ID_PI_T60X */ #define GPU_ID_S_15DEV0 0x1 diff --git a/drivers/gpu/arm/midgard/mali_kbase_hw.c b/drivers/gpu/arm/midgard/mali_kbase_hw.c index 1d7e5e9b2c18..de2461fb8de4 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_hw.c +++ b/drivers/gpu/arm/midgard/mali_kbase_hw.c @@ -42,9 +42,6 @@ void kbase_hw_set_features_mask(struct kbase_device *kbdev) case GPU_ID2_PRODUCT_TMIX: features = base_hw_features_tMIx; break; - case GPU_ID2_PRODUCT_THEX: - features = base_hw_features_tHEx; - break; default: features = base_hw_features_generic; break; @@ -109,9 +106,6 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev) if ((gpu_id & GPU_ID2_PRODUCT_MODEL) == GPU_ID2_PRODUCT_TMIX) { issues = base_hw_issues_tMIx_r0p0; - } else if ((gpu_id & GPU_ID2_PRODUCT_MODEL) == - GPU_ID2_PRODUCT_THEX) { - issues = base_hw_issues_tHEx_r0p0; } else { dev_err(kbdev->dev, "Unknown GPU ID %x", gpu_id); @@ -221,9 +215,6 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev) case GPU_ID2_PRODUCT_TMIX: issues = base_hw_issues_model_tMIx; break; - case GPU_ID2_PRODUCT_THEX: - issues = base_hw_issues_model_tHEx; - break; default: dev_err(kbdev->dev, "Unknown GPU ID %x", gpu_id); diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h index 0acf297192fd..261453e8f1ac 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h +++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -26,7 +26,8 @@ #include -/* The hwaccess_lock (a spinlock) must be held when accessing this structure */ +/* The kbasep_js_device_data::runpool_irq::lock (a spinlock) must be held when + * accessing this structure */ struct kbase_hwaccess_data { struct kbase_context *active_kctx; diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h index c2c3909caa7b..abe66078029f 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h +++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h @@ -33,17 +33,6 @@ void kbase_backend_run_atom(struct kbase_device *kbdev, struct kbase_jd_atom *katom); -/** - * kbase_backend_slot_update - Update state based on slot ringbuffers - * - * @kbdev: Device pointer - * - * Inspect the jobs in the slot ringbuffers and update state. - * - * This will cause jobs to be submitted to hardware if they are unblocked - */ -void kbase_backend_slot_update(struct kbase_device *kbdev); - /** * kbase_backend_find_free_address_space() - Find a free address space. * @kbdev: Device pointer @@ -99,7 +88,7 @@ bool kbase_backend_use_ctx(struct kbase_device *kbdev, * the context is not scheduled, then kbase_gpu_use_ctx() should be used * instead. * - * Caller must hold hwaccess_lock + * Caller must hold runpool_irq.lock * * Return: true if context is now active, false otherwise (ie if context does * not have an address space assigned) @@ -113,7 +102,7 @@ bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev, * @kbdev: Device pointer * @kctx: Context pointer * - * Caller must hold kbase_device->mmu_hw_mutex and hwaccess_lock + * Caller must hold as->transaction_mutex and runpool_irq.lock */ void kbase_backend_release_ctx_irq(struct kbase_device *kbdev, struct kbase_context *kctx); @@ -124,7 +113,7 @@ void kbase_backend_release_ctx_irq(struct kbase_device *kbdev, * @kbdev: Device pointer * @kctx: Context pointer * - * Caller must hold kbase_device->mmu_hw_mutex + * Caller must hold as->transaction_mutex * * This function must perform any operations that could not be performed in IRQ * context by kbase_backend_release_ctx_irq(). diff --git a/drivers/gpu/arm/midgard/mali_kbase_jd.c b/drivers/gpu/arm/midgard/mali_kbase_jd.c index 81952e2d146f..3e0a5892cc7a 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_jd.c +++ b/drivers/gpu/arm/midgard/mali_kbase_jd.c @@ -61,7 +61,7 @@ static void __user * get_compat_pointer(struct kbase_context *kctx, const union kbase_pointer *p) { #ifdef CONFIG_COMPAT - if (kbase_ctx_flag(kctx, KCTX_COMPAT)) + if (kctx->is_compat) return compat_ptr(p->compat_value); #endif return p->value; @@ -581,20 +581,24 @@ static inline void jd_resolve_dep(struct list_head *out_list, dep_count = atomic_read(&dep_atom->dma_fence.dep_count); if (likely(dep_count == -1)) { dep_satisfied = true; - } else { + } else if (dep_count == 0) { /* - * There are either still active callbacks, or - * all fences for this @dep_atom has signaled, - * but the worker that will queue the atom has - * not yet run. + * All fences for this atom has signaled, but + * the worker that will queue the atom has not + * yet run. * - * Wait for the fences to signal and the fence - * worker to run and handle @dep_atom. If - * @dep_atom was completed due to error on - * @katom, then the fence worker will pick up - * the complete status and error code set on - * @dep_atom above. + * Mark the atom as handled by setting + * dep_count to -1 so that the worker doesn't + * queue the atom again. + */ + atomic_set(&dep_atom->dma_fence.dep_count, -1); + /* + * Remove the atom from the list of dma-fence + * waiting atoms. */ + kbase_dma_fence_waiters_remove(dep_atom); + dep_satisfied = true; + } else { dep_satisfied = false; } #endif /* CONFIG_MALI_DMA_FENCE */ @@ -661,40 +665,6 @@ static void jd_check_force_failure(struct kbase_jd_atom *katom) } #endif -/** - * is_dep_valid - Validate that a dependency is valid for early dependency - * submission - * @katom: Dependency atom to validate - * - * A dependency is valid if any of the following are true : - * - It does not exist (a non-existent dependency does not block submission) - * - It is in the job scheduler - * - It has completed, does not have a failure event code, and has not been - * marked to fail in the future - * - * Return: true if valid, false otherwise - */ -static bool is_dep_valid(struct kbase_jd_atom *katom) -{ - /* If there's no dependency then this is 'valid' from the perspective of - * early dependency submission */ - if (!katom) - return true; - - /* Dependency must have reached the job scheduler */ - if (katom->status < KBASE_JD_ATOM_STATE_IN_JS) - return false; - - /* If dependency has completed and has failed or will fail then it is - * not valid */ - if (katom->status >= KBASE_JD_ATOM_STATE_HW_COMPLETED && - (katom->event_code != BASE_JD_EVENT_DONE || - katom->will_fail_event_code)) - return false; - - return true; -} - static void jd_try_submitting_deps(struct list_head *out_list, struct kbase_jd_atom *node) { @@ -709,41 +679,14 @@ static void jd_try_submitting_deps(struct list_head *out_list, if (IS_GPU_ATOM(dep_atom) && !dep_atom->in_jd_list) { /*Check if atom deps look sane*/ - bool dep0_valid = is_dep_valid( - dep_atom->dep[0].atom); - bool dep1_valid = is_dep_valid( - dep_atom->dep[1].atom); - bool dep_satisfied = true; -#ifdef CONFIG_MALI_DMA_FENCE - int dep_count; - - dep_count = atomic_read( - &dep_atom->dma_fence.dep_count); - if (likely(dep_count == -1)) { - dep_satisfied = true; - } else { - /* - * There are either still active callbacks, or - * all fences for this @dep_atom has signaled, - * but the worker that will queue the atom has - * not yet run. - * - * Wait for the fences to signal and the fence - * worker to run and handle @dep_atom. If - * @dep_atom was completed due to error on - * @katom, then the fence worker will pick up - * the complete status and error code set on - * @dep_atom above. - */ - dep_satisfied = false; - } -#endif /* CONFIG_MALI_DMA_FENCE */ -#ifdef CONFIG_KDS - dep_satisfied = dep_satisfied && - dep_atom->kds_dep_satisfied; -#endif - - if (dep0_valid && dep1_valid && dep_satisfied) { + bool dep0_valid = !dep_atom->dep[0].atom || + (dep_atom->dep[0].atom->status + >= KBASE_JD_ATOM_STATE_IN_JS); + bool dep1_valid = !dep_atom->dep[1].atom || + (dep_atom->dep[1].atom->status + >= KBASE_JD_ATOM_STATE_IN_JS); + + if (dep0_valid && dep1_valid) { dep_atom->in_jd_list = true; list_add(&dep_atom->jd_item, out_list); } @@ -815,7 +758,7 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, for (i = 0; i < 2; i++) jd_resolve_dep(&runnable_jobs, katom, i, - kbase_ctx_flag(kctx, KCTX_DYING)); + kctx->jctx.sched_info.ctx.is_dying); if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) kbase_jd_post_external_resources(katom); @@ -831,7 +774,7 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, KBASE_DEBUG_ASSERT(node->status != KBASE_JD_ATOM_STATE_UNUSED); if (node->status != KBASE_JD_ATOM_STATE_COMPLETED && - !kbase_ctx_flag(kctx, KCTX_DYING)) { + !kctx->jctx.sched_info.ctx.is_dying) { need_to_try_schedule_context |= jd_run_atom(node); } else { node->event_code = katom->event_code; @@ -976,10 +919,7 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us katom->x_pre_dep = NULL; katom->x_post_dep = NULL; katom->will_fail_event_code = BASE_JD_EVENT_NOT_STARTED; - - /* Implicitly sets katom->protected_state.enter as well. */ - katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK; - + katom->exit_protected_state = KBASE_ATOM_EXIT_PROTECTED_CHECK; katom->age = kctx->age_count++; INIT_LIST_HEAD(&katom->jd_item); @@ -993,8 +933,6 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us atomic_set(&katom->dma_fence.dep_count, -1); #endif - kbase_tlstream_tl_attrib_atom_state(katom, TL_ATOM_STATE_IDLE); - /* Don't do anything if there is a mess up with dependencies. This is done in a separate cycle to check both the dependencies at ones, otherwise it will be extra complexity to deal with 1st dependency ( just added to the list ) @@ -1099,17 +1037,10 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us katom->status = KBASE_JD_ATOM_STATE_QUEUED; } - /* For invalid priority, be most lenient and choose the default */ - sched_prio = kbasep_js_atom_prio_to_sched_prio(user_atom->prio); - if (sched_prio == KBASE_JS_ATOM_SCHED_PRIO_INVALID) - sched_prio = KBASE_JS_ATOM_SCHED_PRIO_DEFAULT; - katom->sched_priority = sched_prio; - /* Create a new atom recording all dependencies it was set up with. */ kbase_tlstream_tl_new_atom( katom, kbase_jd_atom_id(kctx, katom)); - kbase_tlstream_tl_attrib_atom_priority(katom, katom->sched_priority); kbase_tlstream_tl_ret_atom_ctx(katom, kctx); for (i = 0; i < 2; i++) if (BASE_JD_DEP_TYPE_INVALID != kbase_jd_katom_dep_type( @@ -1161,6 +1092,12 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us goto out; } + /* For invalid priority, be most lenient and choose the default */ + sched_prio = kbasep_js_atom_prio_to_sched_prio(user_atom->prio); + if (sched_prio == KBASE_JS_ATOM_SCHED_PRIO_INVALID) + sched_prio = KBASE_JS_ATOM_SCHED_PRIO_DEFAULT; + katom->sched_priority = sched_prio; + if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) { /* handle what we need to do to access the external resources */ if (kbase_jd_pre_external_resources(katom, user_atom) != 0) { @@ -1275,7 +1212,7 @@ int kbase_jd_submit(struct kbase_context *kctx, beenthere(kctx, "%s", "Enter"); - if (kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { + if ((kctx->jctx.sched_info.ctx.flags & KBASE_CTX_FLAG_SUBMIT_DISABLED) != 0) { dev_err(kbdev->dev, "Attempt to submit to a context that has SUBMIT_DISABLED set on it"); return -EINVAL; } @@ -1437,6 +1374,7 @@ void kbase_jd_done_worker(struct work_struct *data) struct kbasep_js_device_data *js_devdata; u64 cache_jc = katom->jc; struct kbasep_js_atom_retained_state katom_retained_state; + bool schedule = false; bool context_idle; base_jd_core_req core_req = katom->core_req; u64 affinity = katom->affinity; @@ -1459,7 +1397,6 @@ void kbase_jd_done_worker(struct work_struct *data) * Begin transaction on JD context and JS context */ mutex_lock(&jctx->lock); - kbase_tlstream_tl_attrib_atom_state(katom, TL_ATOM_STATE_DONE); mutex_lock(&js_devdata->queue_mutex); mutex_lock(&js_kctx_info->ctx.jsctx_mutex); @@ -1467,7 +1404,7 @@ void kbase_jd_done_worker(struct work_struct *data) * because it only happens in response to an IRQ from a job that was * running. */ - KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled); if (katom->event_code == BASE_JD_EVENT_STOPPED) { /* Atom has been promoted to stopped */ @@ -1476,12 +1413,12 @@ void kbase_jd_done_worker(struct work_struct *data) mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); mutex_unlock(&js_devdata->queue_mutex); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); katom->status = KBASE_JD_ATOM_STATE_IN_JS; kbase_js_unpull(kctx, katom); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); mutex_unlock(&jctx->lock); return; @@ -1499,6 +1436,19 @@ void kbase_jd_done_worker(struct work_struct *data) /* Retain state before the katom disappears */ kbasep_js_atom_retained_state_copy(&katom_retained_state, katom); + if (!kbasep_js_has_atom_finished(&katom_retained_state)) { + mutex_lock(&js_devdata->runpool_mutex); + kbasep_js_clear_job_retry_submit(katom); + /* An atom that has been hard-stopped might have previously + * been soft-stopped and has just finished before the hard-stop + * occurred. For this reason, clear the hard-stopped flag */ + katom->atom_flags &= ~(KBASE_KATOM_FLAG_BEEN_HARD_STOPPED); + mutex_unlock(&js_devdata->runpool_mutex); + } + + if (kbasep_js_has_atom_finished(&katom_retained_state)) + schedule = true; + context_idle = kbase_js_complete_atom_wq(kctx, katom); KBASE_DEBUG_ASSERT(kbasep_js_has_atom_finished(&katom_retained_state)); @@ -1508,53 +1458,54 @@ void kbase_jd_done_worker(struct work_struct *data) mutex_unlock(&js_devdata->queue_mutex); katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF; /* jd_done_nolock() requires the jsctx_mutex lock to be dropped */ - jd_done_nolock(katom, &kctx->completed_jobs); + schedule |= jd_done_nolock(katom, &kctx->completed_jobs); /* katom may have been freed now, do not use! */ if (context_idle) { unsigned long flags; - context_idle = false; mutex_lock(&js_devdata->queue_mutex); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); /* If kbase_sched() has scheduled this context back in then - * KCTX_ACTIVE will have been set after we marked it as - * inactive, and another pm reference will have been taken, so - * drop our reference. But do not call kbase_jm_idle_ctx(), as - * the context is active and fast-starting is allowed. + * ctx_active will have been set after we marked it as inactive, + * and another pm reference will have been taken, so drop our + * reference. But do not call kbase_jm_idle_ctx(), as the + * context is active and fast-starting is allowed. * * If an atom has been fast-started then kctx->atoms_pulled will - * be non-zero but KCTX_ACTIVE will still be false (as the + * be non-zero but ctx_active will still be false (as the * previous pm reference has been inherited). Do NOT drop our * reference, as it has been re-used, and leave the context as * active. * - * If no new atoms have been started then KCTX_ACTIVE will still + * If no new atoms have been started then ctx_active will still * be false and atoms_pulled will be zero, so drop the reference * and call kbase_jm_idle_ctx(). * * As the checks are done under both the queue_mutex and - * hwaccess_lock is should be impossible for this to race + * runpool_irq.lock is should be impossible for this to race * with the scheduler code. */ - if (kbase_ctx_flag(kctx, KCTX_ACTIVE) || - !atomic_read(&kctx->atoms_pulled)) { + if (kctx->ctx_active || !atomic_read(&kctx->atoms_pulled)) { /* Calling kbase_jm_idle_ctx() here will ensure that * atoms are not fast-started when we drop the - * hwaccess_lock. This is not performed if - * KCTX_ACTIVE is set as in that case another pm - * reference has been taken and a fast-start would be - * valid. + * runpool_irq.lock. This is not performed if ctx_active + * is set as in that case another pm reference has been + * taken and a fast-start would be valid. */ - if (!kbase_ctx_flag(kctx, KCTX_ACTIVE)) + if (!kctx->ctx_active) kbase_jm_idle_ctx(kbdev, kctx); - context_idle = true; + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, + flags); + + kbase_pm_context_idle(kbdev); } else { - kbase_ctx_flag_set(kctx, KCTX_ACTIVE); + kctx->ctx_active = true; + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, + flags); } - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); mutex_unlock(&js_devdata->queue_mutex); } @@ -1568,7 +1519,8 @@ void kbase_jd_done_worker(struct work_struct *data) kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, &katom_retained_state); - kbase_js_sched_all(kbdev); + if (schedule) + kbase_js_sched_all(kbdev); if (!atomic_dec_return(&kctx->work_count)) { /* If worker now idle then post all events that jd_done_nolock() @@ -1588,9 +1540,6 @@ void kbase_jd_done_worker(struct work_struct *data) kbase_backend_complete_wq_post_sched(kbdev, core_req, affinity, coreref_state); - if (context_idle) - kbase_pm_context_idle(kbdev); - KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER_END, kctx, NULL, cache_jc, 0); } @@ -1633,7 +1582,7 @@ static void jd_cancel_worker(struct work_struct *data) * any), nor must we try to schedule out the context (it's already * scheduled out). */ - KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled); /* Scheduler: Remove the job from the system */ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); @@ -1671,7 +1620,7 @@ static void jd_cancel_worker(struct work_struct *data) * * Context: * This can be called safely from atomic context. - * The caller must hold kbdev->hwaccess_lock + * The caller must hold kbasep_js_device_data.runpool_irq.lock */ void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, ktime_t *end_timestamp, kbasep_js_atom_done_code done_code) @@ -1726,7 +1675,7 @@ void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom) KBASE_TRACE_ADD(kbdev, JD_CANCEL, kctx, katom, katom->jc, 0); /* This should only be done from a context that is not scheduled */ - KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled); WARN_ON(work_pending(&katom->work)); diff --git a/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c index 6437e4212a29..0cf75f59c282 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c +++ b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -57,7 +57,7 @@ static int kbasep_jd_debugfs_atoms_show(struct seq_file *sfile, void *data) /* General atom states */ mutex_lock(&kctx->jctx.lock); /* JS-related states */ - spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags); + spin_lock_irqsave(&kctx->kbdev->js_data.runpool_irq.lock, irq_flags); for (i = 0; i != BASE_JD_ATOM_COUNT; ++i) { struct kbase_jd_atom *atom = &atoms[i]; s64 start_timestamp = 0; @@ -84,7 +84,7 @@ static int kbasep_jd_debugfs_atoms_show(struct seq_file *sfile, void *data) atom->time_spent_us * 1000 : start_timestamp) ); } - spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags); + spin_unlock_irqrestore(&kctx->kbdev->js_data.runpool_irq.lock, irq_flags); mutex_unlock(&kctx->jctx.lock); return 0; @@ -110,7 +110,7 @@ static const struct file_operations kbasep_jd_debugfs_atoms_fops = { .release = single_release, }; -void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx) +void kbasep_jd_debugfs_ctx_add(struct kbase_context *kctx) { KBASE_DEBUG_ASSERT(kctx != NULL); diff --git a/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h index 090f81651eb5..bc1878f60e8e 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h +++ b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h @@ -30,10 +30,10 @@ #define MALI_JD_DEBUGFS_VERSION 1 /** - * kbasep_jd_debugfs_ctx_init() - Add debugfs entries for JD system + * kbasep_jd_debugfs_ctx_add() - Add debugfs entries for JD system * * @kctx Pointer to kbase_context */ -void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx); +void kbasep_jd_debugfs_ctx_add(struct kbase_context *kctx); #endif /*_KBASE_JD_DEBUGFS_H*/ diff --git a/drivers/gpu/arm/midgard/mali_kbase_jm.c b/drivers/gpu/arm/midgard/mali_kbase_jm.c index 0c5c6a6f78cb..63425322452b 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_jm.c +++ b/drivers/gpu/arm/midgard/mali_kbase_jm.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -60,7 +60,7 @@ u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask) { u32 ret_mask = 0; - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); while (js_mask) { int js = ffs(js_mask) - 1; @@ -79,7 +79,7 @@ void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask) { struct kbasep_js_device_data *js_devdata = &kbdev->js_data; - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&js_devdata->runpool_irq.lock); if (!down_trylock(&js_devdata->schedule_sem)) { kbase_jm_kick(kbdev, js_mask); @@ -91,7 +91,7 @@ void kbase_jm_try_kick_all(struct kbase_device *kbdev) { struct kbasep_js_device_data *js_devdata = &kbdev->js_data; - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&js_devdata->runpool_irq.lock); if (!down_trylock(&js_devdata->schedule_sem)) { kbase_jm_kick_all(kbdev); @@ -101,31 +101,30 @@ void kbase_jm_try_kick_all(struct kbase_device *kbdev) void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) { - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); if (kbdev->hwaccess.active_kctx == kctx) kbdev->hwaccess.active_kctx = NULL; } -struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev, +void kbase_jm_return_atom_to_js(struct kbase_device *kbdev, struct kbase_jd_atom *katom) { - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); if (katom->event_code != BASE_JD_EVENT_STOPPED && katom->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT) { - return kbase_js_complete_atom(katom, NULL); + kbase_js_complete_atom(katom, NULL); } else { kbase_js_unpull(katom->kctx, katom); - return NULL; } } -struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev, - struct kbase_jd_atom *katom, ktime_t *end_timestamp) +void kbase_jm_complete(struct kbase_device *kbdev, struct kbase_jd_atom *katom, + ktime_t *end_timestamp) { - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); - return kbase_js_complete_atom(katom, end_timestamp); + kbase_js_complete_atom(katom, end_timestamp); } diff --git a/drivers/gpu/arm/midgard/mali_kbase_jm.h b/drivers/gpu/arm/midgard/mali_kbase_jm.h index a74ee24c8058..27aca3a699f4 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_jm.h +++ b/drivers/gpu/arm/midgard/mali_kbase_jm.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,7 +28,7 @@ * @kbdev: Device pointer * @js_mask: Mask of the job slots that can be pulled from. * - * Caller must hold the hwaccess_lock and schedule_sem semaphore + * Caller must hold the runpool_irq lock and schedule_sem semaphore * * Return: Mask of the job slots that can still be submitted to. */ @@ -39,7 +39,7 @@ u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask); * slots. * @kbdev: Device pointer * - * Caller must hold the hwaccess_lock and schedule_sem semaphore + * Caller must hold the runpool_irq lock and schedule_sem semaphore * * Return: Mask of the job slots that can still be submitted to. */ @@ -52,7 +52,7 @@ static inline u32 kbase_jm_kick_all(struct kbase_device *kbdev) * kbase_jm_try_kick - Attempt to call kbase_jm_kick * @kbdev: Device pointer * @js_mask: Mask of the job slots that can be pulled from - * Context: Caller must hold hwaccess_lock + * Context: Caller must hold runpool_irq lock * * If schedule_sem can be immediately obtained then this function will call * kbase_jm_kick() otherwise it will do nothing. @@ -62,7 +62,7 @@ void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask); /** * kbase_jm_try_kick_all() - Attempt to call kbase_jm_kick_all * @kbdev: Device pointer - * Context: Caller must hold hwaccess_lock + * Context: Caller must hold runpool_irq lock * * If schedule_sem can be immediately obtained then this function will call * kbase_jm_kick_all() otherwise it will do nothing. @@ -80,7 +80,7 @@ void kbase_jm_try_kick_all(struct kbase_device *kbdev); * The context should have no atoms currently pulled from it * (kctx->atoms_pulled == 0). * - * Caller must hold the hwaccess_lock + * Caller must hold the runpool_irq lock */ void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); @@ -90,21 +90,17 @@ void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); * dependency * @kbdev: Device pointer * @katom: Atom that has been stopped or will be failed - * - * Return: Atom that has now been unblocked and can now be run, or NULL if none */ -struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev, - struct kbase_jd_atom *katom); +void kbase_jm_return_atom_to_js(struct kbase_device *kbdev, + struct kbase_jd_atom *katom); /** * kbase_jm_complete() - Complete an atom * @kbdev: Device pointer * @katom: Atom that has completed * @end_timestamp: Timestamp of atom completion - * - * Return: Atom that has now been unblocked and can now be run, or NULL if none */ -struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev, - struct kbase_jd_atom *katom, ktime_t *end_timestamp); +void kbase_jm_complete(struct kbase_device *kbdev, struct kbase_jd_atom *katom, + ktime_t *end_timestamp); #endif /* _KBASE_JM_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_js.c b/drivers/gpu/arm/midgard/mali_kbase_js.c index 60a7373601c0..c591ebbcd861 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_js.c +++ b/drivers/gpu/arm/midgard/mali_kbase_js.c @@ -89,7 +89,7 @@ static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev, js_devdata = &kbdev->js_data; - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); as_nr = kctx->as_nr; if (as_nr != KBASEP_AS_NR_INVALID) { struct kbasep_js_per_as_data *js_per_as_data; @@ -98,7 +98,7 @@ static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev, refcnt = js_per_as_data->as_busy_refcount; } - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); return refcnt; } @@ -198,7 +198,7 @@ static void kbase_js_sync_timers(struct kbase_device *kbdev) mutex_unlock(&kbdev->js_data.runpool_mutex); } -/* Hold the hwaccess_lock for this */ +/* Hold the kbasep_js_device_data::runpool_irq::lock for this */ bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev, struct kbase_context *kctx) { @@ -248,7 +248,7 @@ jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio) { struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; - lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); return RB_EMPTY_ROOT(&rb->runnable_tree); } @@ -259,7 +259,7 @@ jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio) * @kctx: Pointer to kbase context with ring buffer. * @js: Job slot id to check. * - * Caller must hold hwaccess_lock + * Caller must hold runpool_irq.lock * * Return: true if the ring buffers for all priorities have no pullable atoms, * false otherwise. @@ -269,7 +269,7 @@ jsctx_rb_none_to_pull(struct kbase_context *kctx, int js) { int prio; - lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { if (!jsctx_rb_none_to_pull_prio(kctx, js, prio)) @@ -294,7 +294,8 @@ jsctx_rb_none_to_pull(struct kbase_context *kctx, int js) * enumerated when this function returns jsctx->lock must be held when calling * this function. * - * The HW access lock must always be held when calling this function. + * The HW access lock, js_data.runpool_irq.lock, must always be held when + * calling this function. */ static void jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio, @@ -302,7 +303,7 @@ jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio, { struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; - lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); while (!RB_EMPTY_ROOT(&queue->runnable_tree)) { struct rb_node *node = rb_first(&queue->runnable_tree); @@ -360,7 +361,7 @@ jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio) struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; struct rb_node *node; - lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); node = rb_first(&rb->runnable_tree); if (!node) @@ -378,7 +379,7 @@ jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio) * KBASE_JS_ATOM_SCHED_PRIO_HIGH, for the specified @js and @prio and return a * pointer to the next atom, unless all the priority's ring buffers are empty. * - * Caller must hold the hwaccess_lock. + * Caller must hold the runpool_irq.lock. * * Return: Pointer to next atom in buffer, or NULL if there is no atom. */ @@ -387,7 +388,7 @@ jsctx_rb_peek(struct kbase_context *kctx, int js) { int prio; - lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { struct kbase_jd_atom *katom; @@ -416,7 +417,7 @@ jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom) int js = katom->slot_nr; struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; - lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); /* Atoms must be pulled in the correct order. */ WARN_ON(katom != jsctx_rb_peek_prio(kctx, js, prio)); @@ -434,7 +435,7 @@ jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom) struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; struct rb_node **new = &(queue->runnable_tree.rb_node), *parent = NULL; - lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); while (*new) { struct kbase_jd_atom *entry = container_of(*new, @@ -465,7 +466,7 @@ jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom) static inline void jsctx_rb_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) { - lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); jsctx_tree_add(kctx, katom); } @@ -629,7 +630,7 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev) mutex_init(&jsdd->runpool_mutex); mutex_init(&jsdd->queue_mutex); - spin_lock_init(&kbdev->hwaccess_lock); + spin_lock_init(&jsdd->runpool_irq.lock); sema_init(&jsdd->schedule_sem, 1); err = kbasep_js_policy_init(kbdev); @@ -698,14 +699,14 @@ int kbasep_js_kctx_init(struct kbase_context * const kctx) KBASE_DEBUG_ASSERT(js_kctx_info->init_status == JS_KCTX_INIT_NONE); js_kctx_info->ctx.nr_jobs = 0; - kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED); - kbase_ctx_flag_clear(kctx, KCTX_DYING); + js_kctx_info->ctx.is_scheduled = false; + js_kctx_info->ctx.is_dying = false; memset(js_kctx_info->ctx.ctx_attr_ref_count, 0, sizeof(js_kctx_info->ctx.ctx_attr_ref_count)); /* Initially, the context is disabled from submission until the create * flags are set */ - kbase_ctx_flag_set(kctx, KCTX_SUBMIT_DISABLED); + js_kctx_info->ctx.flags = KBASE_CTX_FLAG_SUBMIT_DISABLED; js_kctx_info->init_status |= JS_KCTX_INIT_CONSTANTS; @@ -752,7 +753,7 @@ void kbasep_js_kctx_term(struct kbase_context *kctx) if ((js_kctx_info->init_status & JS_KCTX_INIT_CONSTANTS)) { /* The caller must de-register all jobs before calling this */ - KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled); KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs == 0); } @@ -762,11 +763,11 @@ void kbasep_js_kctx_term(struct kbase_context *kctx) for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); - if (kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)) { + if (kctx->ctx_runnable_ref) { WARN_ON(atomic_read(&kbdev->js_data.nr_contexts_runnable) <= 0); atomic_dec(&kbdev->js_data.nr_contexts_runnable); update_ctx_count = true; - kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); + kctx->ctx_runnable_ref = false; } mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex); @@ -788,12 +789,12 @@ void kbasep_js_kctx_term(struct kbase_context *kctx) * kbase_js_ctx_list_add_pullable_nolock - Variant of * kbase_jd_ctx_list_add_pullable() * where the caller must hold - * hwaccess_lock + * runpool_irq.lock * @kbdev: Device pointer * @kctx: Context to add to queue * @js: Job slot to use * - * Caller must hold hwaccess_lock + * Caller must hold runpool_irq.lock * * Return: true if caller should call kbase_backend_ctx_count_changed() */ @@ -803,7 +804,7 @@ static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev, { bool ret = false; - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])) list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); @@ -815,8 +816,8 @@ static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev, kbdev->js_data.nr_contexts_pullable++; ret = true; if (!atomic_read(&kctx->atoms_pulled)) { - WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); - kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF); + WARN_ON(kctx->ctx_runnable_ref); + kctx->ctx_runnable_ref = true; atomic_inc(&kbdev->js_data.nr_contexts_runnable); } } @@ -829,12 +830,12 @@ static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev, * kbase_js_ctx_list_add_pullable_head_nolock - Variant of * kbase_js_ctx_list_add_pullable_head() * where the caller must hold - * hwaccess_lock + * runpool_irq.lock * @kbdev: Device pointer * @kctx: Context to add to queue * @js: Job slot to use * - * Caller must hold hwaccess_lock + * Caller must hold runpool_irq.lock * * Return: true if caller should call kbase_backend_ctx_count_changed() */ @@ -843,7 +844,7 @@ static bool kbase_js_ctx_list_add_pullable_head_nolock( { bool ret = false; - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])) list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); @@ -855,8 +856,8 @@ static bool kbase_js_ctx_list_add_pullable_head_nolock( kbdev->js_data.nr_contexts_pullable++; ret = true; if (!atomic_read(&kctx->atoms_pulled)) { - WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); - kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF); + WARN_ON(kctx->ctx_runnable_ref); + kctx->ctx_runnable_ref = true; atomic_inc(&kbdev->js_data.nr_contexts_runnable); } } @@ -887,9 +888,9 @@ static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev, bool ret; unsigned long flags; - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags); ret = kbase_js_ctx_list_add_pullable_head_nolock(kbdev, kctx, js); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags); return ret; } @@ -907,7 +908,7 @@ static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev, * This function should be used when a context has been pulled from, and there * are no jobs remaining on the specified slot. * - * Caller must hold hwaccess_lock + * Caller must hold runpool_irq.lock * * Return: true if caller should call kbase_backend_ctx_count_changed() */ @@ -917,7 +918,7 @@ static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev, { bool ret = false; - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], &kbdev->js_data.ctx_list_unpullable[js]); @@ -926,8 +927,8 @@ static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev, kbdev->js_data.nr_contexts_pullable--; ret = true; if (!atomic_read(&kctx->atoms_pulled)) { - WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); - kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); + WARN_ON(!kctx->ctx_runnable_ref); + kctx->ctx_runnable_ref = false; atomic_dec(&kbdev->js_data.nr_contexts_runnable); } } @@ -948,7 +949,7 @@ static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev, * This function should be used when a context has no jobs on the GPU, and no * jobs remaining for the specified slot. * - * Caller must hold hwaccess_lock + * Caller must hold runpool_irq.lock * * Return: true if caller should call kbase_backend_ctx_count_changed() */ @@ -958,7 +959,7 @@ static bool kbase_js_ctx_list_remove_nolock(struct kbase_device *kbdev, { bool ret = false; - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); WARN_ON(list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])); @@ -968,8 +969,8 @@ static bool kbase_js_ctx_list_remove_nolock(struct kbase_device *kbdev, kbdev->js_data.nr_contexts_pullable--; ret = true; if (!atomic_read(&kctx->atoms_pulled)) { - WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); - kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); + WARN_ON(!kctx->ctx_runnable_ref); + kctx->ctx_runnable_ref = false; atomic_dec(&kbdev->js_data.nr_contexts_runnable); } } @@ -981,11 +982,11 @@ static bool kbase_js_ctx_list_remove_nolock(struct kbase_device *kbdev, /** * kbase_js_ctx_list_pop_head_nolock - Variant of kbase_js_ctx_list_pop_head() * where the caller must hold - * hwaccess_lock + * runpool_irq.lock * @kbdev: Device pointer * @js: Job slot to use * - * Caller must hold hwaccess_lock + * Caller must hold runpool_irq.lock * * Return: Context to use for specified slot. * NULL if no contexts present for specified slot @@ -996,7 +997,7 @@ static struct kbase_context *kbase_js_ctx_list_pop_head_nolock( { struct kbase_context *kctx; - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); if (list_empty(&kbdev->js_data.ctx_list_pullable[js])) return NULL; @@ -1025,9 +1026,9 @@ static struct kbase_context *kbase_js_ctx_list_pop_head( struct kbase_context *kctx; unsigned long flags; - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags); kctx = kbase_js_ctx_list_pop_head_nolock(kbdev, js); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags); return kctx; } @@ -1039,7 +1040,7 @@ static struct kbase_context *kbase_js_ctx_list_pop_head( * @js: Job slot to use * @is_scheduled: true if the context is currently scheduled * - * Caller must hold hwaccess_lock + * Caller must hold runpool_irq.lock * * Return: true if context can be pulled from on specified slot * false otherwise @@ -1050,7 +1051,7 @@ static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js, struct kbasep_js_device_data *js_devdata; struct kbase_jd_atom *katom; - lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); js_devdata = &kctx->kbdev->js_data; @@ -1243,7 +1244,7 @@ bool kbasep_js_add_job(struct kbase_context *kctx, kbasep_js_clear_job_retry_submit(atom); /* Lock for state available during IRQ */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); if (!kbase_js_dep_validate(kctx, atom)) { /* Dependencies could not be represented */ @@ -1253,13 +1254,12 @@ bool kbasep_js_add_job(struct kbase_context *kctx, * dependencies */ atom->status = KBASE_JD_ATOM_STATE_QUEUED; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); mutex_unlock(&js_devdata->runpool_mutex); goto out_unlock; } - kbase_tlstream_tl_attrib_atom_state(atom, TL_ATOM_STATE_READY); KBASE_TIMELINE_ATOM_READY(kctx, kbase_jd_atom_id(kctx, atom)); enqueue_required = kbase_js_dep_resolved_submit(kctx, atom); @@ -1283,14 +1283,14 @@ bool kbasep_js_add_job(struct kbase_context *kctx, if (enqueue_required && kctx == kbdev->hwaccess.active_kctx) kbase_jm_try_kick(kbdev, 1 << atom->slot_nr); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); if (timer_sync) kbase_backend_ctx_count_changed(kbdev); mutex_unlock(&js_devdata->runpool_mutex); /* End runpool transaction */ - if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED)) { - if (kbase_ctx_flag(kctx, KCTX_DYING)) { + if (!js_kctx_info->ctx.is_scheduled) { + if (js_kctx_info->ctx.is_dying) { /* A job got added while/after kbase_job_zap_context() * was called on a non-scheduled context (e.g. KDS * dependency resolved). Kill that job by killing the @@ -1300,7 +1300,7 @@ bool kbasep_js_add_job(struct kbase_context *kctx, } else if (js_kctx_info->ctx.nr_jobs == 1) { /* Handle Refcount going from 0 to 1: schedule the * context on the Policy Queue */ - KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled); dev_dbg(kbdev->dev, "JS: Enqueue Context %p", kctx); /* Policy Queue was updated - caller must try to @@ -1356,7 +1356,7 @@ bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev, kbasep_js_atom_retained_state_copy(&katom_retained_state, katom); kbasep_js_remove_job(kbdev, kctx, katom); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); /* The atom has 'finished' (will not be re-run), so no need to call * kbasep_js_has_atom_finished(). @@ -1366,7 +1366,8 @@ bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev, * whether it was soft-stopped or not */ attr_state_changed = kbasep_js_ctx_attr_ctx_release_atom(kbdev, kctx, &katom_retained_state); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); return attr_state_changed; } @@ -1383,9 +1384,9 @@ bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev, /* KBASE_TRACE_ADD_REFCOUNT( kbdev, JS_RETAIN_CTX, kctx, NULL, 0, kbasep_js_trace_get_refcnt(kbdev, kctx)); */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); result = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); return result; } @@ -1403,14 +1404,14 @@ struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev, js_devdata = &kbdev->js_data; js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr]; - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); found_kctx = js_per_as_data->kctx; if (found_kctx != NULL) ++(js_per_as_data->as_busy_refcount); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); return found_kctx; } @@ -1425,7 +1426,7 @@ struct kbase_context *kbasep_js_runpool_lookup_ctx_nolock( KBASE_DEBUG_ASSERT(kbdev != NULL); KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS); - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); js_devdata = &kbdev->js_data; js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr]; @@ -1447,7 +1448,8 @@ struct kbase_context *kbasep_js_runpool_lookup_ctx_nolock( * @katom_retained_state: Retained state from the atom * @runpool_ctx_attr_change: True if the runpool context attributes have changed * - * This collates a set of actions that must happen whilst hwaccess_lock is held. + * This collates a set of actions that must happen whilst + * kbasep_js_device_data.runpool_irq.lock is held. * * This includes running more jobs when: * - The previously released kctx caused a ctx attribute change, @@ -1474,7 +1476,7 @@ static kbasep_js_release_result kbasep_js_run_jobs_after_ctx_and_atom_release( lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); lockdep_assert_held(&js_devdata->runpool_mutex); - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&js_devdata->runpool_irq.lock); if (js_devdata->nr_user_contexts_running != 0) { bool retry_submit = false; @@ -1539,7 +1541,7 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( js_policy = &kbdev->js_data.policy; /* Ensure context really is scheduled in */ - KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled); /* kctx->as_nr and js_per_as_data are only read from here. The caller's * js_ctx_mutex provides a barrier that ensures they are up-to-date. @@ -1559,9 +1561,8 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( */ current_as = &kbdev->as[kctx_as_nr]; mutex_lock(&kbdev->pm.lock); - mutex_lock(&kbdev->mmu_hw_mutex); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - + mutex_lock(¤t_as->transaction_mutex); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); KBASE_DEBUG_ASSERT(kctx_as_nr == kctx->as_nr); KBASE_DEBUG_ASSERT(js_per_as_data->as_busy_refcount > 0); @@ -1576,7 +1577,8 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_RELEASE_CTX, kctx, NULL, 0u, new_ref_count); - if (new_ref_count == 1 && kbase_ctx_flag(kctx, KCTX_PRIVILEGED) && + if (new_ref_count == 1 && kctx->jctx.sched_info.ctx.flags & + KBASE_CTX_FLAG_PRIVILEGED && !kbase_pm_is_suspending(kbdev)) { /* Context is kept scheduled into an address space even when * there are no jobs, in this case we have to handle the @@ -1615,9 +1617,8 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( /* Ctx Attribute handling * * Releasing atoms attributes must either happen before this, or - * after the KCTX_SHEDULED flag is changed, otherwise we - * double-decount the attributes - */ + * after 'is_scheduled' is changed, otherwise we double-decount + * the attributes */ runpool_ctx_attr_change |= kbasep_js_ctx_attr_runpool_release_ctx(kbdev, kctx); @@ -1647,11 +1648,11 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( kctx, slot); } - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); kbase_backend_release_ctx_noirq(kbdev, kctx); - mutex_unlock(&kbdev->mmu_hw_mutex); + mutex_unlock(¤t_as->transaction_mutex); mutex_unlock(&kbdev->pm.lock); /* Note: Don't reuse kctx_as_nr now */ @@ -1660,7 +1661,7 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( kbase_backend_ctx_count_changed(kbdev); /* update book-keeping info */ - kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED); + js_kctx_info->ctx.is_scheduled = false; /* Signal any waiter that the context is not scheduled, so is * safe for termination - once the jsctx_mutex is also dropped, * and jobs have finished. */ @@ -1673,8 +1674,8 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev, kctx, katom_retained_state, runpool_ctx_attr_change); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - mutex_unlock(&kbdev->mmu_hw_mutex); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + mutex_unlock(¤t_as->transaction_mutex); mutex_unlock(&kbdev->pm.lock); } @@ -1709,9 +1710,9 @@ void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, /* This is called if and only if you've you've detached the context from * the Runpool or the Policy Queue, and not added it back to the Runpool */ - KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled); - if (kbase_ctx_flag(kctx, KCTX_DYING)) { + if (js_kctx_info->ctx.is_dying) { /* Dying: don't requeue, but kill all jobs on the context. This * happens asynchronously */ dev_dbg(kbdev->dev, @@ -1805,7 +1806,7 @@ static void kbasep_js_runpool_release_ctx_no_schedule( void kbase_js_set_timeouts(struct kbase_device *kbdev) { - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); kbase_backend_timeouts_changed(kbdev); } @@ -1840,7 +1841,7 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, mutex_lock(&js_devdata->runpool_mutex); /* Check to see if context is dying due to kbase_job_zap_context() */ - if (kbase_ctx_flag(kctx, KCTX_DYING)) { + if (js_kctx_info->ctx.is_dying) { /* Roll back the transaction so far and return */ kbase_backend_release_free_address_space(kbdev, as_nr); @@ -1854,17 +1855,17 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, 0u, kbasep_js_trace_get_refcnt(kbdev, kctx)); - kbase_ctx_flag_set(kctx, KCTX_SCHEDULED); + js_kctx_info->ctx.is_scheduled = true; - mutex_lock(&kbdev->mmu_hw_mutex); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + mutex_lock(&new_address_space->transaction_mutex); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); /* Assign context to previously chosen address space */ if (!kbase_backend_use_ctx(kbdev, kctx, as_nr)) { - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - mutex_unlock(&kbdev->mmu_hw_mutex); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + mutex_unlock(&new_address_space->transaction_mutex); /* Roll back the transaction so far and return */ - kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED); + js_kctx_info->ctx.is_scheduled = false; kbase_backend_release_free_address_space(kbdev, as_nr); @@ -1891,8 +1892,8 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, * we just want it out immediately. * * The DMB required to read the suspend flag was issued recently as part - * of the hwaccess_lock locking. If a suspend occurs *after* that lock - * was taken (i.e. this condition doesn't execute), then the + * of the runpool_irq locking. If a suspend occurs *after* that lock was + * taken (i.e. this condition doesn't execute), then the * kbasep_js_suspend() code will cleanup this context instead (by virtue * of it being called strictly after the suspend flag is set, and will * wait for this lock to drop) */ @@ -1908,8 +1909,8 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, } /* Transaction complete */ - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - mutex_unlock(&kbdev->mmu_hw_mutex); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + mutex_unlock(&new_address_space->transaction_mutex); /* Synchronize with any policy timers */ kbase_backend_ctx_count_changed(kbdev); @@ -1933,18 +1934,18 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, static bool kbase_js_use_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) { + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; unsigned long flags; - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); if (kbase_backend_use_ctx_sched(kbdev, kctx)) { /* Context already has ASID - mark as active */ kbdev->hwaccess.active_kctx = kctx; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); return true; /* Context already scheduled */ } - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); return kbasep_js_schedule_ctx(kbdev, kctx); } @@ -1970,9 +1971,9 @@ void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, mutex_lock(&js_kctx_info->ctx.jsctx_mutex); /* Mark the context as privileged */ - kbase_ctx_flag_set(kctx, KCTX_PRIVILEGED); + js_kctx_info->ctx.flags |= KBASE_CTX_FLAG_PRIVILEGED; - is_scheduled = kbase_ctx_flag(kctx, KCTX_SCHEDULED); + is_scheduled = js_kctx_info->ctx.is_scheduled; if (!is_scheduled) { /* Add the context to the pullable list */ if (kbase_js_ctx_list_add_pullable_head(kbdev, kctx, 0)) @@ -1988,7 +1989,7 @@ void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, /* Wait for the context to be scheduled in */ wait_event(kctx->jctx.sched_info.ctx.is_scheduled_wait, - kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + kctx->jctx.sched_info.ctx.is_scheduled); } else { /* Already scheduled in - We need to retain it to keep the * corresponding address space */ @@ -2009,7 +2010,7 @@ void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, /* We don't need to use the address space anymore */ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); - kbase_ctx_flag_clear(kctx, KCTX_PRIVILEGED); + js_kctx_info->ctx.flags &= (~KBASE_CTX_FLAG_PRIVILEGED); mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); /* Release the context - it will be scheduled out */ @@ -2031,7 +2032,7 @@ void kbasep_js_suspend(struct kbase_device *kbdev) KBASE_DEBUG_ASSERT(kbase_pm_is_suspending(kbdev)); js_devdata = &kbdev->js_data; - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); /* Prevent all contexts from submitting */ js_devdata->runpool_irq.submit_allowed = 0; @@ -2051,15 +2052,15 @@ void kbasep_js_suspend(struct kbase_device *kbdev) /* We can only cope with up to 1 privileged context - * the instrumented context. It'll be suspended by * disabling instrumentation */ - if (kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) { + if (kctx->jctx.sched_info.ctx.flags & + KBASE_CTX_FLAG_PRIVILEGED) { ++nr_privileged_ctx; WARN_ON(nr_privileged_ctx != 1); } } } CSTD_UNUSED(nr_privileged_ctx); - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); /* De-ref the previous retain to ensure each context gets pulled out * sometime later. */ @@ -2102,14 +2103,16 @@ void kbasep_js_resume(struct kbase_device *kbdev) mutex_lock(&js_kctx_info->ctx.jsctx_mutex); mutex_lock(&js_devdata->runpool_mutex); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); - if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED) && + if (!js_kctx_info->ctx.is_scheduled && kbase_js_ctx_pullable(kctx, js, false)) timer_sync = kbase_js_ctx_list_add_pullable_nolock( kbdev, kctx, js); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, + flags); if (timer_sync) kbase_backend_ctx_count_changed(kbdev); mutex_unlock(&js_devdata->runpool_mutex); @@ -2164,7 +2167,7 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, katom->slot_nr = kbase_js_get_slot(kctx->kbdev, katom); - lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); lockdep_assert_held(&kctx->jctx.lock); /* If slot will transition from unpullable to pullable then add to @@ -2174,6 +2177,9 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, } else { enqueue_required = false; } + /* Check if there are lower priority jobs to soft stop */ + kbase_job_slot_ctx_priority_check_locked(kctx, katom); + if ((katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) || (katom->pre_dep && (katom->pre_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) { @@ -2185,9 +2191,6 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; enqueue_required = false; } else { - /* Check if there are lower priority jobs to soft stop */ - kbase_job_slot_ctx_priority_check_locked(kctx, katom); - /* Add atom to ring buffer. */ jsctx_tree_add(kctx, katom); katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE; @@ -2207,7 +2210,7 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, */ static void kbase_js_move_to_tree(struct kbase_jd_atom *katom) { - lockdep_assert_held(&katom->kctx->kbdev->hwaccess_lock); + lockdep_assert_held(&katom->kctx->kbdev->js_data.runpool_irq.lock); while (katom) { WARN_ON(!(katom->atom_flags & @@ -2247,7 +2250,7 @@ static void kbase_js_evict_deps(struct kbase_context *kctx, struct kbase_jd_atom *x_dep = katom->x_post_dep; struct kbase_jd_atom *next_katom = katom->post_dep; - lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); if (next_katom) { KBASE_DEBUG_ASSERT(next_katom->status != @@ -2280,7 +2283,7 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) KBASE_DEBUG_ASSERT(kctx); js_devdata = &kctx->kbdev->js_data; - lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + lockdep_assert_held(&js_devdata->runpool_irq.lock); if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) return NULL; @@ -2315,12 +2318,11 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) return NULL; } - kbase_ctx_flag_set(kctx, KCTX_PULLED); - + kctx->pulled = true; pulled = atomic_inc_return(&kctx->atoms_pulled); if (pulled == 1 && !kctx->slots_pullable) { - WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); - kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF); + WARN_ON(kctx->ctx_runnable_ref); + kctx->ctx_runnable_ref = true; atomic_inc(&kctx->kbdev->js_data.nr_contexts_runnable); } atomic_inc(&kctx->atoms_pulled_slot[katom->slot_nr]); @@ -2352,7 +2354,7 @@ static void js_return_worker(struct work_struct *data) u64 affinity = katom->affinity; enum kbase_atom_coreref_state coreref_state = katom->coreref_state; - kbase_tlstream_tl_event_atom_softstop_ex(katom); + kbase_tlstream_aux_job_softstop_ex(katom); kbase_backend_complete_wq(kbdev, katom); @@ -2369,7 +2371,7 @@ static void js_return_worker(struct work_struct *data) atomic_dec(&katom->blocked); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); if (!atomic_read(&kctx->atoms_pulled_slot[js]) && jsctx_rb_none_to_pull(kctx, js)) @@ -2377,14 +2379,14 @@ static void js_return_worker(struct work_struct *data) if (!atomic_read(&kctx->atoms_pulled)) { if (!kctx->slots_pullable) { - WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); - kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); + WARN_ON(!kctx->ctx_runnable_ref); + kctx->ctx_runnable_ref = false; atomic_dec(&kbdev->js_data.nr_contexts_runnable); timer_sync = true; } if (kctx->as_nr != KBASEP_AS_NR_INVALID && - !kbase_ctx_flag(kctx, KCTX_DYING)) { + !js_kctx_info->ctx.is_dying) { int num_slots = kbdev->gpu_props.num_job_slots; int slot; @@ -2404,11 +2406,11 @@ static void js_return_worker(struct work_struct *data) context_idle = true; } - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); if (context_idle) { - WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE)); - kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); + WARN_ON(!kctx->ctx_active); + kctx->ctx_active = false; kbase_pm_context_idle(kbdev); } @@ -2430,7 +2432,7 @@ static void js_return_worker(struct work_struct *data) void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) { - lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); jsctx_rb_unpull(kctx, katom); @@ -2466,7 +2468,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); mutex_lock(&js_devdata->runpool_mutex); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); if (katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) { context_idle = !atomic_dec_return(&kctx->atoms_pulled); @@ -2474,8 +2476,8 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, if (!atomic_read(&kctx->atoms_pulled) && !kctx->slots_pullable) { - WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); - kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); + WARN_ON(!kctx->ctx_runnable_ref); + kctx->ctx_runnable_ref = false; atomic_dec(&kbdev->js_data.nr_contexts_runnable); timer_sync = true; } @@ -2497,7 +2499,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, */ if (!kbasep_js_is_submit_allowed(js_devdata, kctx) && !atomic_read(&kctx->atoms_pulled) && - !kbase_ctx_flag(kctx, KCTX_DYING)) { + !js_kctx_info->ctx.is_dying) { int js; kbasep_js_set_submit_allowed(js_devdata, kctx); @@ -2524,9 +2526,9 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, * jd_done_worker(). */ if (context_idle) - kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); + kctx->ctx_active = false; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); if (timer_sync) kbase_backend_ctx_count_changed(kbdev); mutex_unlock(&js_devdata->runpool_mutex); @@ -2534,8 +2536,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, return context_idle; } -struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, - ktime_t *end_timestamp) +void kbase_js_complete_atom(struct kbase_jd_atom *katom, ktime_t *end_timestamp) { u64 microseconds_spent = 0; struct kbase_device *kbdev; @@ -2547,7 +2548,7 @@ struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, js_policy = &kbdev->js_data.policy; - lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); if (katom->will_fail_event_code) katom->event_code = katom->will_fail_event_code; @@ -2598,12 +2599,7 @@ struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, false)) kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx, x_dep->slot_nr); - - if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) - return x_dep; } - - return NULL; } void kbase_js_sched(struct kbase_device *kbdev, int js_mask) @@ -2633,7 +2629,7 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) break; /* No contexts on pullable list */ } - if (!kbase_ctx_flag(kctx, KCTX_ACTIVE)) { + if (!kctx->ctx_active) { context_idle = true; if (kbase_pm_context_active_handle_suspend( @@ -2652,16 +2648,18 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) up(&js_devdata->schedule_sem); return; } - kbase_ctx_flag_set(kctx, KCTX_ACTIVE); + kctx->ctx_active = true; } if (!kbase_js_use_ctx(kbdev, kctx)) { mutex_lock( &kctx->jctx.sched_info.ctx.jsctx_mutex); /* Context can not be used at this time */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, + flags); if (kbase_js_ctx_pullable(kctx, js, false) - || kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) + || (kctx->jctx.sched_info.ctx.flags & + KBASE_CTX_FLAG_PRIVILEGED)) timer_sync |= kbase_js_ctx_list_add_pullable_head_nolock( kctx->kbdev, kctx, js); @@ -2669,13 +2667,13 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) timer_sync |= kbase_js_ctx_list_add_unpullable_nolock( kctx->kbdev, kctx, js); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, - flags); + spin_unlock_irqrestore( + &js_devdata->runpool_irq.lock, flags); mutex_unlock( &kctx->jctx.sched_info.ctx.jsctx_mutex); if (context_idle) { - WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE)); - kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); + WARN_ON(!kctx->ctx_active); + kctx->ctx_active = false; kbase_pm_context_idle(kbdev); } @@ -2684,15 +2682,15 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) break; } mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); - kbase_ctx_flag_clear(kctx, KCTX_PULLED); + kctx->pulled = false; if (!kbase_jm_kick(kbdev, 1 << js)) /* No more jobs can be submitted on this slot */ js_mask &= ~(1 << js); - if (!kbase_ctx_flag(kctx, KCTX_PULLED)) { + if (!kctx->pulled) { /* Failed to pull jobs - push to head of list */ if (kbase_js_ctx_pullable(kctx, js, true)) timer_sync |= @@ -2708,15 +2706,15 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) if (context_idle) { kbase_jm_idle_ctx(kbdev, kctx); spin_unlock_irqrestore( - &kbdev->hwaccess_lock, - flags); - WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE)); - kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); + &js_devdata->runpool_irq.lock, + flags); + WARN_ON(!kctx->ctx_active); + kctx->ctx_active = false; kbase_pm_context_idle(kbdev); } else { spin_unlock_irqrestore( - &kbdev->hwaccess_lock, - flags); + &js_devdata->runpool_irq.lock, + flags); } mutex_unlock( &kctx->jctx.sched_info.ctx.jsctx_mutex); @@ -2734,8 +2732,8 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) timer_sync |= kbase_js_ctx_list_add_unpullable_nolock( kctx->kbdev, kctx, js); - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, + flags); mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex); } } @@ -2766,7 +2764,7 @@ void kbase_js_zap_context(struct kbase_context *kctx) mutex_lock(&kctx->jctx.lock); mutex_lock(&js_devdata->queue_mutex); mutex_lock(&js_kctx_info->ctx.jsctx_mutex); - kbase_ctx_flag_set(kctx, KCTX_DYING); + js_kctx_info->ctx.is_dying = true; dev_dbg(kbdev->dev, "Zap: Try Evict Ctx %p", kctx); @@ -2806,7 +2804,7 @@ void kbase_js_zap_context(struct kbase_context *kctx) * jobs and releasing the Power manager active reference will be * handled when it leaves the runpool. */ - if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED)) { + if (!js_kctx_info->ctx.is_scheduled) { for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { if (!list_empty( &kctx->jctx.sched_info.ctx.ctx_list_entry[js])) @@ -2826,7 +2824,7 @@ void kbase_js_zap_context(struct kbase_context *kctx) */ KBASE_TRACE_ADD(kbdev, JM_ZAP_NON_SCHEDULED, kctx, NULL, 0u, - kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + js_kctx_info->ctx.is_scheduled); dev_dbg(kbdev->dev, "Zap: Ctx %p scheduled=0", kctx); @@ -2847,11 +2845,11 @@ void kbase_js_zap_context(struct kbase_context *kctx) /* Case c: didn't evict, but it is scheduled - it's in the Run * Pool */ KBASE_TRACE_ADD(kbdev, JM_ZAP_SCHEDULED, kctx, NULL, 0u, - kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + js_kctx_info->ctx.is_scheduled); dev_dbg(kbdev->dev, "Zap: Ctx %p is in RunPool", kctx); /* Disable the ctx from submitting any more jobs */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); kbasep_js_clear_submit_allowed(js_devdata, kctx); @@ -2872,7 +2870,7 @@ void kbase_js_zap_context(struct kbase_context *kctx) for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) kbase_job_slot_hardstop(kctx, js, NULL); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); mutex_unlock(&js_devdata->queue_mutex); mutex_unlock(&kctx->jctx.lock); @@ -2944,7 +2942,7 @@ static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, kbdev = kctx->kbdev; js_devdata = &kbdev->js_data; - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_FOREACH_CTX_JOBS, kctx, NULL, 0u, trace_get_refcnt(kbdev, kctx)); @@ -2953,5 +2951,5 @@ static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) jsctx_queue_foreach(kctx, js, callback); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); } diff --git a/drivers/gpu/arm/midgard/mali_kbase_js.h b/drivers/gpu/arm/midgard/mali_kbase_js.h index 8969222c3389..66b213293016 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_js.h +++ b/drivers/gpu/arm/midgard/mali_kbase_js.h @@ -27,7 +27,6 @@ #include "mali_kbase_js_defs.h" #include "mali_kbase_js_policy.h" -#include "mali_kbase_context.h" #include "mali_kbase_defs.h" #include "mali_kbase_debug.h" @@ -152,7 +151,8 @@ void kbasep_js_kctx_term(struct kbase_context *kctx); * * The following locking conditions are made on the caller: * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex. - * - it must \em not hold hwaccess_lock (as this will be obtained internally) + * - it must \em not hold kbasep_js_device_data::runpool_irq::lock (as this will be + * obtained internally) * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be * obtained internally) * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally). @@ -213,8 +213,8 @@ void kbasep_js_remove_job(struct kbase_device *kbdev, struct kbase_context *kctx * * The following locking conditions are made on the caller: * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex. - * - it must \em not hold the hwaccess_lock, (as this will be obtained - * internally) + * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, (as this will be + * obtained internally) * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this could be * obtained internally) * @@ -233,7 +233,8 @@ bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev, * @note This function can safely be called from IRQ context. * * The following locking conditions are made on the caller: - * - it must \em not hold the hwaccess_lock, because it will be used internally. + * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, because + * it will be used internally. * * @return value != false if the retain succeeded, and the context will not be scheduled out. * @return false if the retain failed (because the context is being/has been scheduled out). @@ -247,7 +248,7 @@ bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_conte * @note This function can safely be called from IRQ context. * * The following locks must be held by the caller: - * - hwaccess_lock + * - kbasep_js_device_data::runpool_irq::lock * * @return value != false if the retain succeeded, and the context will not be scheduled out. * @return false if the retain failed (because the context is being/has been scheduled out). @@ -265,9 +266,9 @@ bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev, struct kbas * @note This function can safely be called from IRQ context. * * The following locking conditions are made on the caller: - * - it must \em not hold the hwaccess_lock, because it will be used internally. - * If the hwaccess_lock is already held, then the caller should use - * kbasep_js_runpool_lookup_ctx_nolock() instead. + * - it must \em not hold the kbasep_js_device_data::runpoool_irq::lock, because + * it will be used internally. If the runpool_irq::lock is already held, then + * the caller should use kbasep_js_runpool_lookup_ctx_nolock() instead. * * @return a valid struct kbase_context on success, which has been refcounted as being busy. * @return NULL on failure, indicating that no context was found in \a as_nr @@ -287,7 +288,7 @@ struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev, i * Note: This function can safely be called from IRQ context. * * The following locking conditions are made on the caller: - * - it must the hold the hwaccess_lock + * - it must the kbasep_js_device_data::runpoool_irq::lock. * * Return: a valid struct kbase_context on success, which has been refcounted as * being busy. @@ -361,12 +362,12 @@ void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, struct kb * scheduled, or that already has a zero refcount. * * The following locking conditions are made on the caller: - * - it must \em not hold the hwaccess_lock, because it will be used internally. + * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, because + * it will be used internally. * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex. * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be * obtained internally) - * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be - * obtained internally) + * - it must \em not hold the kbase_device::as[n].transaction_mutex (as this will be obtained internally) * - it must \em not hold kbasep_jd_device_data::queue_mutex (as this will be * obtained internally) * @@ -410,11 +411,11 @@ void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev, * kbasep_js_release_privileged_ctx is called). * * The following locking conditions are made on the caller: - * - it must \em not hold the hwaccess_lock, because it will be used internally. + * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, because + * it will be used internally. * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be * obtained internally) - * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be - * obtained internally) + * - it must \em not hold the kbase_device::as[n].transaction_mutex (as this will be obtained internally) * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally). * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex, because it will * be used internally. @@ -428,12 +429,12 @@ void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, struct kbase_ * See kbasep_js_runpool_release_ctx for potential side effects. * * The following locking conditions are made on the caller: - * - it must \em not hold the hwaccess_lock, because it will be used internally. + * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, because + * it will be used internally. * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex. * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be * obtained internally) - * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be - * obtained internally) + * - it must \em not hold the kbase_device::as[n].transaction_mutex (as this will be obtained internally) * */ void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); @@ -443,7 +444,7 @@ void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, struct kbase_c * * The following locks may be used: * - kbasep_js_device_data::runpool_mutex - * - hwaccess_lock + * - kbasep_js_device_data::runpool_irq::lock */ void kbase_js_try_run_jobs(struct kbase_device *kbdev); @@ -563,10 +564,8 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, * * @param[in] katom Pointer to the atom to complete * @param[in] end_timestamp The time that the atom completed (may be NULL) - * - * Return: Atom that has now been unblocked and can now be run, or NULL if none */ -struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, +void kbase_js_complete_atom(struct kbase_jd_atom *katom, ktime_t *end_timestamp); /** @@ -631,7 +630,7 @@ void kbase_js_set_timeouts(struct kbase_device *kbdev); * * As with any bool, never test the return value with true. * - * The caller must hold hwaccess_lock. + * The caller must hold kbasep_js_device_data::runpool_irq::lock. */ static inline bool kbasep_js_is_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx) { @@ -639,7 +638,7 @@ static inline bool kbasep_js_is_submit_allowed(struct kbasep_js_device_data *js_ /* Ensure context really is scheduled in */ KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); - KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + KBASE_DEBUG_ASSERT(kctx->jctx.sched_info.ctx.is_scheduled); test_bit = (u16) (1u << kctx->as_nr); @@ -652,7 +651,7 @@ static inline bool kbasep_js_is_submit_allowed(struct kbasep_js_device_data *js_ * The purpose of this abstraction is to hide the underlying data size, and wrap up * the long repeated line of code. * - * The caller must hold hwaccess_lock. + * The caller must hold kbasep_js_device_data::runpool_irq::lock. */ static inline void kbasep_js_set_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx) { @@ -660,7 +659,7 @@ static inline void kbasep_js_set_submit_allowed(struct kbasep_js_device_data *js /* Ensure context really is scheduled in */ KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); - KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + KBASE_DEBUG_ASSERT(kctx->jctx.sched_info.ctx.is_scheduled); set_bit = (u16) (1u << kctx->as_nr); @@ -675,7 +674,7 @@ static inline void kbasep_js_set_submit_allowed(struct kbasep_js_device_data *js * The purpose of this abstraction is to hide the underlying data size, and wrap up * the long repeated line of code. * - * The caller must hold hwaccess_lock. + * The caller must hold kbasep_js_device_data::runpool_irq::lock. */ static inline void kbasep_js_clear_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx) { @@ -684,7 +683,7 @@ static inline void kbasep_js_clear_submit_allowed(struct kbasep_js_device_data * /* Ensure context really is scheduled in */ KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); - KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + KBASE_DEBUG_ASSERT(kctx->jctx.sched_info.ctx.is_scheduled); clear_bit = (u16) (1u << kctx->as_nr); clear_mask = ~clear_bit; @@ -793,7 +792,7 @@ static inline bool kbasep_js_get_atom_retry_submit_slot(const struct kbasep_js_a /** * Debug Check the refcount of a context. Only use within ASSERTs * - * Obtains hwaccess_lock + * Obtains kbasep_js_device_data::runpool_irq::lock * * @return negative value if the context is not scheduled in * @return current refcount of the context if it is scheduled in. The refcount @@ -810,12 +809,12 @@ static inline int kbasep_js_debug_check_ctx_refcount(struct kbase_device *kbdev, KBASE_DEBUG_ASSERT(kctx != NULL); js_devdata = &kbdev->js_data; - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); as_nr = kctx->as_nr; if (as_nr != KBASEP_AS_NR_INVALID) result = js_devdata->runpool_irq.per_as_data[as_nr].as_busy_refcount; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); return result; } @@ -830,7 +829,8 @@ static inline int kbasep_js_debug_check_ctx_refcount(struct kbase_device *kbdev, * when there is no ctx in \a as_nr (NULL returned). * * The following locking conditions are made on the caller: - * - it must \em not hold the hwaccess_lock, because it will be used internally. + * - it must \em not hold the kbasep_js_device_data::runpoool_irq::lock, because + * it will be used internally. * * @return a valid struct kbase_context on success, with a refcount that is guarenteed * to be non-zero and unmodified by this function. @@ -848,12 +848,12 @@ static inline struct kbase_context *kbasep_js_runpool_lookup_ctx_noretain(struct js_devdata = &kbdev->js_data; js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr]; - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); found_kctx = js_per_as_data->kctx; KBASE_DEBUG_ASSERT(found_kctx == NULL || js_per_as_data->as_busy_refcount > 0); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); return found_kctx; } @@ -948,7 +948,7 @@ static inline void kbase_js_runpool_inc_context_count( KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running < S8_MAX); ++(js_devdata->nr_all_contexts_running); - if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { + if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_SUBMIT_DISABLED) == 0) { /* Track contexts that can submit jobs */ KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running < S8_MAX); @@ -981,7 +981,7 @@ static inline void kbase_js_runpool_dec_context_count( --(js_devdata->nr_all_contexts_running); KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running >= 0); - if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { + if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_SUBMIT_DISABLED) == 0) { /* Track contexts that can submit jobs */ --(js_devdata->nr_user_contexts_running); KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running >= 0); diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c index 455b661a1176..e6e611b9f415 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c +++ b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -51,9 +51,9 @@ static bool kbasep_js_ctx_attr_runpool_retain_attr(struct kbase_device *kbdev, s js_kctx_info = &kctx->jctx.sched_info; lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); - KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled != false); if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) { KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] < S8_MAX); @@ -97,8 +97,8 @@ static bool kbasep_js_ctx_attr_runpool_release_attr(struct kbase_device *kbdev, js_kctx_info = &kctx->jctx.sched_info; lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); - lockdep_assert_held(&kbdev->hwaccess_lock); - KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled != false); if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) { KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] > 0); @@ -136,13 +136,13 @@ static bool kbasep_js_ctx_attr_ctx_retain_attr(struct kbase_device *kbdev, struc KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); js_kctx_info = &kctx->jctx.sched_info; - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] < U32_MAX); ++(js_kctx_info->ctx.ctx_attr_ref_count[attribute]); - if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) { + if (js_kctx_info->ctx.is_scheduled != false && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) { /* Only ref-count the attribute on the runpool for the first time this contexts sees this attribute */ KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_ON_CTX, kctx, NULL, 0u, attribute); runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, attribute); @@ -176,8 +176,8 @@ static bool kbasep_js_ctx_attr_ctx_release_attr(struct kbase_device *kbdev, stru lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] > 0); - if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) { - lockdep_assert_held(&kbdev->hwaccess_lock); + if (js_kctx_info->ctx.is_scheduled != false && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) { + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); /* Only de-ref-count the attribute on the runpool when this is the last ctx-reference to it */ runpool_state_changed = kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, attribute); KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_OFF_CTX, kctx, NULL, 0u, attribute); @@ -202,7 +202,7 @@ void kbasep_js_ctx_attr_set_initial_attrs(struct kbase_device *kbdev, struct kba KBASE_DEBUG_ASSERT(kctx != NULL); js_kctx_info = &kctx->jctx.sched_info; - if (kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { + if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_SUBMIT_DISABLED) != false) { /* This context never submits, so don't track any scheduling attributes */ return; } diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_defs.h b/drivers/gpu/arm/midgard/mali_kbase_js_defs.h index e6a9d41b6a08..e1342045b394 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_js_defs.h +++ b/drivers/gpu/arm/midgard/mali_kbase_js_defs.h @@ -44,6 +44,17 @@ struct kbase_device; struct kbase_jd_atom; +/* Types used by the policies must go here */ +enum { + /** Context will not submit any jobs */ + KBASE_CTX_FLAG_SUBMIT_DISABLED = (1u << 0), + + /** Set if the context uses an address space and should be kept scheduled in */ + KBASE_CTX_FLAG_PRIVILEGED = (1u << 1) + + /* NOTE: Add flags for other things, such as 'is scheduled', and 'is dying' */ +}; + typedef u32 kbase_context_flags; struct kbasep_atom_req { @@ -174,8 +185,9 @@ typedef u32 kbasep_js_atom_done_code; /** * Data used by the scheduler that is unique for each Address Space. * - * This is used in IRQ context and hwaccess_lock must be held whilst accessing - * this data (inculding reads and atomic decisions based on the read). + * This is used in IRQ context and kbasep_js_device_data::runpoool_irq::lock + * must be held whilst accessing this data (inculding reads and atomic + * decisions based on the read). */ struct kbasep_js_per_as_data { /** @@ -206,9 +218,27 @@ struct kbasep_js_per_as_data { * to remove masking). */ struct kbasep_js_device_data { - /* Sub-structure to collect together Job Scheduling data used in IRQ - * context. The hwaccess_lock must be held when accessing. */ + /** Sub-structure to collect together Job Scheduling data used in IRQ context */ struct runpool_irq { + /** + * Lock for accessing Job Scheduling data used in IRQ context + * + * This lock must be held whenever this data is accessed (read, or + * write). Even for read-only access, memory barriers would be needed. + * In any case, it is likely that decisions based on only reading must + * also be atomic with respect to data held here and elsewhere in the + * Job Scheduler. + * + * This lock must also be held for accessing: + * - kbase_context::as_nr + * - kbase_device::jm_slots + * - Parts of the kbasep_js_policy, dependent on the policy (refer to + * the policy in question for more information) + * - Parts of kbasep_js_policy_ctx_info, dependent on the policy (refer to + * the policy in question for more information) + */ + spinlock_t lock; + /** Bitvector indicating whether a currently scheduled context is allowed to submit jobs. * When bit 'N' is set in this, it indicates whether the context bound to address space * 'N' (per_as_data[N].kctx) is allowed to submit jobs. @@ -353,8 +383,8 @@ struct kbasep_js_kctx_info { * Runpool substructure. This must only be accessed whilst the Run Pool * mutex ( kbasep_js_device_data::runpool_mutex ) is held. * - * In addition, the hwaccess_lock may need to be held for certain - * sub-members. + * In addition, the kbasep_js_device_data::runpool_irq::lock may need to be + * held for certain sub-members. * * @note some of the members could be moved into struct kbasep_js_device_data for * improved d-cache/tlb efficiency. @@ -388,11 +418,21 @@ struct kbasep_js_kctx_info { * the context. **/ u32 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT]; + kbase_context_flags flags; + /* NOTE: Unify the following flags into kbase_context_flags */ /** - * Wait queue to wait for KCTX_SHEDULED flag state changes. + * Is the context scheduled on the Run Pool? + * + * This is only ever updated whilst the jsctx_mutex is held. + */ + bool is_scheduled; + /** + * Wait queue to wait for is_scheduled state changes. * */ wait_queue_head_t is_scheduled_wait; + bool is_dying; /**< Is the context in the process of being evicted? */ + /** Link implementing JS queues. Context can be present on one * list per job slot */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_policy.h b/drivers/gpu/arm/midgard/mali_kbase_js_policy.h index d1f3a0abea1f..debd0117d45a 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_js_policy.h +++ b/drivers/gpu/arm/midgard/mali_kbase_js_policy.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -531,7 +531,7 @@ void kbasep_js_policy_foreach_ctx_job(union kbasep_js_policy *js_policy, struct * The locking conditions on the caller are as follows: * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex. * - it will be holding kbasep_js_device_data::runpool_mutex. - * - it will be holding hwaccess_lock (a spinlock) + * - it will be holding kbasep_js_device_data::runpool_irq::lock (a spinlock) * * Due to a spinlock being held, this function must not call any APIs that sleep. */ @@ -548,7 +548,7 @@ void kbasep_js_policy_runpool_add_ctx(union kbasep_js_policy *js_policy, struct * The locking conditions on the caller are as follows: * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex. * - it will be holding kbasep_js_device_data::runpool_mutex. - * - it will be holding hwaccess_lock (a spinlock) + * - it will be holding kbasep_js_device_data::runpool_irq::lock (a spinlock) * * Due to a spinlock being held, this function must not call any APIs that sleep. */ @@ -558,7 +558,7 @@ void kbasep_js_policy_runpool_remove_ctx(union kbasep_js_policy *js_policy, stru * @brief Indicate whether a context should be removed from the Run Pool * (should be scheduled out). * - * The hwaccess_lock will be held by the caller. + * The kbasep_js_device_data::runpool_irq::lock will be held by the caller. * * @note This API is called from IRQ context. */ @@ -715,7 +715,7 @@ bool kbasep_js_policy_dequeue_job(struct kbase_device *kbdev, int job_slot_idx, * the policy that the job should be run again at some point later. * * The caller has the following conditions on locking: - * - hwaccess_lock (a spinlock) will be held. + * - kbasep_js_device_data::runpool_irq::lock (a spinlock) will be held. * - kbasep_js_device_data::runpool_mutex will be held. * - kbasep_js_kctx_info::ctx::jsctx_mutex will be held. */ @@ -746,7 +746,7 @@ void kbasep_js_policy_enqueue_job(union kbasep_js_policy *js_policy, struct kbas * @note This API is called from IRQ context. * * The caller has the following conditions on locking: - * - hwaccess_lock will be held. + * - kbasep_js_device_data::runpool_irq::lock will be held. * * @param js_policy job scheduler policy * @param katom job dispatch atom diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c b/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c index 1ac05693b62c..90c13458ec7c 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c +++ b/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c @@ -149,9 +149,9 @@ static inline int kbasep_js_policy_trace_get_refcnt(struct kbase_device *kbdev, js_devdata = &kbdev->js_data; - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); refcnt = kbasep_js_policy_trace_get_refcnt_nolock(kbdev, kctx); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); return refcnt; } @@ -223,7 +223,7 @@ int kbasep_js_policy_init_ctx(struct kbase_device *kbdev, struct kbase_context * * head_runtime_us occur strictly after this context is initialized */ mutex_lock(&js_devdata->queue_mutex); - /* No need to hold the the hwaccess_lock here, because we're initializing + /* No need to hold the the runpool_irq.lock here, because we're initializing * the value, and the context is definitely not being updated in the * runpool at this point. The queue_mutex ensures the memory barrier. */ ctx_info->runtime_us = policy_info->head_runtime_us + priority_weight(ctx_info, (u64) js_devdata->cfs_ctx_runtime_init_slices * (u64) (js_devdata->ctx_timeslice_ns / 1000u)); diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.h b/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.h index 0a8454c033d8..b457d8215abe 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.h +++ b/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -54,10 +54,10 @@ struct kbasep_js_policy_cfs { * after ctx init * @process_priority: calling process NICE priority, in the range -20..19 * - * hwaccess_lock must be held when updating @runtime_us. Initializing will occur - * on context init and context enqueue (which can only occur in one thread at a - * time), but multi-thread access only occurs while the context is in the - * runpool. + * &kbasep_js_device_data.runpool_irq.lock must be held when updating + * @runtime_us. Initializing will occur on context init and context enqueue + * (which can only occur in one thread at a time), but multi-thread access only + * occurs while the context is in the runpool. * * Reads are possible without the spinlock, but an older value might be read if * no memory barries are issued beforehand. @@ -72,7 +72,7 @@ struct kbasep_js_policy_cfs_ctx { * struct kbasep_js_policy_cfs_job - per job information for CFS * @ticks: number of ticks that this job has been executing for * - * hwaccess_lock must be held when accessing @ticks. + * &kbasep_js_device_data.runpool_irq.lock must be held when accessing @ticks. */ struct kbasep_js_policy_cfs_job { u32 ticks; diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem.c b/drivers/gpu/arm/midgard/mali_kbase_mem.c index 4824b31673d3..c1851caa95a0 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem.c +++ b/drivers/gpu/arm/midgard/mali_kbase_mem.c @@ -485,7 +485,7 @@ int kbase_region_tracker_init(struct kbase_context *kctx) #endif #ifdef CONFIG_64BIT - if (kbase_ctx_flag(kctx, KCTX_COMPAT)) + if (kctx->is_compat) same_va_bits = 32; else if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA)) same_va_bits = 33; @@ -509,7 +509,7 @@ int kbase_region_tracker_init(struct kbase_context *kctx) #ifdef CONFIG_64BIT /* 32-bit clients have exec and custom VA zones */ - if (kbase_ctx_flag(kctx, KCTX_COMPAT)) { + if (kctx->is_compat) { #endif if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) { err = -EINVAL; @@ -573,7 +573,7 @@ int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages) * Nothing to do for 32-bit clients, JIT uses the existing * custom VA zone. */ - if (kbase_ctx_flag(kctx, KCTX_COMPAT)) + if (kctx->is_compat) return 0; #if defined(CONFIG_ARM64) @@ -1730,7 +1730,7 @@ static int kbase_jit_debugfs_phys_get(struct kbase_jit_debugfs_data *data) KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_phys_fops, kbase_jit_debugfs_phys_get); -void kbase_jit_debugfs_init(struct kbase_context *kctx) +void kbase_jit_debugfs_add(struct kbase_context *kctx) { /* Debugfs entry for getting the number of JIT allocations. */ debugfs_create_file("mem_jit_count", S_IRUGO, kctx->kctx_dentry, diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem.h b/drivers/gpu/arm/midgard/mali_kbase_mem.h index 8953c852e765..7b2433e868bd 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem.h +++ b/drivers/gpu/arm/midgard/mali_kbase_mem.h @@ -400,8 +400,7 @@ static inline int kbase_reg_prepare_native(struct kbase_va_region *reg, return -ENOMEM; reg->cpu_alloc->imported.kctx = kctx; INIT_LIST_HEAD(®->cpu_alloc->evict_node); - if (kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE) - && (reg->flags & KBASE_REG_CPU_CACHED)) { + if (kctx->infinite_cache_active && (reg->flags & KBASE_REG_CPU_CACHED)) { reg->gpu_alloc = kbase_alloc_create(reg->nr_pages, KBASE_MEM_TYPE_NATIVE); reg->gpu_alloc->imported.kctx = kctx; @@ -488,7 +487,7 @@ void kbase_mem_pool_term(struct kbase_mem_pool *pool); * 1. If there are free pages in the pool, allocate a page from @pool. * 2. Otherwise, if @next_pool is not NULL and has free pages, allocate a page * from @next_pool. - * 3. Return NULL if no memory in the pool + * 3. Finally, allocate a page from the kernel. * * Return: Pointer to allocated page, or NULL if allocation failed. */ @@ -573,39 +572,19 @@ static inline size_t kbase_mem_pool_max_size(struct kbase_mem_pool *pool) */ void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size); -/** - * kbase_mem_pool_grow - Grow the pool - * @pool: Memory pool to grow - * @nr_to_grow: Number of pages to add to the pool - * - * Adds @nr_to_grow pages to the pool. Note that this may cause the pool to - * become larger than the maximum size specified. - * - * Returns: 0 on success, -ENOMEM if unable to allocate sufficent pages - */ -int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow); - /** * kbase_mem_pool_trim - Grow or shrink the pool to a new size * @pool: Memory pool to trim * @new_size: New number of pages in the pool * * If @new_size > @cur_size, fill the pool with new pages from the kernel, but - * not above the max_size for the pool. + * not above @max_size. * If @new_size < @cur_size, shrink the pool by freeing pages to the kernel. - */ -void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size); - -/* - * kbase_mem_alloc_page - Allocate a new page for a device - * @kbdev: The kbase device * - * Most uses should use kbase_mem_pool_alloc to allocate a page. However that - * function can fail in the event the pool is empty. - * - * Return: A new page or NULL if no memory + * Return: The new size of the pool */ -struct page *kbase_mem_alloc_page(struct kbase_device *kbdev); +size_t kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size); + int kbase_region_tracker_init(struct kbase_context *kctx); int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages); @@ -668,8 +647,8 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg); /** * The caller has the following locking conditions: - * - It must hold kbase_device->mmu_hw_mutex - * - It must hold the hwaccess_lock + * - It must hold kbase_as::transaction_mutex on kctx's address space + * - It must hold the kbasep_js_device_data::runpool_irq::lock */ void kbase_mmu_update(struct kbase_context *kctx); @@ -681,8 +660,8 @@ void kbase_mmu_update(struct kbase_context *kctx); * data from provided kbase context from the GPU caches. * * The caller has the following locking conditions: - * - It must hold kbase_device->mmu_hw_mutex - * - It must hold the hwaccess_lock + * - It must hold kbase_as::transaction_mutex on kctx's address space + * - It must hold the kbasep_js_device_data::runpool_irq::lock */ void kbase_mmu_disable(struct kbase_context *kctx); @@ -695,7 +674,7 @@ void kbase_mmu_disable(struct kbase_context *kctx); * This function must only be called during reset/power-up and it used to * ensure the registers are in a known state. * - * The caller must hold kbdev->mmu_hw_mutex. + * The caller must hold kbdev->as[as_nr].transaction_mutex. */ void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr); @@ -915,10 +894,10 @@ void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle, #ifdef CONFIG_DEBUG_FS /** - * kbase_jit_debugfs_init - Add per context debugfs entry for JIT. + * kbase_jit_debugfs_add - Add per context debugfs entry for JIT. * @kctx: kbase context */ -void kbase_jit_debugfs_init(struct kbase_context *kctx); +void kbase_jit_debugfs_add(struct kbase_context *kctx); #endif /* CONFIG_DEBUG_FS */ /** diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c index b6dac5586a7a..f91d3c916355 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c @@ -30,15 +30,13 @@ #include #include #include -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) && \ - (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)) -#include -#endif /* LINUX_VERSION_CODE >= 3.5.0 && < 4.8.0 */ +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) + #include +#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0) */ #ifdef CONFIG_DMA_SHARED_BUFFER #include #endif /* defined(CONFIG_DMA_SHARED_BUFFER) */ #include -#include #include #include @@ -114,7 +112,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages goto bad_size; #if defined(CONFIG_64BIT) - if (kbase_ctx_flag(kctx, KCTX_COMPAT)) + if (kctx->is_compat) cpu_va_bits = 32; #endif @@ -210,19 +208,18 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages } /* - * 10.1-10.4 UKU userland relies on the kernel to call mmap. - * For all other versions we can just return the cookie + * Pre-10.1 UKU userland calls mmap for us so return the + * unaligned address and skip the map. */ - if (kctx->api_version < KBASE_API_VERSION(10, 1) || - kctx->api_version > KBASE_API_VERSION(10, 4)) { + if (kctx->api_version < KBASE_API_VERSION(10, 1)) { *gpu_va = (u64) cookie; return reg; } /* - * To achieve alignment and avoid allocating on large alignment - * (to work around a GPU hardware issue) we must allocate 3 - * times the required size. + * GPUCORE-2190: + * + * We still need to return alignment for old userspace. */ if (*va_alignment) va_map += 3 * (1UL << *va_alignment); @@ -236,10 +233,8 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages MAP_SHARED, cookie); if (IS_ERR_VALUE(cpu_addr)) { - kbase_gpu_vm_lock(kctx); kctx->pending_regions[cookie_nr] = NULL; kctx->cookies |= (1UL << cookie_nr); - kbase_gpu_vm_unlock(kctx); goto no_mmap; } @@ -1041,7 +1036,7 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, in shared_zone = true; #ifdef CONFIG_64BIT - if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { + if (!kctx->is_compat) { /* * 64-bit tasks require us to reserve VA on the CPU that we use * on the GPU. @@ -1138,7 +1133,7 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( shared_zone = true; #ifdef CONFIG_64BIT - if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { + if (!kctx->is_compat) { /* * 64-bit tasks require us to reserve VA on the CPU that we use * on the GPU. @@ -1271,7 +1266,7 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, *num_pages = nents * stride; #ifdef CONFIG_64BIT - if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { + if (!kctx->is_compat) { /* 64-bit tasks must MMAP anyway, but not expose this address to * clients */ *flags |= BASE_MEM_NEED_MMAP; @@ -1363,7 +1358,7 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, } #ifdef CONFIG_64BIT - if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { + if (!kctx->is_compat) { /* Bind to a cookie */ if (!kctx->cookies) { dev_err(kctx->kbdev->dev, "No cookies available for allocation!"); @@ -1416,32 +1411,6 @@ bad_flags: return 0; } -static u32 kbase_get_cache_line_alignment(struct kbase_context *kctx) -{ - u32 cpu_cache_line_size = cache_line_size(); - u32 gpu_cache_line_size = - (1UL << kctx->kbdev->gpu_props.props.l2_props.log2_line_size); - - return ((cpu_cache_line_size > gpu_cache_line_size) ? - cpu_cache_line_size : - gpu_cache_line_size); -} - -static int kbase_check_buffer_size(struct kbase_context *kctx, u64 size) -{ - u32 cache_line_align = kbase_get_cache_line_alignment(kctx); - - return (size & (cache_line_align - 1)) == 0 ? 0 : -EINVAL; -} - -static int kbase_check_buffer_cache_alignment(struct kbase_context *kctx, - void __user *ptr) -{ - u32 cache_line_align = kbase_get_cache_line_alignment(kctx); - - return ((uintptr_t)ptr & (cache_line_align - 1)) == 0 ? 0 : -EINVAL; -} - int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, void __user *phandle, u64 *gpu_va, u64 *va_pages, u64 *flags) @@ -1454,7 +1423,7 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, KBASE_DEBUG_ASSERT(flags); #ifdef CONFIG_64BIT - if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) + if (!kctx->is_compat) *flags |= BASE_MEM_SAME_VA; #endif @@ -1497,26 +1466,12 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, reg = NULL; } else { #ifdef CONFIG_COMPAT - if (kbase_ctx_flag(kctx, KCTX_COMPAT)) + if (kctx->is_compat) uptr = compat_ptr(user_buffer.ptr.compat_value); else #endif uptr = user_buffer.ptr.value; - if (0 != kbase_check_buffer_cache_alignment(kctx, - uptr)) { - dev_warn(kctx->kbdev->dev, - "User buffer is not cache line aligned!\n"); - goto no_reg; - } - - if (0 != kbase_check_buffer_size(kctx, - user_buffer.length)) { - dev_warn(kctx->kbdev->dev, - "User buffer size is not multiple of cache line size!\n"); - goto no_reg; - } - reg = kbase_mem_from_user_buffer(kctx, (unsigned long)uptr, user_buffer.length, va_pages, flags); @@ -2191,6 +2146,44 @@ void kbase_os_mem_map_unlock(struct kbase_context *kctx) up_read(&mm->mmap_sem); } +#if defined(CONFIG_DMA_SHARED_BUFFER) && defined(CONFIG_MALI_TRACE_TIMELINE) +/* This section is required only for instrumentation. */ + +static void kbase_dma_buf_vm_open(struct vm_area_struct *vma) +{ + struct kbase_cpu_mapping *map = vma->vm_private_data; + + KBASE_DEBUG_ASSERT(map); + KBASE_DEBUG_ASSERT(map->count > 0); + /* Non-atomic as we're under Linux's mm lock. */ + map->count++; +} + +static void kbase_dma_buf_vm_close(struct vm_area_struct *vma) +{ + struct kbase_cpu_mapping *map = vma->vm_private_data; + + KBASE_DEBUG_ASSERT(map); + KBASE_DEBUG_ASSERT(map->count > 0); + + /* Non-atomic as we're under Linux's mm lock. */ + if (--map->count) + return; + + KBASE_DEBUG_ASSERT(map->kctx); + + kbase_gpu_vm_lock(map->kctx); + list_del(&map->mappings_list); + kbase_gpu_vm_unlock(map->kctx); + kfree(map); +} + +static const struct vm_operations_struct kbase_dma_mmap_ops = { + .open = kbase_dma_buf_vm_open, + .close = kbase_dma_buf_vm_close, +}; +#endif /* CONFIG_DMA_SHARED_BUFFER && CONFIG_MALI_TRACE_TIMELINE */ + int kbase_mmap(struct file *file, struct vm_area_struct *vma) { struct kbase_context *kctx = file->private_data; @@ -2409,6 +2402,35 @@ map: #ifdef CONFIG_DMA_SHARED_BUFFER dma_map: err = dma_buf_mmap(reg->cpu_alloc->imported.umm.dma_buf, vma, vma->vm_pgoff - reg->start_pfn); +#if defined(CONFIG_MALI_TRACE_TIMELINE) + /* This section is required only for instrumentation. */ + /* Add created mapping to imported region mapping list. + * It is important to make it visible to dumping infrastructure. + * Add mapping only if vm_ops structure is not used by memory owner. */ + WARN_ON(vma->vm_ops); + WARN_ON(vma->vm_private_data); + if (!err && !vma->vm_ops && !vma->vm_private_data) { + struct kbase_cpu_mapping *map = kzalloc( + sizeof(*map), + GFP_KERNEL); + + if (map) { + map->kctx = reg->kctx; + map->region = NULL; + map->page_off = vma->vm_pgoff; + map->vm_start = vma->vm_start; + map->vm_end = vma->vm_end; + map->count = 1; /* start with one ref */ + + vma->vm_ops = &kbase_dma_mmap_ops; + vma->vm_private_data = map; + + list_add( + &map->mappings_list, + ®->cpu_alloc->mappings); + } + } +#endif /* CONFIG_MALI_TRACE_TIMELINE */ #endif /* CONFIG_DMA_SHARED_BUFFER */ out_unlock: kbase_gpu_vm_unlock(kctx); @@ -2697,9 +2719,7 @@ void *kbase_va_alloc(struct kbase_context *kctx, u32 size, struct kbase_hwc_dma_ dma_addr_t dma_pa; struct kbase_va_region *reg; phys_addr_t *page_array; -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)) - unsigned long attrs = DMA_ATTR_WRITE_COMBINE; -#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) DEFINE_DMA_ATTRS(attrs); #endif @@ -2715,13 +2735,9 @@ void *kbase_va_alloc(struct kbase_context *kctx, u32 size, struct kbase_hwc_dma_ goto err; /* All the alloc calls return zeroed memory */ -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)) - va = dma_alloc_attrs(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL, - attrs); -#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); - va = dma_alloc_attrs(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL, - &attrs); + va = dma_alloc_attrs(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL, &attrs); #else va = dma_alloc_writecombine(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL); #endif @@ -2768,9 +2784,7 @@ no_mmap: no_alloc: kfree(reg); no_reg: -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)) - dma_free_attrs(kctx->kbdev->dev, size, va, dma_pa, attrs); -#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) dma_free_attrs(kctx->kbdev->dev, size, va, dma_pa, &attrs); #else dma_free_writecombine(kctx->kbdev->dev, size, va, dma_pa); @@ -2784,8 +2798,7 @@ void kbase_va_free(struct kbase_context *kctx, struct kbase_hwc_dma_mapping *han { struct kbase_va_region *reg; int err; -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) && \ - (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)) +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) DEFINE_DMA_ATTRS(attrs); #endif @@ -2803,10 +2816,7 @@ void kbase_va_free(struct kbase_context *kctx, struct kbase_hwc_dma_mapping *han kbase_mem_phy_alloc_put(reg->gpu_alloc); kfree(reg); -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)) - dma_free_attrs(kctx->kbdev->dev, handle->size, - handle->cpu_va, handle->dma_pa, DMA_ATTR_WRITE_COMBINE); -#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); dma_free_attrs(kctx->kbdev->dev, handle->size, handle->cpu_va, handle->dma_pa, &attrs); diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c b/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c index 9a3f9b571d96..957061893b00 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c @@ -175,11 +175,11 @@ static void kbase_mem_pool_spill(struct kbase_mem_pool *next_pool, kbase_mem_pool_add(next_pool, p); } -struct page *kbase_mem_alloc_page(struct kbase_device *kbdev) +static struct page *kbase_mem_pool_alloc_page(struct kbase_mem_pool *pool) { struct page *p; gfp_t gfp; - struct device *dev = kbdev->dev; + struct device *dev = pool->kbdev->dev; dma_addr_t dma_addr; #if defined(CONFIG_ARM) && !defined(CONFIG_HAVE_DMA_ATTRS) && \ @@ -210,6 +210,8 @@ struct page *kbase_mem_alloc_page(struct kbase_device *kbdev) kbase_set_dma_addr(p, dma_addr); + pool_dbg(pool, "alloced page from kernel\n"); + return p; } @@ -254,35 +256,36 @@ static size_t kbase_mem_pool_shrink(struct kbase_mem_pool *pool, return nr_freed; } -int kbase_mem_pool_grow(struct kbase_mem_pool *pool, +static size_t kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow) { struct page *p; size_t i; - for (i = 0; i < nr_to_grow; i++) { - p = kbase_mem_alloc_page(pool->kbdev); + for (i = 0; i < nr_to_grow && !kbase_mem_pool_is_full(pool); i++) { + p = kbase_mem_pool_alloc_page(pool); if (!p) - return -ENOMEM; + break; kbase_mem_pool_add(pool, p); } - return 0; + return i; } -void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size) +size_t kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size) { size_t cur_size; cur_size = kbase_mem_pool_size(pool); - if (new_size > pool->max_size) - new_size = pool->max_size; - if (new_size < cur_size) kbase_mem_pool_shrink(pool, cur_size - new_size); else if (new_size > cur_size) kbase_mem_pool_grow(pool, new_size - cur_size); + + cur_size = kbase_mem_pool_size(pool); + + return cur_size; } void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size) @@ -425,17 +428,21 @@ struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool) { struct page *p; - do { - pool_dbg(pool, "alloc()\n"); - p = kbase_mem_pool_remove(pool); + pool_dbg(pool, "alloc()\n"); + + p = kbase_mem_pool_remove(pool); - if (p) - return p; + if (!p && pool->next_pool) { + /* Allocate via next pool */ + return kbase_mem_pool_alloc(pool->next_pool); + } - pool = pool->next_pool; - } while (pool); + if (!p) { + /* Get page from kernel */ + p = kbase_mem_pool_alloc_page(pool); + } - return NULL; + return p; } void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *p, @@ -492,7 +499,7 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_pages, /* Get any remaining pages from kernel */ for (; i < nr_pages; i++) { - p = kbase_mem_alloc_page(pool->kbdev); + p = kbase_mem_pool_alloc_page(pool); if (!p) goto err_rollback; pages[i] = page_to_phys(p); diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c index 585fba036c9e..493665b7e607 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -68,7 +68,7 @@ DEFINE_SIMPLE_ATTRIBUTE(kbase_mem_pool_debugfs_max_size_fops, kbase_mem_pool_debugfs_max_size_set, "%llu\n"); -void kbase_mem_pool_debugfs_init(struct dentry *parent, +void kbase_mem_pool_debugfs_add(struct dentry *parent, struct kbase_mem_pool *pool) { debugfs_create_file("mem_pool_size", S_IRUGO | S_IWUSR, parent, diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h index 1442854e8956..458f3f09e697 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -21,7 +21,7 @@ #include /** - * kbase_mem_pool_debugfs_init - add debugfs knobs for @pool + * kbase_mem_pool_debugfs_add - add debugfs knobs for @pool * @parent: Parent debugfs dentry * @pool: Memory pool to control * @@ -29,7 +29,7 @@ * - mem_pool_size: get/set the current size of @pool * - mem_pool_max_size: get/set the max size of @pool */ -void kbase_mem_pool_debugfs_init(struct dentry *parent, +void kbase_mem_pool_debugfs_add(struct dentry *parent, struct kbase_mem_pool *pool); #endif /*_KBASE_MEM_POOL_DEBUGFS_H*/ diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c index 092da9a96ea5..03594102f7ef 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c @@ -67,27 +67,26 @@ int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, mutex_lock(&kctx->mem_profile_lock); dev_dbg(kctx->kbdev->dev, "initialised: %d", - kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)); + kctx->mem_profile_initialized); - if (!kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)) { + if (!kctx->mem_profile_initialized) { if (!debugfs_create_file("mem_profile", S_IRUGO, kctx->kctx_dentry, kctx, &kbasep_mem_profile_debugfs_fops)) { err = -EAGAIN; } else { - kbase_ctx_flag_set(kctx, - KCTX_MEM_PROFILE_INITIALIZED); + kctx->mem_profile_initialized = true; } } - if (kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)) { + if (kctx->mem_profile_initialized) { kfree(kctx->mem_profile_data); kctx->mem_profile_data = data; kctx->mem_profile_size = size; } dev_dbg(kctx->kbdev->dev, "returning: %d, initialised: %d", - err, kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)); + err, kctx->mem_profile_initialized); mutex_unlock(&kctx->mem_profile_lock); @@ -99,7 +98,7 @@ void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx) mutex_lock(&kctx->mem_profile_lock); dev_dbg(kctx->kbdev->dev, "initialised: %d", - kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)); + kctx->mem_profile_initialized); kfree(kctx->mem_profile_data); kctx->mem_profile_data = NULL; diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu.c b/drivers/gpu/arm/midgard/mali_kbase_mmu.c index 5c1b9c3b84c4..48d53723a9b4 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mmu.c +++ b/drivers/gpu/arm/midgard/mali_kbase_mmu.c @@ -41,7 +41,6 @@ #include #include #include -#include #define KBASE_MMU_PAGE_ENTRIES 512 @@ -140,16 +139,6 @@ void page_fault_worker(struct work_struct *data) KBASE_DEBUG_ASSERT(kctx->kbdev == kbdev); - if (unlikely(faulting_as->protected_mode)) - { - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Protected mode fault"); - kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx, - KBASE_MMU_FAULT_TYPE_PAGE); - - goto fault_done; - } - fault_status = faulting_as->fault_status; switch (fault_status & AS_FAULTSTATUS_EXCEPTION_CODE_MASK) { @@ -164,7 +153,7 @@ void page_fault_worker(struct work_struct *data) case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT: kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Translation table bus fault"); + "Tranlation table bus fault"); goto fault_done; case AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG: @@ -230,8 +219,6 @@ void page_fault_worker(struct work_struct *data) region->start_pfn + kbase_reg_current_backed_size(region)); - mutex_lock(&kbdev->mmu_hw_mutex); - kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx, KBASE_MMU_FAULT_TYPE_PAGE); /* [1] in case another page fault occurred while we were @@ -244,9 +231,6 @@ void page_fault_worker(struct work_struct *data) */ kbase_mmu_hw_do_operation(kbdev, faulting_as, NULL, 0, 0, AS_COMMAND_UNLOCK, 1); - - mutex_unlock(&kbdev->mmu_hw_mutex); - kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx, KBASE_MMU_FAULT_TYPE_PAGE); kbase_gpu_vm_unlock(kctx); @@ -265,17 +249,12 @@ void page_fault_worker(struct work_struct *data) kbase_reg_current_backed_size(region); if (0 == new_pages) { - mutex_lock(&kbdev->mmu_hw_mutex); - /* Duplicate of a fault we've already handled, nothing to do */ kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx, KBASE_MMU_FAULT_TYPE_PAGE); /* See comment [1] about UNLOCK usage */ kbase_mmu_hw_do_operation(kbdev, faulting_as, NULL, 0, 0, AS_COMMAND_UNLOCK, 1); - - mutex_unlock(&kbdev->mmu_hw_mutex); - kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx, KBASE_MMU_FAULT_TYPE_PAGE); kbase_gpu_vm_unlock(kctx); @@ -335,7 +314,7 @@ void page_fault_worker(struct work_struct *data) kbase_tlstream_aux_pagefault(kctx->id, (u64)new_pages); /* AS transaction begin */ - mutex_lock(&kbdev->mmu_hw_mutex); + mutex_lock(&faulting_as->transaction_mutex); /* flush L2 and unlock the VA (resumes the MMU) */ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367)) @@ -359,7 +338,7 @@ void page_fault_worker(struct work_struct *data) new_pages, op, 1); - mutex_unlock(&kbdev->mmu_hw_mutex); + mutex_unlock(&faulting_as->transaction_mutex); /* AS transaction end */ /* reenable this in the mask */ @@ -427,17 +406,14 @@ sub_pages: KBASE_EXPORT_TEST_API(kbase_mmu_alloc_pgd); -/* Given PGD PFN for level N, return PGD PFN for level N+1, allocating the - * new table from the pool if needed and possible - */ -static int mmu_get_next_pgd(struct kbase_context *kctx, - phys_addr_t *pgd, u64 vpfn, int level) +/* Given PGD PFN for level N, return PGD PFN for level N+1 */ +static phys_addr_t mmu_get_next_pgd(struct kbase_context *kctx, phys_addr_t pgd, u64 vpfn, int level) { u64 *page; phys_addr_t target_pgd; struct page *p; - KBASE_DEBUG_ASSERT(*pgd); + KBASE_DEBUG_ASSERT(pgd); KBASE_DEBUG_ASSERT(NULL != kctx); lockdep_assert_held(&kctx->mmu_lock); @@ -449,11 +425,11 @@ static int mmu_get_next_pgd(struct kbase_context *kctx, vpfn >>= (3 - level) * 9; vpfn &= 0x1FF; - p = pfn_to_page(PFN_DOWN(*pgd)); + p = pfn_to_page(PFN_DOWN(pgd)); page = kmap(p); if (NULL == page) { dev_warn(kctx->kbdev->dev, "mmu_get_next_pgd: kmap failure\n"); - return -EINVAL; + return 0; } target_pgd = kctx->kbdev->mmu_mode->pte_to_phy_addr(page[vpfn]); @@ -461,9 +437,9 @@ static int mmu_get_next_pgd(struct kbase_context *kctx, if (!target_pgd) { target_pgd = kbase_mmu_alloc_pgd(kctx); if (!target_pgd) { - dev_dbg(kctx->kbdev->dev, "mmu_get_next_pgd: kbase_mmu_alloc_pgd failure\n"); + dev_warn(kctx->kbdev->dev, "mmu_get_next_pgd: kbase_mmu_alloc_pgd failure\n"); kunmap(p); - return -ENOMEM; + return 0; } kctx->kbdev->mmu_mode->entry_set_pte(&page[vpfn], target_pgd); @@ -473,13 +449,10 @@ static int mmu_get_next_pgd(struct kbase_context *kctx, } kunmap(p); - *pgd = target_pgd; - - return 0; + return target_pgd; } -static int mmu_get_bottom_pgd(struct kbase_context *kctx, - u64 vpfn, phys_addr_t *out_pgd) +static phys_addr_t mmu_get_bottom_pgd(struct kbase_context *kctx, u64 vpfn) { phys_addr_t pgd; int l; @@ -488,17 +461,15 @@ static int mmu_get_bottom_pgd(struct kbase_context *kctx, pgd = kctx->pgd; for (l = MIDGARD_MMU_TOPLEVEL; l < MIDGARD_MMU_BOTTOMLEVEL; l++) { - int err = mmu_get_next_pgd(kctx, &pgd, vpfn, l); + pgd = mmu_get_next_pgd(kctx, pgd, vpfn, l); /* Handle failure condition */ - if (err) { - dev_dbg(kctx->kbdev->dev, "mmu_get_bottom_pgd: mmu_get_next_pgd failure\n"); - return err; + if (!pgd) { + dev_warn(kctx->kbdev->dev, "mmu_get_bottom_pgd: mmu_get_next_pgd failure\n"); + return 0; } } - *out_pgd = pgd; - - return 0; + return pgd; } static phys_addr_t mmu_insert_pages_recover_get_next_pgd(struct kbase_context *kctx, phys_addr_t pgd, u64 vpfn, int level) @@ -637,19 +608,8 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, * On the other hand, it's only a gain when we map more than * 256 pages at once (on average). Do we really care? */ - do { - err = mmu_get_bottom_pgd(kctx, vpfn, &pgd); - if (err != -ENOMEM) - break; - /* Fill the memory pool with enough pages for - * the page walk to succeed - */ - mutex_unlock(&kctx->mmu_lock); - err = kbase_mem_pool_grow(&kctx->mem_pool, - MIDGARD_MMU_BOTTOMLEVEL); - mutex_lock(&kctx->mmu_lock); - } while (!err); - if (err) { + pgd = mmu_get_bottom_pgd(kctx, vpfn); + if (!pgd) { dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n"); if (recover_required) { /* Invalidate the pages we have partially @@ -658,6 +618,7 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, recover_vpfn, recover_count); } + err = -EINVAL; goto fail_unlock; } @@ -749,19 +710,8 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx, u64 vpfn, * On the other hand, it's only a gain when we map more than * 256 pages at once (on average). Do we really care? */ - do { - err = mmu_get_bottom_pgd(kctx, vpfn, &pgd); - if (err != -ENOMEM) - break; - /* Fill the memory pool with enough pages for - * the page walk to succeed - */ - mutex_unlock(&kctx->mmu_lock); - err = kbase_mem_pool_grow(&kctx->mem_pool, - MIDGARD_MMU_BOTTOMLEVEL); - mutex_lock(&kctx->mmu_lock); - } while (!err); - if (err) { + pgd = mmu_get_bottom_pgd(kctx, vpfn); + if (!pgd) { dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n"); if (recover_required) { /* Invalidate the pages we have partially @@ -770,6 +720,7 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx, u64 vpfn, recover_vpfn, recover_count); } + err = -EINVAL; goto fail_unlock; } @@ -918,7 +869,8 @@ static void kbase_mmu_flush_invalidate(struct kbase_context *kctx, u32 op; /* AS transaction begin */ - mutex_lock(&kbdev->mmu_hw_mutex); + mutex_lock(&kbdev->as[ + kctx->as_nr].transaction_mutex); if (sync) op = AS_COMMAND_FLUSH_MEM; @@ -941,7 +893,8 @@ static void kbase_mmu_flush_invalidate(struct kbase_context *kctx, } #endif /* KBASE_GPU_RESET_EN */ - mutex_unlock(&kbdev->mmu_hw_mutex); + mutex_unlock(&kbdev->as[ + kctx->as_nr].transaction_mutex); /* AS transaction end */ #ifndef CONFIG_MALI_NO_MALI @@ -968,13 +921,13 @@ static void kbase_mmu_flush_invalidate(struct kbase_context *kctx, void kbase_mmu_update(struct kbase_context *kctx) { - lockdep_assert_held(&kctx->kbdev->hwaccess_lock); - lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex); + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); /* ASSERT that the context has a valid as_nr, which is only the case * when it's scheduled in. * - * as_nr won't change because the caller has the hwaccess_lock */ + * as_nr won't change because the caller has the runpool_irq lock */ KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + lockdep_assert_held(&kctx->kbdev->as[kctx->as_nr].transaction_mutex); kctx->kbdev->mmu_mode->update(kctx); } @@ -982,8 +935,8 @@ KBASE_EXPORT_TEST_API(kbase_mmu_update); void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr) { - lockdep_assert_held(&kbdev->hwaccess_lock); - lockdep_assert_held(&kbdev->mmu_hw_mutex); + lockdep_assert_held(&kbdev->as[as_nr].transaction_mutex); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); kbdev->mmu_mode->disable_as(kbdev, as_nr); } @@ -993,10 +946,11 @@ void kbase_mmu_disable(struct kbase_context *kctx) /* ASSERT that the context has a valid as_nr, which is only the case * when it's scheduled in. * - * as_nr won't change because the caller has the hwaccess_lock */ + * as_nr won't change because the caller has the runpool_irq lock */ KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); - lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + lockdep_assert_held(&kctx->kbdev->as[kctx->as_nr].transaction_mutex); + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); /* * The address space is being disabled, drain all knowledge of it out @@ -1054,8 +1008,8 @@ int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr) if (count > nr) count = nr; - err = mmu_get_bottom_pgd(kctx, vpfn, &pgd); - if (err) { + pgd = mmu_get_bottom_pgd(kctx, vpfn); + if (!pgd) { dev_warn(kbdev->dev, "kbase_mmu_teardown_pages: mmu_get_bottom_pgd failure\n"); err = -EINVAL; goto fail_unlock; @@ -1138,20 +1092,10 @@ int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *ph if (count > nr) count = nr; - do { - err = mmu_get_bottom_pgd(kctx, vpfn, &pgd); - if (err != -ENOMEM) - break; - /* Fill the memory pool with enough pages for - * the page walk to succeed - */ - mutex_unlock(&kctx->mmu_lock); - err = kbase_mem_pool_grow(&kctx->mem_pool, - MIDGARD_MMU_BOTTOMLEVEL); - mutex_lock(&kctx->mmu_lock); - } while (!err); - if (err) { + pgd = mmu_get_bottom_pgd(kctx, vpfn); + if (!pgd) { dev_warn(kctx->kbdev->dev, "mmu_get_bottom_pgd failure\n"); + err = -EINVAL; goto fail_unlock; } @@ -1372,13 +1316,13 @@ void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages) return NULL; } + mutex_lock(&kctx->mmu_lock); + size_left = nr_pages * PAGE_SIZE; KBASE_DEBUG_ASSERT(0 != size_left); kaddr = vmalloc_user(size_left); - mutex_lock(&kctx->mmu_lock); - if (kaddr) { u64 end_marker = 0xFFULL; char *buffer; @@ -1463,18 +1407,6 @@ void bus_fault_worker(struct work_struct *data) return; } - if (unlikely(faulting_as->protected_mode)) - { - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Permission failure"); - kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx, - KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); - kbasep_js_runpool_release_ctx(kbdev, kctx); - atomic_dec(&kbdev->faults_pending); - return; - - } - #if KBASE_GPU_RESET_EN if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) { /* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode. @@ -1491,14 +1423,15 @@ void bus_fault_worker(struct work_struct *data) /* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */ /* AS transaction begin */ - mutex_lock(&kbdev->mmu_hw_mutex); + mutex_lock(&kbdev->as[as_no].transaction_mutex); /* Set the MMU into unmapped mode */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags); kbase_mmu_disable(kctx); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, + flags); - mutex_unlock(&kbdev->mmu_hw_mutex); + mutex_unlock(&kbdev->as[as_no].transaction_mutex); /* AS transaction end */ kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx, @@ -1733,7 +1666,7 @@ static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, dev_err(kbdev->dev, "Unhandled Page fault in AS%d at VA 0x%016llX\n" "Reason: %s\n" - "raw fault status: 0x%X\n" + "raw fault status 0x%X\n" "decoded fault status: %s\n" "exception type 0x%X: %s\n" "access type 0x%X: %s\n" @@ -1762,15 +1695,15 @@ static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, /* Stop the kctx from submitting more jobs and cause it to be scheduled * out/rescheduled - this will occur on releasing the context's refcount */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); kbasep_js_clear_submit_allowed(js_devdata, kctx); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); /* Kill any running jobs from the context. Submit is disallowed, so no more jobs from this * context can appear in the job slots from this point on */ kbase_backend_jm_kill_jobs_from_kctx(kctx); /* AS transaction begin */ - mutex_lock(&kbdev->mmu_hw_mutex); + mutex_lock(&as->transaction_mutex); #if KBASE_GPU_RESET_EN if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) { /* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode. @@ -1782,11 +1715,11 @@ static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, } #endif /* KBASE_GPU_RESET_EN */ /* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); kbase_mmu_disable(kctx); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); - mutex_unlock(&kbdev->mmu_hw_mutex); + mutex_unlock(&as->transaction_mutex); /* AS transaction end */ /* Clear down the fault */ kbase_mmu_hw_clear_fault(kbdev, as, kctx, @@ -1823,20 +1756,20 @@ void kbasep_as_do_poke(struct work_struct *work) kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as->number); /* AS transaction begin */ - mutex_lock(&kbdev->mmu_hw_mutex); + mutex_lock(&as->transaction_mutex); /* Force a uTLB invalidate */ kbase_mmu_hw_do_operation(kbdev, as, kctx, 0, 0, AS_COMMAND_UNLOCK, 0); - mutex_unlock(&kbdev->mmu_hw_mutex); + mutex_unlock(&as->transaction_mutex); /* AS transaction end */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags); if (as->poke_refcount && !(as->poke_state & KBASE_AS_POKE_STATE_KILLING_POKE)) { /* Only queue up the timer if we need it, and we're not trying to kill it */ hrtimer_start(&as->poke_timer, HR_TIMER_DELAY_MSEC(5), HRTIMER_MODE_REL); } - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags); } enum hrtimer_restart kbasep_as_poke_timer_callback(struct hrtimer *timer) @@ -1860,7 +1793,7 @@ enum hrtimer_restart kbasep_as_poke_timer_callback(struct hrtimer *timer) * This must only be called on a context that's scheduled in, and an atom * that's running on the GPU. * - * The caller must hold hwaccess_lock + * The caller must hold kbasep_js_device_data::runpool_irq::lock * * This can be called safely from atomic context */ @@ -1872,7 +1805,7 @@ void kbase_as_poking_timer_retain_atom(struct kbase_device *kbdev, struct kbase_ KBASE_DEBUG_ASSERT(kctx); KBASE_DEBUG_ASSERT(katom); KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); if (katom->poking) return; @@ -1880,7 +1813,7 @@ void kbase_as_poking_timer_retain_atom(struct kbase_device *kbdev, struct kbase_ katom->poking = 1; /* It's safe to work on the as/as_nr without an explicit reference, - * because the caller holds the hwaccess_lock, and the atom itself + * because the caller holds the runpool_irq lock, and the atom itself * was also running and had already taken a reference */ as = &kbdev->as[kctx->as_nr]; @@ -1917,18 +1850,18 @@ void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase as = &kbdev->as[kctx->as_nr]; - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags); KBASE_DEBUG_ASSERT(as->poke_refcount > 0); KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT); if (--(as->poke_refcount) == 0) { as->poke_state |= KBASE_AS_POKE_STATE_KILLING_POKE; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags); hrtimer_cancel(&as->poke_timer); flush_workqueue(as->poke_wq); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags); /* Re-check whether it's still needed */ if (as->poke_refcount) { @@ -1953,7 +1886,7 @@ void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase * the context until new atoms are run */ } } - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags); katom->poking = 0; } @@ -1962,7 +1895,7 @@ void kbase_mmu_interrupt_process(struct kbase_device *kbdev, struct kbase_contex { struct kbasep_js_device_data *js_devdata = &kbdev->js_data; - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); if (!kctx) { dev_warn(kbdev->dev, "%s in AS%d at 0x%016llx with no context present! Suprious IRQ or SW Design Error?\n", diff --git a/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c deleted file mode 100644 index c970650069cd..000000000000 --- a/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c +++ /dev/null @@ -1,130 +0,0 @@ -/* - * - * (C) COPYRIGHT 2016 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * A copy of the licence is included with the program, and can also be obtained - * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - */ - - - -#include "mali_kbase.h" - -#include "mali_kbase_regs_history_debugfs.h" - -#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI) - -#include - - -static int regs_history_size_get(void *data, u64 *val) -{ - struct kbase_io_history *const h = data; - - *val = h->size; - - return 0; -} - -static int regs_history_size_set(void *data, u64 val) -{ - struct kbase_io_history *const h = data; - - return kbase_io_history_resize(h, (u16)val); -} - - -DEFINE_SIMPLE_ATTRIBUTE(regs_history_size_fops, - regs_history_size_get, - regs_history_size_set, - "%llu\n"); - - -/** - * regs_history_show - show callback for the register access history file. - * - * @sfile: The debugfs entry - * @data: Data associated with the entry - * - * This function is called to dump all recent accesses to the GPU registers. - * - * @return 0 if successfully prints data in debugfs entry file, failure - * otherwise - */ -static int regs_history_show(struct seq_file *sfile, void *data) -{ - struct kbase_io_history *const h = sfile->private; - u16 i; - size_t iters; - unsigned long flags; - - if (!h->enabled) { - seq_puts(sfile, "The register access history is disabled\n"); - goto out; - } - - spin_lock_irqsave(&h->lock, flags); - - iters = (h->size > h->count) ? h->count : h->size; - seq_printf(sfile, "Last %zu register accesses of %zu total:\n", iters, - h->count); - for (i = 0; i < iters; ++i) { - struct kbase_io_access *io = - &h->buf[(h->count - iters + i) % h->size]; - char const access = (io->addr & 1) ? 'w' : 'r'; - - seq_printf(sfile, "%6i: %c: reg 0x%p val %08x\n", i, access, - (void *)(io->addr & ~0x1), io->value); - } - - spin_unlock_irqrestore(&h->lock, flags); - -out: - return 0; -} - - -/** - * regs_history_open - open operation for regs_history debugfs file - * - * @in: &struct inode pointer - * @file: &struct file pointer - * - * @return file descriptor - */ -static int regs_history_open(struct inode *in, struct file *file) -{ - return single_open(file, ®s_history_show, in->i_private); -} - - -static const struct file_operations regs_history_fops = { - .open = ®s_history_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - - -void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev) -{ - debugfs_create_bool("regs_history_enabled", S_IRUGO | S_IWUSR, - kbdev->mali_debugfs_directory, - &kbdev->io_history.enabled); - debugfs_create_file("regs_history_size", S_IRUGO | S_IWUSR, - kbdev->mali_debugfs_directory, - &kbdev->io_history, ®s_history_size_fops); - debugfs_create_file("regs_history", S_IRUGO, - kbdev->mali_debugfs_directory, &kbdev->io_history, - ®s_history_fops); -} - - -#endif /* CONFIG_DEBUG_FS */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h deleted file mode 100644 index f10837002330..000000000000 --- a/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * - * (C) COPYRIGHT 2016 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * A copy of the licence is included with the program, and can also be obtained - * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - */ - - - -/** - * Header file for register access history support via debugfs - * - * This interface is made available via /sys/kernel/debug/mali#/regs_history*. - * - * Usage: - * - regs_history_enabled: whether recording of register accesses is enabled. - * Write 'y' to enable, 'n' to disable. - * - regs_history_size: size of the register history buffer, must be > 0 - * - regs_history: return the information about last accesses to the registers. - */ - -#ifndef _KBASE_REGS_HISTORY_DEBUGFS_H -#define _KBASE_REGS_HISTORY_DEBUGFS_H - -struct kbase_device; - -#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI) - -/** - * kbasep_regs_history_debugfs_init - add debugfs entries for register history - * - * @kbdev: Pointer to kbase_device containing the register history - */ -void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev); - -#else /* CONFIG_DEBUG_FS */ - -#define kbasep_regs_history_debugfs_init CSTD_NOP - -#endif /* CONFIG_DEBUG_FS */ - -#endif /*_KBASE_REGS_HISTORY_DEBUGFS_H*/ diff --git a/drivers/gpu/arm/midgard/mali_kbase_replay.c b/drivers/gpu/arm/midgard/mali_kbase_replay.c index 84aa3316e435..d3a3dbfa5241 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_replay.c +++ b/drivers/gpu/arm/midgard/mali_kbase_replay.c @@ -1098,6 +1098,7 @@ static bool kbase_replay_fault_check(struct kbase_jd_atom *katom) bool kbase_replay_process(struct kbase_jd_atom *katom) { struct kbase_context *kctx = katom->kctx; + struct kbase_jd_context *jctx = &kctx->jctx; struct kbase_device *kbdev = kctx->kbdev; /* Don't replay this atom if these issues are not present in the @@ -1121,7 +1122,7 @@ bool kbase_replay_process(struct kbase_jd_atom *katom) return false; } - if (kbase_ctx_flag(kctx, KCTX_DYING)) { + if (jctx->sched_info.ctx.is_dying) { dev_dbg(kbdev->dev, "Not replaying; context is dying\n"); if (katom->retry_count) diff --git a/drivers/gpu/arm/midgard/mali_kbase_softjobs.c b/drivers/gpu/arm/midgard/mali_kbase_softjobs.c index 88b91a2efba2..07b862546f80 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_softjobs.c +++ b/drivers/gpu/arm/midgard/mali_kbase_softjobs.c @@ -639,7 +639,8 @@ struct kbase_debug_copy_buffer { struct page **pages; int nr_pages; size_t offset; - struct kbase_mem_phy_alloc *gpu_alloc; + /*To find memory region*/ + u64 gpu_addr; struct page **extres_pages; int nr_extres_pages; @@ -676,7 +677,10 @@ static void kbase_debug_copy_finish(struct kbase_jd_atom *katom) kbase_gpu_vm_lock(katom->kctx); for (i = 0; i < nr; i++) { int p; - struct kbase_mem_phy_alloc *gpu_alloc = buffers[i].gpu_alloc; + struct kbase_va_region *reg; + + reg = kbase_region_tracker_find_region_enclosing_address( + katom->kctx, buffers[i].gpu_addr); if (!buffers[i].pages) break; @@ -687,8 +691,8 @@ static void kbase_debug_copy_finish(struct kbase_jd_atom *katom) put_page(pg); } kfree(buffers[i].pages); - if (gpu_alloc) { - switch (gpu_alloc->type) { + if (reg && reg->gpu_alloc) { + switch (reg->gpu_alloc->type) { case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { free_user_buffer(&buffers[i]); @@ -698,7 +702,7 @@ static void kbase_debug_copy_finish(struct kbase_jd_atom *katom) /* Nothing to be done. */ break; } - kbase_mem_phy_alloc_put(gpu_alloc); + kbase_mem_phy_alloc_put(reg->gpu_alloc); } } kbase_gpu_vm_unlock(katom->kctx); @@ -786,20 +790,20 @@ static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom) goto out_cleanup; } + buffers[i].gpu_addr = user_extres.ext_resource & + ~BASE_EXT_RES_ACCESS_EXCLUSIVE; kbase_gpu_vm_lock(katom->kctx); reg = kbase_region_tracker_find_region_enclosing_address( - katom->kctx, user_extres.ext_resource & - ~BASE_EXT_RES_ACCESS_EXCLUSIVE); + katom->kctx, buffers[i].gpu_addr); - if (NULL == reg || NULL == reg->gpu_alloc || + if (NULL == reg || NULL == reg->cpu_alloc || (reg->flags & KBASE_REG_FREE)) { ret = -EINVAL; goto out_unlock; } + kbase_mem_phy_alloc_get(reg->gpu_alloc); - buffers[i].gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); buffers[i].nr_extres_pages = reg->nr_pages; - if (reg->nr_pages*PAGE_SIZE != buffers[i].size) dev_warn(katom->kctx->kbdev->dev, "Copy buffer is not of same size as the external resource to copy.\n"); @@ -905,22 +909,25 @@ static int kbase_mem_copy_from_extres(struct kbase_context *kctx, { unsigned int i; unsigned int target_page_nr = 0; + struct kbase_va_region *reg; struct page **pages = buf_data->pages; u64 offset = buf_data->offset; size_t extres_size = buf_data->nr_extres_pages*PAGE_SIZE; size_t to_copy = min(extres_size, buf_data->size); - struct kbase_mem_phy_alloc *gpu_alloc = buf_data->gpu_alloc; int ret = 0; KBASE_DEBUG_ASSERT(pages != NULL); kbase_gpu_vm_lock(kctx); - if (!gpu_alloc) { + reg = kbase_region_tracker_find_region_enclosing_address( + kctx, buf_data->gpu_addr); + + if (!reg) { ret = -EINVAL; goto out_unlock; } - switch (gpu_alloc->type) { + switch (reg->gpu_alloc->type) { case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { for (i = 0; i < buf_data->nr_extres_pages; i++) { @@ -943,14 +950,14 @@ static int kbase_mem_copy_from_extres(struct kbase_context *kctx, break; #ifdef CONFIG_DMA_SHARED_BUFFER case KBASE_MEM_TYPE_IMPORTED_UMM: { - struct dma_buf *dma_buf = gpu_alloc->imported.umm.dma_buf; + struct dma_buf *dma_buf = reg->gpu_alloc->imported.umm.dma_buf; KBASE_DEBUG_ASSERT(dma_buf != NULL); KBASE_DEBUG_ASSERT(dma_buf->size == buf_data->nr_extres_pages * PAGE_SIZE); ret = dma_buf_begin_cpu_access(dma_buf, -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && !defined(CONFIG_CHROMEOS) +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) 0, buf_data->nr_extres_pages*PAGE_SIZE, #endif DMA_FROM_DEVICE); @@ -973,7 +980,7 @@ static int kbase_mem_copy_from_extres(struct kbase_context *kctx, break; } dma_buf_end_cpu_access(dma_buf, -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && !defined(CONFIG_CHROMEOS) +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) 0, buf_data->nr_extres_pages*PAGE_SIZE, #endif DMA_FROM_DEVICE); @@ -1322,11 +1329,9 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom) break; } case BASE_JD_REQ_SOFT_JIT_ALLOC: - return -EINVAL; /* Temporarily disabled */ kbase_jit_allocate_process(katom); break; case BASE_JD_REQ_SOFT_JIT_FREE: - return -EINVAL; /* Temporarily disabled */ kbase_jit_free_process(katom); break; case BASE_JD_REQ_SOFT_EXT_RES_MAP: diff --git a/drivers/gpu/arm/midgard/mali_kbase_tlstream.c b/drivers/gpu/arm/midgard/mali_kbase_tlstream.c index 4c1535fc0139..ad88b7b69018 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_tlstream.c +++ b/drivers/gpu/arm/midgard/mali_kbase_tlstream.c @@ -142,13 +142,7 @@ enum tl_msg_id_obj { KBASE_TL_NDEP_ATOM_ATOM, KBASE_TL_RDEP_ATOM_ATOM, KBASE_TL_ATTRIB_ATOM_CONFIG, - KBASE_TL_ATTRIB_ATOM_PRIORITY, - KBASE_TL_ATTRIB_ATOM_STATE, - KBASE_TL_ATTRIB_ATOM_PRIORITY_CHANGE, KBASE_TL_ATTRIB_AS_CONFIG, - KBASE_TL_EVENT_LPU_SOFTSTOP, - KBASE_TL_EVENT_ATOM_SOFTSTOP_EX, - KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE, /* Job dump specific events. */ KBASE_JD_GPU_SOFT_RESET @@ -157,9 +151,11 @@ enum tl_msg_id_obj { /* Message ids of trace events that are recorded in the auxiliary stream. */ enum tl_msg_id_aux { KBASE_AUX_PM_STATE, + KBASE_AUX_ISSUE_JOB_SOFTSTOP, + KBASE_AUX_JOB_SOFTSTOP, + KBASE_AUX_JOB_SOFTSTOP_EX, KBASE_AUX_PAGEFAULT, - KBASE_AUX_PAGESALLOC, - KBASE_AUX_DEVFREQ_TARGET + KBASE_AUX_PAGESALLOC }; /*****************************************************************************/ @@ -433,27 +429,6 @@ static const struct tp_desc tp_desc_obj[] = { "@pLLI", "atom,descriptor,affinity,config" }, - { - KBASE_TL_ATTRIB_ATOM_PRIORITY, - __stringify(KBASE_TL_ATTRIB_ATOM_PRIORITY), - "atom priority", - "@pI", - "atom,prio" - }, - { - KBASE_TL_ATTRIB_ATOM_STATE, - __stringify(KBASE_TL_ATTRIB_ATOM_STATE), - "atom state", - "@pI", - "atom,state" - }, - { - KBASE_TL_ATTRIB_ATOM_PRIORITY_CHANGE, - __stringify(KBASE_TL_ATTRIB_ATOM_PRIORITY_CHANGE), - "atom caused priority change", - "@p", - "atom" - }, { KBASE_TL_ATTRIB_AS_CONFIG, __stringify(KBASE_TL_ATTRIB_AS_CONFIG), @@ -461,27 +436,6 @@ static const struct tp_desc tp_desc_obj[] = { "@pLLL", "address_space,transtab,memattr,transcfg" }, - { - KBASE_TL_EVENT_LPU_SOFTSTOP, - __stringify(KBASE_TL_EVENT_LPU_SOFTSTOP), - "softstop event on given lpu", - "@p", - "lpu" - }, - { - KBASE_TL_EVENT_ATOM_SOFTSTOP_EX, - __stringify(KBASE_TL_EVENT_ATOM_SOFTSTOP_EX), - "atom softstopped", - "@p", - "atom" - }, - { - KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE, - __stringify(KBASE_TL_EVENT_SOFTSTOP_ISSUE), - "atom softstop issued", - "@p", - "atom" - }, { KBASE_JD_GPU_SOFT_RESET, __stringify(KBASE_JD_GPU_SOFT_RESET), @@ -500,6 +454,27 @@ static const struct tp_desc tp_desc_aux[] = { "@IL", "core_type,core_state_bitset" }, + { + KBASE_AUX_ISSUE_JOB_SOFTSTOP, + __stringify(KBASE_AUX_ISSUE_JOB_SOFTSTOP), + "Issuing job soft stop", + "@p", + "atom" + }, + { + KBASE_AUX_JOB_SOFTSTOP, + __stringify(KBASE_AUX_JOB_SOFTSTOP), + "Job soft stop", + "@I", + "tag_id" + }, + { + KBASE_AUX_JOB_SOFTSTOP_EX, + __stringify(KBASE_AUX_JOB_SOFTSTOP_EX), + "Job soft stop, more details", + "@pI", + "atom,job_type" + }, { KBASE_AUX_PAGEFAULT, __stringify(KBASE_AUX_PAGEFAULT), @@ -513,13 +488,6 @@ static const struct tp_desc tp_desc_aux[] = { "Total alloc pages change", "@IL", "ctx_nr,page_cnt" - }, - { - KBASE_AUX_DEVFREQ_TARGET, - __stringify(KBASE_AUX_DEVFREQ_TARGET), - "New device frequency target", - "@L", - "target_freq" } }; @@ -1117,10 +1085,9 @@ static ssize_t kbasep_tlstream_read( ssize_t copy_len = 0; KBASE_DEBUG_ASSERT(filp); + KBASE_DEBUG_ASSERT(buffer); KBASE_DEBUG_ASSERT(f_pos); - - if (!buffer) - return -EINVAL; + CSTD_UNUSED(filp); if ((0 > *f_pos) || (PACKET_SIZE > size)) return -EINVAL; @@ -1348,11 +1315,9 @@ void kbase_tlstream_term(void) } } -int kbase_tlstream_acquire(struct kbase_context *kctx, int *fd, u32 flags) +int kbase_tlstream_acquire(struct kbase_context *kctx, int *fd) { - u32 tlstream_enabled = TLSTREAM_ENABLED | flags; - - if (0 == atomic_cmpxchg(&kbase_tlstream_enabled, 0, tlstream_enabled)) { + if (0 == atomic_cmpxchg(&kbase_tlstream_enabled, 0, 1)) { int rcode; *fd = anon_inode_getfd( @@ -1616,8 +1581,8 @@ void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid) void __kbase_tlstream_tl_new_atom(void *atom, u32 nr) { const u32 msg_id = KBASE_TL_NEW_ATOM; - const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(atom) + - sizeof(nr); + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(nr); unsigned long flags; char *buffer; size_t pos = 0; @@ -2046,79 +2011,6 @@ void __kbase_tlstream_tl_attrib_atom_config( kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_tl_attrib_atom_priority(void *atom, u32 prio) -{ - const u32 msg_id = KBASE_TL_ATTRIB_ATOM_PRIORITY; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(prio); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_OBJ, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &atom, sizeof(atom)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &prio, sizeof(prio)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -} - -void __kbase_tlstream_tl_attrib_atom_state(void *atom, u32 state) -{ - const u32 msg_id = KBASE_TL_ATTRIB_ATOM_STATE; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(state); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_OBJ, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &atom, sizeof(atom)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &state, sizeof(state)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -} - -void __kbase_tlstream_tl_attrib_atom_priority_change(void *atom) -{ - const u32 msg_id = KBASE_TL_ATTRIB_ATOM_PRIORITY_CHANGE; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(atom); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_OBJ, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &atom, sizeof(atom)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -} - void __kbase_tlstream_tl_attrib_as_config( void *as, u64 transtab, u64 memattr, u64 transcfg) { @@ -2150,11 +2042,11 @@ void __kbase_tlstream_tl_attrib_as_config( kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_tl_event_lpu_softstop(void *lpu) +void __kbase_tlstream_jd_gpu_soft_reset(void *gpu) { - const u32 msg_id = KBASE_TL_EVENT_LPU_SOFTSTOP; + const u32 msg_id = KBASE_JD_GPU_SOFT_RESET; const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(lpu); + sizeof(msg_id) + sizeof(u64) + sizeof(gpu); unsigned long flags; char *buffer; size_t pos = 0; @@ -2167,108 +2059,141 @@ void __kbase_tlstream_tl_event_lpu_softstop(void *lpu) pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); pos = kbasep_tlstream_write_timestamp(buffer, pos); pos = kbasep_tlstream_write_bytes( - buffer, pos, &lpu, sizeof(lpu)); + buffer, pos, &gpu, sizeof(gpu)); KBASE_DEBUG_ASSERT(msg_size == pos); kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_tl_event_atom_softstop_ex(void *atom) +/*****************************************************************************/ + +void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state) { - const u32 msg_id = KBASE_TL_EVENT_ATOM_SOFTSTOP_EX; + const u32 msg_id = KBASE_AUX_PM_STATE; const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(atom); + sizeof(msg_id) + sizeof(u64) + sizeof(core_type) + + sizeof(state); unsigned long flags; char *buffer; size_t pos = 0; buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_OBJ, + TL_STREAM_TYPE_AUX, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); pos = kbasep_tlstream_write_timestamp(buffer, pos); pos = kbasep_tlstream_write_bytes( - buffer, pos, &atom, sizeof(atom)); + buffer, pos, &core_type, sizeof(core_type)); + pos = kbasep_tlstream_write_bytes(buffer, pos, &state, sizeof(state)); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); } -void __kbase_tlstream_tl_event_atom_softstop_issue(void *atom) +void __kbase_tlstream_aux_issue_job_softstop(void *katom) { - const u32 msg_id = KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE; + const u32 msg_id = KBASE_AUX_ISSUE_JOB_SOFTSTOP; const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(atom); + sizeof(msg_id) + sizeof(u64) + sizeof(katom); unsigned long flags; char *buffer; size_t pos = 0; buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_OBJ, - msg_size, &flags); + TL_STREAM_TYPE_AUX, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &atom, sizeof(atom)); + pos = kbasep_tlstream_write_bytes(buffer, pos, &katom, sizeof(katom)); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); } -void __kbase_tlstream_jd_gpu_soft_reset(void *gpu) +void __kbase_tlstream_aux_job_softstop(u32 js_id) { - const u32 msg_id = KBASE_JD_GPU_SOFT_RESET; + const u32 msg_id = KBASE_AUX_JOB_SOFTSTOP; const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(gpu); + sizeof(msg_id) + sizeof(u64) + sizeof(js_id); unsigned long flags; char *buffer; size_t pos = 0; buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_OBJ, + TL_STREAM_TYPE_AUX, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &gpu, sizeof(gpu)); + pos = kbasep_tlstream_write_bytes(buffer, pos, &js_id, sizeof(js_id)); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); } -/*****************************************************************************/ - -void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state) +/** + * __kbase_tlstream_aux_job_softstop_ex_record - record the trace point + * @katom: the atom that has been soft-stopped + * @job_type: the job type + */ +static void __kbase_tlstream_aux_job_softstop_ex_record( + void *katom, u32 job_type) { - const u32 msg_id = KBASE_AUX_PM_STATE; + const u32 msg_id = KBASE_AUX_JOB_SOFTSTOP_EX; const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(core_type) + - sizeof(state); + sizeof(msg_id) + sizeof(u64) + sizeof(katom) + sizeof(job_type); unsigned long flags; char *buffer; size_t pos = 0; buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_AUX, - msg_size, &flags); + TL_STREAM_TYPE_AUX, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes(buffer, pos, &katom, sizeof(katom)); pos = kbasep_tlstream_write_bytes( - buffer, pos, &core_type, sizeof(core_type)); - pos = kbasep_tlstream_write_bytes(buffer, pos, &state, sizeof(state)); + buffer, pos, &job_type, sizeof(job_type)); KBASE_DEBUG_ASSERT(msg_size == pos); kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); } +void __kbase_tlstream_aux_job_softstop_ex(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + u64 jd = katom->jc; + + while (jd != 0) { + struct job_descriptor_header *job; + struct kbase_vmap_struct map; + + job = kbase_vmap(kctx, jd, sizeof(*job), &map); + if (!job) { + dev_err(kctx->kbdev->dev, + "__kbase_tlstream_aux_job_softstop_ex: failed to map job descriptor 0x%llx for atom 0x%p\n", + jd, (void *)katom); + break; + } + if (job->exception_status != BASE_JD_EVENT_STOPPED) { + kbase_vunmap(kctx, &map); + break; + } + + __kbase_tlstream_aux_job_softstop_ex_record( + katom, job->job_type); + + jd = job->job_descriptor_size ? + job->next_job._64 : job->next_job._32; + kbase_vunmap(kctx, &map); + } +} + void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change) { const u32 msg_id = KBASE_AUX_PAGEFAULT; @@ -2318,25 +2243,3 @@ void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); } -void __kbase_tlstream_aux_devfreq_target(u64 target_freq) -{ - const u32 msg_id = KBASE_AUX_DEVFREQ_TARGET; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(target_freq); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_AUX, msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &target_freq, sizeof(target_freq)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); -} - diff --git a/drivers/gpu/arm/midgard/mali_kbase_tlstream.h b/drivers/gpu/arm/midgard/mali_kbase_tlstream.h index e29be71c142f..22a0d96f9a72 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_tlstream.h +++ b/drivers/gpu/arm/midgard/mali_kbase_tlstream.h @@ -37,9 +37,8 @@ void kbase_tlstream_term(void); /** * kbase_tlstream_acquire - acquire timeline stream file descriptor - * @kctx: kernel common context - * @fd: timeline stream file descriptor - * @flags: timeline stream flags + * @kctx: kernel common context + * @fd: timeline stream file descriptor * * This descriptor is meant to be used by userspace timeline to gain access to * kernel timeline stream. This stream is later broadcasted by user space to the @@ -51,7 +50,7 @@ void kbase_tlstream_term(void); * Return: zero on success (this does not necessarily mean that stream * descriptor could be returned), negative number on error */ -int kbase_tlstream_acquire(struct kbase_context *kctx, int *fd, u32 flags); +int kbase_tlstream_acquire(struct kbase_context *kctx, int *fd); /** * kbase_tlstream_flush_streams - flush timeline streams. @@ -103,11 +102,6 @@ void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated); /*****************************************************************************/ -#define TL_ATOM_STATE_IDLE 0 -#define TL_ATOM_STATE_READY 1 -#define TL_ATOM_STATE_DONE 2 -#define TL_ATOM_STATE_POSTED 3 - void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid); void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count); void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn); @@ -134,38 +128,25 @@ void __kbase_tlstream_tl_ndep_atom_atom(void *atom1, void *atom2); void __kbase_tlstream_tl_rdep_atom_atom(void *atom1, void *atom2); void __kbase_tlstream_tl_attrib_atom_config( void *atom, u64 jd, u64 affinity, u32 config); -void __kbase_tlstream_tl_attrib_atom_priority(void *atom, u32 prio); -void __kbase_tlstream_tl_attrib_atom_state(void *atom, u32 state); -void __kbase_tlstream_tl_attrib_atom_priority_change(void *atom); void __kbase_tlstream_tl_attrib_as_config( void *as, u64 transtab, u64 memattr, u64 transcfg); -void __kbase_tlstream_tl_event_atom_softstop_ex(void *atom); -void __kbase_tlstream_tl_event_lpu_softstop(void *lpu); -void __kbase_tlstream_tl_event_atom_softstop_issue(void *atom); void __kbase_tlstream_jd_gpu_soft_reset(void *gpu); void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state); +void __kbase_tlstream_aux_issue_job_softstop(void *katom); +void __kbase_tlstream_aux_job_softstop(u32 js_id); +void __kbase_tlstream_aux_job_softstop_ex(struct kbase_jd_atom *katom); void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change); void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count); -void __kbase_tlstream_aux_devfreq_target(u64 target_freq); - -#define TLSTREAM_ENABLED (1 << 31) extern atomic_t kbase_tlstream_enabled; #define __TRACE_IF_ENABLED(trace_name, ...) \ do { \ int enabled = atomic_read(&kbase_tlstream_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ + if (enabled) \ __kbase_tlstream_##trace_name(__VA_ARGS__); \ } while (0) -#define __TRACE_IF_ENABLED_LATENCY(trace_name, ...) \ - do { \ - int enabled = atomic_read(&kbase_tlstream_enabled); \ - if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \ - __kbase_tlstream_##trace_name(__VA_ARGS__); \ - } while (0) - /*****************************************************************************/ /** @@ -448,35 +429,6 @@ extern atomic_t kbase_tlstream_enabled; #define kbase_tlstream_tl_attrib_atom_config(atom, jd, affinity, config) \ __TRACE_IF_ENABLED(tl_attrib_atom_config, atom, jd, affinity, config) -/** - * kbase_tlstream_tl_attrib_atom_priority - atom priority - * @atom: name of the atom object - * @prio: atom priority - * - * Function emits a timeline message containing atom priority. - */ -#define kbase_tlstream_tl_attrib_atom_priority(atom, prio) \ - __TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_priority, atom, prio) - -/** - * kbase_tlstream_tl_attrib_atom_state - atom state - * @atom: name of the atom object - * @state: atom state - * - * Function emits a timeline message containing atom state. - */ -#define kbase_tlstream_tl_attrib_atom_state(atom, state) \ - __TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_state, atom, state) - -/** - * kbase_tlstream_tl_attrib_atom_priority_change - atom caused priority change - * @atom: name of the atom object - * - * Function emits a timeline message signalling priority change - */ -#define kbase_tlstream_tl_attrib_atom_priority_change(atom) \ - __TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_priority_change, atom) - /** * kbase_tlstream_tl_attrib_as_config - address space attributes * @as: assigned address space @@ -489,27 +441,6 @@ extern atomic_t kbase_tlstream_enabled; #define kbase_tlstream_tl_attrib_as_config(as, transtab, memattr, transcfg) \ __TRACE_IF_ENABLED(tl_attrib_as_config, as, transtab, memattr, transcfg) -/** - * kbase_tlstream_tl_event_atom_softstop_ex - * @atom: atom identifier - */ -#define kbase_tlstream_tl_event_atom_softstop_ex(atom) \ - __TRACE_IF_ENABLED(tl_event_atom_softstop_ex, atom) - -/** - * kbase_tlstream_tl_event_lpu_softstop - * @lpu: name of the LPU object - */ -#define kbase_tlstream_tl_event_lpu_softstop(lpu) \ - __TRACE_IF_ENABLED(tl_event_lpu_softstop, lpu) - -/** - * kbase_tlstream_tl_event_atom_softstop_issue - * @atom: atom identifier - */ -#define kbase_tlstream_tl_event_atom_softstop_issue(atom) \ - __TRACE_IF_ENABLED(tl_event_atom_softstop_issue, atom) - /** * kbase_tlstream_jd_gpu_soft_reset - The GPU is being soft reset * @gpu: name of the GPU object @@ -528,6 +459,34 @@ extern atomic_t kbase_tlstream_enabled; #define kbase_tlstream_aux_pm_state(core_type, state) \ __TRACE_IF_ENABLED(aux_pm_state, core_type, state) +/** + * kbase_tlstream_aux_issue_job_softstop - a soft-stop command is being issued + * @katom: the atom that is being soft-stopped + */ +#define kbase_tlstream_aux_issue_job_softstop(katom) \ + __TRACE_IF_ENABLED(aux_issue_job_softstop, katom) + +/** + * kbase_tlstream_aux_job_softstop - soft job stop occurred + * @js_id: job slot id + */ +#define kbase_tlstream_aux_job_softstop(js_id) \ + __TRACE_IF_ENABLED(aux_job_softstop, js_id) + +/** + * kbase_tlstream_aux_job_softstop_ex - extra info about soft-stopped atom + * @katom: the atom that has been soft-stopped + * + * This trace point adds more details about the soft-stopped atom. These details + * can't be safety collected inside the interrupt handler so we're doing it + * inside a worker. + * + * Note: this is not the same information that is recorded in the trace point, + * refer to __kbase_tlstream_aux_job_softstop_ex() for more details. + */ +#define kbase_tlstream_aux_job_softstop_ex(katom) \ + __TRACE_IF_ENABLED(aux_job_softstop_ex, katom) + /** * kbase_tlstream_aux_pagefault - timeline message: MMU page fault event * resulting in new pages being mapped @@ -546,13 +505,5 @@ extern atomic_t kbase_tlstream_enabled; #define kbase_tlstream_aux_pagesalloc(ctx_nr, page_count) \ __TRACE_IF_ENABLED(aux_pagesalloc, ctx_nr, page_count) -/** - * kbase_tlstream_aux_devfreq_target - timeline message: new target DVFS - * frequency - * @target_freq: new target frequency - */ -#define kbase_tlstream_aux_devfreq_target(target_freq) \ - __TRACE_IF_ENABLED(aux_devfreq_target, target_freq) - #endif /* _KBASE_TLSTREAM_H */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c index 5830e87f0818..a606ae810656 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c +++ b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c @@ -105,8 +105,6 @@ static const struct file_operations kbasep_trace_timeline_debugfs_fops = { .release = seq_release, }; -#ifdef CONFIG_DEBUG_FS - void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev) { debugfs_create_file("mali_timeline_defs", @@ -114,12 +112,10 @@ void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev) &kbasep_trace_timeline_debugfs_fops); } -#endif /* CONFIG_DEBUG_FS */ - void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom, int js) { - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); if (kbdev->timeline.slot_atoms_submitted[js] > 0) { KBASE_TIMELINE_JOB_START_NEXT(kctx, js, 1); @@ -138,7 +134,7 @@ void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_conte struct kbase_jd_atom *katom, int js, kbasep_js_atom_done_code done_code) { - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT) { KBASE_TIMELINE_JOB_START_NEXT(kctx, js, 0); @@ -217,7 +213,7 @@ void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timel void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev) { - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->pm.power_change_lock); /* Simply log the start of the transition */ kbdev->timeline.l2_transitioning = true; KBASE_TIMELINE_POWERING_L2(kbdev); @@ -225,7 +221,7 @@ void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev) void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev) { - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->pm.power_change_lock); /* Simply log the end of the transition */ if (kbdev->timeline.l2_transitioning) { kbdev->timeline.l2_transitioning = false; diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h index 619072f3215c..22a36494e72e 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h +++ b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h @@ -28,17 +28,9 @@ enum kbase_trace_timeline_code { #undef KBASE_TIMELINE_TRACE_CODE }; -#ifdef CONFIG_DEBUG_FS - /** Initialize Timeline DebugFS entries */ void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev); -#else /* CONFIG_DEBUG_FS */ - -#define kbasep_trace_timeline_debugfs_init CSTD_NOP - -#endif /* CONFIG_DEBUG_FS */ - /* mali_timeline.h defines kernel tracepoints used by the KBASE_TIMELINE * functions. * Output is timestamped by either sched_clock() (default), local_clock(), or @@ -240,12 +232,13 @@ void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev); count); \ } while (0) + /* NOTE: kbase_timeline_pm_cores_func() is in mali_kbase_pm_policy.c */ /** * Trace that an atom is starting on a job slot * - * The caller must be holding hwaccess_lock + * The caller must be holding kbasep_js_device_data::runpool_irq::lock */ void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom, int js); @@ -264,7 +257,7 @@ void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_con * - kbasep_jm_dequeue_submit_slot() * - kbasep_jm_dequeue_tail_submit_slot() * - * The caller must be holding hwaccess_lock + * The caller must be holding kbasep_js_device_data::runpool_irq::lock */ void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom, int js, @@ -328,14 +321,14 @@ void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev); static inline void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom, int js) { - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); } static inline void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom, int js, kbasep_js_atom_done_code done_code) { - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); } static inline void kbase_timeline_pm_send_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event_sent) diff --git a/drivers/gpu/arm/midgard/mali_kbase_uku.h b/drivers/gpu/arm/midgard/mali_kbase_uku.h index 711b09138db3..e880d9663d0e 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_uku.h +++ b/drivers/gpu/arm/midgard/mali_kbase_uku.h @@ -56,15 +56,9 @@ * * 10.4: * - Removed KBASE_FUNC_EXT_BUFFER_LOCK used only in internal tests - * - * 10.5: - * - Reverted to performing mmap in user space so that tools like valgrind work. - * - * 10.6: - * - Add flags input variable to KBASE_FUNC_TLSTREAM_ACQUIRE */ #define BASE_UK_VERSION_MAJOR 10 -#define BASE_UK_VERSION_MINOR 6 +#define BASE_UK_VERSION_MINOR 4 struct kbase_uk_mem_alloc { union uk_header header; @@ -325,7 +319,6 @@ struct kbase_uk_profiling_controls { struct kbase_uk_debugfs_mem_profile_add { union uk_header header; u32 len; - u32 padding; union kbase_pointer buf; }; @@ -340,30 +333,12 @@ struct kbase_uk_context_id { /** * struct kbase_uk_tlstream_acquire - User/Kernel space data exchange structure * @header: UK structure header - * @flags: timeline stream flags * @fd: timeline stream file descriptor * - * This structure is used when performing a call to acquire kernel side timeline - * stream file descriptor. + * This structure is used used when performing a call to acquire kernel side + * timeline stream file descriptor. */ struct kbase_uk_tlstream_acquire { - union uk_header header; - /* IN */ - u32 flags; - /* OUT */ - s32 fd; -}; - -/** - * struct kbase_uk_tlstream_acquire_v10_4 - User/Kernel space data exchange - * structure - * @header: UK structure header - * @fd: timeline stream file descriptor - * - * This structure is used when performing a call to acquire kernel side timeline - * stream file descriptor. - */ -struct kbase_uk_tlstream_acquire_v10_4 { union uk_header header; /* IN */ /* OUT */ @@ -522,7 +497,7 @@ enum kbase_uk_function_id { #if (defined(MALI_MIPE_ENABLED) && MALI_MIPE_ENABLED) || \ !defined(MALI_MIPE_ENABLED) - KBASE_FUNC_TLSTREAM_ACQUIRE_V10_4 = (UK_FUNC_ID + 32), + KBASE_FUNC_TLSTREAM_ACQUIRE = (UK_FUNC_ID + 32), #if MALI_UNIT_TEST KBASE_FUNC_TLSTREAM_TEST = (UK_FUNC_ID + 33), KBASE_FUNC_TLSTREAM_STATS = (UK_FUNC_ID + 34), @@ -540,11 +515,6 @@ enum kbase_uk_function_id { KBASE_FUNC_MEM_JIT_INIT = (UK_FUNC_ID + 39), -#if (defined(MALI_MIPE_ENABLED) && MALI_MIPE_ENABLED) || \ - !defined(MALI_MIPE_ENABLED) - KBASE_FUNC_TLSTREAM_ACQUIRE = (UK_FUNC_ID + 40), -#endif /* MALI_MIPE_ENABLED */ - KBASE_FUNC_MAX }; diff --git a/drivers/gpu/arm/midgard/mali_kbase_vinstr.c b/drivers/gpu/arm/midgard/mali_kbase_vinstr.c index 3adb06ddb266..bd6095f77480 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_vinstr.c +++ b/drivers/gpu/arm/midgard/mali_kbase_vinstr.c @@ -29,7 +29,6 @@ #include #include -#include #include #include #include @@ -1637,9 +1636,11 @@ static int kbasep_vinstr_hwcnt_reader_mmap(struct file *filp, if (vma->vm_pgoff > (size >> PAGE_SHIFT)) return -EINVAL; + if (vm_size > size) + return -EINVAL; offset = vma->vm_pgoff << PAGE_SHIFT; - if (vm_size > size - offset) + if ((vm_size + offset) > size) return -EINVAL; addr = __pa((unsigned long)cli->dump_buffers + offset); @@ -1686,9 +1687,9 @@ static void kbasep_vinstr_kick_scheduler(struct kbase_device *kbdev) unsigned long flags; down(&js_devdata->schedule_sem); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_backend_slot_update(kbdev); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + kbase_jm_kick_all(kbdev); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); up(&js_devdata->schedule_sem); } diff --git a/drivers/gpu/arm/midgard/mali_midg_regmap.h b/drivers/gpu/arm/midgard/mali_midg_regmap.h index de3053b88da8..de6c206f3c71 100644 --- a/drivers/gpu/arm/midgard/mali_midg_regmap.h +++ b/drivers/gpu/arm/midgard/mali_midg_regmap.h @@ -61,7 +61,6 @@ #define LATEST_FLUSH 0x038 /* (RO) */ #define GROUPS_L2_COHERENT (1 << 0) /* Cores groups are l2 coherent */ -#define GPU_DBGEN (1 << 8) /* DBGEN wire status */ #define GPU_FAULTSTATUS 0x03C /* (RO) GPU exception type and fault status */ #define GPU_FAULTADDRESS_LO 0x040 /* (RO) GPU exception fault address, low word */ diff --git a/drivers/gpu/arm/midgard/sconscript b/drivers/gpu/arm/midgard/sconscript index 7b7ec772ecf3..c36e92dd0d7a 100644 --- a/drivers/gpu/arm/midgard/sconscript +++ b/drivers/gpu/arm/midgard/sconscript @@ -13,9 +13,13 @@ # + +import os +import re import sys Import('env') + if Glob('tests/sconscript'): SConscript( 'tests/sconscript' ) @@ -29,12 +33,18 @@ else: fake_platform_device = 1 # Source files required for kbase. -kbase_src = [ - Glob('*.c'), - Glob('backend/*/*.c'), - Glob('internal/*/*.c'), - Glob('platform/%s/*.c' % env['platform_config']), -] +kbase_src = [Glob('#kernel/drivers/gpu/arm/midgard/*.c'), + Glob('#kernel/drivers/gpu/arm/midgard/*.c'), + Glob('#kernel/drivers/gpu/arm/midgard/backend/*/*.c'), + Glob('#kernel/drivers/gpu/arm/midgard/backend/*/*.h'), + Glob('#kernel/drivers/gpu/arm/midgard/platform/%s/*.c' % (env['platform_config'])), + Glob('#kernel/drivers/gpu/arm/midgard/*.h'), + Glob('#kernel/drivers/gpu/arm/midgard/*.h'), + Glob('#kernel/drivers/gpu/arm/midgard/Makefile', + Glob('#kernel/drivers/gpu/arm/midgard/K*')) + ] + +kbase_src += [Glob('#kernel/drivers/gpu/arm/midgard/internal/*/*.c')] if Glob('#kernel/drivers/gpu/arm/midgard/tests/internal/src/mock') and env['unit'] == '1': kbase_src += [Glob('#kernel/drivers/gpu/arm/midgard/tests/internal/src/mock/*.c')] @@ -43,43 +53,75 @@ if Glob('#kernel/drivers/gpu/arm/midgard/tests/internal/src/mock') and env['unit # we need platform config for GPL version using fake platform if fake_platform_device==1: # Check if we are compiling for PBX - if env.KernelConfigEnabled("CONFIG_MACH_REALVIEW_PBX") and \ - env["platform_config"] in {"vexpress", "vexpress_6xvirtex7_10mhz"}: + linux_config_file = os.path.normpath(os.environ['KDIR']) + '/.config' + search_term = '^[\ ]*CONFIG_MACH_REALVIEW_PBX[\ ]*=[\ ]*y' + REALVIEW_PBX = 0 + for line in open(linux_config_file, 'r'): + if re.search(search_term, line): + REALVIEW_PBX = 1 + break + if REALVIEW_PBX == 1 and (env['platform_config'] == 'vexpress' or env['platform_config'] == 'vexpress_6xvirtex7_10mhz'): sys.stderr.write("WARNING: Building for a PBX kernel but with platform_config=vexpress*\n") # if the file platform config file is in the tpip directory then use that, otherwise use the default config directory if Glob('#kernel/drivers/gpu/arm/midgard/config/tpip/*%s.c' % (env['platform_config'])): kbase_src += Glob('#kernel/drivers/gpu/arm/midgard/config/tpip/*%s.c' % (env['platform_config'])) else: kbase_src += Glob('#kernel/drivers/gpu/arm/midgard/config/*%s.c' % (env['platform_config'])) + +# Note: cleaning via the Linux kernel build system does not yet work +if env.GetOption('clean') : + env.Execute(Action("make clean", '[clean] kbase')) + cmd = env.Command(['$STATIC_LIB_PATH/mali_kbase.ko', '$STATIC_LIB_PATH/mali_platform_fake.ko'], kbase_src, []) +else: + if env['os'] == 'android': + env['android'] = 1 + else: + env['android'] = 0 + + if env['unit'] == '1': + env['kernel_test'] = 1 + else: + env['kernel_test'] = 0 + + #Extract environment options, note the trailing spaces are important + env_options = \ + "PLATFORM=${platform} " +\ + "MALI_ERROR_INJECT_ON=${error_inject} " +\ + "MALI_ANDROID=${android} " +\ + "MALI_KERNEL_TEST_API=${kernel_test} " +\ + "MALI_UNIT_TEST=${unit} " +\ + "MALI_RELEASE_NAME=\"${mali_release_name}\" "+\ + "MALI_MOCK_TEST=%s " % mock_test +\ + "MALI_CUSTOMER_RELEASE=${release} " +\ + "MALI_INSTRUMENTATION_LEVEL=${instr} " +\ + "MALI_COVERAGE=${coverage} " +\ + "MALI_BUS_LOG=${buslog} " + + make_action_start = "cd ${SOURCE.dir} && make -j%d " % GetOption('num_jobs') + make_action_end = "%s && cp mali_kbase.ko $STATIC_LIB_PATH/mali_kbase.ko" % env.kernel_get_config_defines(fake_platform_device) + make_action = make_action_start + env_options + make_action_end -make_args = env.kernel_get_config_defines(ret_list = True, - fake = fake_platform_device) + [ - 'PLATFORM=%s' % env['platform'], - 'MALI_ERROR_INJECT_ON=%s' % env['error_inject'], - 'MALI_KERNEL_TEST_API=%s' % env['unit'], - 'MALI_UNIT_TEST=%s' % env['unit'], - 'MALI_RELEASE_NAME=%s' % env['mali_release_name'], - 'MALI_MOCK_TEST=%s' % mock_test, - 'MALI_CUSTOMER_RELEASE=%s' % env['release'], - 'MALI_INSTRUMENTATION_LEVEL=%s' % env['instr'], - 'MALI_COVERAGE=%s' % env['coverage'], - 'MALI_BUS_LOG=%s' % env['buslog'], -] - -kbase = env.BuildKernelModule('$STATIC_LIB_PATH/mali_kbase.ko', kbase_src, - make_args = make_args) + makeAction=Action(make_action, '$MAKECOMSTR') + cmd = env.Command('$STATIC_LIB_PATH/mali_kbase.ko', kbase_src, [makeAction]) # Add a dependency on kds.ko. # Only necessary when KDS is not built into the kernel. # if env['os'] != 'android': - if not env.KernelConfigEnabled("CONFIG_KDS"): - env.Depends(kbase, '$STATIC_LIB_PATH/kds.ko') + linux_config_file = os.path.normpath(os.environ['KDIR']) + '/.config' + search_term = '^[\ ]*CONFIG_KDS[\ ]*=[\ ]*y' + kds_in_kernel = 0 + for line in open(linux_config_file, 'r'): + if re.search(search_term, line): + # KDS in kernel. + kds_in_kernel = 1 + if not kds_in_kernel: + env.Depends('$STATIC_LIB_PATH/mali_kbase.ko', '$STATIC_LIB_PATH/kds.ko') # need Module.symvers from ump.ko build if int(env['ump']) == 1: - env.Depends(kbase, '$STATIC_LIB_PATH/ump.ko') + env.Depends('$STATIC_LIB_PATH/mali_kbase.ko', '$STATIC_LIB_PATH/ump.ko') -env.KernelObjTarget('kbase', kbase) +env.KernelObjTarget('kbase', cmd) env.AppendUnique(BASE=['cutils_linked_list']) -- 2.34.1