From b096c7187a357f977364e5e41fd0e3ddfc4320a0 Mon Sep 17 00:00:00 2001 From: chenzhen Date: Mon, 9 Nov 2015 16:36:48 +0800 Subject: [PATCH] MALI: rockchip: upgrade DDK to r8p0-02rel0. Change-Id: I85a3bedf89a3fc27971b1d26e7bfa9a8bee32d06 Signed-off-by: chenzhen --- drivers/gpu/arm/midgard/Kbuild | 24 +- drivers/gpu/arm/midgard/Kconfig | 24 +- drivers/gpu/arm/midgard/backend/gpu/Kbuild | 4 + .../midgard/backend/gpu/mali_kbase_devfreq.c | 36 +- .../backend/gpu/mali_kbase_gpuprops_backend.c | 8 + .../midgard/backend/gpu/mali_kbase_jm_hw.c | 81 ++-- .../midgard/backend/gpu/mali_kbase_jm_rb.c | 26 +- .../backend/gpu/mali_kbase_js_affinity.c | 12 +- .../backend/gpu/mali_kbase_mmu_hw_direct.c | 26 +- .../backend/gpu/mali_kbase_pm_backend.c | 23 +- .../midgard/backend/gpu/mali_kbase_pm_ca.c | 6 +- .../midgard/backend/gpu/mali_kbase_pm_defs.h | 11 +- .../backend/gpu/mali_kbase_pm_driver.c | 25 +- .../backend/gpu/mali_kbase_pm_internal.h | 11 - .../backend/gpu/mali_kbase_pm_policy.c | 125 +++--- .../gpu/mali_kbase_power_model_simple.c | 160 +++++++ .../gpu/mali_kbase_power_model_simple.h | 43 ++ .../arm/midgard/mali_base_hwconfig_features.h | 2 - .../arm/midgard/mali_base_hwconfig_issues.h | 36 ++ drivers/gpu/arm/midgard/mali_base_kernel.h | 70 ++- drivers/gpu/arm/midgard/mali_kbase.h | 7 +- drivers/gpu/arm/midgard/mali_kbase_config.h | 26 ++ drivers/gpu/arm/midgard/mali_kbase_context.c | 3 - .../gpu/arm/midgard/mali_kbase_core_linux.c | 411 ++++++++---------- .../arm/midgard/mali_kbase_debug_job_fault.c | 47 +- .../arm/midgard/mali_kbase_debug_job_fault.h | 25 +- .../arm/midgard/mali_kbase_debug_mem_view.c | 4 +- drivers/gpu/arm/midgard/mali_kbase_defs.h | 49 ++- drivers/gpu/arm/midgard/mali_kbase_device.c | 4 +- drivers/gpu/arm/midgard/mali_kbase_event.c | 18 +- drivers/gpu/arm/midgard/mali_kbase_gpu_id.h | 110 +++++ drivers/gpu/arm/midgard/mali_kbase_gpuprops.c | 19 +- drivers/gpu/arm/midgard/mali_kbase_gpuprops.h | 10 + .../arm/midgard/mali_kbase_gpuprops_types.h | 1 + drivers/gpu/arm/midgard/mali_kbase_hw.c | 312 ++++++------- .../midgard/mali_kbase_hwaccess_gpuprops.h | 14 +- .../gpu/arm/midgard/mali_kbase_hwaccess_pm.h | 7 +- drivers/gpu/arm/midgard/mali_kbase_instr.c | 8 - drivers/gpu/arm/midgard/mali_kbase_ipa.c | 251 +++++++++-- drivers/gpu/arm/midgard/mali_kbase_ipa.h | 11 +- .../gpu/arm/midgard/mali_kbase_ipa_tables.h | 104 +++++ drivers/gpu/arm/midgard/mali_kbase_jd.c | 204 ++++++++- drivers/gpu/arm/midgard/mali_kbase_js.c | 6 +- .../gpu/arm/midgard/mali_kbase_js_ctx_attr.c | 7 - drivers/gpu/arm/midgard/mali_kbase_js_defs.h | 12 +- drivers/gpu/arm/midgard/mali_kbase_mem.c | 3 + drivers/gpu/arm/midgard/mali_kbase_mem.h | 9 + .../gpu/arm/midgard/mali_kbase_mem_linux.c | 264 ++++++++++- .../gpu/arm/midgard/mali_kbase_mem_linux.h | 4 +- drivers/gpu/arm/midgard/mali_kbase_mem_pool.c | 18 +- .../midgard/mali_kbase_mem_profile_debugfs.c | 83 ++-- .../midgard/mali_kbase_mem_profile_debugfs.h | 32 +- drivers/gpu/arm/midgard/mali_kbase_mmu.c | 2 +- drivers/gpu/arm/midgard/mali_kbase_security.c | 76 ---- drivers/gpu/arm/midgard/mali_kbase_security.h | 52 --- drivers/gpu/arm/midgard/mali_kbase_smc.c | 13 +- drivers/gpu/arm/midgard/mali_kbase_tlstream.c | 143 +++--- drivers/gpu/arm/midgard/mali_kbase_tlstream.h | 42 +- drivers/gpu/arm/midgard/mali_kbase_uku.h | 29 +- drivers/gpu/arm/midgard/mali_kbase_vinstr.c | 128 ++++-- drivers/gpu/arm/midgard/mali_kbase_vinstr.h | 30 ++ drivers/gpu/arm/midgard/mali_midg_coherency.h | 26 ++ drivers/gpu/arm/midgard/mali_midg_regmap.h | 45 +- .../arm/midgard/platform/devicetree/Kbuild | 8 +- .../devicetree/mali_kbase_config_devicetree.c | 8 + .../devicetree/mali_kbase_config_platform.h | 6 - .../devicetree/mali_kbase_runtime_pm.c | 9 +- .../juno_soc/mali_kbase_config_juno_soc.c | 71 --- .../juno_soc/mali_kbase_config_platform.h | 10 - .../vexpress/mali_kbase_config_platform.h | 6 - .../mali_kbase_config_platform.h | 6 - .../mali_kbase_config_platform.h | 6 - 72 files changed, 2358 insertions(+), 1194 deletions(-) create mode 100644 drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.c create mode 100644 drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.h create mode 100644 drivers/gpu/arm/midgard/mali_kbase_gpu_id.h create mode 100644 drivers/gpu/arm/midgard/mali_kbase_ipa_tables.h delete mode 100644 drivers/gpu/arm/midgard/mali_kbase_security.c delete mode 100644 drivers/gpu/arm/midgard/mali_kbase_security.h create mode 100644 drivers/gpu/arm/midgard/mali_midg_coherency.h diff --git a/drivers/gpu/arm/midgard/Kbuild b/drivers/gpu/arm/midgard/Kbuild index 015521cd7d43..4cb88fe5c003 100644 --- a/drivers/gpu/arm/midgard/Kbuild +++ b/drivers/gpu/arm/midgard/Kbuild @@ -15,7 +15,7 @@ # Driver version string which is returned to userspace via an ioctl -MALI_RELEASE_NAME ?= "r7p0-02rel0" +MALI_RELEASE_NAME ?= "r8p0-02rel0" # Paths required for build KBASE_PATH = $(src) @@ -88,7 +88,6 @@ SRC := \ mali_kbase_context.c \ mali_kbase_pm.c \ mali_kbase_config.c \ - mali_kbase_security.c \ mali_kbase_instr.c \ mali_kbase_vinstr.c \ mali_kbase_softjobs.c \ @@ -120,6 +119,10 @@ ifeq ($(CONFIG_MALI_MIPE_ENABLED),y) endif endif +ifeq ($(MALI_CUSTOMER_RELEASE),0) + SRC += mali_kbase_regs_dump_debugfs.c +endif + # Job Scheduler Policy: Completely Fair Scheduler SRC += mali_kbase_js_policy_cfs.c @@ -191,12 +194,6 @@ obj-$(CONFIG_MALI_MIDGARD) += platform/ endif endif -ifeq ($(CONFIG_MALI_PLATFORM_DEVICETREE),y) - SRC += platform/devicetree/mali_kbase_runtime_pm.c - SRC += platform/devicetree/mali_kbase_config_devicetree.c - ccflags-y += -I$(src)/platform/devicetree -endif - # Tell the Linux build system from which .o file to create the kernel module obj-$(CONFIG_MALI_MIDGARD) += mali_kbase.o @@ -227,3 +224,14 @@ mali_kbase-y += $(BACKEND:.c=.o) ccflags-y += -I$(src)/$(MALI_BACKEND_PATH)/$(CONFIG_MALI_BACKEND_REAL) subdir-ccflags-y += -I$(src)/$(MALI_BACKEND_PATH)/$(CONFIG_MALI_BACKEND_REAL) + +# Default to devicetree platform if neither a fake platform or a thirdparty +# platform is configured. +ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY)$(CONFIG_MALI_PLATFORM_FAKE),) +CONFIG_MALI_PLATFORM_DEVICETREE := y +endif + +mali_kbase-$(CONFIG_MALI_PLATFORM_DEVICETREE) += \ + platform/devicetree/mali_kbase_runtime_pm.o \ + platform/devicetree/mali_kbase_config_devicetree.o +ccflags-$(CONFIG_MALI_PLATFORM_DEVICETREE) += -I$(src)/platform/devicetree diff --git a/drivers/gpu/arm/midgard/Kconfig b/drivers/gpu/arm/midgard/Kconfig index 154304380041..a6780dc83bee 100644 --- a/drivers/gpu/arm/midgard/Kconfig +++ b/drivers/gpu/arm/midgard/Kconfig @@ -57,17 +57,17 @@ config MALI_MIPE_ENABLED endchoice config MALI_MIDGARD_DVFS - bool "Enable DVFS" - depends on MALI_MIDGARD + bool "Enable legacy DVFS" + depends on MALI_MIDGARD && !MALI_DEVFREQ && !MALI_PLATFORM_DEVICETREE default n help - Choose this option to enable DVFS in the Mali Midgard DDK. + Choose this option to enable legacy DVFS in the Mali Midgard DDK. config MALI_MIDGARD_RT_PM bool "Enable Runtime power management" depends on MALI_MIDGARD depends on PM_RUNTIME - default n + default y help Choose this option to enable runtime power management in the Mali Midgard DDK. @@ -128,10 +128,24 @@ config MALI_PLATFORM_FAKE choice prompt "Platform configuration" depends on MALI_MIDGARD && MALI_EXPERT - default MALI_PLATFORM_VEXPRESS + default MALI_PLATFORM_DEVICETREE help Select the SOC platform that contains a Mali Midgard GPU +config MALI_PLATFORM_DEVICETREE + bool "Device Tree platform" + depends on OF + help + Select this option to use Device Tree with the Mali driver. + + When using this option the Mali driver will get the details of the + GPU hardware from the Device Tree. This means that the same driver + binary can run on multiple platforms as long as all the GPU hardware + details are described in the device tree. + + Device Tree is the recommended method for the Mali driver platform + integration. + config MALI_PLATFORM_VEXPRESS depends on ARCH_VEXPRESS && (ARCH_VEXPRESS_CA9X4 || ARCH_VEXPRESS_CA15X4) bool "Versatile Express" diff --git a/drivers/gpu/arm/midgard/backend/gpu/Kbuild b/drivers/gpu/arm/midgard/backend/gpu/Kbuild index df4e796dc0d8..a39df411ae1a 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/Kbuild +++ b/drivers/gpu/arm/midgard/backend/gpu/Kbuild @@ -56,3 +56,7 @@ ifeq ($(CONFIG_MALI_NO_MALI),y) # HW error simulation BACKEND += backend/gpu/mali_kbase_model_error_generator.c endif + +ifeq ($(CONFIG_DEVFREQ_THERMAL),y) + BACKEND += backend/gpu/mali_kbase_power_model_simple.c +endif diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c index 3791457f605d..531c4328ccfd 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -18,6 +18,9 @@ #include #include #include +#ifdef CONFIG_DEVFREQ_THERMAL +#include +#endif #include #include @@ -130,7 +133,9 @@ kbase_devfreq_status(struct device *dev, struct devfreq_dev_status *stat) stat->private_data = NULL; #ifdef CONFIG_DEVFREQ_THERMAL - memcpy(&kbdev->devfreq_cooling->last_status, stat, sizeof(*stat)); + if (kbdev->devfreq_cooling) + memcpy(&kbdev->devfreq_cooling->last_status, stat, + sizeof(*stat)); #endif return 0; @@ -192,14 +197,9 @@ static void kbase_devfreq_exit(struct device *dev) int kbase_devfreq_init(struct kbase_device *kbdev) { -#ifdef CONFIG_DEVFREQ_THERMAL - struct devfreq_cooling_ops *callbacks = POWER_MODEL_CALLBACKS; -#endif struct devfreq_dev_profile *dp; int err; - dev_dbg(kbdev->dev, "Init Mali devfreq\n"); - if (!kbdev->clock) return -ENODEV; @@ -232,12 +232,20 @@ int kbase_devfreq_init(struct kbase_device *kbdev) } #ifdef CONFIG_DEVFREQ_THERMAL - if (callbacks) { - + err = kbase_power_model_simple_init(kbdev); + if (err && err != -ENODEV && err != -EPROBE_DEFER) { + dev_err(kbdev->dev, + "Failed to initialize simple power model (%d)\n", + err); + goto cooling_failed; + } + if (err == -EPROBE_DEFER) + goto cooling_failed; + if (err != -ENODEV) { kbdev->devfreq_cooling = of_devfreq_cooling_register_power( kbdev->dev->of_node, kbdev->devfreq, - callbacks); + &power_model_simple_ops); if (IS_ERR_OR_NULL(kbdev->devfreq_cooling)) { err = PTR_ERR(kbdev->devfreq_cooling); dev_err(kbdev->dev, @@ -245,6 +253,8 @@ int kbase_devfreq_init(struct kbase_device *kbdev) err); goto cooling_failed; } + } else { + err = 0; } #endif @@ -255,8 +265,7 @@ cooling_failed: devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq); #endif /* CONFIG_DEVFREQ_THERMAL */ opp_notifier_failed: - err = devfreq_remove_device(kbdev->devfreq); - if (err) + if (devfreq_remove_device(kbdev->devfreq)) dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err); else kbdev->devfreq = NULL; @@ -271,7 +280,8 @@ void kbase_devfreq_term(struct kbase_device *kbdev) dev_dbg(kbdev->dev, "Term Mali devfreq\n"); #ifdef CONFIG_DEVFREQ_THERMAL - devfreq_cooling_unregister(kbdev->devfreq_cooling); + if (kbdev->devfreq_cooling) + devfreq_cooling_unregister(kbdev->devfreq_cooling); #endif devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq); diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c index 591c013c5349..705b1ebfa87f 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c @@ -83,3 +83,11 @@ void kbase_backend_gpuprops_get(struct kbase_device *kbdev, GPU_CONTROL_REG(L2_PRESENT_HI), NULL); } +void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev, + struct kbase_gpuprops_regdump *regdump) +{ + regdump->coherency_features = + COHERENCY_FEATURE_BIT(COHERENCY_NONE) | + COHERENCY_FEATURE_BIT(COHERENCY_ACE_LITE); +} + diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c index a4b0c26cddec..936be574ce8a 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c @@ -41,12 +41,6 @@ #define beenthere(kctx, f, a...) \ dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a) -#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS -u64 mali_js0_affinity_mask = 0xFFFFFFFFFFFFFFFFULL; -u64 mali_js1_affinity_mask = 0xFFFFFFFFFFFFFFFFULL; -u64 mali_js2_affinity_mask = 0xFFFFFFFFFFFFFFFFULL; -#endif - #if KBASE_GPU_RESET_EN static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev); static void kbasep_reset_timeout_worker(struct work_struct *data); @@ -84,33 +78,10 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), jc_head >> 32, kctx); -#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS - { - u64 mask; - u32 value; - - if (0 == js) - mask = mali_js0_affinity_mask; - else if (1 == js) - mask = mali_js1_affinity_mask; - else - mask = mali_js2_affinity_mask; - - value = katom->affinity & (mask & 0xFFFFFFFF); - - kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO), - value, kctx); - - value = (katom->affinity >> 32) & ((mask>>32) & 0xFFFFFFFF); - kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI), - value, kctx); - } -#else kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO), katom->affinity & 0xFFFFFFFF, kctx); kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI), katom->affinity >> 32, kctx); -#endif /* start MMU, medium priority, cache clean/flush on end, clean/flush on * start */ @@ -162,10 +133,15 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, #if defined(CONFIG_MALI_MIPE_ENABLED) kbase_tlstream_tl_attrib_atom_config(katom, jc_head, katom->affinity, cfg); + kbase_tlstream_tl_ret_ctx_lpu( + kctx, + &kbdev->gpu_props.props.raw_props.js_features[ + katom->slot_nr]); kbase_tlstream_tl_ret_atom_as(katom, &kbdev->as[kctx->as_nr]); kbase_tlstream_tl_ret_atom_lpu( katom, - &kbdev->gpu_props.props.raw_props.js_features[js]); + &kbdev->gpu_props.props.raw_props.js_features[js], + "ctx_nr,atom_nr"); #endif #ifdef CONFIG_GPU_TRACEPOINTS if (kbase_backend_nr_atoms_submitted(kbdev, js) == 1) { @@ -1145,7 +1121,7 @@ static void kbasep_save_hwcnt_setup(struct kbase_device *kbdev, static void kbasep_reset_timeout_worker(struct work_struct *data) { - unsigned long flags; + unsigned long flags, mmu_flags; struct kbase_device *kbdev; int i; ktime_t end_timestamp = ktime_get(); @@ -1155,8 +1131,6 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) bool try_schedule = false; bool restore_hwc = false; - u32 mmu_irq_mask; - KBASE_DEBUG_ASSERT(data); kbdev = container_of(data, struct kbase_device, @@ -1183,6 +1157,30 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) return; } + KBASE_DEBUG_ASSERT(kbdev->irq_reset_flush == false); + + spin_lock_irqsave(&kbdev->mmu_mask_change, mmu_flags); + /* We're about to flush out the IRQs and their bottom half's */ + kbdev->irq_reset_flush = true; + + /* Disable IRQ to avoid IRQ handlers to kick in after releasing the + * spinlock; this also clears any outstanding interrupts */ + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + kbase_pm_disable_interrupts(kbdev); + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + + spin_unlock_irqrestore(&kbdev->mmu_mask_change, mmu_flags); + + /* Ensure that any IRQ handlers have finished + * Must be done without any locks IRQ handlers will take */ + kbase_synchronize_irqs(kbdev); + + /* Flush out any in-flight work items */ + kbase_flush_mmu_wqs(kbdev); + + /* The flush has completed so reset the active indicator */ + kbdev->irq_reset_flush = false; + mutex_lock(&kbdev->pm.lock); /* We hold the pm lock, so there ought to be a current policy */ KBASE_DEBUG_ASSERT(kbdev->pm.backend.pm_current_policy); @@ -1224,22 +1222,11 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_RESETTING; kbdev->hwcnt.backend.triggered = 0; - mmu_irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL); - /* Disable IRQ to avoid IRQ handlers to kick in after releasing the - * spinlock; this also clears any outstanding interrupts */ - kbase_pm_disable_interrupts(kbdev); spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - /* Ensure that any IRQ handlers have finished - * Must be done without any locks IRQ handlers will take */ - kbase_synchronize_irqs(kbdev); - /* Reset the GPU */ kbase_pm_init_hw(kbdev, 0); - /* Re-enabled IRQs */ - kbase_pm_enable_interrupts_mmu_mask(kbdev, mmu_irq_mask); - /* Complete any jobs that were still on the GPU */ spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); kbase_backend_reset(kbdev, &end_timestamp); @@ -1267,6 +1254,8 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) mutex_unlock(&as->transaction_mutex); } + kbase_pm_enable_interrupts(kbdev); + atomic_set(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_NOT_PENDING); @@ -1362,6 +1351,10 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) break; } spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + + /* Resume the vinstr core */ + kbase_vinstr_hwc_resume(kbdev->vinstr_ctx); + /* Note: counter dumping may now resume */ mutex_lock(&kbdev->pm.lock); diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c index 860171810667..ad56afb0b3ee 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c @@ -1100,24 +1100,10 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) js, 0); if (katom) { - enum kbase_atom_gpu_rb_state gpu_rb_state = - katom->gpu_rb_state; - kbase_gpu_release_atom(kbdev, katom, NULL); kbase_gpu_dequeue_atom(kbdev, js, NULL); - - if (gpu_rb_state == - KBASE_ATOM_GPU_RB_SUBMITTED) { - katom->event_code = - BASE_JD_EVENT_JOB_CANCELLED; - kbase_jm_complete(kbdev, katom, - end_timestamp); - } else { - katom->event_code = - BASE_JD_EVENT_STOPPED; - kbase_jm_return_atom_to_js(kbdev, - katom); - } + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + kbase_jm_complete(kbdev, katom, end_timestamp); } } } @@ -1167,6 +1153,12 @@ static int should_stop_x_dep_slot(struct kbase_jd_atom *katom) return -1; } +static void kbase_job_evicted(struct kbase_jd_atom *katom) +{ + kbase_timeline_job_slot_done(katom->kctx->kbdev, katom->kctx, katom, + katom->slot_nr, KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT); +} + bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, struct kbase_context *kctx, int js, @@ -1265,6 +1257,7 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, != 0) { /* idx1 removed successfully, * will be handled in IRQ */ + kbase_job_evicted(katom_idx1); kbase_gpu_remove_atom(kbdev, katom_idx1, action, true); @@ -1336,6 +1329,7 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, JS_HEAD_NEXT_HI), NULL) != 0) { /* idx1 removed successfully, will be * handled in IRQ once idx0 completes */ + kbase_job_evicted(katom_idx1); kbase_gpu_remove_atom(kbdev, katom_idx1, action, false); diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c index 89b8085e28b2..6a49669af630 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c @@ -122,7 +122,8 @@ bool kbase_js_choose_affinity(u64 * const affinity, if (1 == kbdev->gpu_props.num_cores) { /* trivial case only one core, nothing to do */ - *affinity = core_availability_mask; + *affinity = core_availability_mask & + kbdev->pm.debug_core_mask[js]; } else { if ((core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) { @@ -131,7 +132,8 @@ bool kbase_js_choose_affinity(u64 * const affinity, * the first core group */ *affinity = kbdev->gpu_props.props.coherency_info.group[0].core_mask - & core_availability_mask; + & core_availability_mask & + kbdev->pm.debug_core_mask[js]; } else { /* js[1], js[2] use core groups 0, 1 for * dual-core-group systems */ @@ -141,7 +143,8 @@ bool kbase_js_choose_affinity(u64 * const affinity, num_core_groups); *affinity = kbdev->gpu_props.props.coherency_info.group[core_group_idx].core_mask - & core_availability_mask; + & core_availability_mask & + kbdev->pm.debug_core_mask[js]; /* If the job is specifically targeting core * group 1 and the core availability policy is @@ -155,7 +158,8 @@ bool kbase_js_choose_affinity(u64 * const affinity, } else { /* All cores are available when no core split is * required */ - *affinity = core_availability_mask; + *affinity = core_availability_mask & + kbdev->pm.debug_core_mask[js]; } } diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c index 1b613a1967c9..4fd13e2de63e 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c @@ -203,10 +203,7 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as, struct kbase_context *kctx) { struct kbase_mmu_setup *current_setup = &as->current_setup; -#if defined(CONFIG_MALI_MIPE_ENABLED) || \ - (defined(MALI_INCLUDE_TMIX) && \ - defined(CONFIG_MALI_COH_PAGES) && \ - defined(CONFIG_MALI_GPU_MMU_AARCH64)) +#ifdef CONFIG_MALI_MIPE_ENABLED u32 transcfg = 0; #endif @@ -282,8 +279,18 @@ int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as, void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as, struct kbase_context *kctx, enum kbase_mmu_fault_type type) { + unsigned long flags; u32 pf_bf_mask; + spin_lock_irqsave(&kbdev->mmu_mask_change, flags); + + /* + * A reset is in-flight and we're flushing the IRQ + bottom half + * so don't update anything as it could race with the reset code. + */ + if (kbdev->irq_reset_flush) + goto unlock; + /* Clear the page (and bus fault IRQ as well in case one occurred) */ pf_bf_mask = MMU_PAGE_FAULT(as->number); if (type == KBASE_MMU_FAULT_TYPE_BUS || @@ -291,6 +298,9 @@ void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as, pf_bf_mask |= MMU_BUS_ERROR(as->number); kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), pf_bf_mask, kctx); + +unlock: + spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); } void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as, @@ -303,6 +313,13 @@ void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as, * occurred) */ spin_lock_irqsave(&kbdev->mmu_mask_change, flags); + /* + * A reset is in-flight and we're flushing the IRQ + bottom half + * so don't update anything as it could race with the reset code. + */ + if (kbdev->irq_reset_flush) + goto unlock; + irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), kctx) | MMU_PAGE_FAULT(as->number); @@ -312,5 +329,6 @@ void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as, kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), irq_mask, kctx); +unlock: spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); } diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c index 9ff7baadec7a..57ef9843657e 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c @@ -35,9 +35,6 @@ void kbase_pm_register_access_enable(struct kbase_device *kbdev) { struct kbase_pm_callback_conf *callbacks; -#ifdef CONFIG_MALI_PLATFORM_DEVICETREE - pm_runtime_enable(kbdev->dev); -#endif /* CONFIG_MALI_PLATFORM_DEVICETREE */ callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS; if (callbacks) @@ -56,9 +53,6 @@ void kbase_pm_register_access_disable(struct kbase_device *kbdev) callbacks->power_off_callback(kbdev); kbdev->pm.backend.gpu_powered = false; -#ifdef CONFIG_MALI_PLATFORM_DEVICETREE - pm_runtime_disable(kbdev->dev); -#endif } int kbase_hwaccess_pm_init(struct kbase_device *kbdev) @@ -96,6 +90,8 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev) callbacks->power_runtime_on_callback; kbdev->pm.backend.callback_power_runtime_off = callbacks->power_runtime_off_callback; + kbdev->pm.backend.callback_power_runtime_idle = + callbacks->power_runtime_idle_callback; } else { kbdev->pm.backend.callback_power_on = NULL; kbdev->pm.backend.callback_power_off = NULL; @@ -105,6 +101,7 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev) kbdev->pm.callback_power_runtime_term = NULL; kbdev->pm.backend.callback_power_runtime_on = NULL; kbdev->pm.backend.callback_power_runtime_off = NULL; + kbdev->pm.backend.callback_power_runtime_idle = NULL; } /* Initialise the metrics subsystem */ @@ -227,7 +224,9 @@ int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev, kbasep_pm_read_present_cores(kbdev); - kbdev->pm.debug_core_mask = + kbdev->pm.debug_core_mask_all = kbdev->pm.debug_core_mask[0] = + kbdev->pm.debug_core_mask[1] = + kbdev->pm.debug_core_mask[2] = kbdev->gpu_props.props.raw_props.shader_present; /* Pretend the GPU is active to prevent a power policy turning the GPU @@ -321,9 +320,15 @@ void kbase_pm_power_changed(struct kbase_device *kbdev) } } -void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 new_core_mask) +void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, + u64 new_core_mask_js0, u64 new_core_mask_js1, + u64 new_core_mask_js2) { - kbdev->pm.debug_core_mask = new_core_mask; + kbdev->pm.debug_core_mask[0] = new_core_mask_js0; + kbdev->pm.debug_core_mask[1] = new_core_mask_js1; + kbdev->pm.debug_core_mask[2] = new_core_mask_js2; + kbdev->pm.debug_core_mask_all = new_core_mask_js0 | new_core_mask_js1 | + new_core_mask_js2; kbase_pm_update_cores_state_nolock(kbdev); } diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c index 0d9ee5879142..4b903cca020b 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c @@ -136,14 +136,14 @@ u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev) /* All cores must be enabled when instrumentation is in use */ if (kbdev->pm.backend.instr_enabled) return kbdev->gpu_props.props.raw_props.shader_present & - kbdev->pm.debug_core_mask; + kbdev->pm.debug_core_mask_all; if (kbdev->pm.backend.ca_current_policy == NULL) return kbdev->gpu_props.props.raw_props.shader_present & - kbdev->pm.debug_core_mask; + kbdev->pm.debug_core_mask_all; return kbdev->pm.backend.ca_current_policy->get_core_mask(kbdev) & - kbdev->pm.debug_core_mask; + kbdev->pm.debug_core_mask_all; } KBASE_EXPORT_TEST_API(kbase_pm_ca_get_core_mask); diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h index 19e23d8aa69b..60e40915869c 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h @@ -198,8 +198,13 @@ union kbase_pm_ca_policy_data { * @gpu_poweroff_work: Workitem used on @gpu_poweroff_wq * @shader_poweroff_pending: Bit mask of shaders to be powered off on next * timer callback - * @poweroff_timer_needed: true if the poweroff timer is currently running, + * @poweroff_timer_needed: true if the poweroff timer is currently required, * false otherwise + * @poweroff_timer_running: true if the poweroff timer is currently running, + * false otherwise + * power_change_lock should be held when accessing, + * unless there is no way the timer can be running (eg + * hrtimer_cancel() was called immediately before) * @callback_power_on: Callback when the GPU needs to be turned on. See * &struct kbase_pm_callback_conf * @callback_power_off: Callback when the GPU may be turned off. See @@ -212,6 +217,8 @@ union kbase_pm_ca_policy_data { * &struct kbase_pm_callback_conf * @callback_power_runtime_off: Callback when the GPU may be turned off. See * &struct kbase_pm_callback_conf + * @callback_power_runtime_idle: Optional callback when the GPU may be idle. See + * &struct kbase_pm_callback_conf * @callback_cci_snoop_ctrl: Callback when the GPU L2 power may transition. * If enable is set then snoops should be enabled * otherwise snoops should be disabled @@ -272,6 +279,7 @@ struct kbase_pm_backend_data { u64 shader_poweroff_pending; bool poweroff_timer_needed; + bool poweroff_timer_running; int (*callback_power_on)(struct kbase_device *kbdev); void (*callback_power_off)(struct kbase_device *kbdev); @@ -279,6 +287,7 @@ struct kbase_pm_backend_data { void (*callback_power_resume)(struct kbase_device *kbdev); int (*callback_power_runtime_on)(struct kbase_device *kbdev); void (*callback_power_runtime_off)(struct kbase_device *kbdev); + int (*callback_power_runtime_idle)(struct kbase_device *kbdev); }; diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c index d325c0a7e646..6fe73d433062 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c @@ -840,30 +840,6 @@ void kbase_pm_enable_interrupts(struct kbase_device *kbdev) KBASE_EXPORT_TEST_API(kbase_pm_enable_interrupts); -void kbase_pm_enable_interrupts_mmu_mask(struct kbase_device *kbdev, u32 mask) -{ - unsigned long flags; - - KBASE_DEBUG_ASSERT(NULL != kbdev); - /* - * Clear all interrupts, - * and unmask them all. - */ - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL, - NULL); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), GPU_IRQ_REG_ALL, - NULL); - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); - - kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF, - NULL); - kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0xFFFFFFFF, NULL); - - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL); - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), mask, NULL); -} - void kbase_pm_disable_interrupts(struct kbase_device *kbdev) { unsigned long flags; @@ -921,6 +897,7 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) if (is_resume && kbdev->pm.backend.callback_power_resume) { kbdev->pm.backend.callback_power_resume(kbdev); + return; } else if (kbdev->pm.backend.callback_power_on) { kbdev->pm.backend.callback_power_on(kbdev); /* If your platform properly keeps the GPU state you may use the diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h index bcca37d05402..943eda567cb5 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h @@ -154,17 +154,6 @@ bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend); */ void kbase_pm_enable_interrupts(struct kbase_device *kbdev); -/** - * kbase_pm_enable_interrupts_mmu_mask - Enable interrupts on the device, using - * the provided mask to set MMU_IRQ_MASK. - * - * Interrupts are also enabled after a call to kbase_pm_clock_on(). - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - * @mask: The mask to use for MMU_IRQ_MASK - */ -void kbase_pm_enable_interrupts_mmu_mask(struct kbase_device *kbdev, u32 mask); - /** * kbase_pm_disable_interrupts - Disable interrupts on the device. * diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c index e3c4829019ae..343436fc353d 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c @@ -183,10 +183,13 @@ static enum hrtimer_restart kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer) { struct kbase_device *kbdev; + unsigned long flags; kbdev = container_of(timer, struct kbase_device, pm.backend.gpu_poweroff_timer); + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + /* It is safe for this call to do nothing if the work item is already * queued. The worker function will read the must up-to-date state of * kbdev->pm.backend.gpu_poweroff_pending under lock. @@ -200,30 +203,27 @@ kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer) &kbdev->pm.backend.gpu_poweroff_work); if (kbdev->pm.backend.shader_poweroff_pending) { - unsigned long flags; - - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); - - if (kbdev->pm.backend.shader_poweroff_pending) { - kbdev->pm.backend.shader_poweroff_pending_time--; + kbdev->pm.backend.shader_poweroff_pending_time--; - KBASE_DEBUG_ASSERT( + KBASE_DEBUG_ASSERT( kbdev->pm.backend.shader_poweroff_pending_time >= 0); - if (!kbdev->pm.backend.shader_poweroff_pending_time) - kbasep_pm_do_poweroff_cores(kbdev); - } - - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + if (!kbdev->pm.backend.shader_poweroff_pending_time) + kbasep_pm_do_poweroff_cores(kbdev); } if (kbdev->pm.backend.poweroff_timer_needed) { + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + hrtimer_add_expires(timer, kbdev->pm.gpu_poweroff_time); return HRTIMER_RESTART; } + kbdev->pm.backend.poweroff_timer_running = false; + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + return HRTIMER_NORESTART; } @@ -263,10 +263,13 @@ static void kbasep_pm_do_gpu_poweroff_wq(struct work_struct *data) if (do_poweroff) { kbdev->pm.backend.poweroff_timer_needed = false; hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer); + kbdev->pm.backend.poweroff_timer_running = false; + /* Power off the GPU */ if (!kbase_pm_do_poweroff(kbdev, false)) { /* GPU can not be powered off at present */ kbdev->pm.backend.poweroff_timer_needed = true; + kbdev->pm.backend.poweroff_timer_running = true; hrtimer_start(&kbdev->pm.backend.gpu_poweroff_timer, kbdev->pm.gpu_poweroff_time, HRTIMER_MODE_REL); @@ -316,13 +319,13 @@ void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev) kbdev->pm.backend.poweroff_timer_needed = false; hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer); + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + kbdev->pm.backend.poweroff_timer_running = false; /* If wq is already running but is held off by pm.lock, make sure it has * no effect */ kbdev->pm.backend.gpu_poweroff_pending = 0; - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); - kbdev->pm.backend.shader_poweroff_pending = 0; kbdev->pm.backend.shader_poweroff_pending_time = 0; @@ -331,87 +334,106 @@ void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev) void kbase_pm_update_active(struct kbase_device *kbdev) { + struct kbase_pm_device_data *pm = &kbdev->pm; + struct kbase_pm_backend_data *backend = &pm->backend; unsigned long flags; bool active; - lockdep_assert_held(&kbdev->pm.lock); + lockdep_assert_held(&pm->lock); /* pm_current_policy will never be NULL while pm.lock is held */ - KBASE_DEBUG_ASSERT(kbdev->pm.backend.pm_current_policy); + KBASE_DEBUG_ASSERT(backend->pm_current_policy); - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + spin_lock_irqsave(&pm->power_change_lock, flags); - active = kbdev->pm.backend.pm_current_policy->get_core_active(kbdev); + active = backend->pm_current_policy->get_core_active(kbdev); if (active) { - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); - - if (kbdev->pm.backend.gpu_poweroff_pending) { + if (backend->gpu_poweroff_pending) { /* Cancel any pending power off request */ - kbdev->pm.backend.gpu_poweroff_pending = 0; + backend->gpu_poweroff_pending = 0; /* If a request was pending then the GPU was still * powered, so no need to continue */ - if (!kbdev->poweroff_pending) + if (!kbdev->poweroff_pending) { + spin_unlock_irqrestore(&pm->power_change_lock, + flags); return; + } } - if (!kbdev->pm.backend.poweroff_timer_needed && - !kbdev->pm.backend.gpu_powered && - (kbdev->pm.poweroff_gpu_ticks || - kbdev->pm.poweroff_shader_ticks)) { - kbdev->pm.backend.poweroff_timer_needed = true; - hrtimer_start(&kbdev->pm.backend.gpu_poweroff_timer, - kbdev->pm.gpu_poweroff_time, + if (!backend->poweroff_timer_running && !backend->gpu_powered && + (pm->poweroff_gpu_ticks || + pm->poweroff_shader_ticks)) { + backend->poweroff_timer_needed = true; + backend->poweroff_timer_running = true; + hrtimer_start(&backend->gpu_poweroff_timer, + pm->gpu_poweroff_time, HRTIMER_MODE_REL); } + spin_unlock_irqrestore(&pm->power_change_lock, flags); + /* Power on the GPU and any cores requested by the policy */ kbase_pm_do_poweron(kbdev, false); } else { /* It is an error for the power policy to power off the GPU * when there are contexts active */ - KBASE_DEBUG_ASSERT(kbdev->pm.active_count == 0); + KBASE_DEBUG_ASSERT(pm->active_count == 0); - if (kbdev->pm.backend.shader_poweroff_pending) { - kbdev->pm.backend.shader_poweroff_pending = 0; - kbdev->pm.backend.shader_poweroff_pending_time = 0; + if (backend->shader_poweroff_pending) { + backend->shader_poweroff_pending = 0; + backend->shader_poweroff_pending_time = 0; } - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); - - /* Request power off */ - if (kbdev->pm.backend.gpu_powered) { - if (kbdev->pm.poweroff_gpu_ticks) { - kbdev->pm.backend.gpu_poweroff_pending = - kbdev->pm.poweroff_gpu_ticks; - if (!kbdev->pm.backend.poweroff_timer_needed) { + if (pm->backend.gpu_powered) { + if (pm->poweroff_gpu_ticks) { + backend->gpu_poweroff_pending = + pm->poweroff_gpu_ticks; + backend->poweroff_timer_needed = true; + if (!backend->poweroff_timer_running) { /* Start timer if not running (eg if * power policy has been changed from * always_on to something else). This * will ensure the GPU is actually * powered off */ - kbdev->pm.backend.poweroff_timer_needed + backend->poweroff_timer_running = true; hrtimer_start( - &kbdev->pm.backend.gpu_poweroff_timer, - kbdev->pm.gpu_poweroff_time, + &backend->gpu_poweroff_timer, + pm->gpu_poweroff_time, HRTIMER_MODE_REL); } + spin_unlock_irqrestore(&pm->power_change_lock, + flags); } else { + spin_unlock_irqrestore(&pm->power_change_lock, + flags); + /* Power off the GPU immediately */ if (!kbase_pm_do_poweroff(kbdev, false)) { /* GPU can not be powered off at present */ - kbdev->pm.backend.poweroff_timer_needed - = true; - hrtimer_start( - &kbdev->pm.backend.gpu_poweroff_timer, - kbdev->pm.gpu_poweroff_time, - HRTIMER_MODE_REL); + spin_lock_irqsave( + &pm->power_change_lock, + flags); + backend->poweroff_timer_needed = true; + if (!backend->poweroff_timer_running) { + backend->poweroff_timer_running + = true; + hrtimer_start( + &backend->gpu_poweroff_timer, + pm->gpu_poweroff_time, + HRTIMER_MODE_REL); + } + spin_unlock_irqrestore( + &pm->power_change_lock, + flags); } } + } else { + spin_unlock_irqrestore(&pm->power_change_lock, flags); } } } @@ -478,7 +500,6 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) * Only reset poweroff_timer_needed if we're not in the middle * of the power off callback */ kbdev->pm.backend.poweroff_timer_needed = false; - hrtimer_try_to_cancel(&kbdev->pm.backend.gpu_poweroff_timer); } /* Ensure timer does not power off wanted cores and make sure to power diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.c new file mode 100644 index 000000000000..cd4f0a2bb24f --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.c @@ -0,0 +1,160 @@ +/* + * + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#include +#include +#include +#include +#include +#include + +/* + * This model is primarily designed for the Juno platform. It may not be + * suitable for other platforms. + */ + +#define FALLBACK_STATIC_TEMPERATURE 55000 + +static u32 dynamic_coefficient; +static u32 static_coefficient; +static s32 ts[4]; +static struct thermal_zone_device *gpu_tz; + +static unsigned long model_static_power(unsigned long voltage) +{ + unsigned long temperature, temp; + unsigned long temp_squared, temp_cubed, temp_scaling_factor; + const unsigned long voltage_cubed = (voltage * voltage * voltage) >> 10; + + if (gpu_tz) { + int ret; + + ret = gpu_tz->ops->get_temp(gpu_tz, &temperature); + if (ret) { + pr_warn_ratelimited("Error reading temperature for gpu thermal zone: %d\n", + ret); + temperature = FALLBACK_STATIC_TEMPERATURE; + } + } else { + temperature = FALLBACK_STATIC_TEMPERATURE; + } + + /* Calculate the temperature scaling factor. To be applied to the + * voltage scaled power. + */ + temp = temperature / 1000; + temp_squared = temp * temp; + temp_cubed = temp_squared * temp; + temp_scaling_factor = + (ts[3] * temp_cubed) + + (ts[2] * temp_squared) + + (ts[1] * temp) + + ts[0]; + + return (((static_coefficient * voltage_cubed) >> 20) + * temp_scaling_factor) + / 1000000; +} + +static unsigned long model_dynamic_power(unsigned long freq, + unsigned long voltage) +{ + /* The inputs: freq (f) is in Hz, and voltage (v) in mV. + * The coefficient (c) is in mW/(MHz mV mV). + * + * This function calculates the dynamic power after this formula: + * Pdyn (mW) = c (mW/(MHz*mV*mV)) * v (mV) * v (mV) * f (MHz) + */ + const unsigned long v2 = (voltage * voltage) / 1000; /* m*(V*V) */ + const unsigned long f_mhz = freq / 1000000; /* MHz */ + + return (dynamic_coefficient * v2 * f_mhz) / 1000000; /* mW */ +} + +struct devfreq_cooling_ops power_model_simple_ops = { + .get_static_power = model_static_power, + .get_dynamic_power = model_dynamic_power, +}; + +int kbase_power_model_simple_init(struct kbase_device *kbdev) +{ + struct device_node *power_model_node; + const char *tz_name; + u32 static_power, dynamic_power; + u32 voltage, voltage_squared, voltage_cubed, frequency; + + power_model_node = of_get_child_by_name(kbdev->dev->of_node, + "power_model"); + if (!power_model_node) { + dev_err(kbdev->dev, "could not find power_model node\n"); + return -ENODEV; + } + if (!of_device_is_compatible(power_model_node, + "arm,mali-simple-power-model")) { + dev_err(kbdev->dev, "power_model incompatible with simple power model\n"); + return -ENODEV; + } + + if (of_property_read_string(power_model_node, "thermal-zone", + &tz_name)) { + dev_err(kbdev->dev, "ts in power_model not available\n"); + return -EINVAL; + } + + gpu_tz = thermal_zone_get_zone_by_name(tz_name); + if (IS_ERR(gpu_tz)) { + pr_warn_ratelimited("Error getting gpu thermal zone (%ld), not yet ready?\n", + PTR_ERR(gpu_tz)); + gpu_tz = NULL; + + return -EPROBE_DEFER; + } + + if (of_property_read_u32(power_model_node, "static-power", + &static_power)) { + dev_err(kbdev->dev, "static-power in power_model not available\n"); + return -EINVAL; + } + if (of_property_read_u32(power_model_node, "dynamic-power", + &dynamic_power)) { + dev_err(kbdev->dev, "dynamic-power in power_model not available\n"); + return -EINVAL; + } + if (of_property_read_u32(power_model_node, "voltage", + &voltage)) { + dev_err(kbdev->dev, "voltage in power_model not available\n"); + return -EINVAL; + } + if (of_property_read_u32(power_model_node, "frequency", + &frequency)) { + dev_err(kbdev->dev, "frequency in power_model not available\n"); + return -EINVAL; + } + voltage_squared = (voltage * voltage) / 1000; + voltage_cubed = voltage * voltage * voltage; + static_coefficient = (static_power << 20) / (voltage_cubed >> 10); + dynamic_coefficient = (((dynamic_power * 1000) / voltage_squared) + * 1000) / frequency; + + if (of_property_read_u32_array(power_model_node, "ts", ts, 4)) { + dev_err(kbdev->dev, "ts in power_model not available\n"); + return -EINVAL; + } + + return 0; +} + diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.h new file mode 100644 index 000000000000..d20de1ef6170 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.h @@ -0,0 +1,43 @@ +/* + * + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#ifndef _BASE_POWER_MODEL_SIMPLE_H_ +#define _BASE_POWER_MODEL_SIMPLE_H_ + +/** + * kbase_power_model_simple_init - Initialise the simple power model + * @kbdev: Device pointer + * + * The simple power model estimates power based on current voltage, temperature, + * and coefficients read from device tree. It does not take utilization into + * account. + * + * The power model requires coefficients from the power_model node in device + * tree. The absence of this node will prevent the model from functioning, but + * should not prevent the rest of the driver from running. + * + * Return: 0 on success + * -ENOSYS if the power_model node is not present in device tree + * -EPROBE_DEFER if the thermal zone specified in device tree is not + * currently available + * Any other negative value on failure + */ +int kbase_power_model_simple_init(struct kbase_device *kbdev); + +extern struct devfreq_cooling_ops power_model_simple_ops; + +#endif /* _BASE_POWER_MODEL_SIMPLE_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h b/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h index 5a1523034c15..2102f43348cb 100644 --- a/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h +++ b/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h @@ -45,9 +45,7 @@ enum base_hw_feature { BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, BASE_HW_FEATURE_BRNDOUT_KILL, BASE_HW_FEATURE_WARPING, - BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_V4, - BASE_HW_FEATURE_PROTECTED_MODE, BASE_HW_FEATURE_END }; diff --git a/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h b/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h index 9fae0f6c806a..1afd23809ad3 100644 --- a/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h +++ b/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h @@ -90,6 +90,7 @@ enum base_hw_issue { BASE_HW_ISSUE_11024, BASE_HW_ISSUE_11035, BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_26, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, @@ -165,6 +166,7 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p0_15dev0[] = { BASE_HW_ISSUE_11012, BASE_HW_ISSUE_11020, BASE_HW_ISSUE_11035, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_END }; @@ -201,6 +203,7 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p0_eac[] = { BASE_HW_ISSUE_11012, BASE_HW_ISSUE_11020, BASE_HW_ISSUE_11035, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_END }; @@ -234,6 +237,7 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p1[] = { BASE_HW_ISSUE_11012, BASE_HW_ISSUE_11020, BASE_HW_ISSUE_11035, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_END @@ -264,6 +268,7 @@ static const enum base_hw_issue base_hw_issues_t62x_r0p1[] = { BASE_HW_ISSUE_11024, BASE_HW_ISSUE_11035, BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_END @@ -284,6 +289,7 @@ static const enum base_hw_issue base_hw_issues_t62x_r1p0[] = { BASE_HW_ISSUE_11020, BASE_HW_ISSUE_11024, BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_END @@ -302,6 +308,7 @@ static const enum base_hw_issue base_hw_issues_t62x_r1p1[] = { BASE_HW_ISSUE_10959, BASE_HW_ISSUE_11012, BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_END @@ -315,6 +322,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p0[] = { BASE_HW_ISSUE_11020, BASE_HW_ISSUE_11024, BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_26, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, @@ -337,6 +345,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p1[] = { BASE_HW_ISSUE_11020, BASE_HW_ISSUE_11024, BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_26, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, @@ -357,6 +366,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p1_50rel0[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_26, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, @@ -379,6 +389,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p2[] = { BASE_HW_ISSUE_11020, BASE_HW_ISSUE_11024, BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_26, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, @@ -399,6 +410,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p3[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_26, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, @@ -419,6 +431,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r1p0[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, @@ -440,6 +453,7 @@ static const enum base_hw_issue base_hw_issues_t72x_r0p0[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_END @@ -455,6 +469,7 @@ static const enum base_hw_issue base_hw_issues_t72x_r1p0[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_END @@ -470,6 +485,7 @@ static const enum base_hw_issue base_hw_issues_t72x_r1p1[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_END @@ -483,6 +499,7 @@ static const enum base_hw_issue base_hw_issues_model_t72x[] = { BASE_HW_ISSUE_10471, BASE_HW_ISSUE_10797, BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_END @@ -495,6 +512,7 @@ static const enum base_hw_issue base_hw_issues_model_t76x[] = { BASE_HW_ISSUE_11020, BASE_HW_ISSUE_11024, BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, @@ -514,6 +532,7 @@ static const enum base_hw_issue base_hw_issues_model_t60x[] = { BASE_HW_ISSUE_11012, BASE_HW_ISSUE_11020, BASE_HW_ISSUE_11024, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_END @@ -529,6 +548,7 @@ static const enum base_hw_issue base_hw_issues_model_t62x[] = { BASE_HW_ISSUE_11020, BASE_HW_ISSUE_11024, BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_END @@ -539,6 +559,7 @@ static const enum base_hw_issue base_hw_issues_tFRx_r0p1[] = { BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, @@ -555,6 +576,7 @@ static const enum base_hw_issue base_hw_issues_tFRx_r0p2[] = { BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, @@ -570,6 +592,7 @@ static const enum base_hw_issue base_hw_issues_tFRx_r1p0[] = { BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, @@ -584,6 +607,7 @@ static const enum base_hw_issue base_hw_issues_tFRx_r2p0[] = { BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, @@ -597,6 +621,7 @@ static const enum base_hw_issue base_hw_issues_model_tFRx[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9275, BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, @@ -609,6 +634,7 @@ static const enum base_hw_issue base_hw_issues_t86x_r0p2[] = { BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, @@ -624,6 +650,7 @@ static const enum base_hw_issue base_hw_issues_t86x_r1p0[] = { BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, @@ -638,6 +665,7 @@ static const enum base_hw_issue base_hw_issues_t86x_r2p0[] = { BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, @@ -651,6 +679,7 @@ static const enum base_hw_issue base_hw_issues_model_t86x[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9275, BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, @@ -663,6 +692,7 @@ static const enum base_hw_issue base_hw_issues_t83x_r0p1[] = { BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, @@ -678,6 +708,7 @@ static const enum base_hw_issue base_hw_issues_t83x_r1p0[] = { BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, @@ -691,6 +722,7 @@ static const enum base_hw_issue base_hw_issues_model_t83x[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9275, BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, @@ -704,6 +736,7 @@ static const enum base_hw_issue base_hw_issues_t82x_r0p0[] = { BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, @@ -719,6 +752,7 @@ static const enum base_hw_issue base_hw_issues_t82x_r0p1[] = { BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, @@ -734,6 +768,7 @@ static const enum base_hw_issue base_hw_issues_t82x_r1p0[] = { BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, @@ -747,6 +782,7 @@ static const enum base_hw_issue base_hw_issues_model_t82x[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9275, BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, diff --git a/drivers/gpu/arm/midgard/mali_base_kernel.h b/drivers/gpu/arm/midgard/mali_base_kernel.h index 56621309c5ad..e7f741690cce 100644 --- a/drivers/gpu/arm/midgard/mali_base_kernel.h +++ b/drivers/gpu/arm/midgard/mali_base_kernel.h @@ -46,6 +46,8 @@ typedef u64 base_mem_handle; #include "mali_base_mem_priv.h" #include "mali_kbase_profiling_gator_api.h" +#include "mali_midg_coherency.h" +#include "mali_kbase_gpu_id.h" /* * Dependency stuff, keep it private for now. May want to expose it if @@ -184,7 +186,13 @@ enum { /** - * @brief Memory types supported by @a base_mem_import + * enum base_mem_import_type - Memory types supported by @a base_mem_import + * + * @BASE_MEM_IMPORT_TYPE_INVALID: Invalid type + * @BASE_MEM_IMPORT_TYPE_UMP: UMP import. Handle type is ump_secure_id. + * @BASE_MEM_IMPORT_TYPE_UMM: UMM import. Handle type is a file descriptor (int) + * @BASE_MEM_IMPORT_TYPE_USER_BUFFER: User buffer import. Handle is a + * base_mem_import_user_buffer * * Each type defines what the supported handle type is. * @@ -196,12 +204,25 @@ enum { */ typedef enum base_mem_import_type { BASE_MEM_IMPORT_TYPE_INVALID = 0, - /** UMP import. Handle type is ump_secure_id. */ BASE_MEM_IMPORT_TYPE_UMP = 1, - /** UMM import. Handle type is a file descriptor (int) */ - BASE_MEM_IMPORT_TYPE_UMM = 2 + BASE_MEM_IMPORT_TYPE_UMM = 2, + BASE_MEM_IMPORT_TYPE_USER_BUFFER = 3 } base_mem_import_type; +/** + * struct base_mem_import_user_buffer - Handle of an imported user buffer + * + * @ptr: kbase_pointer to imported user buffer + * @length: length of imported user buffer in bytes + * + * This structure is used to represent a handle of an imported user buffer. + */ + +struct base_mem_import_user_buffer { + kbase_pointer ptr; + u64 length; +}; + /** * @brief Invalid memory handle type. * Return value from functions returning @a base_mem_handle on error. @@ -463,9 +484,6 @@ typedef u16 base_jd_core_req; * * In contrast to @ref BASE_JD_REQ_CS, this does \b not indicate that the Job * Chain contains 'Geometry Shader' or 'Vertex Shader' jobs. - * - * @note This is a more flexible variant of the @ref BASE_CONTEXT_HINT_ONLY_COMPUTE flag, - * allowing specific jobs to be marked as 'Only Compute' instead of the entire context */ #define BASE_JD_REQ_ONLY_COMPUTE (1U << 10) @@ -1365,8 +1383,7 @@ struct gpu_raw_gpu_props { u64 shader_present; u64 tiler_present; u64 l2_present; - u32 coherency_enabled; - u32 unused_1; /* keep for backward compatibility */ + u64 unused_1; /* keep for backward compatibility */ u32 l2_features; u32 suspend_size; /* API 8.2+ */ @@ -1387,7 +1404,11 @@ struct gpu_raw_gpu_props { u32 thread_max_barrier_size; u32 thread_features; - u32 coherency_features; + /* + * Note: This is the _selected_ coherency mode rather than the + * available modes as exposed in the coherency_features register. + */ + u32 coherency_mode; }; /** @@ -1441,28 +1462,7 @@ enum base_context_create_flags { /** Base context is a 'System Monitor' context for Hardware counters. * * One important side effect of this is that job submission is disabled. */ - BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED = (1u << 1), - - /** Base context flag indicating a 'hint' that this context uses Compute - * Jobs only. - * - * Specifially, this means that it only sends atoms that do not - * contain the following @ref base_jd_core_req : - * - BASE_JD_REQ_FS - * - BASE_JD_REQ_T - * - * Violation of these requirements will cause the Job-Chains to be rejected. - * - * In addition, it is inadvisable for the atom's Job-Chains to contain Jobs - * of the following @ref gpu_job_type (whilst it may work now, it may not - * work in future) : - * - @ref GPU_JOB_VERTEX - * - @ref GPU_JOB_GEOMETRY - * - * @note An alternative to using this is to specify the BASE_JD_REQ_ONLY_COMPUTE - * requirement in atoms. - */ - BASE_CONTEXT_HINT_ONLY_COMPUTE = (1u << 2) + BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED = (1u << 1) }; /** @@ -1470,15 +1470,13 @@ enum base_context_create_flags { */ #define BASE_CONTEXT_CREATE_ALLOWED_FLAGS \ (((u32)BASE_CONTEXT_CCTX_EMBEDDED) | \ - ((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) | \ - ((u32)BASE_CONTEXT_HINT_ONLY_COMPUTE)) + ((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED)) /** * Bitpattern describing the ::base_context_create_flags that can be passed to the kernel */ #define BASE_CONTEXT_CREATE_KERNEL_FLAGS \ - (((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) | \ - ((u32)BASE_CONTEXT_HINT_ONLY_COMPUTE)) + ((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) /** * Private flags used on the base context diff --git a/drivers/gpu/arm/midgard/mali_kbase.h b/drivers/gpu/arm/midgard/mali_kbase.h index 3e294da28ea0..b30bd92f009d 100644 --- a/drivers/gpu/arm/midgard/mali_kbase.h +++ b/drivers/gpu/arm/midgard/mali_kbase.h @@ -51,7 +51,6 @@ #include "mali_kbase_trace_timeline.h" #include "mali_kbase_js.h" #include "mali_kbase_mem.h" -#include "mali_kbase_security.h" #include "mali_kbase_utility.h" #include "mali_kbase_gpu_memory_debugfs.h" #include "mali_kbase_mem_profile_debugfs.h" @@ -82,7 +81,7 @@ struct kbase_device *kbase_device_alloc(void); */ /* -* API to acquire device list semaphone and return pointer +* API to acquire device list semaphore and return pointer * to the device list head */ const struct list_head *kbase_dev_list_get(void); @@ -346,6 +345,10 @@ void kbase_disjoint_state_down(struct kbase_device *kbdev); */ #define KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD 2 +#if !defined(UINT64_MAX) + #define UINT64_MAX ((uint64_t)0xFFFFFFFFFFFFFFFFULL) +#endif + #if KBASE_TRACE_ENABLE void kbasep_trace_debugfs_init(struct kbase_device *kbdev); diff --git a/drivers/gpu/arm/midgard/mali_kbase_config.h b/drivers/gpu/arm/midgard/mali_kbase_config.h index f282b9aeb9de..41c885691a65 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_config.h +++ b/drivers/gpu/arm/midgard/mali_kbase_config.h @@ -189,6 +189,32 @@ struct kbase_pm_callback_conf { * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature. */ int (*power_runtime_on_callback)(struct kbase_device *kbdev); + + /* + * Optional callback for checking if GPU can be suspended when idle + * + * This callback will be called by the runtime power management core + * when the reference count goes to 0 to provide notification that the + * GPU now seems idle. + * + * If this callback finds that the GPU can't be powered off, or handles + * suspend by powering off directly or queueing up a power off, a + * non-zero value must be returned to prevent the runtime PM core from + * also triggering a suspend. + * + * Returning 0 will cause the runtime PM core to conduct a regular + * autosuspend. + * + * This callback is optional and if not provided regular ausosuspend + * will triggered. + * + * Note: The Linux kernel must have CONFIG_PM_RUNTIME enabled to use + * this feature. + * + * Return 0 if GPU can be suspended, positive value if it can not be + * suspeneded by runtime PM, else OS error code + */ + int (*power_runtime_idle_callback)(struct kbase_device *kbdev); }; /** diff --git a/drivers/gpu/arm/midgard/mali_kbase_context.c b/drivers/gpu/arm/midgard/mali_kbase_context.c index 3a6e9f867206..798979963937 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_context.c +++ b/drivers/gpu/arm/midgard/mali_kbase_context.c @@ -262,9 +262,6 @@ int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags) if ((flags & BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0) js_kctx_info->ctx.flags &= ~((u32) KBASE_CTX_FLAG_SUBMIT_DISABLED); - if ((flags & BASE_CONTEXT_HINT_ONLY_COMPUTE) != 0) - js_kctx_info->ctx.flags |= (u32) KBASE_CTX_FLAG_HINT_ONLY_COMPUTE; - /* Latch the initial attributes into the Job Scheduler */ kbasep_js_ctx_attr_set_initial_attrs(kctx->kbdev, kctx); diff --git a/drivers/gpu/arm/midgard/mali_kbase_core_linux.c b/drivers/gpu/arm/midgard/mali_kbase_core_linux.c index 85389eb1c27c..09688219d5f3 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_core_linux.c +++ b/drivers/gpu/arm/midgard/mali_kbase_core_linux.c @@ -35,6 +35,9 @@ #include "mali_kbase_debug_mem_view.h" #include "mali_kbase_mem.h" #include "mali_kbase_mem_pool_debugfs.h" +#if !MALI_CUSTOMER_RELEASE +#include "mali_kbase_regs_dump_debugfs.h" +#endif /* !MALI_CUSTOMER_RELEASE */ #include #include #include @@ -317,8 +320,10 @@ static int kbase_external_buffer_lock(struct kbase_context *kctx, resource_list_data.kds_resources, KDS_WAIT_BLOCKING); - if (IS_ERR_OR_NULL(lock)) { + if (!lock) { ret = -EINVAL; + } else if (IS_ERR(lock)) { + ret = PTR_ERR(lock); } else { ret = 0; fdata->lock = lock; @@ -487,6 +492,7 @@ static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 arg struct kbase_device *kbdev; union uk_header *ukh = args; u32 id; + int ret = 0; KBASE_DEBUG_ASSERT(ukh != NULL); @@ -567,11 +573,9 @@ static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 arg ukh->ret = MALI_ERROR_FUNCTION_FAILED; break; } - case KBASE_FUNC_MEM_IMPORT: - { + case KBASE_FUNC_MEM_IMPORT: { struct kbase_uk_mem_import *mem_import = args; - int __user *phandle; - int handle; + void __user *phandle; if (sizeof(*mem_import) != args_size) goto bad_size; @@ -582,26 +586,20 @@ static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 arg #endif phandle = mem_import->phandle.value; - switch (mem_import->type) { - case BASE_MEM_IMPORT_TYPE_UMP: - get_user(handle, phandle); - break; - case BASE_MEM_IMPORT_TYPE_UMM: - get_user(handle, phandle); - break; - default: - mem_import->type = BASE_MEM_IMPORT_TYPE_INVALID; + if (mem_import->type == BASE_MEM_IMPORT_TYPE_INVALID) { + ukh->ret = MALI_ERROR_FUNCTION_FAILED; break; } - if (mem_import->type == BASE_MEM_IMPORT_TYPE_INVALID || - kbase_mem_import(kctx, mem_import->type, - handle, &mem_import->gpu_va, - &mem_import->va_pages, - &mem_import->flags)) + if (kbase_mem_import(kctx, mem_import->type, phandle, + &mem_import->gpu_va, + &mem_import->va_pages, + &mem_import->flags)) { + mem_import->type = BASE_MEM_IMPORT_TYPE_INVALID; ukh->ret = MALI_ERROR_FUNCTION_FAILED; + } break; - } + } case KBASE_FUNC_MEM_ALIAS: { struct kbase_uk_mem_alias *alias = args; struct base_mem_aliasing_info __user *user_ai; @@ -811,18 +809,10 @@ copy_failed: case KBASE_FUNC_HWCNT_SETUP: { struct kbase_uk_hwcnt_setup *setup = args; - bool access_allowed; if (sizeof(*setup) != args_size) goto bad_size; - access_allowed = kbase_security_has_capability( - kctx, - KBASE_SEC_INSTR_HW_COUNTERS_COLLECT, - KBASE_SEC_FLAG_NOAUDIT); - if (!access_allowed) - goto out_bad; - mutex_lock(&kctx->vinstr_cli_lock); if (kbase_vinstr_legacy_hwc_setup(kbdev->vinstr_ctx, &kctx->vinstr_cli, setup) != 0) @@ -855,18 +845,10 @@ copy_failed: case KBASE_FUNC_HWCNT_READER_SETUP: { struct kbase_uk_hwcnt_reader_setup *setup = args; - bool access_allowed; if (sizeof(*setup) != args_size) goto bad_size; - access_allowed = kbase_security_has_capability( - kctx, - KBASE_SEC_INSTR_HW_COUNTERS_COLLECT, - KBASE_SEC_FLAG_NOAUDIT); - if (!access_allowed) - goto out_bad; - mutex_lock(&kctx->vinstr_cli_lock); if (kbase_vinstr_hwcnt_reader_setup(kbdev->vinstr_ctx, setup) != 0) @@ -971,9 +953,10 @@ copy_failed: case KBASE_FUNC_EXT_BUFFER_LOCK: { #ifdef CONFIG_KDS - switch (kbase_external_buffer_lock(kctx, + ret = kbase_external_buffer_lock(kctx, (struct kbase_uk_ext_buff_kds_data *)args, - args_size)) { + args_size); + switch (ret) { case 0: ukh->ret = MALI_ERROR_NONE; break; @@ -1108,11 +1091,30 @@ copy_failed: kfree(buf); goto out_bad; } - kbasep_mem_profile_debugfs_insert(kctx, buf, - add_data->len); + + if (kbasep_mem_profile_debugfs_insert(kctx, buf, + add_data->len)) { + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + kfree(buf); + goto out_bad; + } break; } + +#ifdef CONFIG_MALI_NO_MALI + case KBASE_FUNC_SET_PRFCNT_VALUES: + { + + struct kbase_uk_prfcnt_values *params = + ((struct kbase_uk_prfcnt_values *)args); + gpu_model_set_dummy_prfcnt_sample(params->data, + params->size); + + break; + } +#endif /* CONFIG_MALI_NO_MALI */ + #ifdef CONFIG_MALI_MIPE_ENABLED case KBASE_FUNC_TLSTREAM_ACQUIRE: { @@ -1188,7 +1190,7 @@ copy_failed: goto out_bad; } - return 0; + return ret; bad_size: dev_err(kbdev->dev, "Wrong syscall size (%d) for %08x\n", args_size, id); @@ -1270,6 +1272,7 @@ static int kbase_open(struct inode *inode, struct file *filp) init_waitqueue_head(&kctx->event_queue); filp->private_data = kctx; + kctx->filp = filp; kctx->infinite_cache_active = kbdev->infinite_cache_active_default; @@ -1292,7 +1295,8 @@ static int kbase_open(struct inode *inode, struct file *filp) debugfs_create_bool("infinite_cache", 0644, kctx->kctx_dentry, &kctx->infinite_cache_active); #endif /* CONFIG_MALI_COH_USER */ - kbasep_mem_profile_debugfs_add(kctx); + + mutex_init(&kctx->mem_profile_lock); kbasep_jd_debugfs_ctx_add(kctx); kbase_debug_mem_view_init(filp); @@ -1345,7 +1349,7 @@ static int kbase_release(struct inode *inode, struct file *filp) #ifdef CONFIG_DEBUG_FS debugfs_remove_recursive(kctx->kctx_dentry); kbasep_mem_profile_debugfs_remove(kctx); - kbase_debug_job_fault_context_exit(kctx); + kbase_debug_job_fault_context_term(kctx); #endif mutex_lock(&kbdev->kctx_list_lock); @@ -1822,7 +1826,15 @@ static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr, if (!kbdev) return -ENODEV; - ret += scnprintf(buf + ret, PAGE_SIZE - ret, "Current core mask : 0x%llX\n", kbdev->pm.debug_core_mask); + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "Current core mask (JS0) : 0x%llX\n", + kbdev->pm.debug_core_mask[0]); + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "Current core mask (JS1) : 0x%llX\n", + kbdev->pm.debug_core_mask[1]); + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "Current core mask (JS2) : 0x%llX\n", + kbdev->pm.debug_core_mask[2]); ret += scnprintf(buf + ret, PAGE_SIZE - ret, "Available core mask : 0x%llX\n", kbdev->gpu_props.props.raw_props.shader_present); @@ -1844,36 +1856,63 @@ static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr, static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct kbase_device *kbdev; - u64 new_core_mask; - int rc; + u64 new_core_mask[3]; + int items; kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; - rc = kstrtoull(buf, 16, &new_core_mask); - if (rc) - return rc; + items = sscanf(buf, "%llx %llx %llx", + &new_core_mask[0], &new_core_mask[1], + &new_core_mask[2]); + + if (items == 1) + new_core_mask[1] = new_core_mask[2] = new_core_mask[0]; + + if (items == 1 || items == 3) { + u64 shader_present = + kbdev->gpu_props.props.raw_props.shader_present; + u64 group0_core_mask = + kbdev->gpu_props.props.coherency_info.group[0]. + core_mask; + + if ((new_core_mask[0] & shader_present) != new_core_mask[0] || + !(new_core_mask[0] & group0_core_mask) || + (new_core_mask[1] & shader_present) != + new_core_mask[1] || + !(new_core_mask[1] & group0_core_mask) || + (new_core_mask[2] & shader_present) != + new_core_mask[2] || + !(new_core_mask[2] & group0_core_mask)) { + dev_err(dev, "power_policy: invalid core specification\n"); + return -EINVAL; + } - if ((new_core_mask & kbdev->gpu_props.props.raw_props.shader_present) - != new_core_mask || - !(new_core_mask & kbdev->gpu_props.props.coherency_info.group[0].core_mask)) { - dev_err(dev, "power_policy: invalid core specification\n"); - return -EINVAL; - } + if (kbdev->pm.debug_core_mask[0] != new_core_mask[0] || + kbdev->pm.debug_core_mask[1] != + new_core_mask[1] || + kbdev->pm.debug_core_mask[2] != + new_core_mask[2]) { + unsigned long flags; - if (kbdev->pm.debug_core_mask != new_core_mask) { - unsigned long flags; + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + kbase_pm_set_debug_core_mask(kbdev, new_core_mask[0], + new_core_mask[1], new_core_mask[2]); - kbase_pm_set_debug_core_mask(kbdev, new_core_mask); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, + flags); + } - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + return count; } - return count; + dev_err(kbdev->dev, "Couldn't process set_core_mask write operation.\n" + "Use format \n" + "or \n"); + return -EINVAL; } /** The sysfs file @c core_mask. @@ -1884,122 +1923,6 @@ static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, */ static DEVICE_ATTR(core_mask, S_IRUGO | S_IWUSR, show_core_mask, set_core_mask); -#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS -/** - * struct sc_split_config - * @tag: Short name - * @human_readable: Long name - * @js0_mask: Mask for job slot 0 - * @js1_mask: Mask for job slot 1 - * @js2_mask: Mask for job slot 2 - * - * Structure containing a single shader affinity split configuration. - */ -struct sc_split_config { - char const *tag; - char const *human_readable; - u64 js0_mask; - u64 js1_mask; - u64 js2_mask; -}; - -/* - * Array of available shader affinity split configurations. - */ -static struct sc_split_config const sc_split_configs[] = { - /* All must be the first config (default). */ - { - "all", "All cores", - 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL - }, - { - "mp1", "MP1 shader core", - 0x1, 0x1, 0x1 - }, - { - "mp2", "MP2 shader core", - 0x3, 0x3, 0x3 - }, - { - "mp4", "MP4 shader core", - 0xF, 0xF, 0xF - }, - { - "mp1_vf", "MP1 vertex + MP1 fragment shader core", - 0x2, 0x1, 0xFFFFFFFFFFFFFFFFULL - }, - { - "mp2_vf", "MP2 vertex + MP2 fragment shader core", - 0xA, 0x5, 0xFFFFFFFFFFFFFFFFULL - }, - /* This must be the last config. */ - { - NULL, NULL, - 0x0, 0x0, 0x0 - }, -}; - -/* Pointer to the currently active shader split configuration. */ -static struct sc_split_config const *current_sc_split_config = &sc_split_configs[0]; - -/** Show callback for the @c sc_split sysfs file - * - * Returns the current shader core affinity policy. - */ -static ssize_t show_split(struct device *dev, struct device_attribute *attr, char * const buf) -{ - ssize_t ret; - /* We know we are given a buffer which is PAGE_SIZE long. Our strings are all guaranteed - * to be shorter than that at this time so no length check needed. */ - ret = scnprintf(buf, PAGE_SIZE, "Current sc_split: '%s'\n", current_sc_split_config->tag); - return ret; -} - -/** Store callback for the @c sc_split sysfs file. - * - * This function is called when the @c sc_split sysfs file is written to - * It modifies the system shader core affinity configuration to allow - * system profiling with different hardware configurations. - * - * @param dev The device with sysfs file is for - * @param attr The attributes of the sysfs file - * @param buf The value written to the sysfs file - * @param count The number of bytes written to the sysfs file - * - * @return @c count if the function succeeded. An error code on failure. - */ -static ssize_t set_split(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) -{ - struct sc_split_config const *config = &sc_split_configs[0]; - - /* Try to match: loop until we hit the last "NULL" entry */ - while (config->tag) { - if (sysfs_streq(config->tag, buf)) { - current_sc_split_config = config; - mali_js0_affinity_mask = config->js0_mask; - mali_js1_affinity_mask = config->js1_mask; - mali_js2_affinity_mask = config->js2_mask; - dev_dbg(dev, "Setting sc_split: '%s'\n", config->tag); - return count; - } - config++; - } - - /* No match found in config list */ - dev_err(dev, "sc_split: invalid value\n"); - dev_err(dev, " Possible settings: mp[1|2|4], mp[1|2]_vf\n"); - return -ENOENT; -} - -/** The sysfs file @c sc_split - * - * This is used for configuring/querying the current shader core work affinity - * configuration. - */ -static DEVICE_ATTR(sc_split, S_IRUGO|S_IWUSR, show_split, set_split); -#endif /* CONFIG_MALI_DEBUG_SHADER_SPLIT_FS */ - - /** Store callback for the @c js_timeouts sysfs file. * * This function is called to get the contents of the @c js_timeouts sysfs @@ -2735,8 +2658,9 @@ static ssize_t kbase_show_gpuinfo(struct device *dev, const char *product_name = "(Unknown Mali GPU)"; struct kbase_device *kbdev; u32 gpu_id; - unsigned product_id; + unsigned product_id, product_id_mask; unsigned i; + bool is_new_format; kbdev = to_kbase_device(dev); if (!kbdev) @@ -2744,10 +2668,20 @@ static ssize_t kbase_show_gpuinfo(struct device *dev, gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; product_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT; + is_new_format = GPU_ID_IS_NEW_FORMAT(product_id); + product_id_mask = + (is_new_format ? + GPU_ID2_PRODUCT_MODEL : + GPU_ID_VERSION_PRODUCT_ID) >> + GPU_ID_VERSION_PRODUCT_ID_SHIFT; for (i = 0; i < ARRAY_SIZE(gpu_product_id_names); ++i) { - if (gpu_product_id_names[i].id == product_id) { - product_name = gpu_product_id_names[i].name; + const struct gpu_product_id_name *p = &gpu_product_id_names[i]; + + if ((GPU_ID_IS_NEW_FORMAT(p->id) == is_new_format) && + (p->id & product_id_mask) == + (product_id & product_id_mask)) { + product_name = p->name; break; } } @@ -3202,7 +3136,12 @@ static int kbase_device_debugfs_init(struct kbase_device *kbdev) err = -ENOMEM; goto out; } - kbase_debug_job_fault_dev_init(kbdev); + +#if !MALI_CUSTOMER_RELEASE + kbasep_regs_dump_debugfs_add(kbdev); +#endif /* !MALI_CUSTOMER_RELEASE */ + + kbase_debug_job_fault_debugfs_init(kbdev); kbasep_gpu_memory_debugfs_init(kbdev); #if KBASE_GPU_RESET_EN debugfs_create_file("quirks_sc", 0644, @@ -3261,21 +3200,19 @@ static inline void kbase_device_debugfs_term(struct kbase_device *kbdev) { } static void kbase_device_coherency_init(struct kbase_device *kbdev, u32 gpu_id) { - u32 selected_coherency = COHERENCY_NONE; - /* COHERENCY_NONE is always supported */ - u32 supported_coherency_bitmap = COHERENCY_FEATURE_BIT(COHERENCY_NONE); - #ifdef CONFIG_OF + u32 supported_coherency_bitmap = + kbdev->gpu_props.props.raw_props.coherency_mode; const void *coherency_override_dts; u32 override_coherency; #endif /* CONFIG_OF */ - kbdev->system_coherency = selected_coherency; + kbdev->system_coherency = COHERENCY_NONE; /* device tree may override the coherency */ #ifdef CONFIG_OF coherency_override_dts = of_get_property(kbdev->dev->of_node, - "override-coherency", + "system-coherency", NULL); if (coherency_override_dts) { @@ -3288,17 +3225,17 @@ static void kbase_device_coherency_init(struct kbase_device *kbdev, u32 gpu_id) kbdev->system_coherency = override_coherency; dev_info(kbdev->dev, - "Using coherency override, mode %u set from dtb", + "Using coherency mode %u set from dtb", override_coherency); } else dev_warn(kbdev->dev, - "Ignoring invalid coherency override, mode %u set from dtb", + "Ignoring unsupported coherency mode %u set from dtb", override_coherency); } #endif /* CONFIG_OF */ - kbdev->gpu_props.props.raw_props.coherency_features = + kbdev->gpu_props.props.raw_props.coherency_mode = kbdev->system_coherency; } @@ -3337,7 +3274,8 @@ static int kbase_common_device_init(struct kbase_device *kbdev) inited_backend_late = (1u << 12), inited_device = (1u << 13), inited_vinstr = (1u << 19), - inited_ipa = (1u << 20) + inited_ipa = (1u << 20), + inited_job_fault = (1u << 21) }; int inited = 0; @@ -3354,7 +3292,7 @@ static int kbase_common_device_init(struct kbase_device *kbdev) inited |= inited_backend_early; scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", kbase_drv_name, - kbase_dev_nr++); + kbase_dev_nr); kbase_disjoint_init(kbdev); @@ -3405,22 +3343,6 @@ static int kbase_common_device_init(struct kbase_device *kbdev) inited |= inited_device; - kbdev->vinstr_ctx = kbase_vinstr_init(kbdev); - if (!kbdev->vinstr_ctx) { - dev_err(kbdev->dev, "Can't initialize virtual instrumentation core\n"); - goto out_partial; - } - - inited |= inited_vinstr; - - kbdev->ipa_ctx = kbase_ipa_init(kbdev); - if (!kbdev->ipa_ctx) { - dev_err(kbdev->dev, "Can't initialize IPA\n"); - goto out_partial; - } - - inited |= inited_ipa; - if (kbdev->pm.callback_power_runtime_init) { err = kbdev->pm.callback_power_runtime_init(kbdev); if (err) @@ -3474,6 +3396,28 @@ static int kbase_common_device_init(struct kbase_device *kbdev) inited |= inited_devfreq; #endif /* CONFIG_MALI_DEVFREQ */ + kbdev->vinstr_ctx = kbase_vinstr_init(kbdev); + if (!kbdev->vinstr_ctx) { + dev_err(kbdev->dev, "Can't initialize virtual instrumentation core\n"); + goto out_partial; + } + + inited |= inited_vinstr; + + kbdev->ipa_ctx = kbase_ipa_init(kbdev); + if (!kbdev->ipa_ctx) { + dev_err(kbdev->dev, "Can't initialize IPA\n"); + goto out_partial; + } + + inited |= inited_ipa; + + err = kbase_debug_job_fault_dev_init(kbdev); + if (err) + goto out_partial; + + inited |= inited_job_fault; + err = kbase_device_debugfs_init(kbdev); if (err) goto out_partial; @@ -3502,12 +3446,16 @@ static int kbase_common_device_init(struct kbase_device *kbdev) dev_info(kbdev->dev, "Probed as %s\n", dev_name(kbdev->mdev.this_device)); + kbase_dev_nr++; + return 0; out_misc: put_device(kbdev->dev); kbase_device_debugfs_term(kbdev); out_partial: + if (inited & inited_job_fault) + kbase_debug_job_fault_dev_term(kbdev); if (inited & inited_ipa) kbase_ipa_term(kbdev->ipa_ctx); if (inited & inited_vinstr) @@ -3551,9 +3499,6 @@ out_partial: static struct attribute *kbase_attrs[] = { -#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS - &dev_attr_sc_split.attr, -#endif #ifdef CONFIG_MALI_DEBUG &dev_attr_debug_command.attr, &dev_attr_js_softstop_always.attr, @@ -3668,8 +3613,12 @@ static int kbase_platform_device_probe(struct platform_device *pdev) && defined(CONFIG_REGULATOR) kbdev->regulator = regulator_get_optional(kbdev->dev, "mali"); if (IS_ERR_OR_NULL(kbdev->regulator)) { - dev_info(kbdev->dev, "Continuing without Mali regulator control\n"); + err = PTR_ERR(kbdev->regulator); + kbdev->regulator = NULL; + if (err == -EPROBE_DEFER) + goto out_regulator; + dev_info(kbdev->dev, "Continuing without Mali regulator control\n"); /* Allow probe to continue without regulator */ } #endif /* LINUX_VERSION_CODE >= 3, 12, 0 */ @@ -3677,10 +3626,15 @@ static int kbase_platform_device_probe(struct platform_device *pdev) #ifdef CONFIG_MALI_PLATFORM_DEVICETREE pm_runtime_enable(kbdev->dev); #endif + kbdev->clock = clk_get(kbdev->dev, "clk_mali"); if (IS_ERR_OR_NULL(kbdev->clock)) { - dev_info(kbdev->dev, "Continuing without Mali clock control\n"); + err = PTR_ERR(kbdev->clock); + kbdev->clock = NULL; + if (err == -EPROBE_DEFER) + goto out_clock_prepare; + dev_info(kbdev->dev, "Continuing without Mali clock control\n"); /* Allow probe to continue without clock. */ } else { err = clk_prepare_enable(kbdev->clock); @@ -3744,6 +3698,7 @@ out_clock_prepare: #endif #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \ && defined(CONFIG_REGULATOR) +out_regulator: regulator_put(kbdev->regulator); #endif /* LINUX_VERSION_CODE >= 3, 12, 0 */ kbase_common_reg_unmap(kbdev); @@ -3764,6 +3719,7 @@ out: static int kbase_common_device_remove(struct kbase_device *kbdev) { + kbase_debug_job_fault_dev_term(kbdev); kbase_ipa_term(kbdev->ipa_ctx); kbase_vinstr_term(kbdev->vinstr_ctx); sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group); @@ -3946,21 +3902,30 @@ int kbase_device_runtime_resume(struct device *dev) } #endif /* KBASE_PM_RUNTIME */ -/** Runtime idle callback from the OS. - * - * This is called by Linux when the device appears to be inactive and it might be - * placed into a low power state + +#ifdef KBASE_PM_RUNTIME +/** + * kbase_device_runtime_idle - Runtime idle callback from the OS. + * @dev: The device to suspend * - * @param dev The device to suspend + * This is called by Linux when the device appears to be inactive and it might + * be placed into a low power state. * - * @return A standard Linux error code + * Return: 0 if device can be suspended, non-zero to avoid runtime autosuspend, + * otherwise a standard Linux error code */ - -#ifdef KBASE_PM_RUNTIME static int kbase_device_runtime_idle(struct device *dev) { - /* Avoid pm_runtime_suspend being called */ - return 1; + struct kbase_device *kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + /* Use platform specific implementation if it exists. */ + if (kbdev->pm.backend.callback_power_runtime_idle) + return kbdev->pm.backend.callback_power_runtime_idle(kbdev); + + return 0; } #endif /* KBASE_PM_RUNTIME */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c b/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c index 41ce05130d8f..a6ddaa0af7c6 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c +++ b/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c @@ -386,7 +386,18 @@ static const struct file_operations kbasep_debug_job_fault_fops = { .release = debug_job_fault_release, }; -static int kbase_job_fault_event_init(struct kbase_device *kbdev) +/* + * Initialize debugfs entry for job fault dump + */ +void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev) +{ + debugfs_create_file("job_fault", S_IRUGO, + kbdev->mali_debugfs_directory, kbdev, + &kbasep_debug_job_fault_fops); +} + + +int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev) { INIT_LIST_HEAD(&kbdev->job_fault_event_list); @@ -396,24 +407,23 @@ static int kbase_job_fault_event_init(struct kbase_device *kbdev) kbdev->job_fault_resume_workq = alloc_workqueue( "kbase_job_fault_resume_work_queue", WQ_MEM_RECLAIM, 1); + if (!kbdev->job_fault_resume_workq) + return -ENOMEM; + + kbdev->job_fault_debug = false; return 0; } /* - * Initialize debugfs entry for job fault dump + * Release the relevant resource per device */ -void kbase_debug_job_fault_dev_init(struct kbase_device *kbdev) +void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev) { - debugfs_create_file("job_fault", S_IRUGO, - kbdev->mali_debugfs_directory, kbdev, - &kbasep_debug_job_fault_fops); - - kbase_job_fault_event_init(kbdev); - kbdev->job_fault_debug = false; - + destroy_workqueue(kbdev->job_fault_resume_workq); } + /* * Initialize the relevant data structure per context */ @@ -439,9 +449,22 @@ void kbase_debug_job_fault_context_init(struct kbase_context *kctx) /* * release the relevant resource per context */ -void kbase_debug_job_fault_context_exit(struct kbase_context *kctx) +void kbase_debug_job_fault_context_term(struct kbase_context *kctx) { kfree(kctx->reg_dump); } -#endif +#else /* CONFIG_DEBUG_FS */ + +int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev) +{ + kbdev->job_fault_debug = false; + + return 0; +} + +void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev) +{ +} + +#endif /* CONFIG_DEBUG_FS */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h b/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h index 3734046f3fd9..0930f905e4ef 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h +++ b/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h @@ -25,11 +25,26 @@ #define REGISTER_DUMP_TERMINATION_FLAG 0xFFFFFFFF /** - * kbase_debug_job_fault_dev_init - Initialize job fault debug sysfs - * and create the fault event wait queue per device + * kbase_debug_job_fault_dev_init - Create the fault event wait queue + * per device and initialize the required lists. + * @kbdev: Device pointer + * + * Return: Zero on success or a negative error code. + */ +int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev); + +/** + * kbase_debug_job_fault_debugfs_init - Initialize job fault debug sysfs + * @kbdev: Device pointer + */ +void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev); + +/** + * kbase_debug_job_fault_dev_term - Clean up resources created in + * kbase_debug_job_fault_dev_init. * @kbdev: Device pointer */ -void kbase_debug_job_fault_dev_init(struct kbase_device *kbdev); +void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev); /** * kbase_debug_job_fault_context_init - Initialize the relevant @@ -39,11 +54,11 @@ void kbase_debug_job_fault_dev_init(struct kbase_device *kbdev); void kbase_debug_job_fault_context_init(struct kbase_context *kctx); /** - * kbase_debug_job_fault_context_exit - Release the relevant + * kbase_debug_job_fault_context_term - Release the relevant * resource per context * @kctx: KBase context pointer */ -void kbase_debug_job_fault_context_exit(struct kbase_context *kctx); +void kbase_debug_job_fault_context_term(struct kbase_context *kctx); /** * kbase_debug_job_fault_process - Process the failed job. diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c b/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c index 1a3198e5b535..42d1d832c0a3 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c +++ b/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -25,7 +25,7 @@ #include #include -#if CONFIG_DEBUG_FS +#ifdef CONFIG_DEBUG_FS struct debug_mem_mapping { struct list_head node; diff --git a/drivers/gpu/arm/midgard/mali_kbase_defs.h b/drivers/gpu/arm/midgard/mali_kbase_defs.h index fe5f1046fd73..b4e5809a3d54 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_defs.h +++ b/drivers/gpu/arm/midgard/mali_kbase_defs.h @@ -38,6 +38,7 @@ #include #include #include +#include #ifdef CONFIG_MALI_FPGA_BUS_LOGGER #include @@ -664,10 +665,11 @@ struct kbase_pm_device_data { wait_queue_head_t zero_active_count_wait; /** - * A bit mask identifying the available shader cores that are specified - * via sysfs + * Bit masks identifying the available shader cores that are specified + * via sysfs. One mask per job slot. */ - u64 debug_core_mask; + u64 debug_core_mask[BASE_JM_MAX_NR_SLOTS]; + u64 debug_core_mask_all; /** * Lock protecting the power state of the device. @@ -953,6 +955,12 @@ struct kbase_device { struct kbase_trace_kbdev_timeline timeline; #endif + /* + * Control for enabling job dump on failure, set when control debugfs + * is opened. + */ + bool job_fault_debug; + #ifdef CONFIG_DEBUG_FS /* directory for debugfs entries */ struct dentry *mali_debugfs_directory; @@ -960,13 +968,19 @@ struct kbase_device { struct dentry *debugfs_ctx_directory; /* failed job dump, used for separate debug process */ - bool job_fault_debug; wait_queue_head_t job_fault_wq; wait_queue_head_t job_fault_resume_wq; struct workqueue_struct *job_fault_resume_workq; struct list_head job_fault_event_list; struct kbase_context *kctx_fault; +#if !MALI_CUSTOMER_RELEASE + /* Per-device data for register dumping interface */ + struct { + u16 reg_offset; /* Offset of a GPU_CONTROL register to be + dumped upon request */ + } regs_dump_debugfs_data; +#endif /* !MALI_CUSTOMER_RELEASE */ #endif /* CONFIG_DEBUG_FS */ /* fbdump profiling controls set by gator */ @@ -1034,6 +1048,8 @@ struct kbase_device { */ struct bus_logger_client *buslogger; #endif + /* Boolean indicating if an IRQ flush during reset is in progress. */ + bool irq_reset_flush; }; /* JSCTX ringbuffer size must always be a power of 2 */ @@ -1080,14 +1096,16 @@ struct jsctx_rb { ((0 & 0xFF) << 0)) struct kbase_context { + struct file *filp; struct kbase_device *kbdev; int id; /* System wide unique id */ unsigned long api_version; phys_addr_t pgd; struct list_head event_list; struct mutex event_mutex; - bool event_closed; + atomic_t event_closed; struct workqueue_struct *event_workq; + atomic_t event_count; bool is_compat; @@ -1148,8 +1166,10 @@ struct kbase_context { char *mem_profile_data; /* Size of @c mem_profile_data */ size_t mem_profile_size; - /* Spinlock guarding data */ - spinlock_t mem_profile_lock; + /* Mutex guarding memory profile state */ + struct mutex mem_profile_lock; + /* Memory profile file created */ + bool mem_profile_initialized; struct dentry *kctx_dentry; /* for job fault debug */ @@ -1210,6 +1230,21 @@ enum kbase_share_attr_bits { SHARE_INNER_BITS = (3ULL << 8) /* inner shareable coherency */ }; +/** + * kbase_device_is_cpu_coherent - Returns if the device is CPU coherent. + * @kbdev: kbase device + * + * Return: true if the device access are coherent, false if not. + */ +static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev) +{ + if ((kbdev->system_coherency == COHERENCY_ACE_LITE) || + (kbdev->system_coherency == COHERENCY_ACE)) + return true; + + return false; +} + /* Conversion helpers for setting up high resolution timers */ #define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime((x)*1000000U)) #define HR_TIMER_DELAY_NSEC(x) (ns_to_ktime(x)) diff --git a/drivers/gpu/arm/midgard/mali_kbase_device.c b/drivers/gpu/arm/midgard/mali_kbase_device.c index 6b8a2854a7a7..f0fffe611989 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_device.c +++ b/drivers/gpu/arm/midgard/mali_kbase_device.c @@ -159,6 +159,8 @@ int kbase_device_init(struct kbase_device * const kbdev) */ kbase_hw_set_features_mask(kbdev); + kbase_gpuprops_set_features(kbdev); + /* On Linux 4.0+, dma coherency is determined from device tree */ #if defined(CONFIG_ARM64) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0) set_dma_ops(kbdev->dev, &noncoherent_swiotlb_dma_ops); @@ -206,7 +208,7 @@ int kbase_device_init(struct kbase_device * const kbdev) for (i = 0; i < FBDUMP_CONTROL_MAX; i++) kbdev->kbase_profiling_controls[i] = 0; - kbase_debug_assert_register_hook(&kbasep_trace_hook_wrapper, kbdev); + kbase_debug_assert_register_hook(&kbasep_trace_hook_wrapper, kbdev); atomic_set(&kbdev->ctx_num, 0); diff --git a/drivers/gpu/arm/midgard/mali_kbase_event.c b/drivers/gpu/arm/midgard/mali_kbase_event.c index 25b30f0b976e..0d0c5258aaa4 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_event.c +++ b/drivers/gpu/arm/midgard/mali_kbase_event.c @@ -52,15 +52,10 @@ static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, stru int kbase_event_pending(struct kbase_context *ctx) { - int ret; - KBASE_DEBUG_ASSERT(ctx); - mutex_lock(&ctx->event_mutex); - ret = (!list_empty(&ctx->event_list)) || (true == ctx->event_closed); - mutex_unlock(&ctx->event_mutex); - - return ret; + return (atomic_read(&ctx->event_count) != 0) || + (atomic_read(&ctx->event_closed) != 0); } KBASE_EXPORT_TEST_API(kbase_event_pending); @@ -74,7 +69,7 @@ int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *ueve mutex_lock(&ctx->event_mutex); if (list_empty(&ctx->event_list)) { - if (!ctx->event_closed) { + if (!atomic_read(&ctx->event_closed)) { mutex_unlock(&ctx->event_mutex); return -1; } @@ -90,6 +85,7 @@ int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *ueve } /* normal event processing */ + atomic_dec(&ctx->event_count); atom = list_entry(ctx->event_list.next, struct kbase_jd_atom, dep_item[0]); list_del(ctx->event_list.next); @@ -168,6 +164,7 @@ void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom) } mutex_lock(&ctx->event_mutex); + atomic_inc(&ctx->event_count); list_add_tail(&atom->dep_item[0], &ctx->event_list); mutex_unlock(&ctx->event_mutex); @@ -178,7 +175,7 @@ KBASE_EXPORT_TEST_API(kbase_event_post); void kbase_event_close(struct kbase_context *kctx) { mutex_lock(&kctx->event_mutex); - kctx->event_closed = true; + atomic_set(&kctx->event_closed, true); mutex_unlock(&kctx->event_mutex); kbase_event_wakeup(kctx); } @@ -189,7 +186,8 @@ int kbase_event_init(struct kbase_context *kctx) INIT_LIST_HEAD(&kctx->event_list); mutex_init(&kctx->event_mutex); - kctx->event_closed = false; + atomic_set(&kctx->event_count, 0); + atomic_set(&kctx->event_closed, false); kctx->event_workq = alloc_workqueue("kbase_event", WQ_MEM_RECLAIM, 1); if (NULL == kctx->event_workq) diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h b/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h new file mode 100644 index 000000000000..dc8af2d6e794 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h @@ -0,0 +1,110 @@ +/* + * + * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + +#ifndef _KBASE_GPU_ID_H_ +#define _KBASE_GPU_ID_H_ + +/* GPU_ID register */ +#define GPU_ID_VERSION_STATUS_SHIFT 0 +#define GPU_ID_VERSION_MINOR_SHIFT 4 +#define GPU_ID_VERSION_MAJOR_SHIFT 12 +#define GPU_ID_VERSION_PRODUCT_ID_SHIFT 16 +#define GPU_ID_VERSION_STATUS (0xF << GPU_ID_VERSION_STATUS_SHIFT) +#define GPU_ID_VERSION_MINOR (0xFF << GPU_ID_VERSION_MINOR_SHIFT) +#define GPU_ID_VERSION_MAJOR (0xF << GPU_ID_VERSION_MAJOR_SHIFT) +#define GPU_ID_VERSION_PRODUCT_ID (0xFFFF << GPU_ID_VERSION_PRODUCT_ID_SHIFT) + +/* Values for GPU_ID_VERSION_PRODUCT_ID bitfield */ +#define GPU_ID_PI_T60X 0x6956 +#define GPU_ID_PI_T62X 0x0620 +#define GPU_ID_PI_T76X 0x0750 +#define GPU_ID_PI_T72X 0x0720 +#define GPU_ID_PI_TFRX 0x0880 +#define GPU_ID_PI_T86X 0x0860 +#define GPU_ID_PI_T82X 0x0820 +#define GPU_ID_PI_T83X 0x0830 + +/* New GPU ID format when PRODUCT_ID is >= 0x1000 (and not 0x6956) */ +#define GPU_ID_PI_NEW_FORMAT_START 0x1000 +#define GPU_ID_IS_NEW_FORMAT(product_id) ((product_id) != GPU_ID_PI_T60X && \ + (product_id) >= \ + GPU_ID_PI_NEW_FORMAT_START) + +#define GPU_ID2_VERSION_STATUS_SHIFT 0 +#define GPU_ID2_VERSION_MINOR_SHIFT 4 +#define GPU_ID2_VERSION_MAJOR_SHIFT 12 +#define GPU_ID2_PRODUCT_MAJOR_SHIFT 16 +#define GPU_ID2_ARCH_REV_SHIFT 20 +#define GPU_ID2_ARCH_MINOR_SHIFT 24 +#define GPU_ID2_ARCH_MAJOR_SHIFT 28 +#define GPU_ID2_VERSION_STATUS (0xF << GPU_ID2_VERSION_STATUS_SHIFT) +#define GPU_ID2_VERSION_MINOR (0xFF << GPU_ID2_VERSION_MINOR_SHIFT) +#define GPU_ID2_VERSION_MAJOR (0xF << GPU_ID2_VERSION_MAJOR_SHIFT) +#define GPU_ID2_PRODUCT_MAJOR (0xF << GPU_ID2_PRODUCT_MAJOR_SHIFT) +#define GPU_ID2_ARCH_REV (0xF << GPU_ID2_ARCH_REV_SHIFT) +#define GPU_ID2_ARCH_MINOR (0xF << GPU_ID2_ARCH_MINOR_SHIFT) +#define GPU_ID2_ARCH_MAJOR (0xF << GPU_ID2_ARCH_MAJOR_SHIFT) +#define GPU_ID2_PRODUCT_MODEL (GPU_ID2_ARCH_MAJOR | GPU_ID2_PRODUCT_MAJOR) + +/* Helper macro to create a partial GPU_ID (new format) that defines + a product ignoring its version. */ +#define GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, product_major) \ + (((arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) | \ + ((arch_minor) << GPU_ID2_ARCH_MINOR_SHIFT) | \ + ((arch_rev) << GPU_ID2_ARCH_REV_SHIFT) | \ + ((product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT)) + +/* Helper macro to create a partial GPU_ID (new format) that specifies the + revision (major, minor, status) of a product */ +#define GPU_ID2_VERSION_MAKE(version_major, version_minor, version_status) \ + (((version_major) << GPU_ID2_VERSION_MAJOR_SHIFT) | \ + ((version_minor) << GPU_ID2_VERSION_MINOR_SHIFT) | \ + ((version_status) << GPU_ID2_VERSION_STATUS_SHIFT)) + +/* Helper macro to create a complete GPU_ID (new format) */ +#define GPU_ID2_MAKE(arch_major, arch_minor, arch_rev, product_major, \ + version_major, version_minor, version_status) \ + (GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev) | \ + GPU_ID2_VERSION_MAKE(version_major, version_minor, \ + version_status)) + +/* Helper macro to create a partial GPU_ID (new format) that identifies + a particular GPU model by its arch_major and product_major. */ +#define GPU_ID2_MODEL_MAKE(arch_major, product_major) \ + (((arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) | \ + ((product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT)) + +/* Strip off the non-relevant bits from a product_id value and make it suitable + for comparison against the GPU_ID2_PRODUCT_xxx values which identify a GPU + model. */ +#define GPU_ID2_MODEL_MATCH_VALUE(product_id) \ + (((product_id) << GPU_ID2_PRODUCT_MAJOR_SHIFT) & \ + GPU_ID2_PRODUCT_MODEL) + + +/* Values for GPU_ID_VERSION_STATUS field for PRODUCT_ID GPU_ID_PI_T60X */ +#define GPU_ID_S_15DEV0 0x1 +#define GPU_ID_S_EAC 0x2 + +/* Helper macro to create a GPU_ID assuming valid values for id, major, + minor, status */ +#define GPU_ID_MAKE(id, major, minor, status) \ + (((id) << GPU_ID_VERSION_PRODUCT_ID_SHIFT) | \ + ((major) << GPU_ID_VERSION_MAJOR_SHIFT) | \ + ((minor) << GPU_ID_VERSION_MINOR_SHIFT) | \ + ((status) << GPU_ID_VERSION_STATUS_SHIFT)) + +#endif /* _KBASE_GPU_ID_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c b/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c index d632a0bbb1bc..7f77dba347d0 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c +++ b/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c @@ -87,7 +87,6 @@ int kbase_gpuprops_uk_get_props(struct kbase_context *kctx, struct kbase_uk_gpup if (kctx->api_version < KBASE_API_VERSION(8, 2)) kbase_props->props.raw_props.suspend_size = 0; - return 0; } @@ -200,7 +199,6 @@ static void kbase_gpuprops_get_props(base_gpu_props * const gpu_props, struct kb gpu_props->raw_props.thread_max_threads = regdump.thread_max_threads; gpu_props->raw_props.thread_max_workgroup_size = regdump.thread_max_workgroup_size; gpu_props->raw_props.thread_features = regdump.thread_features; - } /** @@ -297,3 +295,20 @@ void kbase_gpuprops_set(struct kbase_device *kbdev) gpu_props->num_address_spaces = hweight32(raw->as_present); gpu_props->num_job_slots = hweight32(raw->js_present); } + +void kbase_gpuprops_set_features(struct kbase_device *kbdev) +{ + base_gpu_props *gpu_props; + struct kbase_gpuprops_regdump regdump; + + gpu_props = &kbdev->gpu_props.props; + + /* Dump relevant registers */ + kbase_backend_gpuprops_get_features(kbdev, ®dump); + + /* + * Copy the raw value from the register, later this will get turned + * into the selected coherency mode. + */ + gpu_props->raw_props.coherency_mode = regdump.coherency_features; +} diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h b/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h index af97d97bf945..f3c95cc1849c 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h +++ b/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h @@ -39,6 +39,16 @@ struct kbase_device; */ void kbase_gpuprops_set(struct kbase_device *kbdev); +/** + * kbase_gpuprops_set_features - Set up Kbase GPU properties + * @kbdev: Device pointer + * + * This function sets up GPU properties that are dependent on the hardware + * features bitmask. This function must be preceeded by a call to + * kbase_hw_set_features_mask(). + */ +void kbase_gpuprops_set_features(struct kbase_device *kbdev); + /** * @brief Provide GPU properties to userside through UKU call. * diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h b/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h index 463fead4b05d..781375a9a97f 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h +++ b/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h @@ -51,6 +51,7 @@ struct kbase_gpuprops_regdump { u32 tiler_present_hi; u32 l2_present_lo; u32 l2_present_hi; + u32 coherency_features; }; struct kbase_gpu_cache_props { diff --git a/drivers/gpu/arm/midgard/mali_kbase_hw.c b/drivers/gpu/arm/midgard/mali_kbase_hw.c index fac65d4f2286..f2f93de9d2e8 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_hw.c +++ b/drivers/gpu/arm/midgard/mali_kbase_hw.c @@ -31,39 +31,41 @@ void kbase_hw_set_features_mask(struct kbase_device *kbdev) { const enum base_hw_feature *features; u32 gpu_id; + u32 product_id; gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; - gpu_id &= GPU_ID_VERSION_PRODUCT_ID; - gpu_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT; - - switch (gpu_id) { - case GPU_ID_PI_TFRX: - /* FALLTHROUGH */ - case GPU_ID_PI_T86X: - features = base_hw_features_tFxx; - break; - case GPU_ID_PI_T83X: - features = base_hw_features_t83x; - break; - case GPU_ID_PI_T82X: - features = base_hw_features_t82x; - break; - case GPU_ID_PI_T76X: - features = base_hw_features_t76x; - break; - case GPU_ID_PI_T72X: - features = base_hw_features_t72x; - break; - case GPU_ID_PI_T62X: - features = base_hw_features_t62x; - break; - case GPU_ID_PI_T60X: - features = base_hw_features_t60x; - break; - default: - features = base_hw_features_generic; - break; - } + product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID; + product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT; + + switch (product_id) { + case GPU_ID_PI_TFRX: + /* FALLTHROUGH */ + case GPU_ID_PI_T86X: + features = base_hw_features_tFxx; + break; + case GPU_ID_PI_T83X: + features = base_hw_features_t83x; + break; + case GPU_ID_PI_T82X: + features = base_hw_features_t82x; + break; + case GPU_ID_PI_T76X: + features = base_hw_features_t76x; + break; + case GPU_ID_PI_T72X: + features = base_hw_features_t72x; + break; + case GPU_ID_PI_T62X: + features = base_hw_features_t62x; + break; + case GPU_ID_PI_T60X: + features = base_hw_features_t60x; + break; + default: + features = base_hw_features_generic; + break; + } + for (; *features != BASE_HW_FEATURE_END; features++) set_bit(*features, &kbdev->hw_features_mask[0]); @@ -73,136 +75,140 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev) { const enum base_hw_issue *issues; u32 gpu_id; + u32 product_id; u32 impl_tech; gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID; + product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT; impl_tech = kbdev->gpu_props.props.thread_props.impl_tech; if (impl_tech != IMPLEMENTATION_MODEL) { - switch (gpu_id) { - case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_15DEV0): - issues = base_hw_issues_t60x_r0p0_15dev0; - break; - case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_EAC): - issues = base_hw_issues_t60x_r0p0_eac; - break; - case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 1, 0): - issues = base_hw_issues_t60x_r0p1; - break; - case GPU_ID_MAKE(GPU_ID_PI_T62X, 0, 1, 0): - issues = base_hw_issues_t62x_r0p1; - break; - case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 0): - case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 1): - issues = base_hw_issues_t62x_r1p0; - break; - case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 1, 0): - issues = base_hw_issues_t62x_r1p1; - break; - case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 0, 1): - issues = base_hw_issues_t76x_r0p0; - break; - case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 1): - issues = base_hw_issues_t76x_r0p1; - break; - case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 9): - issues = base_hw_issues_t76x_r0p1_50rel0; - break; - case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 2, 1): - issues = base_hw_issues_t76x_r0p2; - break; - case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 3, 1): - issues = base_hw_issues_t76x_r0p3; - break; - case GPU_ID_MAKE(GPU_ID_PI_T76X, 1, 0, 0): - issues = base_hw_issues_t76x_r1p0; - break; - case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 0): - case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 1): - case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 2): - issues = base_hw_issues_t72x_r0p0; - break; - case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 0, 0): - issues = base_hw_issues_t72x_r1p0; - break; - case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 1, 0): - issues = base_hw_issues_t72x_r1p1; - break; - case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 1, 2): - issues = base_hw_issues_tFRx_r0p1; - break; - case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 2, 0): - issues = base_hw_issues_tFRx_r0p2; - break; - case GPU_ID_MAKE(GPU_ID_PI_TFRX, 1, 0, 0): - case GPU_ID_MAKE(GPU_ID_PI_TFRX, 1, 0, 8): - issues = base_hw_issues_tFRx_r1p0; - break; - case GPU_ID_MAKE(GPU_ID_PI_TFRX, 2, 0, 0): - issues = base_hw_issues_tFRx_r2p0; - break; - case GPU_ID_MAKE(GPU_ID_PI_T86X, 0, 2, 0): - issues = base_hw_issues_t86x_r0p2; - break; - case GPU_ID_MAKE(GPU_ID_PI_T86X, 1, 0, 0): - case GPU_ID_MAKE(GPU_ID_PI_T86X, 1, 0, 8): - issues = base_hw_issues_t86x_r1p0; - break; - case GPU_ID_MAKE(GPU_ID_PI_T86X, 2, 0, 0): - issues = base_hw_issues_t86x_r2p0; - break; - case GPU_ID_MAKE(GPU_ID_PI_T83X, 0, 1, 0): - issues = base_hw_issues_t83x_r0p1; - break; - case GPU_ID_MAKE(GPU_ID_PI_T83X, 1, 0, 0): - case GPU_ID_MAKE(GPU_ID_PI_T83X, 1, 0, 8): - issues = base_hw_issues_t83x_r1p0; - break; - case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 0, 0): - issues = base_hw_issues_t82x_r0p0; - break; - case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 1, 0): - issues = base_hw_issues_t82x_r0p1; - break; - case GPU_ID_MAKE(GPU_ID_PI_T82X, 1, 0, 0): - case GPU_ID_MAKE(GPU_ID_PI_T82X, 1, 0, 8): - issues = base_hw_issues_t82x_r1p0; - break; - default: - dev_err(kbdev->dev, "Unknown GPU ID %x", gpu_id); - return -EINVAL; - } + switch (gpu_id) { + case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_15DEV0): + issues = base_hw_issues_t60x_r0p0_15dev0; + break; + case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_EAC): + issues = base_hw_issues_t60x_r0p0_eac; + break; + case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 1, 0): + issues = base_hw_issues_t60x_r0p1; + break; + case GPU_ID_MAKE(GPU_ID_PI_T62X, 0, 1, 0): + issues = base_hw_issues_t62x_r0p1; + break; + case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 0): + case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 1): + issues = base_hw_issues_t62x_r1p0; + break; + case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 1, 0): + issues = base_hw_issues_t62x_r1p1; + break; + case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 0, 1): + issues = base_hw_issues_t76x_r0p0; + break; + case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 1): + issues = base_hw_issues_t76x_r0p1; + break; + case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 9): + issues = base_hw_issues_t76x_r0p1_50rel0; + break; + case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 2, 1): + issues = base_hw_issues_t76x_r0p2; + break; + case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 3, 1): + issues = base_hw_issues_t76x_r0p3; + break; + case GPU_ID_MAKE(GPU_ID_PI_T76X, 1, 0, 0): + issues = base_hw_issues_t76x_r1p0; + break; + case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 0): + case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 1): + case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 2): + issues = base_hw_issues_t72x_r0p0; + break; + case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 0, 0): + issues = base_hw_issues_t72x_r1p0; + break; + case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 1, 0): + issues = base_hw_issues_t72x_r1p1; + break; + case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 1, 2): + issues = base_hw_issues_tFRx_r0p1; + break; + case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 2, 0): + issues = base_hw_issues_tFRx_r0p2; + break; + case GPU_ID_MAKE(GPU_ID_PI_TFRX, 1, 0, 0): + case GPU_ID_MAKE(GPU_ID_PI_TFRX, 1, 0, 8): + issues = base_hw_issues_tFRx_r1p0; + break; + case GPU_ID_MAKE(GPU_ID_PI_TFRX, 2, 0, 0): + issues = base_hw_issues_tFRx_r2p0; + break; + case GPU_ID_MAKE(GPU_ID_PI_T86X, 0, 2, 0): + issues = base_hw_issues_t86x_r0p2; + break; + case GPU_ID_MAKE(GPU_ID_PI_T86X, 1, 0, 0): + case GPU_ID_MAKE(GPU_ID_PI_T86X, 1, 0, 8): + issues = base_hw_issues_t86x_r1p0; + break; + case GPU_ID_MAKE(GPU_ID_PI_T86X, 2, 0, 0): + issues = base_hw_issues_t86x_r2p0; + break; + case GPU_ID_MAKE(GPU_ID_PI_T83X, 0, 1, 0): + issues = base_hw_issues_t83x_r0p1; + break; + case GPU_ID_MAKE(GPU_ID_PI_T83X, 1, 0, 0): + case GPU_ID_MAKE(GPU_ID_PI_T83X, 1, 0, 8): + issues = base_hw_issues_t83x_r1p0; + break; + case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 0, 0): + issues = base_hw_issues_t82x_r0p0; + break; + case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 1, 0): + issues = base_hw_issues_t82x_r0p1; + break; + case GPU_ID_MAKE(GPU_ID_PI_T82X, 1, 0, 0): + case GPU_ID_MAKE(GPU_ID_PI_T82X, 1, 0, 8): + issues = base_hw_issues_t82x_r1p0; + break; + default: + dev_err(kbdev->dev, "Unknown GPU ID %x", gpu_id); + return -EINVAL; + } } else { /* Software model */ - switch (gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT) { - case GPU_ID_PI_T60X: - issues = base_hw_issues_model_t60x; - break; - case GPU_ID_PI_T62X: - issues = base_hw_issues_model_t62x; - break; - case GPU_ID_PI_T72X: - issues = base_hw_issues_model_t72x; - break; - case GPU_ID_PI_T76X: - issues = base_hw_issues_model_t76x; - break; - case GPU_ID_PI_TFRX: - issues = base_hw_issues_model_tFRx; - break; - case GPU_ID_PI_T86X: - issues = base_hw_issues_model_t86x; - break; - case GPU_ID_PI_T83X: - issues = base_hw_issues_model_t83x; - break; - case GPU_ID_PI_T82X: - issues = base_hw_issues_model_t82x; - break; - default: - dev_err(kbdev->dev, "Unknown GPU ID %x", gpu_id); - return -EINVAL; - } + switch (product_id) { + case GPU_ID_PI_T60X: + issues = base_hw_issues_model_t60x; + break; + case GPU_ID_PI_T62X: + issues = base_hw_issues_model_t62x; + break; + case GPU_ID_PI_T72X: + issues = base_hw_issues_model_t72x; + break; + case GPU_ID_PI_T76X: + issues = base_hw_issues_model_t76x; + break; + case GPU_ID_PI_TFRX: + issues = base_hw_issues_model_tFRx; + break; + case GPU_ID_PI_T86X: + issues = base_hw_issues_model_t86x; + break; + case GPU_ID_PI_T83X: + issues = base_hw_issues_model_t83x; + break; + case GPU_ID_PI_T82X: + issues = base_hw_issues_model_t82x; + break; + default: + dev_err(kbdev->dev, "Unknown GPU ID %x", + gpu_id); + return -EINVAL; + } } dev_info(kbdev->dev, "GPU identified as 0x%04x r%dp%d status %d", (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, (gpu_id & GPU_ID_VERSION_MAJOR) >> GPU_ID_VERSION_MAJOR_SHIFT, (gpu_id & GPU_ID_VERSION_MINOR) >> GPU_ID_VERSION_MINOR_SHIFT, (gpu_id & GPU_ID_VERSION_STATUS) >> GPU_ID_VERSION_STATUS_SHIFT); diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h index f93ca9d86802..cf8a8131c22e 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h +++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -32,4 +32,16 @@ void kbase_backend_gpuprops_get(struct kbase_device *kbdev, struct kbase_gpuprops_regdump *regdump); +/** + * kbase_backend_gpuprops_get - Fill @regdump with GPU properties read from GPU + * @kbdev: Device pointer + * @regdump: Pointer to struct kbase_gpuprops_regdump structure + * + * This function reads GPU properties that are dependent on the hardware + * features bitmask + */ +void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev, + struct kbase_gpuprops_regdump *regdump); + + #endif /* _KBASE_HWACCESS_GPUPROPS_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h index dbdcd3def220..71c7d495c40a 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h +++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h @@ -126,10 +126,13 @@ void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev); * * @param kbdev The kbase device structure for the device (must be a * valid pointer) - * @param new_core_mask The core mask to use + * @param new_core_mask_js0 The core mask to use for job slot 0 + * @param new_core_mask_js0 The core mask to use for job slot 1 + * @param new_core_mask_js0 The core mask to use for job slot 2 */ void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, - u64 new_core_mask); + u64 new_core_mask_js0, u64 new_core_mask_js1, + u64 new_core_mask_js2); /** diff --git a/drivers/gpu/arm/midgard/mali_kbase_instr.c b/drivers/gpu/arm/midgard/mali_kbase_instr.c index 314ae0819d50..fda317b90176 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_instr.c +++ b/drivers/gpu/arm/midgard/mali_kbase_instr.c @@ -66,18 +66,10 @@ int kbase_instr_hwcnt_enable(struct kbase_context *kctx, struct kbase_uk_hwcnt_setup *setup) { struct kbase_device *kbdev; - bool access_allowed; int err; kbdev = kctx->kbdev; - /* Determine if the calling task has access to this capability */ - access_allowed = kbase_security_has_capability(kctx, - KBASE_SEC_INSTR_HW_COUNTERS_COLLECT, - KBASE_SEC_FLAG_NOAUDIT); - if (!access_allowed) - return -EINVAL; - /* Mark the context as active so the GPU is kept turned on */ /* A suspend won't happen here, because we're in a syscall from a * userspace thread. */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_ipa.c b/drivers/gpu/arm/midgard/mali_kbase_ipa.c index 433103c0d331..6ac97eb7937c 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_ipa.c +++ b/drivers/gpu/arm/midgard/mali_kbase_ipa.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -21,50 +21,39 @@ #define NR_IPA_GROUPS 8 +struct kbase_ipa_context; + /** * struct ipa_group - represents a single IPA group * @name: name of the IPA group * @capacitance: capacitance constant for IPA group + * @calc_power: function to calculate power for IPA group */ struct ipa_group { const char *name; u32 capacitance; + u32 (*calc_power)(struct kbase_ipa_context *, + struct ipa_group *); }; +#include + /** * struct kbase_ipa_context - IPA context per device - * @kbdev: pointer to kbase device - * @groups: array of IPA groups for this context - * @ipa_lock: protects the entire IPA context + * @kbdev: pointer to kbase device + * @groups: array of IPA groups for this context + * @vinstr_cli: vinstr client handle + * @vinstr_buffer: buffer to dump hardware counters onto + * @ipa_lock: protects the entire IPA context */ struct kbase_ipa_context { struct kbase_device *kbdev; struct ipa_group groups[NR_IPA_GROUPS]; + struct kbase_vinstr_client *vinstr_cli; + void *vinstr_buffer; struct mutex ipa_lock; }; -static struct ipa_group ipa_groups_def_v4[] = { - { .name = "group0", .capacitance = 0 }, - { .name = "group1", .capacitance = 0 }, - { .name = "group2", .capacitance = 0 }, - { .name = "group3", .capacitance = 0 }, - { .name = "group4", .capacitance = 0 }, - { .name = "group5", .capacitance = 0 }, - { .name = "group6", .capacitance = 0 }, - { .name = "group7", .capacitance = 0 }, -}; - -static struct ipa_group ipa_groups_def_v5[] = { - { .name = "group0", .capacitance = 0 }, - { .name = "group1", .capacitance = 0 }, - { .name = "group2", .capacitance = 0 }, - { .name = "group3", .capacitance = 0 }, - { .name = "group4", .capacitance = 0 }, - { .name = "group5", .capacitance = 0 }, - { .name = "group6", .capacitance = 0 }, - { .name = "group7", .capacitance = 0 }, -}; - static ssize_t show_ipa_group(struct device *dev, struct device_attribute *attr, char *buf) @@ -143,22 +132,7 @@ static struct attribute_group kbase_ipa_attr_group = { static void init_ipa_groups(struct kbase_ipa_context *ctx) { - struct kbase_device *kbdev = ctx->kbdev; - struct ipa_group *defs; - size_t i, len; - - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_V4)) { - defs = ipa_groups_def_v4; - len = ARRAY_SIZE(ipa_groups_def_v4); - } else { - defs = ipa_groups_def_v5; - len = ARRAY_SIZE(ipa_groups_def_v5); - } - - for (i = 0; i < len; i++) { - ctx->groups[i].name = defs[i].name; - ctx->groups[i].capacitance = defs[i].capacitance; - } + memcpy(ctx->groups, ipa_groups_def, sizeof(ctx->groups)); } #if defined(CONFIG_OF) && (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0)) @@ -229,6 +203,172 @@ static int reset_ipa_groups(struct kbase_ipa_context *ctx) return update_ipa_groups_from_dt(ctx); } +static inline u32 read_hwcnt(struct kbase_ipa_context *ctx, + u32 offset) +{ + u8 *p = ctx->vinstr_buffer; + + return *(u32 *)&p[offset]; +} + +static inline u32 add_saturate(u32 a, u32 b) +{ + if (U32_MAX - a < b) + return U32_MAX; + return a + b; +} + +/* + * Calculate power estimation based on hardware counter `c' + * across all shader cores. + */ +static u32 calc_power_sc_single(struct kbase_ipa_context *ctx, + struct ipa_group *group, u32 c) +{ + struct kbase_device *kbdev = ctx->kbdev; + u64 core_mask; + u32 base = 0, r = 0; + + core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask; + while (core_mask != 0ull) { + if ((core_mask & 1ull) != 0ull) { + u64 n = read_hwcnt(ctx, base + c); + u32 d = read_hwcnt(ctx, GPU_ACTIVE); + u32 s = group->capacitance; + + r = add_saturate(r, div_u64(n * s, d)); + } + base += NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT; + core_mask >>= 1; + } + return r; +} + +/* + * Calculate power estimation based on hardware counter `c1' + * and `c2' across all shader cores. + */ +static u32 calc_power_sc_double(struct kbase_ipa_context *ctx, + struct ipa_group *group, u32 c1, u32 c2) +{ + struct kbase_device *kbdev = ctx->kbdev; + u64 core_mask; + u32 base = 0, r = 0; + + core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask; + while (core_mask != 0ull) { + if ((core_mask & 1ull) != 0ull) { + u64 n = read_hwcnt(ctx, base + c1); + u32 d = read_hwcnt(ctx, GPU_ACTIVE); + u32 s = group->capacitance; + + r = add_saturate(r, div_u64(n * s, d)); + n = read_hwcnt(ctx, base + c2); + r = add_saturate(r, div_u64(n * s, d)); + } + base += NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT; + core_mask >>= 1; + } + return r; +} + +static u32 calc_power_single(struct kbase_ipa_context *ctx, + struct ipa_group *group, u32 c) +{ + u64 n = read_hwcnt(ctx, c); + u32 d = read_hwcnt(ctx, GPU_ACTIVE); + u32 s = group->capacitance; + + return div_u64(n * s, d); +} + +static u32 calc_power_group0(struct kbase_ipa_context *ctx, + struct ipa_group *group) +{ + return calc_power_single(ctx, group, L2_ANY_LOOKUP); +} + +static u32 calc_power_group1(struct kbase_ipa_context *ctx, + struct ipa_group *group) +{ + return calc_power_single(ctx, group, TILER_ACTIVE); +} + +static u32 calc_power_group2(struct kbase_ipa_context *ctx, + struct ipa_group *group) +{ + return calc_power_sc_single(ctx, group, FRAG_ACTIVE); +} + +static u32 calc_power_group3(struct kbase_ipa_context *ctx, + struct ipa_group *group) +{ + return calc_power_sc_double(ctx, group, VARY_SLOT_32, + VARY_SLOT_16); +} + +static u32 calc_power_group4(struct kbase_ipa_context *ctx, + struct ipa_group *group) +{ + return calc_power_sc_single(ctx, group, TEX_COORD_ISSUE); +} + +static u32 calc_power_group5(struct kbase_ipa_context *ctx, + struct ipa_group *group) +{ + return calc_power_sc_single(ctx, group, EXEC_INSTR_COUNT); +} + +static u32 calc_power_group6(struct kbase_ipa_context *ctx, + struct ipa_group *group) +{ + return calc_power_sc_double(ctx, group, BEATS_RD_LSC, + BEATS_WR_LSC); +} + +static u32 calc_power_group7(struct kbase_ipa_context *ctx, + struct ipa_group *group) +{ + return calc_power_sc_single(ctx, group, EXEC_CORE_ACTIVE); +} + +static int attach_vinstr(struct kbase_ipa_context *ctx) +{ + struct kbase_device *kbdev = ctx->kbdev; + struct kbase_uk_hwcnt_reader_setup setup; + size_t dump_size; + + dump_size = kbase_vinstr_dump_size(kbdev); + ctx->vinstr_buffer = kzalloc(dump_size, GFP_KERNEL); + if (!ctx->vinstr_buffer) { + dev_err(kbdev->dev, "Failed to allocate IPA dump buffer"); + return -1; + } + + setup.jm_bm = ~0u; + setup.shader_bm = ~0u; + setup.tiler_bm = ~0u; + setup.mmu_l2_bm = ~0u; + ctx->vinstr_cli = kbase_vinstr_hwcnt_kernel_setup(kbdev->vinstr_ctx, + &setup, ctx->vinstr_buffer); + if (!ctx->vinstr_cli) { + dev_err(kbdev->dev, "Failed to register IPA with vinstr core"); + kfree(ctx->vinstr_buffer); + ctx->vinstr_buffer = NULL; + return -1; + } + return 0; +} + +static void detach_vinstr(struct kbase_ipa_context *ctx) +{ + if (ctx->vinstr_cli) + kbase_vinstr_detach_client(ctx->vinstr_cli); + ctx->vinstr_cli = NULL; + kfree(ctx->vinstr_buffer); + ctx->vinstr_buffer = NULL; +} + struct kbase_ipa_context *kbase_ipa_init(struct kbase_device *kbdev) { struct kbase_ipa_context *ctx; @@ -259,6 +399,33 @@ void kbase_ipa_term(struct kbase_ipa_context *ctx) { struct kbase_device *kbdev = ctx->kbdev; + detach_vinstr(ctx); sysfs_remove_group(&kbdev->dev->kobj, &kbase_ipa_attr_group); kfree(ctx); } + +u32 kbase_ipa_dynamic_power(struct kbase_ipa_context *ctx, int *err) +{ + struct ipa_group *group; + u32 power = 0; + size_t i; + + mutex_lock(&ctx->ipa_lock); + if (!ctx->vinstr_cli) { + *err = attach_vinstr(ctx); + if (*err < 0) + goto err0; + } + *err = kbase_vinstr_hwc_dump(ctx->vinstr_cli, + BASE_HWCNT_READER_EVENT_MANUAL); + if (*err) + goto err0; + for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) { + group = &ctx->groups[i]; + power = add_saturate(power, group->calc_power(ctx, group)); + } +err0: + mutex_unlock(&ctx->ipa_lock); + return power; +} +KBASE_EXPORT_TEST_API(kbase_ipa_dynamic_power); diff --git a/drivers/gpu/arm/midgard/mali_kbase_ipa.h b/drivers/gpu/arm/midgard/mali_kbase_ipa.h index ed123759ff3c..e2234d150b0b 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_ipa.h +++ b/drivers/gpu/arm/midgard/mali_kbase_ipa.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,3 +30,12 @@ struct kbase_ipa_context *kbase_ipa_init(struct kbase_device *kbdev); * @ctx: pointer to the IPA context */ void kbase_ipa_term(struct kbase_ipa_context *ctx); + +/** + * kbase_ipa_dynamic_power - calculate power + * @ctx: pointer to the IPA context + * @err: 0 on success, negative on failure + * + * Return: returns power consumption as mw @ 1GHz @ 1V + */ +u32 kbase_ipa_dynamic_power(struct kbase_ipa_context *ctx, int *err); diff --git a/drivers/gpu/arm/midgard/mali_kbase_ipa_tables.h b/drivers/gpu/arm/midgard/mali_kbase_ipa_tables.h new file mode 100644 index 000000000000..101abfe565a7 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_ipa_tables.h @@ -0,0 +1,104 @@ +/* + * + * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#define NR_BYTES_PER_CNT 4 +#define NR_CNT_PER_BLOCK 64 + +#define JM_BASE (0 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT) +#define TILER_BASE (1 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT) +#define MMU_BASE (2 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT) +#define SC0_BASE (3 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT) + +#define GPU_ACTIVE (JM_BASE + NR_BYTES_PER_CNT * 6) +#define TILER_ACTIVE (TILER_BASE + NR_BYTES_PER_CNT * 45) +#define L2_ANY_LOOKUP (MMU_BASE + NR_BYTES_PER_CNT * 25) +#define FRAG_ACTIVE (SC0_BASE + NR_BYTES_PER_CNT * 4) +#define EXEC_CORE_ACTIVE (SC0_BASE + NR_BYTES_PER_CNT * 26) +#define EXEC_INSTR_COUNT (SC0_BASE + NR_BYTES_PER_CNT * 28) +#define TEX_COORD_ISSUE (SC0_BASE + NR_BYTES_PER_CNT * 40) +#define VARY_SLOT_32 (SC0_BASE + NR_BYTES_PER_CNT * 50) +#define VARY_SLOT_16 (SC0_BASE + NR_BYTES_PER_CNT * 51) +#define BEATS_RD_LSC (SC0_BASE + NR_BYTES_PER_CNT * 56) +#define BEATS_WR_LSC (SC0_BASE + NR_BYTES_PER_CNT * 61) + +static u32 calc_power_group0(struct kbase_ipa_context *ctx, + struct ipa_group *group); +static u32 calc_power_group1(struct kbase_ipa_context *ctx, + struct ipa_group *group); +static u32 calc_power_group2(struct kbase_ipa_context *ctx, + struct ipa_group *group); +static u32 calc_power_group3(struct kbase_ipa_context *ctx, + struct ipa_group *group); +static u32 calc_power_group4(struct kbase_ipa_context *ctx, + struct ipa_group *group); +static u32 calc_power_group5(struct kbase_ipa_context *ctx, + struct ipa_group *group); +static u32 calc_power_group6(struct kbase_ipa_context *ctx, + struct ipa_group *group); +static u32 calc_power_group7(struct kbase_ipa_context *ctx, + struct ipa_group *group); + +static struct ipa_group ipa_groups_def[] = { + /* L2 */ + { + .name = "group0", + .capacitance = 687, + .calc_power = calc_power_group0, + }, + /* TILER */ + { + .name = "group1", + .capacitance = 0, + .calc_power = calc_power_group1, + }, + /* FRAG */ + { + .name = "group2", + .capacitance = 23, + .calc_power = calc_power_group2, + }, + /* VARY */ + { + .name = "group3", + .capacitance = 108, + .calc_power = calc_power_group3, + }, + /* TEX */ + { + .name = "group4", + .capacitance = 128, + .calc_power = calc_power_group4, + }, + /* EXEC INSTR */ + { + .name = "group5", + .capacitance = 249, + .calc_power = calc_power_group5, + }, + /* LSC */ + { + .name = "group6", + .capacitance = 0, + .calc_power = calc_power_group6, + }, + /* EXEC OVERHEAD */ + { + .name = "group7", + .capacitance = 29, + .calc_power = calc_power_group7, + }, +}; diff --git a/drivers/gpu/arm/midgard/mali_kbase_jd.c b/drivers/gpu/arm/midgard/mali_kbase_jd.c index dd2d187d5cd9..7e5da2944304 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_jd.c +++ b/drivers/gpu/arm/midgard/mali_kbase_jd.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -198,6 +199,122 @@ static void kbase_cancel_kds_wait_job(struct kbase_jd_atom *katom) } #endif /* CONFIG_KDS */ +static int kbase_jd_user_buf_map(struct kbase_context *kctx, + struct kbase_va_region *reg) +{ + long pinned_pages; + struct kbase_mem_phy_alloc *alloc; + struct page **pages; + phys_addr_t *pa; + long i; + int err = -ENOMEM; + unsigned long address; + struct task_struct *owner; + struct device *dev; + unsigned long offset; + unsigned long local_size; + + alloc = reg->gpu_alloc; + pa = kbase_get_gpu_phy_pages(reg); + address = alloc->imported.user_buf.address; + owner = alloc->imported.user_buf.owner; + + KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF); + + pages = alloc->imported.user_buf.pages; + + down_read(&owner->mm->mmap_sem); + pinned_pages = get_user_pages(owner, owner->mm, + address, + alloc->imported.user_buf.nr_pages, + reg->flags & KBASE_REG_GPU_WR, + 0, pages, NULL); + up_read(&owner->mm->mmap_sem); + + if (pinned_pages <= 0) + return pinned_pages; + + if (pinned_pages != alloc->imported.user_buf.nr_pages) { + for (i = 0; i < pinned_pages; i++) + put_page(pages[i]); + return -ENOMEM; + } + + dev = kctx->kbdev->dev; + offset = address & ~PAGE_MASK; + local_size = alloc->imported.user_buf.size; + + for (i = 0; i < pinned_pages; i++) { + dma_addr_t dma_addr; + unsigned long min; + + /* if page already is private, we can't store our + * private data. */ + if (PagePrivate(pages[i])) + goto unwind; + + min = MIN(PAGE_SIZE - offset, local_size); + dma_addr = dma_map_page(dev, pages[i], + offset, min, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(dev, dma_addr)) + goto unwind; + + kbase_set_dma_addr(pages[i], dma_addr); + pa[i] = page_to_phys(pages[i]); + + local_size -= min; + offset = 0; + } + + alloc->nents = pinned_pages; + + err = kbase_mmu_insert_pages(kctx, reg->start_pfn, pa, + kbase_reg_current_backed_size(reg), + reg->flags); + if (err == 0) + return 0; + + alloc->nents = 0; + /* fall down */ +unwind: + while (i--) { + dma_unmap_page(kctx->kbdev->dev, kbase_dma_addr(pages[i]), + PAGE_SIZE, DMA_BIDIRECTIONAL); + put_page(pages[i]); + pages[i] = NULL; + } + + return err; +} + +static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, + struct kbase_mem_phy_alloc *alloc, bool writeable) +{ + long i; + struct page **pages; + unsigned long size = alloc->imported.user_buf.size; + + KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF); + pages = alloc->imported.user_buf.pages; + for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) { + unsigned long local_size; + dma_addr_t dma_addr = kbase_dma_addr(pages[i]); + + local_size = MIN(size, PAGE_SIZE - (dma_addr & ~PAGE_MASK)); + dma_unmap_page(kctx->kbdev->dev, dma_addr, local_size, + DMA_BIDIRECTIONAL); + ClearPagePrivate(pages[i]); + if (writeable) + set_page_dirty_lock(pages[i]); + put_page(pages[i]); + pages[i] = NULL; + + size -= local_size; + } + alloc->nents = 0; +} + #ifdef CONFIG_DMA_SHARED_BUFFER static int kbase_jd_umm_map(struct kbase_context *kctx, struct kbase_va_region *reg) { @@ -316,11 +433,11 @@ static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom) res_no = katom->nr_extres; while (res_no-- > 0) { - struct kbase_mem_phy_alloc *alloc; + struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc; - alloc = katom->extres[res_no].alloc; + switch (alloc->type) { #ifdef CONFIG_DMA_SHARED_BUFFER - if (alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) { + case KBASE_MEM_TYPE_IMPORTED_UMM: { alloc->imported.umm.current_mapping_usage_count--; if (0 == alloc->imported.umm.current_mapping_usage_count) { @@ -339,8 +456,34 @@ static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom) kbase_jd_umm_unmap(katom->kctx, alloc); } } -#endif /* CONFIG_DMA_SHARED_BUFFER */ - kbase_mem_phy_alloc_put(alloc); + break; +#endif /* CONFIG_DMA_SHARED_BUFFER */ + case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { + alloc->imported.user_buf.current_mapping_usage_count--; + + if (0 == alloc->imported.user_buf.current_mapping_usage_count) { + struct kbase_va_region *reg; + + reg = kbase_region_tracker_find_region_base_address( + katom->kctx, + katom->extres[res_no].gpu_address); + + if (reg && reg->gpu_alloc == alloc) + kbase_mmu_teardown_pages( + katom->kctx, + reg->start_pfn, + kbase_reg_current_backed_size(reg)); + + kbase_jd_user_buf_unmap(katom->kctx, + alloc, + reg->flags & KBASE_REG_GPU_WR); + } + } + break; + default: + break; + } + kbase_mem_phy_alloc_put(katom->extres[res_no].alloc); } kfree(katom->extres); katom->extres = NULL; @@ -437,7 +580,8 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st struct kbase_va_region *reg; res = &input_extres[res_no]; - reg = kbase_region_tracker_find_region_enclosing_address(katom->kctx, + reg = kbase_region_tracker_find_region_enclosing_address( + katom->kctx, res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE); /* did we find a matching region object? */ if (NULL == reg || (reg->flags & KBASE_REG_FREE)) { @@ -456,8 +600,29 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st /* decide what needs to happen for this resource */ switch (reg->gpu_alloc->type) { - case BASE_MEM_IMPORT_TYPE_UMP: - { + case BASE_MEM_IMPORT_TYPE_USER_BUFFER: { + reg->gpu_alloc->imported.user_buf.current_mapping_usage_count++; + if (1 == reg->gpu_alloc->imported.user_buf.current_mapping_usage_count) { + /* use a local variable to not pollute + * err_ret_val with a potential success + * value as some other gotos depend on + * the default error code stored in + * err_ret_val */ + int tmp; + + tmp = kbase_jd_user_buf_map(katom->kctx, + reg); + if (0 != tmp) { + /* failed to map this buffer, + * roll back */ + err_ret_val = tmp; + reg->gpu_alloc->imported.user_buf.current_mapping_usage_count--; + goto failed_loop; + } + } + } + break; + case BASE_MEM_IMPORT_TYPE_UMP: { #if defined(CONFIG_KDS) && defined(CONFIG_UMP) struct kds_resource *kds_res; @@ -468,10 +633,9 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE); #endif /*defined(CONFIG_KDS) && defined(CONFIG_UMP) */ break; - } + } #ifdef CONFIG_DMA_SHARED_BUFFER - case BASE_MEM_IMPORT_TYPE_UMM: - { + case BASE_MEM_IMPORT_TYPE_UMM: { #ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS struct kds_resource *kds_res; @@ -495,7 +659,7 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st } } break; - } + } #endif default: goto failed_loop; @@ -926,6 +1090,9 @@ bool jd_submit_atom(struct kbase_context *kctx, katom->event_code = BASE_JD_EVENT_JOB_CONFIG_FAULT; katom->status = KBASE_JD_ATOM_STATE_COMPLETED; #if defined(CONFIG_MALI_MIPE_ENABLED) + /* Wrong dependency setup. Atom will be sent + * back to user space. Do not record any + * dependencies. */ kbase_tlstream_tl_new_atom( katom, kbase_jd_atom_id(kctx, katom)); @@ -956,7 +1123,7 @@ bool jd_submit_atom(struct kbase_context *kctx, if (dep_atom->event_code == BASE_JD_EVENT_DONE) continue; /* don't stop this atom if it has an order dependency - * only to the failed one, try to submit it throught + * only to the failed one, try to submit it through * the normal path */ if (dep_atom_type == BASE_JD_DEP_TYPE_ORDER && @@ -974,6 +1141,9 @@ bool jd_submit_atom(struct kbase_context *kctx, katom->event_code = dep_atom->event_code; katom->status = KBASE_JD_ATOM_STATE_QUEUED; #if defined(CONFIG_MALI_MIPE_ENABLED) + /* This atom is going through soft replay or + * will be sent back to user space. Do not record any + * dependencies. */ kbase_tlstream_tl_new_atom( katom, kbase_jd_atom_id(kctx, katom)); @@ -1003,10 +1173,18 @@ bool jd_submit_atom(struct kbase_context *kctx, katom->status = KBASE_JD_ATOM_STATE_QUEUED; #if defined(CONFIG_MALI_MIPE_ENABLED) + /* Create a new atom recording all dependencies it was set up with. */ kbase_tlstream_tl_new_atom( katom, kbase_jd_atom_id(kctx, katom)); kbase_tlstream_tl_ret_atom_ctx(katom, kctx); + for (i = 0; i < 2; i++) + if (BASE_JD_DEP_TYPE_INVALID != kbase_jd_katom_dep_type( + &katom->dep[i])) + kbase_tlstream_tl_dep_atom_atom( + (void *)kbase_jd_katom_dep_atom( + &katom->dep[i]), + (void *)katom); #endif /* Reject atoms with job chain = NULL, as these cause issues with soft-stop */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_js.c b/drivers/gpu/arm/midgard/mali_kbase_js.c index 54b8d9bcd1e1..9a366b255b1e 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_js.c +++ b/drivers/gpu/arm/midgard/mali_kbase_js.c @@ -1730,7 +1730,6 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( #endif #if defined(CONFIG_MALI_MIPE_ENABLED) kbase_tlstream_tl_nret_as_ctx(&kbdev->as[kctx->as_nr], kctx); - kbase_tlstream_tl_nret_gpu_ctx(kbdev, kctx); #endif kbase_backend_release_ctx_irq(kbdev, kctx); @@ -2077,7 +2076,6 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, kbase_trace_mali_mmu_as_in_use(kctx->as_nr); #endif #if defined(CONFIG_MALI_MIPE_ENABLED) - kbase_tlstream_tl_ret_gpu_ctx(kbdev, kctx); kbase_tlstream_tl_ret_as_ctx(&kbdev->as[kctx->as_nr], kctx); #endif @@ -2781,6 +2779,10 @@ void kbase_js_complete_atom(struct kbase_jd_atom *katom, ktime_t *end_timestamp) &kbdev->gpu_props.props.raw_props.js_features[ katom->slot_nr]); kbase_tlstream_tl_nret_atom_as(katom, &kbdev->as[kctx->as_nr]); + kbase_tlstream_tl_nret_ctx_lpu( + kctx, + &kbdev->gpu_props.props.raw_props.js_features[ + katom->slot_nr]); #endif /* Calculate the job's time used */ if (end_timestamp != NULL) { diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c index 8891bff70c60..e6e611b9f415 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c +++ b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c @@ -209,13 +209,6 @@ void kbasep_js_ctx_attr_set_initial_attrs(struct kbase_device *kbdev, struct kba /* Transfer attributes held in the context flags for contexts that have submit enabled */ - if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_HINT_ONLY_COMPUTE) != false) { - /* Compute context */ - runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE); - } - /* NOTE: Whether this is a non-compute context depends on the jobs being - * run, e.g. it might be submitting jobs with BASE_JD_REQ_ONLY_COMPUTE */ - /* ... More attributes can be added here ... */ /* The context should not have been scheduled yet, so ASSERT if this caused diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_defs.h b/drivers/gpu/arm/midgard/mali_kbase_js_defs.h index d65b494a70c9..04f7809f79d3 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_js_defs.h +++ b/drivers/gpu/arm/midgard/mali_kbase_js_defs.h @@ -50,10 +50,7 @@ enum { KBASE_CTX_FLAG_SUBMIT_DISABLED = (1u << 0), /** Set if the context uses an address space and should be kept scheduled in */ - KBASE_CTX_FLAG_PRIVILEGED = (1u << 1), - - /** Kernel-side equivalent of BASE_CONTEXT_HINT_ONLY_COMPUTE. Non-mutable after creation flags set */ - KBASE_CTX_FLAG_HINT_ONLY_COMPUTE = (1u << 2) + KBASE_CTX_FLAG_PRIVILEGED = (1u << 1) /* NOTE: Add flags for other things, such as 'is scheduled', and 'is dying' */ }; @@ -126,15 +123,10 @@ typedef void (*kbasep_js_policy_ctx_job_cb)(struct kbase_device *kbdev, struct k * - The runpool holds a refcount of how many contexts in the runpool have this * attribute. * - The context holds a refcount of how many atoms have this attribute. - * - * Examples of use: - * - Finding out when there are a mix of @ref BASE_CONTEXT_HINT_ONLY_COMPUTE - * and ! @ref BASE_CONTEXT_HINT_ONLY_COMPUTE contexts in the runpool */ enum kbasep_js_ctx_attr { /** Attribute indicating a context that contains Compute jobs. That is, - * @ref BASE_CONTEXT_HINT_ONLY_COMPUTE is \b set and/or the context has jobs of type - * @ref BASE_JD_REQ_ONLY_COMPUTE + * the context has jobs of type @ref BASE_JD_REQ_ONLY_COMPUTE * * @note A context can be both 'Compute' and 'Non Compute' if it contains * both types of jobs. diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem.c b/drivers/gpu/arm/midgard/mali_kbase_mem.c index 2909f20c08b2..90714c55fde2 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem.c +++ b/drivers/gpu/arm/midgard/mali_kbase_mem.c @@ -1236,6 +1236,9 @@ void kbase_mem_kref_free(struct kref *kref) dma_buf_put(alloc->imported.umm.dma_buf); break; #endif + case KBASE_MEM_TYPE_IMPORTED_USER_BUF: + kfree(alloc->imported.user_buf.pages); + break; case KBASE_MEM_TYPE_TB:{ void *tb; diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem.h b/drivers/gpu/arm/midgard/mali_kbase_mem.h index 1839cced237e..e2422a377f16 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem.h +++ b/drivers/gpu/arm/midgard/mali_kbase_mem.h @@ -75,6 +75,7 @@ enum kbase_memory_type { KBASE_MEM_TYPE_NATIVE, KBASE_MEM_TYPE_IMPORTED_UMP, KBASE_MEM_TYPE_IMPORTED_UMM, + KBASE_MEM_TYPE_IMPORTED_USER_BUF, KBASE_MEM_TYPE_ALIAS, KBASE_MEM_TYPE_TB, KBASE_MEM_TYPE_RAW @@ -136,6 +137,14 @@ struct kbase_mem_phy_alloc { } alias; /* Used by type = (KBASE_MEM_TYPE_NATIVE, KBASE_MEM_TYPE_TB) */ struct kbase_context *kctx; + struct { + unsigned long address; + unsigned long size; + unsigned long nr_pages; + struct page **pages; + unsigned int current_mapping_usage_count; + struct task_struct *owner; + } user_buf; } imported; }; diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c index 3e4481a77e15..20dabeffa4c4 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c @@ -26,12 +26,13 @@ #include #include #include +#include #include #include #include #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) #include -#endif +#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0) */ #ifdef CONFIG_DMA_SHARED_BUFFER #include #endif /* defined(CONFIG_DMA_SHARED_BUFFER) */ @@ -89,12 +90,12 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages } if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 && - kctx->kbdev->system_coherency != COHERENCY_ACE) { + !kbase_device_is_cpu_coherent(kctx->kbdev)) { dev_warn(dev, "kbase_mem_alloc call required coherent mem when unavailable"); goto bad_flags; } if ((*flags & BASE_MEM_COHERENT_SYSTEM) != 0 && - kctx->kbdev->system_coherency != COHERENCY_ACE) { + !kbase_device_is_cpu_coherent(kctx->kbdev)) { /* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */ *flags &= ~BASE_MEM_COHERENT_SYSTEM; } @@ -141,20 +142,29 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages /* mmap needed to setup VA? */ if (*flags & BASE_MEM_SAME_VA) { + unsigned long prot = PROT_NONE; + unsigned long va_size = va_pages << PAGE_SHIFT; + unsigned long va_map = va_size; + unsigned long cookie; + unsigned long cpu_addr; + /* Bind to a cookie */ if (!kctx->cookies) { dev_err(dev, "No cookies available for allocation!"); + kbase_gpu_vm_unlock(kctx); goto no_cookie; } /* return a cookie */ - *gpu_va = __ffs(kctx->cookies); - kctx->cookies &= ~(1UL << *gpu_va); - BUG_ON(kctx->pending_regions[*gpu_va]); - kctx->pending_regions[*gpu_va] = reg; + cookie = __ffs(kctx->cookies); + kctx->cookies &= ~(1UL << cookie); + BUG_ON(kctx->pending_regions[cookie]); + kctx->pending_regions[cookie] = reg; + + kbase_gpu_vm_unlock(kctx); /* relocate to correct base */ - *gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE); - *gpu_va <<= PAGE_SHIFT; + cookie += PFN_DOWN(BASE_MEM_COOKIE_BASE); + cookie <<= PAGE_SHIFT; /* See if we must align memory due to GPU PC bits vs CPU VA */ if ((*flags & BASE_MEM_PROT_GPU_EX) && @@ -162,21 +172,97 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages *va_alignment = gpu_pc_bits; reg->flags |= KBASE_REG_ALIGNED; } + + /* + * Pre-10.1 UKU userland calls mmap for us so return the + * unaligned address and skip the map. + */ + if (kctx->api_version < KBASE_API_VERSION(10, 1)) { + *gpu_va = (u64) cookie; + return reg; + } + + /* + * GPUCORE-2190: + * + * We still need to return alignment for old userspace. + */ + if (*va_alignment) + va_map += 3 * (1UL << *va_alignment); + + if (*flags & BASE_MEM_PROT_CPU_RD) + prot |= PROT_READ; + if (*flags & BASE_MEM_PROT_CPU_WR) + prot |= PROT_WRITE; + + cpu_addr = vm_mmap(kctx->filp, 0, va_map, prot, MAP_SHARED, + cookie); + if (IS_ERR_VALUE(cpu_addr)) + goto no_mmap; + + /* + * If we had to allocate extra VA space to force the + * alignment release it. + */ + if (*va_alignment) { + unsigned long alignment = 1UL << *va_alignment; + unsigned long align_mask = alignment - 1; + unsigned long addr; + unsigned long addr_end; + unsigned long aligned_addr; + unsigned long aligned_addr_end; + + addr = cpu_addr; + addr_end = addr + va_map; + + aligned_addr = (addr + align_mask) & + ~((u64) align_mask); + aligned_addr_end = aligned_addr + va_size; + + if ((aligned_addr_end & BASE_MEM_MASK_4GB) == 0) { + /* + * Can't end at 4GB boundary on some GPUs as + * it will halt the shader. + */ + aligned_addr += 2 * alignment; + aligned_addr_end += 2 * alignment; + } else if ((aligned_addr & BASE_MEM_MASK_4GB) == 0) { + /* + * Can't start at 4GB boundary on some GPUs as + * it will halt the shader. + */ + aligned_addr += alignment; + aligned_addr_end += alignment; + } + + /* anything to chop off at the start? */ + if (addr != aligned_addr) + vm_munmap(addr, aligned_addr - addr); + + /* anything at the end? */ + if (addr_end != aligned_addr_end) + vm_munmap(aligned_addr_end, + addr_end - aligned_addr_end); + + *gpu_va = (u64) aligned_addr; + } else + *gpu_va = (u64) cpu_addr; } else /* we control the VA */ { if (kbase_gpu_mmap(kctx, reg, 0, va_pages, 1) != 0) { dev_warn(dev, "Failed to map memory on GPU"); + kbase_gpu_vm_unlock(kctx); goto no_mmap; } /* return real GPU VA */ *gpu_va = reg->start_pfn << PAGE_SHIFT; + + kbase_gpu_vm_unlock(kctx); } - kbase_gpu_vm_unlock(kctx); return reg; no_mmap: no_cookie: - kbase_gpu_vm_unlock(kctx); no_mem: kbase_mem_phy_alloc_put(reg->cpu_alloc); kbase_mem_phy_alloc_put(reg->gpu_alloc); @@ -352,10 +438,6 @@ static struct kbase_va_region *kbase_mem_from_ump(struct kbase_context *kctx, um ump_alloc_flags cpu_flags; ump_alloc_flags gpu_flags; - KBASE_DEBUG_ASSERT(kctx); - KBASE_DEBUG_ASSERT(va_pages); - KBASE_DEBUG_ASSERT(flags); - if (*flags & BASE_MEM_SECURE) goto bad_flags; @@ -554,6 +636,106 @@ no_buf: } #endif /* CONFIG_DMA_SHARED_BUFFER */ + +static struct kbase_va_region *kbase_mem_from_user_buffer( + struct kbase_context *kctx, unsigned long address, + unsigned long size, u64 *va_pages, u64 *flags) +{ + struct kbase_va_region *reg; + long faulted_pages; + int zone = KBASE_REG_ZONE_CUSTOM_VA; + + *va_pages = (PAGE_ALIGN(address + size) >> PAGE_SHIFT) - + PFN_DOWN(address); + if (!*va_pages) + goto bad_size; + + if (*va_pages > (UINT64_MAX / PAGE_SIZE)) + /* 64-bit address range is the max */ + goto bad_size; + + /* SAME_VA generally not supported with imported memory (no known use cases) */ + *flags &= ~BASE_MEM_SAME_VA; + +#ifdef CONFIG_64BIT + if (!kctx->is_compat) { + /* 64-bit tasks must MMAP anyway, but not expose this address to + * clients */ + *flags |= BASE_MEM_NEED_MMAP; + zone = KBASE_REG_ZONE_SAME_VA; + } +#endif + reg = kbase_alloc_free_region(kctx, 0, *va_pages, zone); + + if (!reg) + goto no_region; + + reg->gpu_alloc = kbase_alloc_create(*va_pages, + KBASE_MEM_TYPE_IMPORTED_USER_BUF); + if (IS_ERR_OR_NULL(reg->gpu_alloc)) + goto no_alloc_obj; + + reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); + + reg->flags &= ~KBASE_REG_FREE; + reg->flags |= KBASE_REG_GPU_NX; /* User-buffers are always No eXecute */ + reg->flags &= ~KBASE_REG_GROWABLE; /* Cannot be grown */ + + if (*flags & BASE_MEM_PROT_CPU_WR) + reg->flags |= KBASE_REG_CPU_WR; + + if (*flags & BASE_MEM_PROT_CPU_RD) + reg->flags |= KBASE_REG_CPU_RD; + + if (*flags & BASE_MEM_PROT_GPU_WR) + reg->flags |= KBASE_REG_GPU_WR; + + if (*flags & BASE_MEM_PROT_GPU_RD) + reg->flags |= KBASE_REG_GPU_RD; + + down_read(¤t->mm->mmap_sem); + + /* A sanity check that get_user_pages will work on the memory */ + /* (so the initial import fails on weird memory regions rather than */ + /* the job failing when we try to handle the external resources). */ + /* It doesn't take a reference to the pages (because the page list is NULL). */ + /* We can't really store the page list because that would involve */ + /* keeping the pages pinned - instead we pin/unpin around the job */ + /* (as part of the external resources handling code) */ + faulted_pages = get_user_pages(current, current->mm, address, *va_pages, + reg->flags & KBASE_REG_GPU_WR, 0, NULL, NULL); + up_read(¤t->mm->mmap_sem); + + if (faulted_pages != *va_pages) + goto fault_mismatch; + + reg->gpu_alloc->imported.user_buf.size = size; + reg->gpu_alloc->imported.user_buf.address = address; + reg->gpu_alloc->imported.user_buf.nr_pages = faulted_pages; + reg->gpu_alloc->imported.user_buf.pages = kmalloc_array(faulted_pages, + sizeof(struct page *), GFP_KERNEL); + reg->gpu_alloc->imported.user_buf.owner = current; + + if (!reg->gpu_alloc->imported.user_buf.pages) + goto no_page_array; + + reg->gpu_alloc->nents = 0; + reg->extent = 0; + + return reg; + +no_page_array: +fault_mismatch: + kbase_mem_phy_alloc_put(reg->gpu_alloc); +no_alloc_obj: + kfree(reg); +no_region: +bad_size: + return NULL; + +} + + u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 nents, struct base_mem_aliasing_info *ai, u64 *num_pages) @@ -736,7 +918,9 @@ bad_flags: return 0; } -int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, int handle, u64 *gpu_va, u64 *va_pages, u64 *flags) +int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, + void __user *phandle, u64 *gpu_va, u64 *va_pages, + u64 *flags) { struct kbase_va_region *reg; @@ -759,19 +943,53 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, switch (type) { #ifdef CONFIG_UMP - case BASE_MEM_IMPORT_TYPE_UMP: - reg = kbase_mem_from_ump(kctx, (ump_secure_id)handle, va_pages, flags); - break; + case BASE_MEM_IMPORT_TYPE_UMP: { + ump_secure_id id; + + if (get_user(id, (ump_secure_id __user *)phandle)) + reg = NULL; + else + reg = kbase_mem_from_ump(kctx, id, va_pages, flags); + } + break; #endif /* CONFIG_UMP */ #ifdef CONFIG_DMA_SHARED_BUFFER - case BASE_MEM_IMPORT_TYPE_UMM: - reg = kbase_mem_from_umm(kctx, handle, va_pages, flags); - break; + case BASE_MEM_IMPORT_TYPE_UMM: { + int fd; + + if (get_user(fd, (int __user *)phandle)) + reg = NULL; + else + reg = kbase_mem_from_umm(kctx, fd, va_pages, flags); + } + break; #endif /* CONFIG_DMA_SHARED_BUFFER */ - default: + case BASE_MEM_IMPORT_TYPE_USER_BUFFER: { + struct base_mem_import_user_buffer user_buffer; + void __user *uptr; + + if (copy_from_user(&user_buffer, phandle, + sizeof(user_buffer))) { + reg = NULL; + } else { +#ifdef CONFIG_COMPAT + if (kctx->is_compat) + uptr = compat_ptr(user_buffer.ptr.compat_value); + else +#endif + uptr = user_buffer.ptr.value; + + reg = kbase_mem_from_user_buffer(kctx, + (unsigned long)uptr, user_buffer.length, + va_pages, flags); + } + break; + } + default: { reg = NULL; break; } + } if (!reg) goto no_reg; diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h index 1d854152704b..6a139fd70234 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h @@ -34,7 +34,9 @@ struct kbase_hwc_dma_mapping { struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages, u64 extent, u64 *flags, u64 *gpu_va, u16 *va_alignment); int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, int query, u64 *const pages); -int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, int handle, u64 *gpu_va, u64 *va_pages, u64 *flags); +int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, + void __user *phandle, u64 *gpu_va, u64 *va_pages, + u64 *flags); u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 nents, struct base_mem_aliasing_info *ai, u64 *num_pages); int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask); int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, enum base_backing_threshold_status *failure_reason); diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c b/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c index a049205d0e90..153cd4efac49 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c @@ -24,7 +24,13 @@ #include #include -/* Backwards compatibility with kernels using the old carveout allocator */ +/* This function is only provided for backwards compatibility with kernels + * which use the old carveout allocator. + * + * The forward declaration is to keep sparse happy. + */ +int __init kbase_carveout_mem_reserve( + phys_addr_t size); int __init kbase_carveout_mem_reserve(phys_addr_t size) { return 0; @@ -73,6 +79,8 @@ static void kbase_mem_pool_add_locked(struct kbase_mem_pool *pool, list_add(&p->lru, &pool->page_list); pool->cur_size++; + zone_page_state_add(1, page_zone(p), NR_SLAB_RECLAIMABLE); + pool_dbg(pool, "added page\n"); } @@ -86,8 +94,14 @@ static void kbase_mem_pool_add(struct kbase_mem_pool *pool, struct page *p) static void kbase_mem_pool_add_list_locked(struct kbase_mem_pool *pool, struct list_head *page_list, size_t nr_pages) { + struct page *p; + lockdep_assert_held(&pool->pool_lock); + list_for_each_entry(p, page_list, lru) { + zone_page_state_add(1, page_zone(p), NR_SLAB_RECLAIMABLE); + } + list_splice(page_list, &pool->page_list); pool->cur_size += nr_pages; @@ -115,6 +129,8 @@ static struct page *kbase_mem_pool_remove_locked(struct kbase_mem_pool *pool) list_del_init(&p->lru); pool->cur_size--; + zone_page_state_add(-1, page_zone(p), NR_SLAB_RECLAIMABLE); + pool_dbg(pool, "removed page\n"); return p; diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c index bf60c1920294..a443d002a1ac 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c @@ -19,19 +19,6 @@ #ifdef CONFIG_DEBUG_FS -/* mam_profile file name max length 22 based on format _\0 */ -#define KBASEP_DEBUGFS_FNAME_SIZE_MAX (10+1+10+1) - -void kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, - size_t size) -{ - spin_lock(&kctx->mem_profile_lock); - kfree(kctx->mem_profile_data); - kctx->mem_profile_data = data; - kctx->mem_profile_size = size; - spin_unlock(&kctx->mem_profile_lock); -} - /** Show callback for the @c mem_profile debugfs file. * * This function is called to get the contents of the @c mem_profile debugfs @@ -40,21 +27,23 @@ void kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, * @param sfile The debugfs entry * @param data Data associated with the entry * - * @return 0 if successfully prints data in debugfs entry file - * -1 if it encountered an error + * @return 0 if it successfully prints data in debugfs entry file, non-zero otherwise */ static int kbasep_mem_profile_seq_show(struct seq_file *sfile, void *data) { struct kbase_context *kctx = sfile->private; + int err = 0; - KBASE_DEBUG_ASSERT(kctx != NULL); + mutex_lock(&kctx->mem_profile_lock); - spin_lock(&kctx->mem_profile_lock); - seq_write(sfile, kctx->mem_profile_data, kctx->mem_profile_size); - seq_putc(sfile, '\n'); - spin_unlock(&kctx->mem_profile_lock); + err = seq_write(sfile, kctx->mem_profile_data, kctx->mem_profile_size); - return 0; + if (!err) + err = seq_putc(sfile, '\n'); + + mutex_unlock(&kctx->mem_profile_lock); + + return err; } /* @@ -72,34 +61,60 @@ static const struct file_operations kbasep_mem_profile_debugfs_fops = { .release = single_release, }; -void kbasep_mem_profile_debugfs_add(struct kbase_context *kctx) +int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, + size_t size) { - KBASE_DEBUG_ASSERT(kctx != NULL); + int err = 0; + + mutex_lock(&kctx->mem_profile_lock); - spin_lock_init(&kctx->mem_profile_lock); + dev_dbg(kctx->kbdev->dev, "initialised: %d", + kctx->mem_profile_initialized); - debugfs_create_file("mem_profile", S_IRUGO, kctx->kctx_dentry, kctx, - &kbasep_mem_profile_debugfs_fops); + if (!kctx->mem_profile_initialized) { + if (!debugfs_create_file("mem_profile", S_IRUGO, + kctx->kctx_dentry, kctx, + &kbasep_mem_profile_debugfs_fops)) { + err = -EAGAIN; + } else { + kctx->mem_profile_initialized = true; + } + } + + if (kctx->mem_profile_initialized) { + kfree(kctx->mem_profile_data); + kctx->mem_profile_data = data; + kctx->mem_profile_size = size; + } + + dev_dbg(kctx->kbdev->dev, "returning: %d, initialised: %d", + err, kctx->mem_profile_initialized); + + mutex_unlock(&kctx->mem_profile_lock); + + return err; } void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx) { - KBASE_DEBUG_ASSERT(kctx != NULL); + mutex_lock(&kctx->mem_profile_lock); + + dev_dbg(kctx->kbdev->dev, "initialised: %d", + kctx->mem_profile_initialized); - spin_lock(&kctx->mem_profile_lock); kfree(kctx->mem_profile_data); kctx->mem_profile_data = NULL; - spin_unlock(&kctx->mem_profile_lock); + kctx->mem_profile_size = 0; + + mutex_unlock(&kctx->mem_profile_lock); } #else /* CONFIG_DEBUG_FS */ -/** - * @brief Stub function for when debugfs is disabled - */ -void kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, - size_t size) +int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, + size_t size) { kfree(data); + return 0; } #endif /* CONFIG_DEBUG_FS */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h index 205bd378c8ec..9555197f305c 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,29 +30,31 @@ #include #include -/** - * @brief Add new entry to Mali memory profile debugfs - */ -void kbasep_mem_profile_debugfs_add(struct kbase_context *kctx); - /** * @brief Remove entry from Mali memory profile debugfs */ void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx); /** - * @brief Insert data to debugfs file, so it can be read by userspce + * @brief Insert @p data to the debugfs file so it can be read by userspace + * + * The function takes ownership of @p data and frees it later when new data + * is inserted. * - * Function takes ownership of @c data and frees it later when new data - * are inserted. + * If the debugfs entry corresponding to the @p kctx doesn't exist, + * an attempt will be made to create it. * - * @param kctx Context to which file data should be inserted - * @param data NULL-terminated string to be inserted to mem_profile file, - without trailing new line character - * @param size @c buf length + * @param kctx The context whose debugfs file @p data should be inserted to + * @param data A NULL-terminated string to be inserted to the debugfs file, + * without the trailing new line character + * @param size The length of the @p data string + * @return 0 if @p data inserted correctly + * -EAGAIN in case of error + * @post @ref mem_profile_initialized will be set to @c true + * the first time this function succeeds. */ -void kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, - size_t size); +int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, + size_t size); #endif /*_KBASE_MEM_PROFILE_DEBUGFS_H*/ diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu.c b/drivers/gpu/arm/midgard/mali_kbase_mmu.c index 2666cdbf4a5e..f16c92a3979a 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mmu.c +++ b/drivers/gpu/arm/midgard/mali_kbase_mmu.c @@ -261,7 +261,7 @@ void page_fault_worker(struct work_struct *data) #endif #if defined(CONFIG_MALI_MIPE_ENABLED) kbase_tlstream_aux_pagefault( - as_no, + kctx->id, atomic_read(&kctx->used_pages)); #endif diff --git a/drivers/gpu/arm/midgard/mali_kbase_security.c b/drivers/gpu/arm/midgard/mali_kbase_security.c deleted file mode 100644 index a0bb3529baf5..000000000000 --- a/drivers/gpu/arm/midgard/mali_kbase_security.c +++ /dev/null @@ -1,76 +0,0 @@ -/* - * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * A copy of the licence is included with the program, and can also be obtained - * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - */ - - - - - -/** - * @file mali_kbase_security.c - * Base kernel security capability API - */ - -#include - -static inline bool kbasep_am_i_root(void) -{ -#if KBASE_HWCNT_DUMP_BYPASS_ROOT - return true; -#else - /* Check if root */ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0) - if (uid_eq(current_euid(), GLOBAL_ROOT_UID)) - return true; -#else - if (current_euid() == 0) - return true; -#endif /*LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)*/ - return false; -#endif /*KBASE_HWCNT_DUMP_BYPASS_ROOT*/ -} - -/** - * kbase_security_has_capability - see mali_kbase_caps.h for description. - */ - -bool kbase_security_has_capability(struct kbase_context *kctx, enum kbase_security_capability cap, u32 flags) -{ - /* Assume failure */ - bool access_allowed = false; - bool audit = KBASE_SEC_FLAG_AUDIT & flags; - - KBASE_DEBUG_ASSERT(NULL != kctx); - CSTD_UNUSED(kctx); - - /* Detect unsupported flags */ - KBASE_DEBUG_ASSERT(((~KBASE_SEC_FLAG_MASK) & flags) == 0); - - /* Determine if access is allowed for the given cap */ - switch (cap) { - case KBASE_SEC_MODIFY_PRIORITY: - case KBASE_SEC_INSTR_HW_COUNTERS_COLLECT: - /* Access is granted only if the caller is privileged */ - access_allowed = kbasep_am_i_root(); - break; - } - - /* Report problem if requested */ - if (!access_allowed && audit) - dev_warn(kctx->kbdev->dev, "Security capability failure: %d, %p", cap, (void *)kctx); - - return access_allowed; -} - -KBASE_EXPORT_TEST_API(kbase_security_has_capability); diff --git a/drivers/gpu/arm/midgard/mali_kbase_security.h b/drivers/gpu/arm/midgard/mali_kbase_security.h deleted file mode 100644 index 024a7ee1aaba..000000000000 --- a/drivers/gpu/arm/midgard/mali_kbase_security.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * A copy of the licence is included with the program, and can also be obtained - * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - */ - - - - - -/** - * @file mali_kbase_security.h - * Base kernel security capability APIs - */ - -#ifndef _KBASE_SECURITY_H_ -#define _KBASE_SECURITY_H_ - -/* Security flags */ -#define KBASE_SEC_FLAG_NOAUDIT (0u << 0) /* Silently handle privilege failure */ -#define KBASE_SEC_FLAG_AUDIT (1u << 0) /* Write audit message on privilege failure */ -#define KBASE_SEC_FLAG_MASK (KBASE_SEC_FLAG_AUDIT) /* Mask of all valid flag bits */ - -/* List of unique capabilities that have security access privileges */ -enum kbase_security_capability { - /* Instrumentation Counters access privilege */ - KBASE_SEC_INSTR_HW_COUNTERS_COLLECT = 1, - KBASE_SEC_MODIFY_PRIORITY - /* Add additional access privileges here */ -}; - -/** - * kbase_security_has_capability - determine whether a task has a particular effective capability - * @param[in] kctx The task context. - * @param[in] cap The capability to check for. - * @param[in] flags Additional configuration information - * Such as whether to write an audit message or not. - * @return true if success (capability is allowed), false otherwise. - */ - -bool kbase_security_has_capability(struct kbase_context *kctx, enum kbase_security_capability cap, u32 flags); - -#endif /* _KBASE_SECURITY_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_smc.c b/drivers/gpu/arm/midgard/mali_kbase_smc.c index 17455fe00d79..43175c85988f 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_smc.c +++ b/drivers/gpu/arm/midgard/mali_kbase_smc.c @@ -25,16 +25,21 @@ static noinline u64 invoke_smc_fid(u64 function_id, u64 arg0, u64 arg1, u64 arg2) { + register u64 x0 asm("x0") = function_id; + register u64 x1 asm("x1") = arg0; + register u64 x2 asm("x2") = arg1; + register u64 x3 asm("x3") = arg2; + asm volatile( __asmeq("%0", "x0") __asmeq("%1", "x1") __asmeq("%2", "x2") __asmeq("%3", "x3") - "smc #0\n" - : "+r" (function_id) - : "r" (arg0), "r" (arg1), "r" (arg2)); + "smc #0\n" + : "+r" (x0) + : "r" (x1), "r" (x2), "r" (x3)); - return function_id; + return x0; } u64 kbase_invoke_smc_fid(u32 fid, u64 arg0, u64 arg1, u64 arg2) diff --git a/drivers/gpu/arm/midgard/mali_kbase_tlstream.c b/drivers/gpu/arm/midgard/mali_kbase_tlstream.c index 99428d1e660e..07804d1b9f0e 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_tlstream.c +++ b/drivers/gpu/arm/midgard/mali_kbase_tlstream.c @@ -32,8 +32,8 @@ /*****************************************************************************/ -/* The version of timeline stream. */ -#define KBASEP_TLSTREAM_VERSION 1 +/* The version of swtrace protocol used in timeline stream. */ +#define SWTRACE_VERSION 3 /* The maximum expected length of string in tracepoint descriptor. */ #define STRLEN_MAX 64 /* bytes */ @@ -48,7 +48,7 @@ #define AUTOFLUSH_INTERVAL 1000 /* ms */ /* The maximum size of a single packet used by timeline. */ -#define PACKET_SIZE 2048 /* bytes */ +#define PACKET_SIZE 4096 /* bytes */ /* The number of packets used by one timeline stream. */ #define PACKET_COUNT 16 @@ -120,7 +120,7 @@ enum tl_packet_type { }; /* Message ids of trace events that are recorded in the timeline stream. */ -enum tl_msg_id { +enum tl_msg_id_obj { /* Timeline object events. */ KBASE_TL_NEW_CTX, KBASE_TL_NEW_GPU, @@ -131,23 +131,26 @@ enum tl_msg_id { KBASE_TL_DEL_ATOM, KBASE_TL_LIFELINK_LPU_GPU, KBASE_TL_LIFELINK_AS_GPU, - KBASE_TL_RET_GPU_CTX, + KBASE_TL_RET_CTX_LPU, KBASE_TL_RET_ATOM_CTX, KBASE_TL_RET_ATOM_LPU, - KBASE_TL_NRET_GPU_CTX, + KBASE_TL_NRET_CTX_LPU, KBASE_TL_NRET_ATOM_CTX, KBASE_TL_NRET_ATOM_LPU, KBASE_TL_RET_AS_CTX, KBASE_TL_NRET_AS_CTX, KBASE_TL_RET_ATOM_AS, KBASE_TL_NRET_ATOM_AS, + KBASE_TL_DEP_ATOM_ATOM, KBASE_TL_ATTRIB_ATOM_CONFIG, KBASE_TL_ATTRIB_AS_CONFIG, - /* Job dump specific events (part of timeline stream). */ - KBASE_JD_GPU_SOFT_RESET, + /* Job dump specific events. */ + KBASE_JD_GPU_SOFT_RESET +}; - /* Timeline non-object events. */ +/* Message ids of trace events that are recorded in the auxiliary stream. */ +enum tl_msg_id_aux { KBASE_AUX_PM_STATE, KBASE_AUX_JOB_SOFTSTOP, KBASE_AUX_PAGEFAULT, @@ -326,11 +329,11 @@ static const struct tp_desc tp_desc_obj[] = { "address_space,gpu" }, { - KBASE_TL_RET_GPU_CTX, - __stringify(KBASE_TL_RET_GPU_CTX), - "gpu is retained by context", + KBASE_TL_RET_CTX_LPU, + __stringify(KBASE_TL_RET_CTX_LPU), + "context is retained by lpu", "@pp", - "gpu,ctx" + "ctx,lpu" }, { KBASE_TL_RET_ATOM_CTX, @@ -343,22 +346,22 @@ static const struct tp_desc tp_desc_obj[] = { KBASE_TL_RET_ATOM_LPU, __stringify(KBASE_TL_RET_ATOM_LPU), "atom is retained by lpu", - "@pp", - "atom,lpu" + "@pps", + "atom,lpu,attrib_match_list" }, { - KBASE_TL_NRET_GPU_CTX, - __stringify(KBASE_TL_NRET_GPU_CTX), - "gpu is released by context", + KBASE_TL_NRET_CTX_LPU, + __stringify(KBASE_TL_NRET_CTX_LPU), + "context is released by lpu", "@pp", - "gpu,ctx" + "ctx,lpu" }, { KBASE_TL_NRET_ATOM_CTX, __stringify(KBASE_TL_NRET_ATOM_CTX), "atom is released by context", "@pp", - "atom,context" + "atom,ctx" }, { KBASE_TL_NRET_ATOM_LPU, @@ -395,6 +398,13 @@ static const struct tp_desc tp_desc_obj[] = { "@pp", "atom,address_space" }, + { + KBASE_TL_DEP_ATOM_ATOM, + __stringify(KBASE_TL_DEP_ATOM_ATOM), + "atom2 depends on atom1", + "@pp", + "atom1,atom2" + }, { KBASE_TL_ATTRIB_ATOM_CONFIG, __stringify(KBASE_TL_ATTRIB_ATOM_CONFIG), @@ -439,7 +449,7 @@ static const struct tp_desc tp_desc_aux[] = { __stringify(KBASE_AUX_PAGEFAULT), "Page fault", "@II", - "as_id,page_cnt" + "ctx_nr,page_cnt" }, { KBASE_AUX_PAGESALLOC, @@ -811,7 +821,7 @@ static size_t kbasep_tlstream_msgbuf_submit( * * Return: pointer to the buffer where message can be stored * - * Warning: Stream must be relased with kbasep_tlstream_msgbuf_release(). + * Warning: Stream must be released with kbasep_tlstream_msgbuf_release(). * Only atomic operations are allowed while stream is locked * (i.e. do not use any operation that may sleep). */ @@ -921,7 +931,6 @@ static void kbasep_tlstream_flush_stream(enum tl_stream_type stype) */ static void kbasep_tlstream_autoflush_timer_callback(unsigned long data) { - u64 timestamp = kbasep_tlstream_get_timestamp(); enum tl_stream_type stype; int rcode; @@ -944,16 +953,17 @@ static void kbasep_tlstream_autoflush_timer_callback(unsigned long data) wb_idx = wb_idx_raw % PACKET_COUNT; wb_size = atomic_read(&stream->buffer[wb_idx].size); - if ( - (wb_size > min_size) && - ( - timestamp - stream->last_write_time > - AUTOFLUSH_TIMEOUT)) { + if (wb_size > min_size) { + u64 timestamp = kbasep_tlstream_get_timestamp(); - wb_size = kbasep_tlstream_msgbuf_submit( - stream, wb_idx_raw, wb_size); - wb_idx = (wb_idx_raw + 1) % PACKET_COUNT; - atomic_set(&stream->buffer[wb_idx].size, wb_size); + if (timestamp - stream->last_write_time + > AUTOFLUSH_TIMEOUT) { + wb_size = kbasep_tlstream_msgbuf_submit( + stream, wb_idx_raw, wb_size); + wb_idx = (wb_idx_raw + 1) % PACKET_COUNT; + atomic_set(&stream->buffer[wb_idx].size, + wb_size); + } } spin_unlock_irqrestore(&stream->lock, flags); } @@ -1071,8 +1081,10 @@ static ssize_t kbasep_tlstream_read( break; } - /* Verify if there was no overflow in selected stream. Make sure - * that if incorrect size was used we will know about it. */ + /* If the rbi still points to the packet we just processed + * then there was no overflow so we add the copied size to + * copy_len and move rbi on to the next packet + */ smp_rmb(); if (atomic_read(&tl_stream[stype]->rbi) == rb_idx_raw) { copy_len += rb_size; @@ -1140,7 +1152,7 @@ static void kbasep_tlstream_timeline_header( const struct tp_desc *tp_desc, u32 tp_count) { - const u8 tv = KBASEP_TLSTREAM_VERSION; /* tlstream version */ + const u8 tv = SWTRACE_VERSION; /* protocol version */ const u8 ps = sizeof(void *); /* pointer size */ size_t msg_size = sizeof(tv) + sizeof(ps) + sizeof(tp_count); char *buffer; @@ -1571,11 +1583,11 @@ void kbase_tlstream_tl_del_atom(void *atom) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_ret_gpu_ctx(void *gpu, void *context) +void kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu) { - const u32 msg_id = KBASE_TL_RET_GPU_CTX; + const u32 msg_id = KBASE_TL_RET_CTX_LPU; const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(gpu) + sizeof(context); + sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(lpu); unsigned long flags; char *buffer; size_t pos = 0; @@ -1587,10 +1599,10 @@ void kbase_tlstream_tl_ret_gpu_ctx(void *gpu, void *context) pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &gpu, sizeof(gpu)); pos = kbasep_tlstream_write_bytes( buffer, pos, &context, sizeof(context)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &lpu, sizeof(lpu)); KBASE_DEBUG_ASSERT(msg_size == pos); kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); @@ -1621,11 +1633,15 @@ void kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_ret_atom_lpu(void *atom, void *lpu) +void kbase_tlstream_tl_ret_atom_lpu( + void *atom, void *lpu, const char *attrib_match_list) { const u32 msg_id = KBASE_TL_RET_ATOM_LPU; + const size_t msg_s0 = sizeof(u32) + sizeof(char) + + strnlen(attrib_match_list, STRLEN_MAX); const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(lpu); + sizeof(msg_id) + sizeof(u64) + + sizeof(atom) + sizeof(lpu) + msg_s0; unsigned long flags; char *buffer; size_t pos = 0; @@ -1641,16 +1657,18 @@ void kbase_tlstream_tl_ret_atom_lpu(void *atom, void *lpu) buffer, pos, &atom, sizeof(atom)); pos = kbasep_tlstream_write_bytes( buffer, pos, &lpu, sizeof(lpu)); + pos = kbasep_tlstream_write_string( + buffer, pos, attrib_match_list, msg_s0); KBASE_DEBUG_ASSERT(msg_size == pos); kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_nret_gpu_ctx(void *gpu, void *context) +void kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu) { - const u32 msg_id = KBASE_TL_NRET_GPU_CTX; + const u32 msg_id = KBASE_TL_NRET_CTX_LPU; const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(gpu) + sizeof(context); + sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(lpu); unsigned long flags; char *buffer; size_t pos = 0; @@ -1662,10 +1680,10 @@ void kbase_tlstream_tl_nret_gpu_ctx(void *gpu, void *context) pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &gpu, sizeof(gpu)); pos = kbasep_tlstream_write_bytes( buffer, pos, &context, sizeof(context)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &lpu, sizeof(lpu)); KBASE_DEBUG_ASSERT(msg_size == pos); kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); @@ -1696,6 +1714,31 @@ void kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } +void kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2) +{ + const u32 msg_id = KBASE_TL_DEP_ATOM_ATOM; + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_OBJ, + msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &atom1, sizeof(atom1)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &atom2, sizeof(atom2)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); +} + void kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu) { const u32 msg_id = KBASE_TL_NRET_ATOM_LPU; @@ -1955,11 +1998,11 @@ void kbase_tlstream_aux_job_softstop(u32 js_id) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); } -void kbase_tlstream_aux_pagefault(u32 mmu_as, u32 page_count) +void kbase_tlstream_aux_pagefault(u32 ctx_nr, u32 page_count) { const u32 msg_id = KBASE_AUX_PAGEFAULT; const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(mmu_as) + + sizeof(msg_id) + sizeof(u64) + sizeof(ctx_nr) + sizeof(page_count); unsigned long flags; char *buffer; @@ -1971,7 +2014,7 @@ void kbase_tlstream_aux_pagefault(u32 mmu_as, u32 page_count) pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes(buffer, pos, &mmu_as, sizeof(mmu_as)); + pos = kbasep_tlstream_write_bytes(buffer, pos, &ctx_nr, sizeof(ctx_nr)); pos = kbasep_tlstream_write_bytes( buffer, pos, &page_count, sizeof(page_count)); KBASE_DEBUG_ASSERT(msg_size == pos); diff --git a/drivers/gpu/arm/midgard/mali_kbase_tlstream.h b/drivers/gpu/arm/midgard/mali_kbase_tlstream.h index 30171044ee1d..b99a6fc93d54 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_tlstream.h +++ b/drivers/gpu/arm/midgard/mali_kbase_tlstream.h @@ -215,14 +215,14 @@ void kbase_tlstream_tl_del_ctx(void *context); void kbase_tlstream_tl_del_atom(void *atom); /** - * kbase_tlstream_tl_ret_gpu_ctx - retain GPU by context - * @gpu: name of the GPU object + * kbase_tlstream_tl_ret_ctx_lpu - retain context by LPU * @context: name of the context object + * @lpu: name of the Logical Processing Unit object * - * Function emits a timeline message informing that GPU object is being held - * by context and must not be deleted unless it is released. + * Function emits a timeline message informing that context is being held + * by LPU and must not be deleted unless it is released. */ -void kbase_tlstream_tl_ret_gpu_ctx(void *gpu, void *context); +void kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu); /** * kbase_tlstream_tl_ret_atom_ctx - retain atom by context @@ -236,23 +236,25 @@ void kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context); /** * kbase_tlstream_tl_ret_atom_lpu - retain atom by LPU - * @atom: name of the atom object - * @lpu: name of the Logical Processing Unit object + * @atom: name of the atom object + * @lpu: name of the Logical Processing Unit object + * @attrib_match_list: list containing match operator attributes * * Function emits a timeline message informing that atom object is being held * by LPU and must not be deleted unless it is released. */ -void kbase_tlstream_tl_ret_atom_lpu(void *atom, void *lpu); +void kbase_tlstream_tl_ret_atom_lpu( + void *atom, void *lpu, const char *attrib_match_list); /** - * kbase_tlstream_tl_nret_gpu_ctx - release GPU by context - * @gpu: name of the GPU object + * kbase_tlstream_tl_nret_ctx_lpu - release context by LPU * @context: name of the context object + * @lpu: name of the Logical Processing Unit object * - * Function emits a timeline message informing that GPU object is being released - * by context. + * Function emits a timeline message informing that context is being released + * by LPU object. */ -void kbase_tlstream_tl_nret_gpu_ctx(void *gpu, void *context); +void kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu); /** * kbase_tlstream_tl_nret_atom_ctx - release atom by context @@ -314,6 +316,16 @@ void kbase_tlstream_tl_ret_atom_as(void *atom, void *as); */ void kbase_tlstream_tl_nret_atom_as(void *atom, void *as); +/** + * kbase_tlstream_tl_dep_atom_atom - parent atom depends on child atom + * @atom1: name of the child atom object + * @atom2: name of the parent atom object that depends on child atom + * + * Function emits a timeline message informing that parent atom waits for + * child atom object to be completed before start its execution. + */ +void kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2); + /** * kbase_tlstream_tl_attrib_atom_config - atom job slot attributes * @atom: name of the atom object @@ -363,10 +375,10 @@ void kbase_tlstream_aux_job_softstop(u32 js_id); /** * kbase_tlstream_aux_pagefault - timeline message: MMU page fault event * resulting in new pages being mapped - * @mmu_as: MMU address space number + * @ctx_nr: kernel context number * @page_count: number of currently used pages */ -void kbase_tlstream_aux_pagefault(u32 mmu_as, u32 page_count); +void kbase_tlstream_aux_pagefault(u32 ctx_nr, u32 page_count); /** * kbase_tlstream_aux_pagesalloc - timeline message: total number of allocated diff --git a/drivers/gpu/arm/midgard/mali_kbase_uku.h b/drivers/gpu/arm/midgard/mali_kbase_uku.h index 5943acb19874..39514685b752 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_uku.h +++ b/drivers/gpu/arm/midgard/mali_kbase_uku.h @@ -41,8 +41,13 @@ #include "mali_kbase_gpuprops_types.h" +/* + * 10.1: + * - Do mmap in kernel for SAME_VA memory allocations rather then + * calling back into the kernel as a 2nd stage of the allocation request. + */ #define BASE_UK_VERSION_MAJOR 10 -#define BASE_UK_VERSION_MINOR 0 +#define BASE_UK_VERSION_MINOR 1 struct kbase_uk_mem_alloc { union uk_header header; @@ -354,7 +359,7 @@ struct kbase_uk_tlstream_flush { #if MALI_UNIT_TEST /** - * struct kbase_uk_tlstream_acquire - User/Kernel space data exchange structure + * struct kbase_uk_tlstream_test - User/Kernel space data exchange structure * @header: UK structure header * @tpw_count: number of trace point writers in each context * @msg_delay: time delay between tracepoints from one writer in milliseconds @@ -375,7 +380,7 @@ struct kbase_uk_tlstream_test { }; /** - * struct kbase_uk_tlstream_acquire - User/Kernel space data exchange structure + * struct kbase_uk_tlstream_stats - User/Kernel space data exchange structure * @header: UK structure header * @bytes_collected: number of bytes read by user * @bytes_generated: number of bytes generated by tracepoints @@ -393,6 +398,20 @@ struct kbase_uk_tlstream_stats { #endif /* MALI_UNIT_TEST */ #endif /* MALI_KTLSTREAM_ENABLED */ +/** + * struct struct kbase_uk_prfcnt_value for the KBASE_FUNC_SET_PRFCNT_VALUES ioctl + * @header: UK structure header + * @data: Counter samples for the dummy model + * @size:............Size of the counter sample data + */ +struct kbase_uk_prfcnt_values { + union uk_header header; + /* IN */ + u32 *data; + u32 size; +}; + + enum kbase_uk_function_id { KBASE_FUNC_MEM_ALLOC = (UK_FUNC_ID + 0), KBASE_FUNC_MEM_IMPORT = (UK_FUNC_ID + 1), @@ -456,6 +475,10 @@ enum kbase_uk_function_id { KBASE_FUNC_HWCNT_READER_SETUP = (UK_FUNC_ID + 36), +#ifdef SUPPORT_MALI_NO_MALI + KBASE_FUNC_SET_PRFCNT_VALUES = (UK_FUNC_ID + 37), +#endif + KBASE_FUNC_MAX }; diff --git a/drivers/gpu/arm/midgard/mali_kbase_vinstr.c b/drivers/gpu/arm/midgard/mali_kbase_vinstr.c index bfa8bfa0e14f..d3d27e2958d7 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_vinstr.c +++ b/drivers/gpu/arm/midgard/mali_kbase_vinstr.c @@ -115,6 +115,7 @@ struct kbase_vinstr_context { * @dump_size: size of one dump buffer in bytes * @bitmap: bitmap request for JM, TILER, SHADER and MMU counters * @legacy_buffer: userspace hwcnt dump buffer (legacy interface) + * @kernel_buffer: kernel hwcnt dump buffer (kernel client interface) * @accum_buffer: temporary accumulation buffer for preserving counters * @dump_time: next time this clients shall request hwcnt dump * @dump_interval: interval between periodic hwcnt dumps @@ -134,6 +135,7 @@ struct kbase_vinstr_client { size_t dump_size; u32 bitmap[4]; void __user *legacy_buffer; + void *kernel_buffer; void *accum_buffer; u64 dump_time; u32 dump_interval; @@ -225,11 +227,11 @@ static void hwcnt_bitmap_union(u32 dst[4], u32 src[4]) dst[MMU_L2_HWCNT_BM] |= src[MMU_L2_HWCNT_BM]; } -static size_t kbasep_vinstr_dump_size(struct kbase_vinstr_context *vinstr_ctx) +size_t kbase_vinstr_dump_size(struct kbase_device *kbdev) { - struct kbase_device *kbdev = vinstr_ctx->kctx->kbdev; size_t dump_size; +#ifndef CONFIG_MALI_NO_MALI if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_V4)) { u32 nr_cg; @@ -237,7 +239,9 @@ static size_t kbasep_vinstr_dump_size(struct kbase_vinstr_context *vinstr_ctx) dump_size = nr_cg * NR_CNT_BLOCKS_PER_GROUP * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT; - } else { + } else +#endif /* CONFIG_MALI_NO_MALI */ + { /* assume v5 for now */ base_gpu_props *props = &kbdev->gpu_props.props; u32 nr_l2 = props->l2_props.num_l2_slices; @@ -251,6 +255,13 @@ static size_t kbasep_vinstr_dump_size(struct kbase_vinstr_context *vinstr_ctx) } return dump_size; } +KBASE_EXPORT_TEST_API(kbase_vinstr_dump_size); + +static size_t kbasep_vinstr_dump_size_ctx( + struct kbase_vinstr_context *vinstr_ctx) +{ + return kbase_vinstr_dump_size(vinstr_ctx->kctx->kbdev); +} static int kbasep_vinstr_map_kernel_dump_buffer( struct kbase_vinstr_context *vinstr_ctx) @@ -261,7 +272,7 @@ static int kbasep_vinstr_map_kernel_dump_buffer( u16 va_align = 0; flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_WR; - vinstr_ctx->dump_size = kbasep_vinstr_dump_size(vinstr_ctx); + vinstr_ctx->dump_size = kbasep_vinstr_dump_size_ctx(vinstr_ctx); nr_pages = PFN_UP(vinstr_ctx->dump_size); reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, @@ -352,22 +363,22 @@ static void kbasep_vinstr_destroy_kctx(struct kbase_vinstr_context *vinstr_ctx) /** * kbasep_vinstr_attach_client - Attach a client to the vinstr core - * @vinstr_ctx: vinstr context - * @buffer_count: requested number of dump buffers - * @bitmap: bitmaps describing which counters should be enabled - * @argp: pointer where notification descriptor shall be stored + * @vinstr_ctx: vinstr context + * @buffer_count: requested number of dump buffers + * @bitmap: bitmaps describing which counters should be enabled + * @argp: pointer where notification descriptor shall be stored + * @kernel_buffer: pointer to kernel side buffer * * Return: vinstr opaque client handle or NULL on failure */ static struct kbase_vinstr_client *kbasep_vinstr_attach_client( struct kbase_vinstr_context *vinstr_ctx, u32 buffer_count, - u32 bitmap[4], void *argp) + u32 bitmap[4], void *argp, void *kernel_buffer) { struct task_struct *thread = NULL; struct kbase_vinstr_client *cli; KBASE_DEBUG_ASSERT(vinstr_ctx); - KBASE_DEBUG_ASSERT(argp); KBASE_DEBUG_ASSERT(buffer_count >= 0); KBASE_DEBUG_ASSERT(buffer_count <= MAX_BUFFER_COUNT); KBASE_DEBUG_ASSERT(!(buffer_count & (buffer_count - 1))); @@ -405,7 +416,7 @@ static struct kbase_vinstr_client *kbasep_vinstr_attach_client( /* The GPU resets the counter block every time there is a request * to dump it. We need a per client kernel buffer for accumulating * the counters. */ - cli->dump_size = kbasep_vinstr_dump_size(vinstr_ctx); + cli->dump_size = kbasep_vinstr_dump_size_ctx(vinstr_ctx); cli->accum_buffer = kzalloc(cli->dump_size, GFP_KERNEL); if (!cli->accum_buffer) goto error; @@ -437,6 +448,8 @@ static struct kbase_vinstr_client *kbasep_vinstr_attach_client( O_RDONLY | O_CLOEXEC); if (0 > *fd) goto error; + } else if (kernel_buffer) { + cli->kernel_buffer = kernel_buffer; } else { cli->legacy_buffer = (void __user *)argp; } @@ -475,11 +488,7 @@ error: return NULL; } -/** - * kbasep_vinstr_detach_client - Detach a client from the vinstr core - * @cli: Pointer to vinstr client - */ -static void kbasep_vinstr_detach_client(struct kbase_vinstr_client *cli) +void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli) { struct kbase_vinstr_context *vinstr_ctx; struct kbase_vinstr_client *iter, *tmp; @@ -540,6 +549,7 @@ static void kbasep_vinstr_detach_client(struct kbase_vinstr_client *cli) if (thread) kthread_stop(thread); } +KBASE_EXPORT_TEST_API(kbase_vinstr_detach_client); /* Accumulate counters in the dump buffer */ static void accum_dump_buffer(void *dst, void *src, size_t dump_size) @@ -702,9 +712,12 @@ static void patch_dump_buffer_hdr_v5( static void accum_clients(struct kbase_vinstr_context *vinstr_ctx) { struct kbase_vinstr_client *iter; - int v4; + int v4 = 0; +#ifndef CONFIG_MALI_NO_MALI v4 = kbase_hw_has_feature(vinstr_ctx->kbdev, BASE_HW_FEATURE_V4); +#endif + list_for_each_entry(iter, &vinstr_ctx->idle_clients, list) { /* Don't bother accumulating clients whose hwcnt requests * have not yet been honoured. */ @@ -791,6 +804,11 @@ static int kbasep_vinstr_collect_and_accumulate( { int rcode; +#ifdef CONFIG_MALI_NO_MALI + /* The dummy model needs the CPU mapping. */ + gpu_model_set_dummy_prfcnt_base_cpu(vinstr_ctx->cpu_va); +#endif + /* Request HW counters dump. * Disable preemption to make dump timestamp more accurate. */ preempt_disable(); @@ -865,6 +883,23 @@ static int kbasep_vinstr_fill_dump_buffer_legacy( return rcode; } +/** + * kbasep_vinstr_fill_dump_buffer_kernel - copy accumulated counters to buffer + * allocated in kernel space + * @cli: requesting client + * + * Return: zero on success + * + * This is part of the kernel client interface. + */ +static int kbasep_vinstr_fill_dump_buffer_kernel( + struct kbase_vinstr_client *cli) +{ + memcpy(cli->kernel_buffer, cli->accum_buffer, cli->dump_size); + + return 0; +} + /** * kbasep_vinstr_reprogram - reprogram hwcnt set collected by inst * @vinstr_ctx: vinstr context @@ -910,6 +945,8 @@ static int kbasep_vinstr_update_client( if (cli->buffer_count) rcode = kbasep_vinstr_fill_dump_buffer( cli, timestamp, event_id); + else if (cli->kernel_buffer) + rcode = kbasep_vinstr_fill_dump_buffer_kernel(cli); else rcode = kbasep_vinstr_fill_dump_buffer_legacy(cli); @@ -1299,14 +1336,18 @@ static long kbasep_vinstr_hwcnt_reader_ioctl_disable_event( static long kbasep_vinstr_hwcnt_reader_ioctl_get_hwver( struct kbase_vinstr_client *cli, u32 __user *hwver) { +#ifndef CONFIG_MALI_NO_MALI struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx; - u32 ver; +#endif + + u32 ver = 5; +#ifndef CONFIG_MALI_NO_MALI KBASE_DEBUG_ASSERT(vinstr_ctx); + if (kbase_hw_has_feature(vinstr_ctx->kbdev, BASE_HW_FEATURE_V4)) + ver = 4; +#endif - ver = 4; - if (!kbase_hw_has_feature(vinstr_ctx->kbdev, BASE_HW_FEATURE_V4)) - ver = 5; return put_user(ver, hwver); } @@ -1451,7 +1492,7 @@ static int kbasep_vinstr_hwcnt_reader_release(struct inode *inode, cli = filp->private_data; KBASE_DEBUG_ASSERT(cli); - kbasep_vinstr_detach_client(cli); + kbase_vinstr_detach_client(cli); return 0; } @@ -1525,7 +1566,8 @@ int kbase_vinstr_hwcnt_reader_setup(struct kbase_vinstr_context *vinstr_ctx, vinstr_ctx, setup->buffer_count, bitmap, - &setup->fd); + &setup->fd, + NULL); if (!cli) return -ENOMEM; @@ -1557,7 +1599,8 @@ int kbase_vinstr_legacy_hwc_setup( vinstr_ctx, 0, bitmap, - (void *)(long)setup->dump_buffer); + (void *)(long)setup->dump_buffer, + NULL); if (!(*cli)) return -ENOMEM; @@ -1565,13 +1608,37 @@ int kbase_vinstr_legacy_hwc_setup( if (!*cli) return -EINVAL; - kbasep_vinstr_detach_client(*cli); + kbase_vinstr_detach_client(*cli); *cli = NULL; } return 0; } +struct kbase_vinstr_client *kbase_vinstr_hwcnt_kernel_setup( + struct kbase_vinstr_context *vinstr_ctx, + struct kbase_uk_hwcnt_reader_setup *setup, + void *kernel_buffer) +{ + u32 bitmap[4]; + + if (!vinstr_ctx || !setup || !kernel_buffer) + return NULL; + + bitmap[SHADER_HWCNT_BM] = setup->shader_bm; + bitmap[TILER_HWCNT_BM] = setup->tiler_bm; + bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm; + bitmap[JM_HWCNT_BM] = setup->jm_bm; + + return kbasep_vinstr_attach_client( + vinstr_ctx, + 0, + bitmap, + NULL, + kernel_buffer); +} +KBASE_EXPORT_TEST_API(kbase_vinstr_hwcnt_kernel_setup); + int kbase_vinstr_hwc_dump(struct kbase_vinstr_client *cli, enum base_hwcnt_reader_event event_id) { @@ -1615,6 +1682,7 @@ exit: return rcode; } +KBASE_EXPORT_TEST_API(kbase_vinstr_hwc_dump); int kbase_vinstr_hwc_clear(struct kbase_vinstr_client *cli) { @@ -1658,6 +1726,11 @@ void kbase_vinstr_hwc_suspend(struct kbase_vinstr_context *vinstr_ctx) KBASE_DEBUG_ASSERT(vinstr_ctx); mutex_lock(&vinstr_ctx->lock); + if (!vinstr_ctx->nclients || vinstr_ctx->suspended) { + mutex_unlock(&vinstr_ctx->lock); + return; + } + kbasep_vinstr_collect_and_accumulate(vinstr_ctx, &unused); vinstr_ctx->suspended = true; vinstr_ctx->suspended_clients = vinstr_ctx->waiting_clients; @@ -1670,6 +1743,11 @@ void kbase_vinstr_hwc_resume(struct kbase_vinstr_context *vinstr_ctx) KBASE_DEBUG_ASSERT(vinstr_ctx); mutex_lock(&vinstr_ctx->lock); + if (!vinstr_ctx->nclients || !vinstr_ctx->suspended) { + mutex_unlock(&vinstr_ctx->lock); + return; + } + vinstr_ctx->suspended = false; vinstr_ctx->waiting_clients = vinstr_ctx->suspended_clients; vinstr_ctx->reprogram = true; diff --git a/drivers/gpu/arm/midgard/mali_kbase_vinstr.h b/drivers/gpu/arm/midgard/mali_kbase_vinstr.h index 12340e5c647d..d32462aec653 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_vinstr.h +++ b/drivers/gpu/arm/midgard/mali_kbase_vinstr.h @@ -66,6 +66,22 @@ int kbase_vinstr_legacy_hwc_setup( struct kbase_vinstr_client **cli, struct kbase_uk_hwcnt_setup *setup); +/** + * kbase_vinstr_hwcnt_kernel_setup - configure hw counters for kernel side + * client + * @vinstr_ctx: vinstr context + * @setup: reader's configuration + * @kernel_buffer: pointer to dump buffer + * + * setup->buffer_count and setup->fd are not used for kernel side clients. + * + * Return: pointer to client structure, or NULL on failure + */ +struct kbase_vinstr_client *kbase_vinstr_hwcnt_kernel_setup( + struct kbase_vinstr_context *vinstr_ctx, + struct kbase_uk_hwcnt_reader_setup *setup, + void *kernel_buffer); + /** * kbase_vinstr_hwc_dump - issue counter dump for vinstr client * @cli: pointer to vinstr client @@ -100,5 +116,19 @@ void kbase_vinstr_hwc_suspend(struct kbase_vinstr_context *vinstr_ctx); */ void kbase_vinstr_hwc_resume(struct kbase_vinstr_context *vinstr_ctx); +/** + * kbase_vinstr_dump_size - Return required size of dump buffer + * @kbdev: device pointer + * + * Return : buffer size in bytes + */ +size_t kbase_vinstr_dump_size(struct kbase_device *kbdev); + +/** + * kbase_vinstr_detach_client - Detach a client from the vinstr core + * @cli: Pointer to vinstr client + */ +void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli); + #endif /* _KBASE_VINSTR_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_midg_coherency.h b/drivers/gpu/arm/midgard/mali_midg_coherency.h new file mode 100644 index 000000000000..a509cbd5f175 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_midg_coherency.h @@ -0,0 +1,26 @@ +/* + * + * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#ifndef _MIDG_COHERENCY_H_ +#define _MIDG_COHERENCY_H_ + +#define COHERENCY_ACE_LITE 0 +#define COHERENCY_ACE 1 +#define COHERENCY_NONE 31 +#define COHERENCY_FEATURE_BIT(x) (1 << (x)) + +#endif /* _MIDG_COHERENCY_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_midg_regmap.h b/drivers/gpu/arm/midgard/mali_midg_regmap.h index c3def83dba24..e261d1432052 100644 --- a/drivers/gpu/arm/midgard/mali_midg_regmap.h +++ b/drivers/gpu/arm/midgard/mali_midg_regmap.h @@ -15,11 +15,12 @@ - - #ifndef _MIDGARD_REGMAP_H_ #define _MIDGARD_REGMAP_H_ +#include "mali_midg_coherency.h" +#include "mali_kbase_gpu_id.h" + /* * Begin Register Offsets */ @@ -441,39 +442,6 @@ /* Outer coherent, write alloc inner */ #define AS_MEMATTR_INDEX_OUTER_WA 4 -/* GPU_ID register */ -#define GPU_ID_VERSION_STATUS_SHIFT 0 -#define GPU_ID_VERSION_MINOR_SHIFT 4 -#define GPU_ID_VERSION_MAJOR_SHIFT 12 -#define GPU_ID_VERSION_PRODUCT_ID_SHIFT 16 -#define GPU_ID_VERSION_STATUS (0xF << GPU_ID_VERSION_STATUS_SHIFT) -#define GPU_ID_VERSION_MINOR (0xFF << GPU_ID_VERSION_MINOR_SHIFT) -#define GPU_ID_VERSION_MAJOR (0xF << GPU_ID_VERSION_MAJOR_SHIFT) -#define GPU_ID_VERSION_PRODUCT_ID (0xFFFF << GPU_ID_VERSION_PRODUCT_ID_SHIFT) - -/* Values for GPU_ID_VERSION_PRODUCT_ID bitfield */ -#define GPU_ID_PI_T60X 0x6956 -#define GPU_ID_PI_T62X 0x0620 -#define GPU_ID_PI_T76X 0x0750 -#define GPU_ID_PI_T72X 0x0720 -#define GPU_ID_PI_TFRX 0x0880 -#define GPU_ID_PI_T86X 0x0860 -#define GPU_ID_PI_T82X 0x0820 -#define GPU_ID_PI_T83X 0x0830 - -/* Values for GPU_ID_VERSION_STATUS field for PRODUCT_ID GPU_ID_PI_T60X */ -#define GPU_ID_S_15DEV0 0x1 -#define GPU_ID_S_EAC 0x2 - -/* Helper macro to create a GPU_ID assuming valid values for id, major, minor, status */ -#define GPU_ID_MAKE(id, major, minor, status) \ - (((id) << GPU_ID_VERSION_PRODUCT_ID_SHIFT) | \ - ((major) << GPU_ID_VERSION_MAJOR_SHIFT) | \ - ((minor) << GPU_ID_VERSION_MINOR_SHIFT) | \ - ((status) << GPU_ID_VERSION_STATUS_SHIFT)) - -/* End GPU_ID register */ - /* JS_FEATURES register */ #define JS_FEATURE_NULL_JOB (1u << 1) @@ -520,13 +488,6 @@ /* End THREAD_* registers */ -/* COHERENCY_* values*/ -#define COHERENCY_ACE_LITE 0 -#define COHERENCY_ACE 1 -#define COHERENCY_NONE 31 -#define COHERENCY_FEATURE_BIT(x) (1 << (x)) -/* End COHERENCY_* values */ - /* SHADER_CONFIG register */ #define SC_ALT_COUNTERS (1ul << 3) diff --git a/drivers/gpu/arm/midgard/platform/devicetree/Kbuild b/drivers/gpu/arm/midgard/platform/devicetree/Kbuild index 679945bec523..b5a49f36e558 100644 --- a/drivers/gpu/arm/midgard/platform/devicetree/Kbuild +++ b/drivers/gpu/arm/midgard/platform/devicetree/Kbuild @@ -13,4 +13,10 @@ # - +ifeq ($(CONFIG_MALI_MIDGARD),y) +obj-y += mali_kbase_runtime_pm.c +obj-y += mali_kbase_config_devicetree.c +else ifeq ($(CONFIG_MALI_MIDGARD),m) +SRC += platform/devicetree/mali_kbase_runtime_pm.c +SRC += platform/devicetree/mali_kbase_config_devicetree.c +endif diff --git a/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c index 046302e8e41f..b2a7c93f12a9 100644 --- a/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c +++ b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c @@ -15,9 +15,17 @@ +#include + int kbase_platform_early_init(void) { /* Nothing needed at this stage */ return 0; } +static struct kbase_platform_config dummy_platform_config; + +struct kbase_platform_config *kbase_get_platform_config(void) +{ + return &dummy_platform_config; +} diff --git a/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h index f92961c53a01..d4813f7f8a35 100644 --- a/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h +++ b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h @@ -70,12 +70,6 @@ */ #define PLATFORM_FUNCS (NULL) -/** Power model for IPA - * - * Attached value: pointer to @ref mali_pa_model_ops - */ -#define POWER_MODEL_CALLBACKS (NULL) - extern struct kbase_pm_callback_conf pm_callbacks; /** diff --git a/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c index 0b7e39502b2b..aa4376afd3ba 100644 --- a/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c +++ b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c @@ -45,14 +45,7 @@ int kbase_device_runtime_init(struct kbase_device *kbdev) { dev_dbg(kbdev->dev, "kbase_device_runtime_init\n"); pm_runtime_enable(kbdev->dev); -#ifdef CONFIG_MALI_MIDGARD_DEBUG_SYS - { - int err = kbase_platform_create_sysfs_file(kbdev->dev); - - if (err) - return err; - } -#endif /* CONFIG_MALI_MIDGARD_DEBUG_SYS */ + return 0; } diff --git a/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c b/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c index 82669510e941..3baf3d96d41a 100644 --- a/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c +++ b/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c @@ -16,9 +16,6 @@ #include -#ifdef CONFIG_DEVFREQ_THERMAL -#include -#endif #include #include #include @@ -68,74 +65,6 @@ struct kbase_pm_callback_conf pm_callbacks = { .power_resume_callback = NULL }; -#ifdef CONFIG_DEVFREQ_THERMAL - -#define FALLBACK_STATIC_TEMPERATURE 55000 - -static unsigned long juno_model_static_power(unsigned long voltage) -{ - struct thermal_zone_device *tz; - unsigned long temperature, temp; - unsigned long temp_squared, temp_cubed, temp_scaling_factor; - const unsigned long coefficient = (410UL << 20) / (729000000UL >> 10); - const unsigned long voltage_cubed = (voltage * voltage * voltage) >> 10; - - tz = thermal_zone_get_zone_by_name("gpu"); - if (IS_ERR(tz)) { - pr_warn_ratelimited("Error getting gpu thermal zone (%ld), not yet ready?\n", - PTR_ERR(tz)); - temperature = FALLBACK_STATIC_TEMPERATURE; - } else { - int ret; - - ret = tz->ops->get_temp(tz, &temperature); - if (ret) { - pr_warn_ratelimited("Error reading temperature for gpu thermal zone: %d\n", - ret); - temperature = FALLBACK_STATIC_TEMPERATURE; - } - } - - /* Calculate the temperature scaling factor. To be applied to the - * voltage scaled power. - */ - temp = temperature / 1000; - temp_squared = temp * temp; - temp_cubed = temp_squared * temp; - temp_scaling_factor = - (2 * temp_cubed) - - (80 * temp_squared) - + (4700 * temp) - + 32000; - - return (((coefficient * voltage_cubed) >> 20) - * temp_scaling_factor) - / 1000000; -} - -static unsigned long juno_model_dynamic_power(unsigned long freq, - unsigned long voltage) -{ - /* The inputs: freq (f) is in Hz, and voltage (v) in mV. - * The coefficient (c) is in mW/(MHz mV mV). - * - * This function calculates the dynamic power after this formula: - * Pdyn (mW) = c (mW/(MHz*mV*mV)) * v (mV) * v (mV) * f (MHz) - */ - const unsigned long v2 = (voltage * voltage) / 1000; /* m*(V*V) */ - const unsigned long f_mhz = freq / 1000000; /* MHz */ - const unsigned long coefficient = 3600; /* mW/(MHz*mV*mV) */ - - return (coefficient * v2 * f_mhz) / 1000000; /* mW */ -} - -struct devfreq_cooling_ops juno_model_ops = { - .get_static_power = juno_model_static_power, - .get_dynamic_power = juno_model_dynamic_power, -}; - -#endif /* CONFIG_DEVFREQ_THERMAL */ - /* * Juno Secure Mode integration */ diff --git a/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h index fa5e9e9a5b11..5fc6d9e1c312 100644 --- a/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h +++ b/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h @@ -70,16 +70,6 @@ */ #define PLATFORM_FUNCS (NULL) -/** Power model for IPA - * - * Attached value: pointer to @ref mali_pa_model_ops - */ -#ifdef CONFIG_DEVFREQ_THERMAL -#define POWER_MODEL_CALLBACKS (&juno_model_ops) -#else -#define POWER_MODEL_CALLBACKS (NULL) -#endif - /** * Secure mode switch * diff --git a/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h index ac5060af6a7e..6384586371d0 100644 --- a/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h +++ b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h @@ -81,12 +81,6 @@ */ #define PLATFORM_FUNCS (NULL) -/** Power model for IPA - * - * Attached value: pointer to @ref mali_pa_model_ops - */ -#define POWER_MODEL_CALLBACKS (NULL) - /** * Secure mode switch * diff --git a/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h index 11c320ecc1f7..2b91d72bd93c 100644 --- a/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h +++ b/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h @@ -70,12 +70,6 @@ */ #define PLATFORM_FUNCS (NULL) -/** Power model for IPA - * - * Attached value: pointer to @ref mali_pa_model_ops - */ -#define POWER_MODEL_CALLBACKS (NULL) - /** * Secure mode switch * diff --git a/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h index 9bc2985fef0f..d269c2591490 100644 --- a/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h +++ b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h @@ -72,12 +72,6 @@ */ #define PLATFORM_FUNCS (NULL) -/** Power model for IPA - * - * Attached value: pointer to @ref mali_pa_model_ops - */ -#define POWER_MODEL_CALLBACKS (NULL) - /** * Secure mode switch * -- 2.34.1