From 69db1b530898a8bfc559bd8035a95c20d54dcdb9 Mon Sep 17 00:00:00 2001 From: Jacob Chen Date: Thu, 12 Jan 2017 11:09:48 +0800 Subject: [PATCH] MALI: rockchip: linux: upgrade to DDK r13p0-00rel0 Since r9p0 can't recover form error "DATA_INVALID_FAULT", we have to update to r13p0. Change-Id: Iac820870159def15dd4c214d0d98f81f81480340 Signed-off-by: Jacob Chen --- drivers/gpu/arm/midgard_for_linux/Kbuild | 25 +- drivers/gpu/arm/midgard_for_linux/Kconfig | 87 +- drivers/gpu/arm/midgard_for_linux/Makefile | 6 +- .../gpu/mali_kbase_cache_policy_backend.c | 9 +- .../gpu/mali_kbase_cache_policy_backend.h | 10 +- .../backend/gpu/mali_kbase_devfreq.c | 13 +- .../backend/gpu/mali_kbase_device_hw.c | 6 +- .../backend/gpu/mali_kbase_gpu.c | 7 +- .../backend/gpu/mali_kbase_gpuprops_backend.c | 14 +- .../backend/gpu/mali_kbase_instr_backend.c | 88 +- .../backend/gpu/mali_kbase_instr_defs.h | 6 +- .../backend/gpu/mali_kbase_irq_linux.c | 8 +- .../backend/gpu/mali_kbase_jm_defs.h | 9 +- .../backend/gpu/mali_kbase_jm_hw.c | 380 ++-- .../backend/gpu/mali_kbase_jm_internal.h | 4 +- .../backend/gpu/mali_kbase_jm_rb.c | 289 ++- .../backend/gpu/mali_kbase_js_affinity.c | 23 +- .../backend/gpu/mali_kbase_js_affinity.h | 10 +- .../backend/gpu/mali_kbase_js_backend.c | 22 +- .../backend/gpu/mali_kbase_mmu_hw_direct.c | 50 +- .../backend/gpu/mali_kbase_pm_backend.c | 3 +- .../backend/gpu/mali_kbase_pm_coarse_demand.c | 5 +- .../backend/gpu/mali_kbase_pm_defs.h | 11 +- .../backend/gpu/mali_kbase_pm_demand.c | 5 +- .../backend/gpu/mali_kbase_pm_driver.c | 247 ++- .../backend/gpu/mali_kbase_pm_internal.h | 20 +- .../backend/gpu/mali_kbase_pm_policy.c | 63 +- .../gpu/mali_kbase_power_model_simple.c | 15 +- .../gpu/mali_kbase_power_model_simple.h | 6 +- .../backend/gpu/mali_kbase_time.c | 9 +- .../mali_base_hwconfig_features.h | 36 +- .../mali_base_hwconfig_issues.h | 198 +- .../arm/midgard_for_linux/mali_base_kernel.h | 314 ++- .../gpu/arm/midgard_for_linux/mali_kbase.h | 16 +- .../mali_kbase_cache_policy.c | 12 +- .../arm/midgard_for_linux/mali_kbase_config.h | 19 +- .../mali_kbase_config_defaults.h | 13 +- .../midgard_for_linux/mali_kbase_context.c | 58 +- .../midgard_for_linux/mali_kbase_context.h | 90 + .../midgard_for_linux/mali_kbase_core_linux.c | 1750 ++++++++--------- .../mali_kbase_debug_job_fault.c | 68 +- .../mali_kbase_debug_job_fault.h | 3 +- .../mali_kbase_debug_mem_view.c | 32 +- .../arm/midgard_for_linux/mali_kbase_defs.h | 384 +++- .../arm/midgard_for_linux/mali_kbase_device.c | 31 +- .../arm/midgard_for_linux/mali_kbase_event.c | 59 +- .../midgard_for_linux/mali_kbase_gator_api.c | 239 +-- .../mali_kbase_gator_hwcnt_names.h | 6 +- .../mali_kbase_gator_hwcnt_names_thex.h | 291 +++ .../arm/midgard_for_linux/mali_kbase_gpu_id.h | 8 +- .../mali_kbase_gpu_memory_debugfs.c | 4 +- .../mali_kbase_gpu_memory_debugfs.h | 9 +- .../mali_kbase_gpuprops_types.h | 2 +- .../gpu/arm/midgard_for_linux/mali_kbase_hw.c | 49 +- .../mali_kbase_hwaccess_jm.h | 37 +- .../arm/midgard_for_linux/mali_kbase_ipa.c | 6 +- .../gpu/arm/midgard_for_linux/mali_kbase_jd.c | 939 ++++----- .../gpu/arm/midgard_for_linux/mali_kbase_js.c | 978 ++++----- .../gpu/arm/midgard_for_linux/mali_kbase_js.h | 35 +- .../midgard_for_linux/mali_kbase_js_defs.h | 7 +- .../mali_kbase_js_policy_cfs.c | 7 +- .../arm/midgard_for_linux/mali_kbase_mem.c | 1286 +++++++++++- .../arm/midgard_for_linux/mali_kbase_mem.h | 216 +- .../midgard_for_linux/mali_kbase_mem_linux.c | 753 ++++++- .../midgard_for_linux/mali_kbase_mem_linux.h | 143 +- .../midgard_for_linux/mali_kbase_mem_pool.c | 41 +- .../mali_kbase_mem_profile_debugfs.c | 4 +- .../mali_kbase_mem_profile_debugfs.h | 3 +- .../arm/midgard_for_linux/mali_kbase_mmu.c | 503 +++-- .../mali_kbase_mmu_mode_lpae.c | 27 +- .../mali_kbase_platform_fake.c | 6 +- .../gpu/arm/midgard_for_linux/mali_kbase_pm.c | 19 +- .../mali_kbase_regs_history_debugfs.c | 130 ++ .../mali_kbase_regs_history_debugfs.h | 50 + .../arm/midgard_for_linux/mali_kbase_replay.c | 232 ++- .../midgard_for_linux/mali_kbase_softjobs.c | 1227 +++++++++++- .../arm/midgard_for_linux/mali_kbase_sync.h | 11 +- .../midgard_for_linux/mali_kbase_sync_user.c | 3 +- .../midgard_for_linux/mali_kbase_tlstream.c | 330 +++- .../midgard_for_linux/mali_kbase_tlstream.h | 181 +- .../mali_kbase_trace_timeline.c | 4 +- .../mali_kbase_trace_timeline.h | 4 +- .../arm/midgard_for_linux/mali_kbase_uku.h | 74 +- .../arm/midgard_for_linux/mali_kbase_vinstr.c | 401 +++- .../arm/midgard_for_linux/mali_kbase_vinstr.h | 37 +- .../arm/midgard_for_linux/mali_linux_trace.h | 28 +- .../arm/midgard_for_linux/mali_midg_regmap.h | 78 +- .../gpu/arm/midgard_for_linux/mali_timeline.h | 3 +- .../devicetree/mali_kbase_config_platform.h | 8 +- .../juno_soc/mali_kbase_config_juno_soc.c | 86 +- .../juno_soc/mali_kbase_config_platform.h | 10 +- .../vexpress/mali_kbase_config_platform.h | 21 +- .../vexpress/mali_kbase_config_vexpress.c | 2 +- .../vexpress/mali_kbase_cpu_vexpress.c | 77 +- .../vexpress/mali_kbase_cpu_vexpress.h | 26 +- .../mali_kbase_config_platform.h | 8 +- .../mali_kbase_config_platform.h | 8 +- drivers/gpu/arm/midgard_for_linux/sconscript | 15 +- 98 files changed, 9331 insertions(+), 3874 deletions(-) create mode 100644 drivers/gpu/arm/midgard_for_linux/mali_kbase_context.h create mode 100644 drivers/gpu/arm/midgard_for_linux/mali_kbase_gator_hwcnt_names_thex.h create mode 100644 drivers/gpu/arm/midgard_for_linux/mali_kbase_regs_history_debugfs.c create mode 100644 drivers/gpu/arm/midgard_for_linux/mali_kbase_regs_history_debugfs.h diff --git a/drivers/gpu/arm/midgard_for_linux/Kbuild b/drivers/gpu/arm/midgard_for_linux/Kbuild index aa22f35cc1e2..3c6d8ef78d74 100644 --- a/drivers/gpu/arm/midgard_for_linux/Kbuild +++ b/drivers/gpu/arm/midgard_for_linux/Kbuild @@ -16,7 +16,7 @@ include drivers/gpu/arm/midgard_for_linux/default_config.mk # Driver version string which is returned to userspace via an ioctl -MALI_RELEASE_NAME ?= "r9p0-05rel0" +MALI_RELEASE_NAME ?= "r13p0-00rel0" # Paths required for build KBASE_PATH = $(src) @@ -89,7 +89,6 @@ SRC := \ mali_kbase_context.c \ mali_kbase_pm.c \ mali_kbase_config.c \ - mali_kbase_instr.c \ mali_kbase_vinstr.c \ mali_kbase_softjobs.c \ mali_kbase_10969_workaround.c \ @@ -105,25 +104,27 @@ SRC := \ mali_kbase_replay.c \ mali_kbase_mem_profile_debugfs.c \ mali_kbase_mmu_mode_lpae.c \ + mali_kbase_mmu_mode_aarch64.c \ mali_kbase_disjoint_events.c \ mali_kbase_gator_api.c \ mali_kbase_debug_mem_view.c \ mali_kbase_debug_job_fault.c \ mali_kbase_smc.c \ mali_kbase_mem_pool.c \ - mali_kbase_mem_pool_debugfs.c + mali_kbase_mem_pool_debugfs.c \ + mali_kbase_tlstream.c \ + mali_kbase_strings.c \ + mali_kbase_as_fault_debugfs.c -ifeq ($(CONFIG_MALI_MIPE_ENABLED),y) - SRC += mali_kbase_tlstream.c - ifeq ($(MALI_UNIT_TEST),1) - SRC += mali_kbase_tlstream_test.c - endif +ifeq ($(MALI_UNIT_TEST),1) + SRC += mali_kbase_tlstream_test.c endif ifeq ($(MALI_CUSTOMER_RELEASE),0) SRC += mali_kbase_regs_dump_debugfs.c endif + # Job Scheduler Policy: Completely Fair Scheduler SRC += mali_kbase_js_policy_cfs.c @@ -201,13 +202,7 @@ obj-$(CONFIG_MALI_MIDGARD) += mali_kbase.o # Tell the Linux build system to enable building of our .c files mali_kbase-y := $(SRC:.c=.o) -ifneq ($(wildcard $(src)/internal/Kbuild),) -ifeq ($(MALI_CUSTOMER_RELEASE),0) -# This include may set MALI_BACKEND_PATH and CONFIG_MALI_BACKEND_REAL -include $(src)/internal/Kbuild -mali_kbase-y += $(INTERNAL:.c=.o) -endif -endif +mali_kbase-$(CONFIG_MALI_DMA_FENCE) += mali_kbase_dma_fence.o MALI_BACKEND_PATH ?= backend CONFIG_MALI_BACKEND ?= gpu diff --git a/drivers/gpu/arm/midgard_for_linux/Kconfig b/drivers/gpu/arm/midgard_for_linux/Kconfig index 4f70e31d7b25..201832b262d4 100644 --- a/drivers/gpu/arm/midgard_for_linux/Kconfig +++ b/drivers/gpu/arm/midgard_for_linux/Kconfig @@ -16,6 +16,7 @@ menuconfig MALI_MIDGARD tristate "Mali Midgard series support" + select GPU_TRACEPOINTS if ANDROID default n help Enable this option to build support for a ARM Mali Midgard GPU. @@ -23,38 +24,15 @@ menuconfig MALI_MIDGARD To compile this driver as a module, choose M here: this will generate a single module, called mali_kbase. -choice - prompt "Streamline support" - depends on MALI_MIDGARD - default MALI_TIMELINE_DISABLED - help - Select streamline support configuration. - -config MALI_TIMELINE_DISABLED - bool "Streamline support disabled" - help - Disable support for ARM Streamline Performance Analyzer. - - Timeline support will not be included in - kernel code. - Debug stream will not be generated. - config MALI_GATOR_SUPPORT bool "Streamline support via Gator" + depends on MALI_MIDGARD + default n help Adds diagnostic support for use with the ARM Streamline Performance Analyzer. You will need the Gator device driver already loaded before loading this driver when enabling Streamline debug support. - -config MALI_MIPE_ENABLED - bool "Streamline support via MIPE" - help - Adds diagnostic support for use with the ARM Streamline Performance Analyzer. - - Stream will be transmitted directly to Mali GPU library. - Compatible version of the library is required to read debug stream generated by kernel. - -endchoice + This is a legacy interface required by older versions of Streamline. config MALI_MIDGARD_DVFS bool "Enable legacy DVFS" @@ -81,6 +59,15 @@ config MALI_DEVFREQ governor, the frequency of Mali will be dynamically selected from the available OPPs. +config MALI_DMA_FENCE + bool "DMA_BUF fence support for Mali" + depends on MALI_MIDGARD && !KDS + default n + help + Support DMA_BUF fences for Mali. + + This option should only be enabled if KDS is not present and + the Linux Kernel has built in support for DMA_BUF fences. # MALI_EXPERT configuration options @@ -92,13 +79,18 @@ menuconfig MALI_EXPERT Enabling this option and modifying the default settings may produce a driver with performance or other limitations. -config MALI_DEBUG_SHADER_SPLIT_FS - bool "Allow mapping of shader cores via sysfs" +config MALI_PRFCNT_SET_SECONDARY + bool "Use secondary set of performance counters" depends on MALI_MIDGARD && MALI_EXPERT default n help - Select this option to provide a sysfs entry for runtime configuration of shader - core affinity masks. + Select this option to use secondary set of performance counters. Kernel + features that depend on an access to the primary set of counters may + become unavailable. Enabling this option will prevent power management + from working optimally and may cause instrumentation tools to return + bogus results. + + If unsure, say N. config MALI_PLATFORM_FAKE bool "Enable fake platform device support" @@ -162,6 +154,23 @@ config MALI_DEBUG help Select this option for increased checking and reporting of errors. +config MALI_FENCE_DEBUG + bool "Debug sync fence usage" + depends on MALI_MIDGARD && MALI_EXPERT && SYNC + default y if MALI_DEBUG + help + Select this option to enable additional checking and reporting on the + use of sync fences in the Mali driver. + + This will add a 3s timeout to all sync fence waits in the Mali + driver, so that when work for Mali has been waiting on a sync fence + for a long time a debug message will be printed, detailing what fence + is causing the block, and which dependent Mali atoms are blocked as a + result of this. + + The timeout can be changed at runtime through the js_soft_timeout + device attribute, where the timeout is specified in milliseconds. + config MALI_NO_MALI bool "No Mali" depends on MALI_MIDGARD && MALI_EXPERT @@ -198,11 +207,19 @@ config MALI_SYSTEM_TRACE minimal overhead when not in use. Enable only if you know what you are doing. -config MALI_GPU_TRACEPOINTS - bool "Enable GPU tracepoints" - depends on MALI_MIDGARD && ANDROID - select GPU_TRACEPOINTS +config MALI_GPU_MMU_AARCH64 + bool "Use AArch64 page tables" + depends on MALI_MIDGARD && MALI_EXPERT + default n help - Enables GPU tracepoints using Android trace event definitions. + Use AArch64 format page tables for the GPU instead of LPAE-style. + The two formats have the same functionality and performance but a + future GPU may deprecate or remove the legacy LPAE-style format. + + The LPAE-style format is supported on all Midgard and current Bifrost + GPUs. Enabling AArch64 format restricts the driver to only supporting + Bifrost GPUs. + + If in doubt, say N. source "drivers/gpu/arm/midgard/platform/Kconfig" diff --git a/drivers/gpu/arm/midgard_for_linux/Makefile b/drivers/gpu/arm/midgard_for_linux/Makefile index d4d5de4cd512..e1625e6dba79 100644 --- a/drivers/gpu/arm/midgard_for_linux/Makefile +++ b/drivers/gpu/arm/midgard_for_linux/Makefile @@ -1,5 +1,5 @@ # -# (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -26,10 +26,6 @@ ifeq ($(MALI_UNIT_TEST), 1) EXTRA_SYMBOLS += $(KBASE_PATH_RELATIVE)/tests/internal/src/kernel_assert_module/linux/Module.symvers endif -ifneq ($(wildcard $(CURDIR)/internal/Makefile.in),) -include $(CURDIR)/internal/Makefile.in -endif - ifeq ($(MALI_BUS_LOG), 1) #Add bus logger symbols EXTRA_SYMBOLS += $(BUSLOG_PATH_RELATIVE)/drivers/base/bus_logger/Module.symvers diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_cache_policy_backend.c b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_cache_policy_backend.c index 92a14fa1bae1..c6862539c8dd 100644 --- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_cache_policy_backend.c +++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_cache_policy_backend.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -16,7 +16,12 @@ #include "backend/gpu/mali_kbase_cache_policy_backend.h" -#include #include +void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, + u32 mode) +{ + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG)) + kbase_reg_write(kbdev, COHERENCY_ENABLE, mode, NULL); +} diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_cache_policy_backend.h b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_cache_policy_backend.h index 42069fc88a1f..fe9869109a82 100644 --- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_cache_policy_backend.h +++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_cache_policy_backend.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,5 +22,13 @@ #include "mali_kbase.h" #include "mali_base_kernel.h" +/** + * kbase_cache_set_coherency_mode() - Sets the system coherency mode + * in the GPU. + * @kbdev: Device pointer + * @mode: Coherency mode. COHERENCY_ACE/ACE_LITE + */ +void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, + u32 mode); #endif /* _KBASE_CACHE_POLICY_H_ */ diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_devfreq.c b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_devfreq.c index 86227d996257..ad05fe5bea8d 100644 --- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_devfreq.c +++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_devfreq.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -135,6 +135,14 @@ kbase_devfreq_status(struct device *dev, struct devfreq_dev_status *stat) stat->private_data = NULL; +#ifdef CONFIG_DEVFREQ_THERMAL +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) + if (kbdev->devfreq_cooling) + memcpy(&kbdev->devfreq_cooling->last_status, stat, + sizeof(*stat)); +#endif +#endif + return 0; } @@ -205,7 +213,8 @@ int kbase_devfreq_init(struct kbase_device *kbdev) dp = &kbdev->devfreq_profile; dp->initial_freq = kbdev->current_freq; - dp->polling_ms = 100; + /* .KP : set devfreq_dvfs_interval_in_ms */ + dp->polling_ms = 20; dp->target = kbase_devfreq_target; dp->get_dev_status = kbase_devfreq_status; dp->get_cur_freq = kbase_devfreq_cur_freq; diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_device_hw.c b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_device_hw.c index 83d5ec9f7a93..b9238a305177 100644 --- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_device_hw.c +++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_device_hw.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -33,7 +33,9 @@ void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value, KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID); KBASE_DEBUG_ASSERT(kbdev->dev != NULL); dev_dbg(kbdev->dev, "w: reg %04x val %08x", offset, value); + writel(value, kbdev->reg + offset); + if (kctx && kctx->jctx.tb) kbase_device_trace_register_access(kctx, REG_WRITE, offset, value); @@ -48,7 +50,9 @@ u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset, KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID); KBASE_DEBUG_ASSERT(kbdev->dev != NULL); + val = readl(kbdev->reg + offset); + dev_dbg(kbdev->dev, "r: reg %04x val %08x", offset, val); if (kctx && kctx->jctx.tb) kbase_device_trace_register_access(kctx, REG_READ, offset, val); diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_gpu.c b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_gpu.c index 72a98d0f7952..d578fd78e825 100644 --- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_gpu.c +++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_gpu.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -20,7 +20,6 @@ * Register-based HW access backend APIs */ #include -#include #include #include #include @@ -81,7 +80,6 @@ int kbase_backend_late_init(struct kbase_device *kbdev) if (err) goto fail_timer; -/* Currently disabled on the prototype */ #ifdef CONFIG_MALI_DEBUG #ifndef CONFIG_MALI_NO_MALI if (kbasep_common_test_interrupt_handlers(kbdev) != 0) { @@ -101,12 +99,13 @@ int kbase_backend_late_init(struct kbase_device *kbdev) return 0; fail_job_slot: -/* Currently disabled on the prototype */ + #ifdef CONFIG_MALI_DEBUG #ifndef CONFIG_MALI_NO_MALI fail_interrupt_test: #endif /* !CONFIG_MALI_NO_MALI */ #endif /* CONFIG_MALI_DEBUG */ + kbase_backend_timer_term(kbdev); fail_timer: kbase_hwaccess_pm_halt(kbdev); diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_gpuprops_backend.c b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_gpuprops_backend.c index 705b1ebfa87f..d410cd297889 100644 --- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_gpuprops_backend.c +++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_gpuprops_backend.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -86,8 +86,20 @@ void kbase_backend_gpuprops_get(struct kbase_device *kbdev, void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev, struct kbase_gpuprops_regdump *regdump) { + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG)) { + /* Ensure we can access the GPU registers */ + kbase_pm_register_access_enable(kbdev); + + regdump->coherency_features = kbase_reg_read(kbdev, + GPU_CONTROL_REG(COHERENCY_FEATURES), NULL); + + /* We're done accessing the GPU registers for now. */ + kbase_pm_register_access_disable(kbdev); + } else { + /* Pre COHERENCY_FEATURES we only supported ACE_LITE */ regdump->coherency_features = COHERENCY_FEATURE_BIT(COHERENCY_NONE) | COHERENCY_FEATURE_BIT(COHERENCY_ACE_LITE); + } } diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_instr_backend.c b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_instr_backend.c index 2c987071a77c..3f06a10f7fed 100644 --- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_instr_backend.c +++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_instr_backend.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -23,6 +23,7 @@ #include #include +#include #include #include #include @@ -40,14 +41,6 @@ static void kbasep_instr_hwcnt_cacheclean(struct kbase_device *kbdev) u32 irq_mask; spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - /* Wait for any reset to complete */ - while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) { - spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - wait_event(kbdev->hwcnt.backend.cache_clean_wait, - kbdev->hwcnt.backend.state != - KBASE_INSTR_STATE_RESETTING); - spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - } KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_REQUEST_CLEAN); @@ -74,18 +67,14 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, { unsigned long flags, pm_flags; int err = -EINVAL; - struct kbasep_js_device_data *js_devdata; u32 irq_mask; int ret; u64 shader_cores_needed; - - KBASE_DEBUG_ASSERT(NULL == kbdev->hwcnt.suspended_kctx); + u32 prfcnt_config; shader_cores_needed = kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER); - js_devdata = &kbdev->js_data; - /* alignment failure */ if ((setup->dump_buffer == 0ULL) || (setup->dump_buffer & (2048 - 1))) goto out_err; @@ -100,14 +89,6 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) { - /* GPU is being reset */ - spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - wait_event(kbdev->hwcnt.backend.wait, - kbdev->hwcnt.backend.triggered != 0); - spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - } - if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) { /* Instrumentation is already enabled */ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); @@ -125,10 +106,6 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, kbdev->hwcnt.kctx = kctx; /* Remember the dump address so we can reprogram it later */ kbdev->hwcnt.addr = setup->dump_buffer; - /* Remember all the settings for suspend/resume */ - if (&kbdev->hwcnt.suspended_state != setup) - memcpy(&kbdev->hwcnt.suspended_state, setup, - sizeof(kbdev->hwcnt.suspended_state)); /* Request the clean */ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN; @@ -151,9 +128,22 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, kbase_pm_request_l2_caches(kbdev); /* Configure */ + prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT; +#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY + { + u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + u32 product_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) + >> GPU_ID_VERSION_PRODUCT_ID_SHIFT; + int arch_v6 = GPU_ID_IS_NEW_FORMAT(product_id); + + if (arch_v6) + prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT; + } +#endif + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), - (kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT) - | PRFCNT_CONFIG_MODE_OFF, kctx); + prfcnt_config | PRFCNT_CONFIG_MODE_OFF, kctx); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), setup->dump_buffer & 0xFFFFFFFF, kctx); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), @@ -174,8 +164,7 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, setup->tiler_bm, kctx); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), - (kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT) | - PRFCNT_CONFIG_MODE_MANUAL, kctx); + prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL, kctx); /* If HW has PRLAM-8186 we can now re-enable the tiler HW counters dump */ @@ -185,14 +174,6 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) { - /* GPU is being reset */ - spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - wait_event(kbdev->hwcnt.backend.wait, - kbdev->hwcnt.backend.triggered != 0); - spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - } - kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; kbdev->hwcnt.backend.triggered = 1; wake_up(&kbdev->hwcnt.backend.wait); @@ -359,15 +340,11 @@ void kbasep_cache_clean_worker(struct work_struct *data) spin_lock_irqsave(&kbdev->hwcnt.lock, flags); /* Wait for our condition, and any reset to complete */ - while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING || - kbdev->hwcnt.backend.state == - KBASE_INSTR_STATE_CLEANING) { + while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) { spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); wait_event(kbdev->hwcnt.backend.cache_clean_wait, - (kbdev->hwcnt.backend.state != - KBASE_INSTR_STATE_RESETTING && kbdev->hwcnt.backend.state != - KBASE_INSTR_STATE_CLEANING)); + KBASE_INSTR_STATE_CLEANING); spin_lock_irqsave(&kbdev->hwcnt.lock, flags); } KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == @@ -400,9 +377,6 @@ void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev) &kbdev->hwcnt.backend.cache_clean_work); KBASE_DEBUG_ASSERT(ret); } - /* NOTE: In the state KBASE_INSTR_STATE_RESETTING, We're in a reset, - * and the instrumentation state hasn't been restored yet - - * kbasep_reset_timeout_worker() will do the rest of the work */ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); } @@ -430,10 +404,6 @@ void kbase_clean_caches_done(struct kbase_device *kbdev) kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANED; wake_up(&kbdev->hwcnt.backend.cache_clean_wait); } - /* NOTE: In the state KBASE_INSTR_STATE_RESETTING, We're in a - * reset, and the instrumentation state hasn't been restored yet - * - kbasep_reset_timeout_worker() will do the rest of the work - */ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); } @@ -451,14 +421,6 @@ int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx) spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) { - /* GPU is being reset */ - spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - wait_event(kbdev->hwcnt.backend.wait, - kbdev->hwcnt.backend.triggered != 0); - spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - } - if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) { err = -EINVAL; kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; @@ -482,14 +444,6 @@ int kbase_instr_hwcnt_clear(struct kbase_context *kctx) spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) { - /* GPU is being reset */ - spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - wait_event(kbdev->hwcnt.backend.wait, - kbdev->hwcnt.backend.triggered != 0); - spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - } - /* Check it's the context previously set up and we're not already * dumping */ if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state != diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_instr_defs.h b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_instr_defs.h index 23bd80a5a150..4794672da8f0 100644 --- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_instr_defs.h +++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_instr_defs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -39,10 +39,6 @@ enum kbase_instr_state { /* Cache clean completed, and either a) a dump is complete, or * b) instrumentation can now be setup. */ KBASE_INSTR_STATE_CLEANED, - /* kbasep_reset_timeout_worker() has started (but not compelted) a - * reset. This generally indicates the current action should be aborted, - * and kbasep_reset_timeout_worker() will handle the cleanup */ - KBASE_INSTR_STATE_RESETTING, /* An error has occured during DUMPING (page fault). */ KBASE_INSTR_STATE_FAULT }; diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_irq_linux.c b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_irq_linux.c index 49c72f90aac6..b891b12a3299 100644 --- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_irq_linux.c +++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_irq_linux.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,7 +28,6 @@ #define MMU_IRQ_TAG 1 #define GPU_IRQ_TAG 2 - static void *kbase_tag(void *ptr, u32 tag) { return (void *)(((uintptr_t) ptr) | tag); @@ -39,9 +38,6 @@ static void *kbase_untag(void *ptr) return (void *)(((uintptr_t) ptr) & ~3); } - - - static irqreturn_t kbase_job_irq_handler(int irq, void *data) { unsigned long flags; @@ -151,13 +147,13 @@ static irqreturn_t kbase_gpu_irq_handler(int irq, void *data) return IRQ_HANDLED; } + static irq_handler_t kbase_handler_table[] = { [JOB_IRQ_TAG] = kbase_job_irq_handler, [MMU_IRQ_TAG] = kbase_mmu_irq_handler, [GPU_IRQ_TAG] = kbase_gpu_irq_handler, }; - #ifdef CONFIG_MALI_DEBUG #define JOB_IRQ_HANDLER JOB_IRQ_TAG #define MMU_IRQ_HANDLER MMU_IRQ_TAG diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_jm_defs.h b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_jm_defs.h index 8ccc440171a2..83d477898c5e 100644 --- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_jm_defs.h +++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_jm_defs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -71,6 +71,7 @@ struct slot_rb { * @reset_work: Work item for performing the reset * @reset_wait: Wait event signalled when the reset is complete * @reset_timer: Timeout for soft-stops before the reset + * @timeouts_updated: Have timeout values just been updated? * * The kbasep_js_device_data::runpool_irq::lock (a spinlock) must be held when * accessing this structure @@ -97,11 +98,15 @@ struct kbase_backend_data { /* The GPU reset process is currently occuring (timeout has expired or * kbasep_try_reset_gpu_early was called) */ #define KBASE_RESET_GPU_HAPPENING 3 - +/* Reset the GPU silently, used when resetting the GPU as part of normal + * behavior (e.g. when exiting protected mode). */ +#define KBASE_RESET_GPU_SILENT 4 struct workqueue_struct *reset_workq; struct work_struct reset_work; wait_queue_head_t reset_wait; struct hrtimer reset_timer; + + bool timeouts_updated; }; /** diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_jm_hw.c b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_jm_hw.c index 33d6aef0ec72..00900a99a898 100644 --- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_jm_hw.c +++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_jm_hw.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -15,8 +15,6 @@ - - /* * Base kernel job manager APIs */ @@ -27,11 +25,9 @@ #if defined(CONFIG_MALI_GATOR_SUPPORT) #include #endif -#if defined(CONFIG_MALI_MIPE_ENABLED) #include -#endif +#include #include -#include #include #include #include @@ -87,14 +83,31 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, * start */ cfg = kctx->as_nr; + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) + cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION; + #ifndef CONFIG_MALI_COH_GPU - cfg |= JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE; - cfg |= JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE; -#endif + if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_START)) + cfg |= JS_CONFIG_START_FLUSH_NO_ACTION; + else + cfg |= JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE; + + if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_END)) + cfg |= JS_CONFIG_END_FLUSH_NO_ACTION; + else + cfg |= JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE; +#endif /* CONFIG_MALI_COH_GPU */ + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10649) || + !kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3982)) + cfg |= JS_CONFIG_START_MMU; - cfg |= JS_CONFIG_START_MMU; cfg |= JS_CONFIG_THREAD_PRI(8); + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE) && + (katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED)) + cfg |= JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK; + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) { if (!kbdev->hwaccess.backend.slot_rb[js].job_chain_flag) { @@ -111,6 +124,9 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_CONFIG_NEXT), cfg, kctx); + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_FLUSH_ID_NEXT), + katom->flush_id, kctx); /* Write an approximate start timestamp. * It's approximate because there might be a job in the HEAD register. @@ -130,7 +146,6 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, GATOR_MAKE_EVENT(GATOR_JOB_SLOT_START, js), kctx, kbase_jd_atom_id(kctx, katom)); #endif -#if defined(CONFIG_MALI_MIPE_ENABLED) kbase_tlstream_tl_attrib_atom_config(katom, jc_head, katom->affinity, cfg); kbase_tlstream_tl_ret_ctx_lpu( @@ -142,7 +157,6 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, katom, &kbdev->gpu_props.props.raw_props.js_features[js], "ctx_nr,atom_nr"); -#endif #ifdef CONFIG_GPU_TRACEPOINTS if (!kbase_backend_nr_atoms_submitted(kbdev, js)) { /* If this is the only job on the slot, trace it as starting */ @@ -204,6 +218,24 @@ static void kbasep_job_slot_update_head_start_timestamp( } } +/** + * kbasep_trace_tl_nret_atom_lpu - Call nret_atom_lpu timeline tracepoint + * @kbdev: kbase device + * @i: job slot + * + * Get kbase atom by calling kbase_gpu_inspect for given job slot. + * Then use obtained katom and name of slot associated with the given + * job slot number in tracepoint call to the instrumentation module + * informing that given atom is no longer executed on given lpu (job slot). + */ +static void kbasep_trace_tl_nret_atom_lpu(struct kbase_device *kbdev, int i) +{ + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, i, 0); + + kbase_tlstream_tl_nret_atom_lpu(katom, + &kbdev->gpu_props.props.raw_props.js_features[i]); +} + void kbase_job_done(struct kbase_device *kbdev, u32 done) { unsigned long flags; @@ -264,9 +296,12 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) GATOR_JOB_SLOT_SOFT_STOPPED, i), NULL, 0); #endif -#if defined(CONFIG_MALI_MIPE_ENABLED) + kbase_tlstream_aux_job_softstop(i); -#endif + + kbasep_trace_tl_nret_atom_lpu( + kbdev, i); + /* Soft-stopped job - read the value of * JS_TAIL so that the job chain can * be resumed */ @@ -437,19 +472,21 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) KBASE_EXPORT_TEST_API(kbase_job_done); static bool kbasep_soft_stop_allowed(struct kbase_device *kbdev, - u16 core_reqs) + struct kbase_jd_atom *katom) { bool soft_stops_allowed = true; - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) { - if ((core_reqs & BASE_JD_REQ_T) != 0) + if (kbase_jd_katom_is_protected(katom)) { + soft_stops_allowed = false; + } else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) { + if ((katom->core_req & BASE_JD_REQ_T) != 0) soft_stops_allowed = false; } return soft_stops_allowed; } static bool kbasep_hard_stop_allowed(struct kbase_device *kbdev, - u16 core_reqs) + base_jd_core_req core_reqs) { bool hard_stops_allowed = true; @@ -463,7 +500,7 @@ static bool kbasep_hard_stop_allowed(struct kbase_device *kbdev, void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, int js, u32 action, - u16 core_reqs, + base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom) { struct kbase_context *kctx = target_katom->kctx; @@ -486,12 +523,13 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, if (action == JS_COMMAND_SOFT_STOP) { bool soft_stop_allowed = kbasep_soft_stop_allowed(kbdev, - core_reqs); + target_katom); if (!soft_stop_allowed) { #ifdef CONFIG_MALI_DEBUG - dev_dbg(kbdev->dev, "Attempt made to soft-stop a job that cannot be soft-stopped. core_reqs = 0x%X", - (unsigned int)core_reqs); + dev_dbg(kbdev->dev, + "Attempt made to soft-stop a job that cannot be soft-stopped. core_reqs = 0x%X", + (unsigned int)core_reqs); #endif /* CONFIG_MALI_DEBUG */ return; } @@ -499,9 +537,51 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, /* We are about to issue a soft stop, so mark the atom as having * been soft stopped */ target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED; - } - if (action == JS_COMMAND_HARD_STOP) { + /* Mark the point where we issue the soft-stop command */ + kbase_tlstream_aux_issue_job_softstop(target_katom); + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) { + int i; + + for (i = 0; + i < kbase_backend_nr_atoms_submitted(kbdev, js); + i++) { + struct kbase_jd_atom *katom; + + katom = kbase_gpu_inspect(kbdev, js, i); + + KBASE_DEBUG_ASSERT(katom); + + /* For HW_ISSUE_8316, only 'bad' jobs attacking + * the system can cause this issue: normally, + * all memory should be allocated in multiples + * of 4 pages, and growable memory should be + * changed size in multiples of 4 pages. + * + * Whilst such 'bad' jobs can be cleared by a + * GPU reset, the locking up of a uTLB entry + * caused by the bad job could also stall other + * ASs, meaning that other ASs' jobs don't + * complete in the 'grace' period before the + * reset. We don't want to lose other ASs' jobs + * when they would normally complete fine, so we + * must 'poke' the MMU regularly to help other + * ASs complete */ + kbase_as_poking_timer_retain_atom( + kbdev, katom->kctx, katom); + } + } + + if (kbase_hw_has_feature( + kbdev, + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) { + action = (target_katom->atom_flags & + KBASE_KATOM_FLAGS_JOBCHAIN) ? + JS_COMMAND_SOFT_STOP_1 : + JS_COMMAND_SOFT_STOP_0; + } + } else if (action == JS_COMMAND_HARD_STOP) { bool hard_stop_allowed = kbasep_hard_stop_allowed(kbdev, core_reqs); @@ -525,55 +605,21 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, * hard-stop fails, so it is safe to just return and * ignore the hard-stop request. */ - dev_warn(kbdev->dev, "Attempt made to hard-stop a job that cannot be hard-stopped. core_reqs = 0x%X", - (unsigned int)core_reqs); + dev_warn(kbdev->dev, + "Attempt made to hard-stop a job that cannot be hard-stopped. core_reqs = 0x%X", + (unsigned int)core_reqs); return; } target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_HARD_STOPPED; - } - - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316) && - action == JS_COMMAND_SOFT_STOP) { - int i; - for (i = 0; i < kbase_backend_nr_atoms_submitted(kbdev, js); - i++) { - struct kbase_jd_atom *katom; - - katom = kbase_gpu_inspect(kbdev, js, i); - - KBASE_DEBUG_ASSERT(katom); - - /* For HW_ISSUE_8316, only 'bad' jobs attacking the - * system can cause this issue: normally, all memory - * should be allocated in multiples of 4 pages, and - * growable memory should be changed size in multiples - * of 4 pages. - * - * Whilst such 'bad' jobs can be cleared by a GPU reset, - * the locking up of a uTLB entry caused by the bad job - * could also stall other ASs, meaning that other ASs' - * jobs don't complete in the 'grace' period before the - * reset. We don't want to lose other ASs' jobs when - * they would normally complete fine, so we must 'poke' - * the MMU regularly to help other ASs complete */ - kbase_as_poking_timer_retain_atom(kbdev, katom->kctx, - katom); - } - } - - if (kbase_hw_has_feature(kbdev, + if (kbase_hw_has_feature( + kbdev, BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) { - if (action == JS_COMMAND_SOFT_STOP) - action = (target_katom->atom_flags & - KBASE_KATOM_FLAGS_JOBCHAIN) ? - JS_COMMAND_SOFT_STOP_1 : - JS_COMMAND_SOFT_STOP_0; - else action = (target_katom->atom_flags & - KBASE_KATOM_FLAGS_JOBCHAIN) ? - JS_COMMAND_HARD_STOP_1 : - JS_COMMAND_HARD_STOP_0; + KBASE_KATOM_FLAGS_JOBCHAIN) ? + JS_COMMAND_HARD_STOP_1 : + JS_COMMAND_HARD_STOP_0; + } } kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND), action, kctx); @@ -699,7 +745,6 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, struct kbase_jd_atom *target_katom) { struct kbase_device *kbdev; - struct kbasep_js_device_data *js_devdata; int js = target_katom->slot_nr; int priority = target_katom->sched_priority; int i; @@ -707,7 +752,6 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, KBASE_DEBUG_ASSERT(kctx != NULL); kbdev = kctx->kbdev; KBASE_DEBUG_ASSERT(kbdev != NULL); - js_devdata = &kbdev->js_data; lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); @@ -825,6 +869,13 @@ u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev) { u32 flush_id = 0; + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) { + mutex_lock(&kbdev->pm.lock); + if (kbdev->pm.backend.gpu_powered) + flush_id = kbase_reg_read(kbdev, + GPU_CONTROL_REG(LATEST_FLUSH), NULL); + mutex_unlock(&kbdev->pm.lock); + } return flush_id; } @@ -1032,7 +1083,7 @@ void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, * state when the soft/hard-stop action is complete */ void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action, - u16 core_reqs, struct kbase_jd_atom *target_katom) + base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom) { u32 hw_action = action & JS_COMMAND_MASK; @@ -1044,7 +1095,7 @@ void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action, /* For soft-stop, don't enter if soft-stop not allowed, or isn't * causing disjoint */ if (hw_action == JS_COMMAND_SOFT_STOP && - !(kbasep_soft_stop_allowed(kbdev, core_reqs) && + !(kbasep_soft_stop_allowed(kbdev, target_katom) && (action & JS_COMMAND_SW_CAUSES_DISJOINT))) return; @@ -1109,26 +1160,6 @@ static void kbase_debug_dump_registers(struct kbase_device *kbdev) kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG), NULL)); } -static void kbasep_save_hwcnt_setup(struct kbase_device *kbdev, - struct kbase_context *kctx, - struct kbase_uk_hwcnt_setup *hwcnt_setup) -{ - hwcnt_setup->dump_buffer = - kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), kctx) & - 0xffffffff; - hwcnt_setup->dump_buffer |= (u64) - kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), kctx) << - 32; - hwcnt_setup->jm_bm = - kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN), kctx); - hwcnt_setup->shader_bm = - kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN), kctx); - hwcnt_setup->tiler_bm = - kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), kctx); - hwcnt_setup->mmu_l2_bm = - kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN), kctx); -} - static void kbasep_reset_timeout_worker(struct work_struct *data) { unsigned long flags, mmu_flags; @@ -1136,10 +1167,8 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) int i; ktime_t end_timestamp = ktime_get(); struct kbasep_js_device_data *js_devdata; - struct kbase_uk_hwcnt_setup hwcnt_setup = { {0} }; - enum kbase_instr_state bckp_state; bool try_schedule = false; - bool restore_hwc = false; + bool silent = false; KBASE_DEBUG_ASSERT(data); @@ -1149,8 +1178,16 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) KBASE_DEBUG_ASSERT(kbdev); js_devdata = &kbdev->js_data; + if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == + KBASE_RESET_GPU_SILENT) + silent = true; + KBASE_TRACE_ADD(kbdev, JM_BEGIN_RESET_WORKER, NULL, NULL, 0u, 0); + /* Suspend vinstr. + * This call will block until vinstr is suspended. */ + kbase_vinstr_suspend(kbdev->vinstr_ctx); + /* Make sure the timer has completed - this cannot be done from * interrupt context, so this cannot be done within * kbasep_try_reset_gpu_early. */ @@ -1200,39 +1237,14 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) * assume that anything that is still left on the GPU is stuck there and * we'll kill it when we reset the GPU */ - dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)", + if (!silent) + dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)", RESET_TIMEOUT); - spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - - if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) { - /* the same interrupt handler preempted itself */ - /* GPU is being reset */ - spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - wait_event(kbdev->hwcnt.backend.wait, - kbdev->hwcnt.backend.triggered != 0); - spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - } - /* Save the HW counters setup */ - if (kbdev->hwcnt.kctx != NULL) { - struct kbase_context *kctx = kbdev->hwcnt.kctx; - - if (kctx->jctx.sched_info.ctx.is_scheduled) { - kbasep_save_hwcnt_setup(kbdev, kctx, &hwcnt_setup); - - restore_hwc = true; - } - } - /* Output the state of some interesting registers to help in the * debugging of GPU resets */ - kbase_debug_dump_registers(kbdev); - - bckp_state = kbdev->hwcnt.backend.state; - kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_RESETTING; - kbdev->hwcnt.backend.triggered = 0; - - spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + if (!silent) + kbase_debug_dump_registers(kbdev); /* Reset the GPU */ kbase_pm_init_hw(kbdev, 0); @@ -1272,101 +1284,14 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) kbase_disjoint_state_down(kbdev); wake_up(&kbdev->hwaccess.backend.reset_wait); - dev_err(kbdev->dev, "Reset complete"); + if (!silent) + dev_err(kbdev->dev, "Reset complete"); if (js_devdata->nr_contexts_pullable > 0 && !kbdev->poweroff_pending) try_schedule = true; mutex_unlock(&js_devdata->runpool_mutex); - spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - /* Restore the HW counters setup */ - if (restore_hwc) { - struct kbase_context *kctx = kbdev->hwcnt.kctx; - - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), - (kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT) | - PRFCNT_CONFIG_MODE_OFF, kctx); - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), - hwcnt_setup.dump_buffer & 0xFFFFFFFF, kctx); - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), - hwcnt_setup.dump_buffer >> 32, kctx); - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN), - hwcnt_setup.jm_bm, kctx); - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN), - hwcnt_setup.shader_bm, kctx); - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN), - hwcnt_setup.mmu_l2_bm, kctx); - - /* Due to PRLAM-8186 we need to disable the Tiler before we - * enable the HW counter dump. */ - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186)) - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), - 0, kctx); - else - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), - hwcnt_setup.tiler_bm, kctx); - - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), - (kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT) | - PRFCNT_CONFIG_MODE_MANUAL, kctx); - - /* If HW has PRLAM-8186 we can now re-enable the tiler HW - * counters dump */ - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186)) - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), - hwcnt_setup.tiler_bm, kctx); - } - kbdev->hwcnt.backend.state = bckp_state; - switch (kbdev->hwcnt.backend.state) { - /* Cases for waking kbasep_cache_clean_worker worker */ - case KBASE_INSTR_STATE_CLEANED: - /* Cache-clean IRQ occurred, but we reset: - * Wakeup incase the waiter saw RESETTING */ - case KBASE_INSTR_STATE_REQUEST_CLEAN: - /* After a clean was requested, but before the regs were - * written: - * Wakeup incase the waiter saw RESETTING */ - wake_up(&kbdev->hwcnt.backend.cache_clean_wait); - break; - case KBASE_INSTR_STATE_CLEANING: - /* Either: - * 1) We've not got the Cache-clean IRQ yet: it was lost, or: - * 2) We got it whilst resetting: it was voluntarily lost - * - * So, move to the next state and wakeup: */ - kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANED; - wake_up(&kbdev->hwcnt.backend.cache_clean_wait); - break; - - /* Cases for waking anyone else */ - case KBASE_INSTR_STATE_DUMPING: - /* If dumping, abort the dump, because we may've lost the IRQ */ - kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; - kbdev->hwcnt.backend.triggered = 1; - wake_up(&kbdev->hwcnt.backend.wait); - break; - case KBASE_INSTR_STATE_DISABLED: - case KBASE_INSTR_STATE_IDLE: - case KBASE_INSTR_STATE_FAULT: - /* Every other reason: wakeup in that state */ - kbdev->hwcnt.backend.triggered = 1; - wake_up(&kbdev->hwcnt.backend.wait); - break; - - /* Unhandled cases */ - case KBASE_INSTR_STATE_RESETTING: - default: - BUG(); - break; - } - spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - - /* Resume the vinstr core */ - kbase_vinstr_hwc_resume(kbdev->vinstr_ctx); - - /* Note: counter dumping may now resume */ - mutex_lock(&kbdev->pm.lock); /* Find out what cores are required now */ @@ -1386,6 +1311,10 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) } kbase_pm_context_idle(kbdev); + + /* Release vinstr */ + kbase_vinstr_resume(kbdev->vinstr_ctx); + KBASE_TRACE_ADD(kbdev, JM_END_RESET_WORKER, NULL, NULL, 0u, 0); } @@ -1467,7 +1396,7 @@ static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev) * * Return: * The function returns a boolean which should be interpreted as follows: - * true - Prepared for reset, kbase_reset_gpu should be called. + * true - Prepared for reset, kbase_reset_gpu_locked should be called. * false - Another thread is performing a reset, kbase_reset_gpu should * not be called. */ @@ -1561,4 +1490,29 @@ void kbase_reset_gpu_locked(struct kbase_device *kbdev) /* Try resetting early */ kbasep_try_reset_gpu_early_locked(kbdev); } + +void kbase_reset_gpu_silent(struct kbase_device *kbdev) +{ + if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, + KBASE_RESET_GPU_NOT_PENDING, + KBASE_RESET_GPU_SILENT) != + KBASE_RESET_GPU_NOT_PENDING) { + /* Some other thread is already resetting the GPU */ + return; + } + + kbase_disjoint_state_up(kbdev); + + queue_work(kbdev->hwaccess.backend.reset_workq, + &kbdev->hwaccess.backend.reset_work); +} + +bool kbase_reset_gpu_active(struct kbase_device *kbdev) +{ + if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == + KBASE_RESET_GPU_NOT_PENDING) + return false; + + return true; +} #endif /* KBASE_GPU_RESET_EN */ diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_jm_internal.h b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_jm_internal.h index eb068d40283b..8f1e5615ea43 100644 --- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_jm_internal.h +++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_jm_internal.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -96,7 +96,7 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, int js, u32 action, - u16 core_reqs, + base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom); /** diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_jm_rb.c b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_jm_rb.c index c0168c74f815..da7c4df7d277 100644 --- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_jm_rb.c +++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_jm_rb.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,11 +24,11 @@ #include #include #include +#include #include #include #include #include -#include #include /* Return whether the specified ringbuffer is empty. HW access lock must be @@ -592,7 +592,7 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev, case KBASE_ATOM_GPU_RB_READY: /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ - case KBASE_ATOM_GPU_RB_WAITING_SECURE_MODE: + case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_ENTRY: /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ case KBASE_ATOM_GPU_RB_WAITING_AFFINITY: @@ -603,6 +603,9 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev, case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: break; + case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_EXIT: + /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ + case KBASE_ATOM_GPU_RB_WAITING_BLOCKED: /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ @@ -654,53 +657,145 @@ static inline bool kbase_gpu_rmu_workaround(struct kbase_device *kbdev, int js) return true; } -static bool kbase_gpu_in_secure_mode(struct kbase_device *kbdev) +static inline bool kbase_gpu_in_protected_mode(struct kbase_device *kbdev) { - return kbdev->secure_mode; + return kbdev->protected_mode; } -static int kbase_gpu_secure_mode_enable(struct kbase_device *kbdev) +static int kbase_gpu_protected_mode_enter(struct kbase_device *kbdev) { int err = -EINVAL; lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); - WARN_ONCE(!kbdev->secure_ops, - "Cannot enable secure mode: secure callbacks not specified.\n"); + WARN_ONCE(!kbdev->protected_ops, + "Cannot enter protected mode: protected callbacks not specified.\n"); - if (kbdev->secure_ops) { - /* Switch GPU to secure mode */ - err = kbdev->secure_ops->secure_mode_enable(kbdev); + if (kbdev->protected_ops) { + /* Switch GPU to protected mode */ + err = kbdev->protected_ops->protected_mode_enter(kbdev); if (err) - dev_warn(kbdev->dev, "Failed to enable secure mode: %d\n", err); + dev_warn(kbdev->dev, "Failed to enable protected mode: %d\n", + err); else - kbdev->secure_mode = true; + kbdev->protected_mode = true; } return err; } -static int kbase_gpu_secure_mode_disable(struct kbase_device *kbdev) +static int kbase_gpu_protected_mode_reset(struct kbase_device *kbdev) { - int err = -EINVAL; - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); - WARN_ONCE(!kbdev->secure_ops, - "Cannot disable secure mode: secure callbacks not specified.\n"); + WARN_ONCE(!kbdev->protected_ops, + "Cannot exit protected mode: protected callbacks not specified.\n"); - if (kbdev->secure_ops) { - /* Switch GPU to non-secure mode */ - err = kbdev->secure_ops->secure_mode_disable(kbdev); + if (!kbdev->protected_ops) + return -EINVAL; - if (err) - dev_warn(kbdev->dev, "Failed to disable secure mode: %d\n", err); - else - kbdev->secure_mode = false; + kbdev->protected_mode_transition = true; + kbase_reset_gpu_silent(kbdev); + + return 0; +} + +static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, + struct kbase_jd_atom **katom, int idx, int js) +{ + int err = 0; + + switch (katom[idx]->exit_protected_state) { + case KBASE_ATOM_EXIT_PROTECTED_CHECK: + /* + * If the atom ahead of this one hasn't got to being + * submitted yet then bail. + */ + if (idx == 1 && + (katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED && + katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB)) + return -EAGAIN; + + /* If we're not exiting protected mode then we're done here. */ + if (!(kbase_gpu_in_protected_mode(kbdev) && + !kbase_jd_katom_is_protected(katom[idx]))) + return 0; + + /* + * If there is a transition in progress, or work still + * on the GPU try again later. + */ + if (kbdev->protected_mode_transition || + kbase_gpu_atoms_submitted_any(kbdev)) + return -EAGAIN; + + /* + * Exiting protected mode requires a reset, but first the L2 + * needs to be powered down to ensure it's not active when the + * reset is issued. + */ + katom[idx]->exit_protected_state = + KBASE_ATOM_EXIT_PROTECTED_IDLE_L2; + + /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + + case KBASE_ATOM_EXIT_PROTECTED_IDLE_L2: + if (kbase_pm_get_active_cores(kbdev, KBASE_PM_CORE_L2) || + kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) { + /* + * The L2 is still powered, wait for all the users to + * finish with it before doing the actual reset. + */ + return -EAGAIN; + } + katom[idx]->exit_protected_state = + KBASE_ATOM_EXIT_PROTECTED_RESET; + + /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + + case KBASE_ATOM_EXIT_PROTECTED_RESET: + /* Issue the reset to the GPU */ + err = kbase_gpu_protected_mode_reset(kbdev); + if (err) { + /* Failed to exit protected mode, fail atom */ + katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID; + kbase_gpu_mark_atom_for_return(kbdev, katom[idx]); + /* Only return if head atom or previous atom + * already removed - as atoms must be returned + * in order */ + if (idx == 0 || katom[0]->gpu_rb_state == + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { + kbase_gpu_dequeue_atom(kbdev, js, NULL); + kbase_jm_return_atom_to_js(kbdev, katom[idx]); + } + + kbase_vinstr_resume(kbdev->vinstr_ctx); + + return -EINVAL; + } + + katom[idx]->exit_protected_state = + KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT; + + /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + + case KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT: + if (kbase_reset_gpu_active(kbdev)) + return -EAGAIN; + + /* protected mode sanity checks */ + KBASE_DEBUG_ASSERT_MSG( + kbase_jd_katom_is_protected(katom[idx]) == kbase_gpu_in_protected_mode(kbdev), + "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", + kbase_jd_katom_is_protected(katom[idx]), kbase_gpu_in_protected_mode(kbdev)); + KBASE_DEBUG_ASSERT_MSG( + (kbase_jd_katom_is_protected(katom[idx]) && js == 0) || + !kbase_jd_katom_is_protected(katom[idx]), + "Protected atom on JS%d not supported", js); } - return err; + return 0; } void kbase_gpu_slot_update(struct kbase_device *kbdev) @@ -719,6 +814,7 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev) for (idx = 0; idx < SLOT_RB_SIZE; idx++) { bool cores_ready; + int ret; if (!katom[idx]) continue; @@ -735,10 +831,48 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev) break; katom[idx]->gpu_rb_state = - KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE; + KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_EXIT; /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + + case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_EXIT: + /* + * Exiting protected mode must be done before + * the references on the cores are taken as + * a power down the L2 is required which + * can't happen after the references for this + * atom are taken. + */ + ret = kbase_jm_exit_protected_mode(kbdev, + katom, idx, js); + if (ret) + break; + + katom[idx]->gpu_rb_state = + KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE; + + /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: + if (katom[idx]->will_fail_event_code) { + kbase_gpu_mark_atom_for_return(kbdev, + katom[idx]); + /* Set EVENT_DONE so this atom will be + completed, not unpulled. */ + katom[idx]->event_code = + BASE_JD_EVENT_DONE; + /* Only return if head atom or previous + * atom already removed - as atoms must + * be returned in order. */ + if (idx == 0 || katom[0]->gpu_rb_state == + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { + kbase_gpu_dequeue_atom(kbdev, js, NULL); + kbase_jm_return_atom_to_js(kbdev, katom[idx]); + } + break; + } + + cores_ready = kbasep_js_job_check_ref_cores(kbdev, js, katom[idx]); @@ -765,12 +899,28 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev) break; katom[idx]->gpu_rb_state = - KBASE_ATOM_GPU_RB_WAITING_SECURE_MODE; + KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_ENTRY; /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ - case KBASE_ATOM_GPU_RB_WAITING_SECURE_MODE: - if (kbase_gpu_in_secure_mode(kbdev) != kbase_jd_katom_is_secure(katom[idx])) { + case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_ENTRY: + + /* Only submit if head atom or previous atom + * already submitted */ + if (idx == 1 && + (katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED && + katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB)) + break; + + /* + * If the GPU is transitioning protected mode + * then bail now and we'll be called when the + * new state has settled. + */ + if (kbdev->protected_mode_transition) + break; + + if (!kbase_gpu_in_protected_mode(kbdev) && kbase_jd_katom_is_protected(katom[idx])) { int err = 0; /* Not in correct mode, take action */ @@ -784,16 +934,26 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev) */ break; } - - /* No jobs running, so we can switch GPU mode right now */ - if (kbase_jd_katom_is_secure(katom[idx])) { - err = kbase_gpu_secure_mode_enable(kbdev); - } else { - err = kbase_gpu_secure_mode_disable(kbdev); + if (kbase_vinstr_try_suspend(kbdev->vinstr_ctx) < 0) { + /* + * We can't switch now because + * the vinstr core state switch + * is not done yet. + */ + break; } + /* Once reaching this point GPU must be + * switched to protected mode or vinstr + * re-enabled. */ + /* No jobs running, so we can switch GPU mode right now */ + err = kbase_gpu_protected_mode_enter(kbdev); if (err) { - /* Failed to switch secure mode, fail atom */ + /* + * Failed to switch into protected mode, resume + * vinstr core and fail atom. + */ + kbase_vinstr_resume(kbdev->vinstr_ctx); katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID; kbase_gpu_mark_atom_for_return(kbdev, katom[idx]); /* Only return if head atom or previous atom @@ -808,22 +968,18 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev) } } - /* Secure mode sanity checks */ + /* Protected mode sanity checks */ KBASE_DEBUG_ASSERT_MSG( - kbase_jd_katom_is_secure(katom[idx]) == kbase_gpu_in_secure_mode(kbdev), - "Secure mode of atom (%d) doesn't match secure mode of GPU (%d)", - kbase_jd_katom_is_secure(katom[idx]), kbase_gpu_in_secure_mode(kbdev)); - KBASE_DEBUG_ASSERT_MSG( - (kbase_jd_katom_is_secure(katom[idx]) && js == 0) || - !kbase_jd_katom_is_secure(katom[idx]), - "Secure atom on JS%d not supported", js); - + kbase_jd_katom_is_protected(katom[idx]) == kbase_gpu_in_protected_mode(kbdev), + "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", + kbase_jd_katom_is_protected(katom[idx]), kbase_gpu_in_protected_mode(kbdev)); katom[idx]->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ case KBASE_ATOM_GPU_RB_READY: + /* Only submit if head atom or previous atom * already submitted */ if (idx == 1 && @@ -944,8 +1100,16 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, } katom = kbase_gpu_dequeue_atom(kbdev, js, end_timestamp); - kbase_timeline_job_slot_done(kbdev, katom->kctx, katom, js, 0); + kbase_tlstream_tl_nret_atom_lpu( + katom, + &kbdev->gpu_props.props.raw_props.js_features[ + katom->slot_nr]); + kbase_tlstream_tl_nret_atom_as(katom, &kbdev->as[kctx->as_nr]); + kbase_tlstream_tl_nret_ctx_lpu( + kctx, + &kbdev->gpu_props.props.raw_props.js_features[ + katom->slot_nr]); if (completion_code == BASE_JD_EVENT_STOPPED) { struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js, @@ -1098,13 +1262,34 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) for (idx = 0; idx < 2; idx++) { struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0); + bool keep_in_jm_rb = false; - if (katom) { - kbase_gpu_release_atom(kbdev, katom, NULL); - kbase_gpu_dequeue_atom(kbdev, js, NULL); - katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; - kbase_jm_complete(kbdev, katom, end_timestamp); + if (!katom) + continue; + + if (katom->gpu_rb_state < KBASE_ATOM_GPU_RB_SUBMITTED) + keep_in_jm_rb = true; + + kbase_gpu_release_atom(kbdev, katom, NULL); + + /* + * If the atom wasn't on HW when the reset was issued + * then leave it in the RB and next time we're kicked + * it will be processed again from the starting state. + */ + if (keep_in_jm_rb) { + katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED; + katom->exit_protected_state = KBASE_ATOM_EXIT_PROTECTED_CHECK; + continue; } + + /* + * The atom was on the HW when the reset was issued + * all we can do is fail the atom. + */ + kbase_gpu_dequeue_atom(kbdev, js, NULL); + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + kbase_jm_complete(kbdev, katom, end_timestamp); } } } diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_js_affinity.c b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_js_affinity.c index 6a49669af630..d665420ab380 100644 --- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_js_affinity.c +++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_js_affinity.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -23,6 +23,7 @@ #include #include "mali_kbase_js_affinity.h" +#include "mali_kbase_hw.h" #include @@ -114,9 +115,14 @@ bool kbase_js_choose_affinity(u64 * const affinity, if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) == BASE_JD_REQ_T) { spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); - /* Tiler only job, bit 0 needed to enable tiler but no shader - * cores required */ - *affinity = 1; + /* If the hardware supports XAFFINITY then we'll only enable + * the tiler (which is the default so this is a no-op), + * otherwise enable shader core 0. */ + if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) + *affinity = 1; + else + *affinity = 0; + return true; } @@ -172,9 +178,12 @@ bool kbase_js_choose_affinity(u64 * const affinity, if (*affinity == 0) return false; - /* Enable core 0 if tiler required */ - if (core_req & BASE_JD_REQ_T) - *affinity = *affinity | 1; + /* Enable core 0 if tiler required for hardware without XAFFINITY + * support (notes above) */ + if (core_req & BASE_JD_REQ_T) { + if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) + *affinity = *affinity | 1; + } return true; } diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_js_affinity.h b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_js_affinity.h index 3026e6a58303..fbffa3b40962 100644 --- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_js_affinity.h +++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_js_affinity.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,14 +24,6 @@ #ifndef _KBASE_JS_AFFINITY_H_ #define _KBASE_JS_AFFINITY_H_ -#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS -/* Import the external affinity mask variables */ -extern u64 mali_js0_affinity_mask; -extern u64 mali_js1_affinity_mask; -extern u64 mali_js2_affinity_mask; -#endif /* CONFIG_MALI_DEBUG_SHADER_SPLIT_FS */ - - /** * kbase_js_can_run_job_on_slot_no_lock - Decide whether it is possible to * submit a job to a particular job slot in the current status diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_js_backend.c b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_js_backend.c index 1e9a7e4c466d..a23deb4ca20c 100644 --- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_js_backend.c +++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_js_backend.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -138,6 +138,17 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) js_devdata->gpu_reset_ticks_ss; } + /* If timeouts have been changed then ensure + * that atom tick count is not greater than the + * new soft_stop timeout. This ensures that + * atoms do not miss any of the timeouts due to + * races between this worker and the thread + * changing the timeouts. */ + if (backend->timeouts_updated && + ticks > soft_stop_ticks) + ticks = atom->sched_info.cfs.ticks = + soft_stop_ticks; + /* Job is Soft-Stoppable */ if (ticks == soft_stop_ticks) { int disjoint_threshold = @@ -257,6 +268,8 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns), HRTIMER_MODE_REL); + backend->timeouts_updated = false; + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); return HRTIMER_NORESTART; @@ -335,3 +348,10 @@ void kbase_backend_timer_resume(struct kbase_device *kbdev) kbase_backend_ctx_count_changed(kbdev); } +void kbase_backend_timeouts_changed(struct kbase_device *kbdev) +{ + struct kbase_backend_data *backend = &kbdev->hwaccess.backend; + + backend->timeouts_updated = true; +} + diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_mmu_hw_direct.c b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_mmu_hw_direct.c index 4fd13e2de63e..4a3572d971a6 100644 --- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_mmu_hw_direct.c +++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_mmu_hw_direct.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -20,11 +20,9 @@ #include #include #include -#if defined(CONFIG_MALI_MIPE_ENABLED) #include -#endif -#include #include +#include static inline u64 lock_region(struct kbase_device *kbdev, u64 pfn, u32 num_pages) @@ -154,6 +152,9 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) AS_FAULTADDRESS_LO), kctx); + /* report the fault to debugfs */ + kbase_as_fault_debugfs_new(kbdev, as_no); + /* record the fault status */ as->fault_status = kbase_reg_read(kbdev, MMU_AS_REG(as_no, @@ -165,6 +166,15 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) KBASE_MMU_FAULT_TYPE_BUS : KBASE_MMU_FAULT_TYPE_PAGE; +#ifdef CONFIG_MALI_GPU_MMU_AARCH64 + as->fault_extra_addr = kbase_reg_read(kbdev, + MMU_AS_REG(as_no, AS_FAULTEXTRA_HI), + kctx); + as->fault_extra_addr <<= 32; + as->fault_extra_addr |= kbase_reg_read(kbdev, + MMU_AS_REG(as_no, AS_FAULTEXTRA_LO), + kctx); +#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */ if (kbase_as_has_bus_fault(as)) { /* Mark bus fault as handled. @@ -203,10 +213,36 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as, struct kbase_context *kctx) { struct kbase_mmu_setup *current_setup = &as->current_setup; -#ifdef CONFIG_MALI_MIPE_ENABLED u32 transcfg = 0; -#endif +#ifdef CONFIG_MALI_GPU_MMU_AARCH64 + transcfg = current_setup->transcfg & 0xFFFFFFFFUL; + + /* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK */ + /* Clear PTW_MEMATTR bits */ + transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK; + /* Enable correct PTW_MEMATTR bits */ + transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK; + + if (kbdev->system_coherency == COHERENCY_ACE) { + /* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable) */ + /* Clear PTW_SH bits */ + transcfg = (transcfg & ~AS_TRANSCFG_PTW_SH_MASK); + /* Enable correct PTW_SH bits */ + transcfg = (transcfg | AS_TRANSCFG_PTW_SH_OS); + } + + kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO), + transcfg, kctx); + kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI), + (current_setup->transcfg >> 32) & 0xFFFFFFFFUL, kctx); + +#else /* CONFIG_MALI_GPU_MMU_AARCH64 */ + + if (kbdev->system_coherency == COHERENCY_ACE) + current_setup->transtab |= AS_TRANSTAB_LPAE_SHARE_OUTER; + +#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */ kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO), current_setup->transtab & 0xFFFFFFFFUL, kctx); @@ -218,12 +254,10 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as, kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_HI), (current_setup->memattr >> 32) & 0xFFFFFFFFUL, kctx); -#if defined(CONFIG_MALI_MIPE_ENABLED) kbase_tlstream_tl_attrib_as_config(as, current_setup->transtab, current_setup->memattr, transcfg); -#endif write_cmd(kbdev, as->number, AS_COMMAND_UPDATE, kctx); } diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_backend.c b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_backend.c index 947a7ed285d6..711e44c7f80a 100644 --- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_backend.c +++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_backend.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -168,6 +168,7 @@ bool kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend) /* Force all cores off */ kbdev->pm.backend.desired_shader_state = 0; + kbdev->pm.backend.desired_tiler_state = 0; /* Force all cores to be unavailable, in the situation where * transitions are in progress for some cores but not others, diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_coarse_demand.c b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_coarse_demand.c index 487391168e25..f891fa225a89 100644 --- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_coarse_demand.c +++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_coarse_demand.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -35,7 +35,8 @@ static u64 coarse_demand_get_core_mask(struct kbase_device *kbdev) static bool coarse_demand_get_core_active(struct kbase_device *kbdev) { if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap | - kbdev->shader_inuse_bitmap)) + kbdev->shader_inuse_bitmap) && !kbdev->tiler_needed_cnt + && !kbdev->tiler_inuse_cnt) return false; return true; diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_defs.h b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_defs.h index 60e40915869c..e8f96fe6c514 100644 --- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_defs.h +++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_defs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -192,12 +192,14 @@ union kbase_pm_ca_policy_data { * @gpu_poweroff_pending: number of poweroff timer ticks until the GPU is * powered off * @shader_poweroff_pending_time: number of poweroff timer ticks until shaders - * are powered off + * and/or timers are powered off * @gpu_poweroff_timer: Timer for powering off GPU * @gpu_poweroff_wq: Workqueue to power off GPU on when timer fires * @gpu_poweroff_work: Workitem used on @gpu_poweroff_wq * @shader_poweroff_pending: Bit mask of shaders to be powered off on next * timer callback + * @tiler_poweroff_pending: Bit mask of tilers to be powered off on next timer + * callback * @poweroff_timer_needed: true if the poweroff timer is currently required, * false otherwise * @poweroff_timer_running: true if the poweroff timer is currently running, @@ -219,9 +221,6 @@ union kbase_pm_ca_policy_data { * &struct kbase_pm_callback_conf * @callback_power_runtime_idle: Optional callback when the GPU may be idle. See * &struct kbase_pm_callback_conf - * @callback_cci_snoop_ctrl: Callback when the GPU L2 power may transition. - * If enable is set then snoops should be enabled - * otherwise snoops should be disabled * * Note: * During an IRQ, @ca_current_policy or @pm_current_policy can be NULL when the @@ -277,6 +276,7 @@ struct kbase_pm_backend_data { struct work_struct gpu_poweroff_work; u64 shader_poweroff_pending; + u64 tiler_poweroff_pending; bool poweroff_timer_needed; bool poweroff_timer_running; @@ -288,7 +288,6 @@ struct kbase_pm_backend_data { int (*callback_power_runtime_on)(struct kbase_device *kbdev); void (*callback_power_runtime_off)(struct kbase_device *kbdev); int (*callback_power_runtime_idle)(struct kbase_device *kbdev); - }; diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_demand.c b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_demand.c index 9dac2303bd00..81322fd0dd17 100644 --- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_demand.c +++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_demand.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -37,7 +37,8 @@ static u64 demand_get_core_mask(struct kbase_device *kbdev) static bool demand_get_core_active(struct kbase_device *kbdev) { if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap | - kbdev->shader_inuse_bitmap)) + kbdev->shader_inuse_bitmap) && !kbdev->tiler_needed_cnt + && !kbdev->tiler_inuse_cnt) return false; return true; diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_driver.c b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_driver.c index 73b6cff641f6..046ebcb7b508 100644 --- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_driver.c +++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_driver.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,11 +27,8 @@ #if defined(CONFIG_MALI_GATOR_SUPPORT) #include #endif -#if defined(CONFIG_MALI_MIPE_ENABLED) #include -#endif #include -#include #include #include #include @@ -99,6 +96,39 @@ static u32 core_type_to_reg(enum kbase_pm_core_type core_type, return (u32)core_type + (u32)action; } +#ifdef CONFIG_ARM64 +static void mali_cci_flush_l2(struct kbase_device *kbdev) +{ + const u32 mask = CLEAN_CACHES_COMPLETED | RESET_COMPLETED; + u32 loops = KBASE_CLEAN_CACHE_MAX_LOOPS; + u32 raw; + + /* + * Note that we don't take the cache flush mutex here since + * we expect to be the last user of the L2, all other L2 users + * would have dropped their references, to initiate L2 power + * down, L2 power down being the only valid place for this + * to be called from. + */ + + kbase_reg_write(kbdev, + GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_CLEAN_INV_CACHES, + NULL); + + raw = kbase_reg_read(kbdev, + GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), + NULL); + + /* Wait for cache flush to complete before continuing, exit on + * gpu resets or loop expiry. */ + while (((raw & mask) == 0) && --loops) { + raw = kbase_reg_read(kbdev, + GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), + NULL); + } +} +#endif /** * kbase_pm_invoke - Invokes an action on a core set @@ -134,7 +164,7 @@ static void kbase_pm_invoke(struct kbase_device *kbdev, kbase_trace_mali_pm_power_off(core_type, cores); } #endif -#if defined(CONFIG_MALI_MIPE_ENABLED) + if (cores) { u64 state = kbase_pm_get_state(kbdev, core_type, ACTION_READY); @@ -144,7 +174,7 @@ static void kbase_pm_invoke(struct kbase_device *kbdev, state &= ~cores; kbase_tlstream_aux_pm_state(core_type, state); } -#endif + /* Tracing */ if (cores) { if (action == ACTION_PWRON) @@ -177,6 +207,8 @@ static void kbase_pm_invoke(struct kbase_device *kbdev, case KBASE_PM_CORE_L2: KBASE_TRACE_ADD(kbdev, PM_PWROFF_L2, NULL, NULL, 0u, lo); + /* disable snoops before L2 is turned off */ + kbase_pm_cache_snoop_disable(kbdev); break; default: break; @@ -404,6 +436,12 @@ static bool kbase_pm_transition_core_type(struct kbase_device *kbdev, /* All are ready, none will be turned off, and none are * transitioning */ kbdev->pm.backend.l2_powered = 1; + /* + * Ensure snoops are enabled after L2 is powered up, + * note that kbase keeps track of the snoop state, so + * safe to repeatedly call. + */ + kbase_pm_cache_snoop_enable(kbdev); if (kbdev->l2_users_count > 0) { /* Notify any registered l2 cache users * (optimized out when no users waiting) */ @@ -471,10 +509,12 @@ KBASE_EXPORT_TEST_API(kbase_pm_transition_core_type); * @present: The bit mask of present caches * @cores_powered: A bit mask of cores (or L2 caches) that are desired to * be powered + * @tilers_powered: The bit mask of tilers that are desired to be powered * * Return: A bit mask of the caches that should be turned on */ -static u64 get_desired_cache_status(u64 present, u64 cores_powered) +static u64 get_desired_cache_status(u64 present, u64 cores_powered, + u64 tilers_powered) { u64 desired = 0; @@ -497,6 +537,10 @@ static u64 get_desired_cache_status(u64 present, u64 cores_powered) present &= ~bit_mask; } + /* Power up the required L2(s) for the tiler */ + if (tilers_powered) + desired |= 1; + return desired; } @@ -509,6 +553,7 @@ MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev) bool in_desired_state = true; u64 desired_l2_state; u64 cores_powered; + u64 tilers_powered; u64 tiler_available_bitmap; u64 shader_available_bitmap; u64 shader_ready_bitmap; @@ -542,6 +587,10 @@ MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev) cores_powered |= kbdev->pm.backend.desired_shader_state; + /* Work out which tilers want to be powered */ + tilers_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER); + tilers_powered |= kbdev->pm.backend.desired_tiler_state; + /* If there are l2 cache users registered, keep all l2s powered even if * all other cores are off. */ if (kbdev->l2_users_count > 0) @@ -549,17 +598,11 @@ MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev) desired_l2_state = get_desired_cache_status( kbdev->gpu_props.props.raw_props.l2_present, - cores_powered); + cores_powered, tilers_powered); /* If any l2 cache is on, then enable l2 #0, for use by job manager */ - if (0 != desired_l2_state) { + if (0 != desired_l2_state) desired_l2_state |= 1; - /* Also enable tiler if l2 cache is powered */ - kbdev->pm.backend.desired_tiler_state = - kbdev->gpu_props.props.raw_props.tiler_present; - } else { - kbdev->pm.backend.desired_tiler_state = 0; - } prev_l2_available_bitmap = kbdev->l2_available_bitmap; in_desired_state &= kbase_pm_transition_core_type(kbdev, @@ -665,7 +708,7 @@ MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev) kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER)); #endif -#if defined(CONFIG_MALI_MIPE_ENABLED) + kbase_tlstream_aux_pm_state( KBASE_PM_CORE_L2, kbase_pm_get_ready_cores( @@ -679,7 +722,6 @@ MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev) kbase_pm_get_ready_cores( kbdev, KBASE_PM_CORE_TILER)); -#endif KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED, NULL, NULL, kbdev->pm.backend.gpu_in_desired_state, @@ -976,6 +1018,7 @@ bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend) return false; } + kbase_pm_cache_snoop_disable(kbdev); /* The GPU power may be turned off from this point */ kbdev->pm.backend.gpu_powered = false; @@ -1058,18 +1101,20 @@ static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev) if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10327)) kbdev->hw_quirks_sc |= SC_SDC_DISABLE_OQ_DISCARD; +#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY /* Enable alternative hardware counter selection if configured. */ - if (DEFAULT_ALTERNATIVE_HWC) + if (!GPU_ID_IS_NEW_FORMAT(prod_id)) kbdev->hw_quirks_sc |= SC_ALT_COUNTERS; +#endif /* Needed due to MIDBASE-2795. ENABLE_TEXGRD_FLAGS. See PRLAM-10797. */ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10797)) kbdev->hw_quirks_sc |= SC_ENABLE_TEXGRD_FLAGS; if (!kbase_hw_has_issue(kbdev, GPUCORE_1619)) { - if (prod_id < 0x760 || prod_id == 0x6956) /* T60x, T62x, T72x */ + if (prod_id < 0x750 || prod_id == 0x6956) /* T60x, T62x, T72x */ kbdev->hw_quirks_sc |= SC_LS_ATTR_CHECK_DISABLE; - else if (prod_id >= 0x760 && prod_id <= 0x880) /* T76x, T8xx */ + else if (prod_id >= 0x750 && prod_id <= 0x880) /* T76x, T8xx */ kbdev->hw_quirks_sc |= SC_LS_ALLOW_ATTR_TYPES; } @@ -1094,6 +1139,12 @@ static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev) kbdev->hw_quirks_mmu |= (DEFAULT_AWID_LIMIT & 0x3) << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT; + if (kbdev->system_coherency == COHERENCY_ACE) { + /* Allow memory configuration disparity to be ignored, we + * optimize the use of shared memory and thus we expect + * some disparity in the memory configuration */ + kbdev->hw_quirks_mmu |= L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY; + } /* Only for T86x/T88x-based products after r2p0 */ if (prod_id >= 0x860 && prod_id <= 0x880 && major >= 2) { @@ -1158,51 +1209,42 @@ static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev) } - -int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) +void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev) { - unsigned long irq_flags; - struct kbasep_reset_timeout_data rtdata; - - KBASE_DEBUG_ASSERT(NULL != kbdev); - lockdep_assert_held(&kbdev->pm.lock); - - /* Ensure the clock is on before attempting to access the hardware */ - if (!kbdev->pm.backend.gpu_powered) { - if (kbdev->pm.backend.callback_power_on) - kbdev->pm.backend.callback_power_on(kbdev); - - spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, - irq_flags); - kbdev->pm.backend.gpu_powered = true; - spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, - irq_flags); + if ((kbdev->system_coherency == COHERENCY_ACE) && + !kbdev->cci_snoop_enabled) { +#ifdef CONFIG_ARM64 + if (kbdev->snoop_enable_smc != 0) + kbase_invoke_smc_fid(kbdev->snoop_enable_smc, 0, 0, 0); +#endif /* CONFIG_ARM64 */ + dev_dbg(kbdev->dev, "MALI - CCI Snoops - Enabled\n"); + kbdev->cci_snoop_enabled = true; } +} - /* Ensure interrupts are off to begin with, this also clears any - * outstanding interrupts */ - kbase_pm_disable_interrupts(kbdev); - /* Prepare for the soft-reset */ - kbdev->pm.backend.reset_done = false; +void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev) +{ + if ((kbdev->system_coherency == COHERENCY_ACE) && + kbdev->cci_snoop_enabled) { +#ifdef CONFIG_ARM64 + if (kbdev->snoop_disable_smc != 0) { + mali_cci_flush_l2(kbdev); + kbase_invoke_smc_fid(kbdev->snoop_disable_smc, 0, 0, 0); + } +#endif /* CONFIG_ARM64 */ + dev_dbg(kbdev->dev, "MALI - CCI Snoops Disabled\n"); + kbdev->cci_snoop_enabled = false; + } +} - /* The cores should be made unavailable due to the reset */ - spin_lock_irqsave(&kbdev->pm.power_change_lock, irq_flags); - if (kbdev->shader_available_bitmap != 0u) - KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, - NULL, 0u, (u32)0u); - if (kbdev->tiler_available_bitmap != 0u) - KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, - NULL, NULL, 0u, (u32)0u); - kbdev->shader_available_bitmap = 0u; - kbdev->tiler_available_bitmap = 0u; - kbdev->l2_available_bitmap = 0u; - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, irq_flags); +static int kbase_pm_reset_do_normal(struct kbase_device *kbdev) +{ + struct kbasep_reset_timeout_data rtdata; - /* Soft reset the GPU */ KBASE_TRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, NULL, 0u, 0); -#if defined(CONFIG_MALI_MIPE_ENABLED) + kbase_tlstream_jd_gpu_soft_reset(kbdev); -#endif + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_SOFT_RESET, NULL); @@ -1228,7 +1270,7 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) /* GPU has been reset */ hrtimer_cancel(&rtdata.timer); destroy_hrtimer_on_stack(&rtdata.timer); - goto out; + return 0; } /* No interrupt has been received - check if the RAWSTAT register says @@ -1264,7 +1306,7 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) /* GPU has been reset */ hrtimer_cancel(&rtdata.timer); destroy_hrtimer_on_stack(&rtdata.timer); - goto out; + return 0; } destroy_hrtimer_on_stack(&rtdata.timer); @@ -1272,16 +1314,90 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) dev_err(kbdev->dev, "Failed to hard-reset the GPU (timed out after %d ms)\n", RESET_TIMEOUT); - /* The GPU still hasn't reset, give up */ return -EINVAL; +} + +static int kbase_pm_reset_do_protected(struct kbase_device *kbdev) +{ + KBASE_TRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, NULL, 0u, 0); + kbase_tlstream_jd_gpu_soft_reset(kbdev); + + return kbdev->protected_ops->protected_mode_reset(kbdev); +} + +int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) +{ + unsigned long irq_flags; + int err; + bool resume_vinstr = false; + + KBASE_DEBUG_ASSERT(NULL != kbdev); + lockdep_assert_held(&kbdev->pm.lock); + + /* Ensure the clock is on before attempting to access the hardware */ + if (!kbdev->pm.backend.gpu_powered) { + if (kbdev->pm.backend.callback_power_on) + kbdev->pm.backend.callback_power_on(kbdev); + + spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, + irq_flags); + kbdev->pm.backend.gpu_powered = true; + spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, + irq_flags); + } + + /* Ensure interrupts are off to begin with, this also clears any + * outstanding interrupts */ + kbase_pm_disable_interrupts(kbdev); + /* Ensure cache snoops are disabled before reset. */ + kbase_pm_cache_snoop_disable(kbdev); + /* Prepare for the soft-reset */ + kbdev->pm.backend.reset_done = false; + + /* The cores should be made unavailable due to the reset */ + spin_lock_irqsave(&kbdev->pm.power_change_lock, irq_flags); + if (kbdev->shader_available_bitmap != 0u) + KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, + NULL, 0u, (u32)0u); + if (kbdev->tiler_available_bitmap != 0u) + KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, + NULL, NULL, 0u, (u32)0u); + kbdev->shader_available_bitmap = 0u; + kbdev->tiler_available_bitmap = 0u; + kbdev->l2_available_bitmap = 0u; + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, irq_flags); -out: + /* Soft reset the GPU */ + if (kbdev->protected_mode_support && + kbdev->protected_ops->protected_mode_reset) + err = kbase_pm_reset_do_protected(kbdev); + else + err = kbase_pm_reset_do_normal(kbdev); + + spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, irq_flags); + if (kbdev->protected_mode) + resume_vinstr = true; + kbdev->protected_mode_transition = false; + kbdev->protected_mode = false; + spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, irq_flags); + + if (err) + goto exit; if (flags & PM_HW_ISSUES_DETECT) kbase_pm_hw_issues_detect(kbdev); kbase_pm_hw_issues_apply(kbdev); + kbase_cache_set_coherency_mode(kbdev, kbdev->system_coherency); + + /* Sanity check protected mode was left after reset */ + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) { + u32 gpu_status = kbase_reg_read(kbdev, + GPU_CONTROL_REG(GPU_STATUS), NULL); + + WARN_ON(gpu_status & GPU_STATUS_PROTECTED_MODE_ACTIVE); + } /* If cycle counter was in use re-enable it, enable_irqs will only be * false when called from kbase_pm_powerup */ @@ -1309,7 +1425,12 @@ out: if (flags & PM_ENABLE_IRQS) kbase_pm_enable_interrupts(kbdev); - return 0; +exit: + /* If GPU is leaving protected mode resume vinstr operation. */ + if (kbdev->vinstr_ctx && resume_vinstr) + kbase_vinstr_resume(kbdev->vinstr_ctx); + + return err; } /** diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_internal.h b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_internal.h index 943eda567cb5..aa51b8cdef8f 100644 --- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_internal.h +++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_internal.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -501,5 +501,23 @@ void kbase_pm_power_changed(struct kbase_device *kbdev); void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *now); +/** + * kbase_pm_cache_snoop_enable - Allow CPU snoops on the GPU + * If the GPU does not have coherency this is a no-op + * @kbdev: Device pointer + * + * This function should be called after L2 power up. + */ + +void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev); + +/** + * kbase_pm_cache_snoop_disable - Prevent CPU snoops on the GPU + * If the GPU does not have coherency this is a no-op + * @kbdev: Device pointer + * + * This function should be called before L2 power off. + */ +void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev); #endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */ diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_policy.c b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_policy.c index 343436fc353d..4d006028089a 100644 --- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_policy.c +++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_policy.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -21,7 +21,6 @@ #include #include -#include #include #include #include @@ -155,16 +154,22 @@ static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev, static void kbasep_pm_do_poweroff_cores(struct kbase_device *kbdev) { u64 prev_shader_state = kbdev->pm.backend.desired_shader_state; + u64 prev_tiler_state = kbdev->pm.backend.desired_tiler_state; lockdep_assert_held(&kbdev->pm.power_change_lock); kbdev->pm.backend.desired_shader_state &= ~kbdev->pm.backend.shader_poweroff_pending; + kbdev->pm.backend.desired_tiler_state &= + ~kbdev->pm.backend.tiler_poweroff_pending; kbdev->pm.backend.shader_poweroff_pending = 0; + kbdev->pm.backend.tiler_poweroff_pending = 0; - if (prev_shader_state != kbdev->pm.backend.desired_shader_state - || kbdev->pm.backend.ca_in_transition) { + if (prev_shader_state != kbdev->pm.backend.desired_shader_state || + prev_tiler_state != + kbdev->pm.backend.desired_tiler_state || + kbdev->pm.backend.ca_in_transition) { bool cores_are_available; KBASE_TIMELINE_PM_CHECKTRANS(kbdev, @@ -202,7 +207,8 @@ kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer) queue_work(kbdev->pm.backend.gpu_poweroff_wq, &kbdev->pm.backend.gpu_poweroff_work); - if (kbdev->pm.backend.shader_poweroff_pending) { + if (kbdev->pm.backend.shader_poweroff_pending || + kbdev->pm.backend.tiler_poweroff_pending) { kbdev->pm.backend.shader_poweroff_pending_time--; KBASE_DEBUG_ASSERT( @@ -327,6 +333,7 @@ void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev) kbdev->pm.backend.gpu_poweroff_pending = 0; kbdev->pm.backend.shader_poweroff_pending = 0; + kbdev->pm.backend.tiler_poweroff_pending = 0; kbdev->pm.backend.shader_poweroff_pending_time = 0; spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); @@ -381,8 +388,10 @@ void kbase_pm_update_active(struct kbase_device *kbdev) * when there are contexts active */ KBASE_DEBUG_ASSERT(pm->active_count == 0); - if (backend->shader_poweroff_pending) { + if (backend->shader_poweroff_pending || + backend->tiler_poweroff_pending) { backend->shader_poweroff_pending = 0; + backend->tiler_poweroff_pending = 0; backend->shader_poweroff_pending_time = 0; } @@ -441,6 +450,7 @@ void kbase_pm_update_active(struct kbase_device *kbdev) void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) { u64 desired_bitmap; + u64 desired_tiler_bitmap; bool cores_are_available; bool do_poweroff = false; @@ -453,23 +463,37 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) kbdev->pm.backend.pm_current_policy->get_core_mask(kbdev); desired_bitmap &= kbase_pm_ca_get_core_mask(kbdev); - /* Enable core 0 if tiler required, regardless of core availability */ if (kbdev->tiler_needed_cnt > 0 || kbdev->tiler_inuse_cnt > 0) - desired_bitmap |= 1; + desired_tiler_bitmap = 1; + else + desired_tiler_bitmap = 0; + + if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) { + /* Unless XAFFINITY is supported, enable core 0 if tiler + * required, regardless of core availability */ + if (kbdev->tiler_needed_cnt > 0 || kbdev->tiler_inuse_cnt > 0) + desired_bitmap |= 1; + } if (kbdev->pm.backend.desired_shader_state != desired_bitmap) KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, NULL, 0u, (u32)desired_bitmap); /* Are any cores being powered on? */ if (~kbdev->pm.backend.desired_shader_state & desired_bitmap || + ~kbdev->pm.backend.desired_tiler_state & desired_tiler_bitmap || kbdev->pm.backend.ca_in_transition) { /* Check if we are powering off any cores before updating shader * state */ - if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap) { + if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap || + kbdev->pm.backend.desired_tiler_state & + ~desired_tiler_bitmap) { /* Start timer to power off cores */ kbdev->pm.backend.shader_poweroff_pending |= (kbdev->pm.backend.desired_shader_state & ~desired_bitmap); + kbdev->pm.backend.tiler_poweroff_pending |= + (kbdev->pm.backend.desired_tiler_state & + ~desired_tiler_bitmap); if (kbdev->pm.poweroff_shader_ticks) kbdev->pm.backend.shader_poweroff_pending_time = @@ -479,21 +503,28 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) } kbdev->pm.backend.desired_shader_state = desired_bitmap; + kbdev->pm.backend.desired_tiler_state = desired_tiler_bitmap; /* If any cores are being powered on, transition immediately */ cores_are_available = kbase_pm_check_transitions_nolock(kbdev); - } else if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap) { + } else if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap || + kbdev->pm.backend.desired_tiler_state & + ~desired_tiler_bitmap) { /* Start timer to power off cores */ kbdev->pm.backend.shader_poweroff_pending |= (kbdev->pm.backend.desired_shader_state & ~desired_bitmap); + kbdev->pm.backend.tiler_poweroff_pending |= + (kbdev->pm.backend.desired_tiler_state & + ~desired_tiler_bitmap); if (kbdev->pm.poweroff_shader_ticks) kbdev->pm.backend.shader_poweroff_pending_time = kbdev->pm.poweroff_shader_ticks; else kbasep_pm_do_poweroff_cores(kbdev); } else if (kbdev->pm.active_count == 0 && desired_bitmap != 0 && - kbdev->pm.backend.poweroff_timer_needed) { + desired_tiler_bitmap != 0 && + kbdev->pm.backend.poweroff_timer_needed) { /* If power policy is keeping cores on despite there being no * active contexts then disable poweroff timer as it isn't * required. @@ -504,11 +535,17 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) /* Ensure timer does not power off wanted cores and make sure to power * off unwanted cores */ - if (kbdev->pm.backend.shader_poweroff_pending != 0) { + if (kbdev->pm.backend.shader_poweroff_pending || + kbdev->pm.backend.tiler_poweroff_pending) { kbdev->pm.backend.shader_poweroff_pending &= ~(kbdev->pm.backend.desired_shader_state & desired_bitmap); - if (kbdev->pm.backend.shader_poweroff_pending == 0) + kbdev->pm.backend.tiler_poweroff_pending &= + ~(kbdev->pm.backend.desired_tiler_state & + desired_tiler_bitmap); + + if (!kbdev->pm.backend.shader_poweroff_pending && + !kbdev->pm.backend.tiler_poweroff_pending) kbdev->pm.backend.shader_poweroff_pending_time = 0; } diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_power_model_simple.c b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_power_model_simple.c index 9d3eb10bd3c9..d965033905ca 100644 --- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_power_model_simple.c +++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_power_model_simple.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -36,7 +36,12 @@ static struct thermal_zone_device *gpu_tz; static unsigned long model_static_power(unsigned long voltage) { - int temperature, temp; +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) + unsigned long temperature; +#else + int temperature; +#endif + unsigned long temp; unsigned long temp_squared, temp_cubed, temp_scaling_factor; const unsigned long voltage_cubed = (voltage * voltage * voltage) >> 10; @@ -85,7 +90,11 @@ static unsigned long model_dynamic_power(unsigned long freq, return (dynamic_coefficient * v2 * f_mhz) / 1000000; /* mW */ } +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) +struct devfreq_cooling_ops power_model_simple_ops = { +#else struct devfreq_cooling_power power_model_simple_ops = { +#endif .get_static_power = model_static_power, .get_dynamic_power = model_dynamic_power, }; @@ -150,7 +159,7 @@ int kbase_power_model_simple_init(struct kbase_device *kbdev) dynamic_coefficient = (((dynamic_power * 1000) / voltage_squared) * 1000) / frequency; - if (of_property_read_u32_array(power_model_node, "ts", ts, 4)) { + if (of_property_read_u32_array(power_model_node, "ts", (u32 *)ts, 4)) { dev_err(kbdev->dev, "ts in power_model not available\n"); return -EINVAL; } diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_power_model_simple.h b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_power_model_simple.h index 17eede4d917c..9b5e69a9323b 100644 --- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_power_model_simple.h +++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_power_model_simple.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -38,6 +38,10 @@ */ int kbase_power_model_simple_init(struct kbase_device *kbdev); +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) +extern struct devfreq_cooling_ops power_model_simple_ops; +#else extern struct devfreq_cooling_power power_model_simple_ops; +#endif #endif /* _BASE_POWER_MODEL_SIMPLE_H_ */ diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_time.c b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_time.c index 4bcde85f3ee1..d992989123e8 100644 --- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_time.c +++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_time.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -74,9 +74,10 @@ void kbase_wait_write_flush(struct kbase_context *kctx) { u32 base_count = 0; - /* A suspend won't happen here, because we're in a syscall from a - * userspace thread */ - + /* + * The caller must be holding onto the kctx or the call is from + * userspace. + */ kbase_pm_context_active(kctx->kbdev); kbase_pm_request_gpu_cycle_counter(kctx->kbdev); diff --git a/drivers/gpu/arm/midgard_for_linux/mali_base_hwconfig_features.h b/drivers/gpu/arm/midgard_for_linux/mali_base_hwconfig_features.h index 2102f43348cb..f7c0ff674906 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_base_hwconfig_features.h +++ b/drivers/gpu/arm/midgard_for_linux/mali_base_hwconfig_features.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,6 +27,7 @@ enum base_hw_feature { BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, BASE_HW_FEATURE_33BIT_VA, + BASE_HW_FEATURE_XAFFINITY, BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, BASE_HW_FEATURE_MRT, BASE_HW_FEATURE_BRNDOUT_CC, @@ -46,6 +47,9 @@ enum base_hw_feature { BASE_HW_FEATURE_BRNDOUT_KILL, BASE_HW_FEATURE_WARPING, BASE_HW_FEATURE_V4, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_MODE, + BASE_HW_FEATURE_COHERENCY_REG, BASE_HW_FEATURE_END }; @@ -84,6 +88,7 @@ static const enum base_hw_feature base_hw_features_t72x[] = { static const enum base_hw_feature base_hw_features_t76x[] = { BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, BASE_HW_FEATURE_BRNDOUT_CC, @@ -101,6 +106,7 @@ static const enum base_hw_feature base_hw_features_t76x[] = { static const enum base_hw_feature base_hw_features_tFxx[] = { BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, BASE_HW_FEATURE_BRNDOUT_CC, @@ -121,6 +127,7 @@ static const enum base_hw_feature base_hw_features_t83x[] = { BASE_HW_FEATURE_33BIT_VA, BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, BASE_HW_FEATURE_WARPING, BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, @@ -142,6 +149,7 @@ static const enum base_hw_feature base_hw_features_t82x[] = { BASE_HW_FEATURE_33BIT_VA, BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, BASE_HW_FEATURE_WARPING, BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, @@ -159,5 +167,31 @@ static const enum base_hw_feature base_hw_features_t82x[] = { BASE_HW_FEATURE_END }; +static const enum base_hw_feature base_hw_features_tMIx[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_MSAA_16X, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_MODE, + BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_END +}; + + #endif /* _BASE_HWCONFIG_FEATURES_H_ */ diff --git a/drivers/gpu/arm/midgard_for_linux/mali_base_hwconfig_issues.h b/drivers/gpu/arm/midgard_for_linux/mali_base_hwconfig_issues.h index 66c2dc76fdb3..149f44cb8674 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_base_hwconfig_issues.h +++ b/drivers/gpu/arm/midgard_for_linux/mali_base_hwconfig_issues.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -71,6 +71,7 @@ enum base_hw_issue { BASE_HW_ISSUE_10487, BASE_HW_ISSUE_10607, BASE_HW_ISSUE_10632, + BASE_HW_ISSUE_10649, BASE_HW_ISSUE_10676, BASE_HW_ISSUE_10682, BASE_HW_ISSUE_10684, @@ -90,6 +91,7 @@ enum base_hw_issue { BASE_HW_ISSUE_11035, BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_26, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, @@ -100,7 +102,17 @@ enum base_hw_issue { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T76X_3982, + BASE_HW_ISSUE_TMIX_7891, + BASE_HW_ISSUE_TMIX_7940, + BASE_HW_ISSUE_TMIX_8042, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TMIX_8138, + BASE_HW_ISSUE_TMIX_8206, + BASE_HW_ISSUE_TMIX_8343, GPUCORE_1619, BASE_HW_ISSUE_END }; @@ -153,6 +165,7 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p0_15dev0[] = { BASE_HW_ISSUE_10487, BASE_HW_ISSUE_10607, BASE_HW_ISSUE_10632, + BASE_HW_ISSUE_10649, BASE_HW_ISSUE_10676, BASE_HW_ISSUE_10682, BASE_HW_ISSUE_10684, @@ -166,7 +179,9 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p0_15dev0[] = { BASE_HW_ISSUE_11020, BASE_HW_ISSUE_11035, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, + BASE_HW_ISSUE_T76X_3964, GPUCORE_1619, BASE_HW_ISSUE_END }; @@ -192,6 +207,7 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p0_eac[] = { BASE_HW_ISSUE_10487, BASE_HW_ISSUE_10607, BASE_HW_ISSUE_10632, + BASE_HW_ISSUE_10649, BASE_HW_ISSUE_10676, BASE_HW_ISSUE_10682, BASE_HW_ISSUE_10684, @@ -203,7 +219,9 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p0_eac[] = { BASE_HW_ISSUE_11020, BASE_HW_ISSUE_11035, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_END }; @@ -226,6 +244,7 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p1[] = { BASE_HW_ISSUE_10487, BASE_HW_ISSUE_10607, BASE_HW_ISSUE_10632, + BASE_HW_ISSUE_10649, BASE_HW_ISSUE_10676, BASE_HW_ISSUE_10682, BASE_HW_ISSUE_10684, @@ -236,8 +255,10 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p1[] = { BASE_HW_ISSUE_11020, BASE_HW_ISSUE_11035, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_END }; @@ -252,6 +273,7 @@ static const enum base_hw_issue base_hw_issues_t62x_r0p1[] = { BASE_HW_ISSUE_10487, BASE_HW_ISSUE_10607, BASE_HW_ISSUE_10632, + BASE_HW_ISSUE_10649, BASE_HW_ISSUE_10676, BASE_HW_ISSUE_10682, BASE_HW_ISSUE_10684, @@ -267,6 +289,7 @@ static const enum base_hw_issue base_hw_issues_t62x_r0p1[] = { BASE_HW_ISSUE_11035, BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_END @@ -277,6 +300,7 @@ static const enum base_hw_issue base_hw_issues_t62x_r1p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10471, BASE_HW_ISSUE_10472, + BASE_HW_ISSUE_10649, BASE_HW_ISSUE_10684, BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, @@ -288,8 +312,10 @@ static const enum base_hw_issue base_hw_issues_t62x_r1p0[] = { BASE_HW_ISSUE_11024, BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_END }; @@ -298,6 +324,7 @@ static const enum base_hw_issue base_hw_issues_t62x_r1p1[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10471, BASE_HW_ISSUE_10472, + BASE_HW_ISSUE_10649, BASE_HW_ISSUE_10684, BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, @@ -307,6 +334,7 @@ static const enum base_hw_issue base_hw_issues_t62x_r1p1[] = { BASE_HW_ISSUE_11012, BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_END @@ -321,6 +349,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p0[] = { BASE_HW_ISSUE_11024, BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_26, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, @@ -331,7 +360,11 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p0[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T76X_3982, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -344,6 +377,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p1[] = { BASE_HW_ISSUE_11024, BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_26, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, @@ -354,7 +388,11 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p1[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T76X_3982, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -365,6 +403,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p1_50rel0[] = { BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_26, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, @@ -375,7 +414,11 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p1_50rel0[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T76X_3982, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -388,6 +431,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p2[] = { BASE_HW_ISSUE_11024, BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_26, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, @@ -398,7 +442,11 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p2[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T76X_3982, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -409,6 +457,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p3[] = { BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_26, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, @@ -419,7 +468,11 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p3[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T76X_3982, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -430,6 +483,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r1p0[] = { BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, @@ -437,7 +491,11 @@ static const enum base_hw_issue base_hw_issues_t76x_r1p0[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T76X_3982, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -445,6 +503,7 @@ static const enum base_hw_issue base_hw_issues_t72x_r0p0[] = { BASE_HW_ISSUE_6402, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10471, + BASE_HW_ISSUE_10649, BASE_HW_ISSUE_10684, BASE_HW_ISSUE_10797, BASE_HW_ISSUE_10821, @@ -452,8 +511,10 @@ static const enum base_hw_issue base_hw_issues_t72x_r0p0[] = { BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_END }; @@ -461,6 +522,7 @@ static const enum base_hw_issue base_hw_issues_t72x_r1p0[] = { BASE_HW_ISSUE_6402, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10471, + BASE_HW_ISSUE_10649, BASE_HW_ISSUE_10684, BASE_HW_ISSUE_10797, BASE_HW_ISSUE_10821, @@ -468,8 +530,10 @@ static const enum base_hw_issue base_hw_issues_t72x_r1p0[] = { BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_END }; @@ -477,6 +541,7 @@ static const enum base_hw_issue base_hw_issues_t72x_r1p1[] = { BASE_HW_ISSUE_6402, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10471, + BASE_HW_ISSUE_10649, BASE_HW_ISSUE_10684, BASE_HW_ISSUE_10797, BASE_HW_ISSUE_10821, @@ -484,8 +549,10 @@ static const enum base_hw_issue base_hw_issues_t72x_r1p1[] = { BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_END }; @@ -494,11 +561,13 @@ static const enum base_hw_issue base_hw_issues_model_t72x[] = { BASE_HW_ISSUE_6402, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10471, + BASE_HW_ISSUE_10649, BASE_HW_ISSUE_10797, BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3964, GPUCORE_1619, BASE_HW_ISSUE_END }; @@ -515,6 +584,10 @@ static const enum base_hw_issue base_hw_issues_model_t76x[] = { BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, + BASE_HW_ISSUE_T76X_3964, + BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T76X_3982, + BASE_HW_ISSUE_TMIX_7891, GPUCORE_1619, BASE_HW_ISSUE_END }; @@ -525,6 +598,7 @@ static const enum base_hw_issue base_hw_issues_model_t60x[] = { BASE_HW_ISSUE_8778, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10472, + BASE_HW_ISSUE_10649, BASE_HW_ISSUE_10931, BASE_HW_ISSUE_11012, BASE_HW_ISSUE_11020, @@ -532,6 +606,7 @@ static const enum base_hw_issue base_hw_issues_model_t60x[] = { BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3964, GPUCORE_1619, BASE_HW_ISSUE_END }; @@ -541,6 +616,7 @@ static const enum base_hw_issue base_hw_issues_model_t62x[] = { BASE_HW_ISSUE_6402, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10472, + BASE_HW_ISSUE_10649, BASE_HW_ISSUE_10931, BASE_HW_ISSUE_11012, BASE_HW_ISSUE_11020, @@ -549,6 +625,7 @@ static const enum base_hw_issue base_hw_issues_model_t62x[] = { BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3964, GPUCORE_1619, BASE_HW_ISSUE_END }; @@ -559,6 +636,7 @@ static const enum base_hw_issue base_hw_issues_tFRx_r0p1[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, @@ -566,7 +644,11 @@ static const enum base_hw_issue base_hw_issues_tFRx_r0p1[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T76X_3982, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -576,13 +658,18 @@ static const enum base_hw_issue base_hw_issues_tFRx_r0p2[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T76X_3982, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -592,12 +679,16 @@ static const enum base_hw_issue base_hw_issues_tFRx_r1p0[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T76X_3982, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -607,12 +698,16 @@ static const enum base_hw_issue base_hw_issues_tFRx_r2p0[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T76X_3982, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -624,6 +719,10 @@ static const enum base_hw_issue base_hw_issues_model_tFRx[] = { BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, + BASE_HW_ISSUE_T76X_3964, + BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T76X_3982, + BASE_HW_ISSUE_TMIX_7891, GPUCORE_1619, BASE_HW_ISSUE_END }; @@ -634,13 +733,18 @@ static const enum base_hw_issue base_hw_issues_t86x_r0p2[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T76X_3982, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -650,12 +754,16 @@ static const enum base_hw_issue base_hw_issues_t86x_r1p0[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T76X_3982, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -665,12 +773,16 @@ static const enum base_hw_issue base_hw_issues_t86x_r2p0[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T76X_3982, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -682,6 +794,9 @@ static const enum base_hw_issue base_hw_issues_model_t86x[] = { BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, + BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_TMIX_7891, + BASE_HW_ISSUE_T76X_3982, GPUCORE_1619, BASE_HW_ISSUE_END }; @@ -692,6 +807,7 @@ static const enum base_hw_issue base_hw_issues_t83x_r0p1[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, @@ -699,6 +815,9 @@ static const enum base_hw_issue base_hw_issues_t83x_r0p1[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T76X_3982, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -708,12 +827,16 @@ static const enum base_hw_issue base_hw_issues_t83x_r1p0[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T76X_3982, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -721,11 +844,14 @@ static const enum base_hw_issue base_hw_issues_model_t83x[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, + BASE_HW_ISSUE_T76X_3964, + BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T76X_3982, + BASE_HW_ISSUE_TMIX_7891, GPUCORE_1619, BASE_HW_ISSUE_END }; @@ -736,6 +862,7 @@ static const enum base_hw_issue base_hw_issues_t82x_r0p0[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, @@ -743,6 +870,10 @@ static const enum base_hw_issue base_hw_issues_t82x_r0p0[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_T76X_3964, + BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T76X_3982, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -752,6 +883,7 @@ static const enum base_hw_issue base_hw_issues_t82x_r0p1[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, @@ -759,6 +891,9 @@ static const enum base_hw_issue base_hw_issues_t82x_r0p1[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T76X_3982, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -768,12 +903,16 @@ static const enum base_hw_issue base_hw_issues_t82x_r1p0[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T76X_3982, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -781,15 +920,68 @@ static const enum base_hw_issue base_hw_issues_model_t82x[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, + BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T76X_3982, + BASE_HW_ISSUE_TMIX_7891, + GPUCORE_1619, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10682, + BASE_HW_ISSUE_10821, + BASE_HW_ISSUE_11054, + BASE_HW_ISSUE_T76X_3700, + BASE_HW_ISSUE_T76X_3953, + BASE_HW_ISSUE_T76X_3982, + BASE_HW_ISSUE_TMIX_7891, + BASE_HW_ISSUE_TMIX_8042, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TMIX_8138, + BASE_HW_ISSUE_TMIX_8343, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tMIx_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10682, + BASE_HW_ISSUE_10821, + BASE_HW_ISSUE_11054, + BASE_HW_ISSUE_T76X_3700, + BASE_HW_ISSUE_T76X_3982, + BASE_HW_ISSUE_TMIX_7891, + BASE_HW_ISSUE_TMIX_7940, + BASE_HW_ISSUE_TMIX_8042, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TMIX_8138, + BASE_HW_ISSUE_TMIX_8206, + BASE_HW_ISSUE_TMIX_8343, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_tMIx[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_T76X_3700, + BASE_HW_ISSUE_T76X_3982, + BASE_HW_ISSUE_TMIX_7891, + BASE_HW_ISSUE_TMIX_7940, + BASE_HW_ISSUE_TMIX_8042, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TMIX_8138, + BASE_HW_ISSUE_TMIX_8206, + BASE_HW_ISSUE_TMIX_8343, GPUCORE_1619, BASE_HW_ISSUE_END }; + + #endif /* _BASE_HWCONFIG_ISSUES_H_ */ diff --git a/drivers/gpu/arm/midgard_for_linux/mali_base_kernel.h b/drivers/gpu/arm/midgard_for_linux/mali_base_kernel.h index 212100dbff63..749dd9a1cc9e 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_base_kernel.h +++ b/drivers/gpu/arm/midgard_for_linux/mali_base_kernel.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -42,7 +42,14 @@ /* Support UK9 IOCTLS */ #define BASE_LEGACY_UK9_SUPPORT 1 -typedef u64 base_mem_handle; +/* Support UK10_2 IOCTLS */ +#define BASE_LEGACY_UK10_2_SUPPORT 1 + +typedef struct base_mem_handle { + struct { + u64 handle; + } basep; +} base_mem_handle; #include "mali_base_mem_priv.h" #include "mali_kbase_profiling_gator_api.h" @@ -62,6 +69,10 @@ typedef u64 base_mem_handle; #define BASEP_JD_SEM_MASK_IN_WORD(x) (1 << ((x) & (BASEP_JD_SEM_PER_WORD - 1))) #define BASEP_JD_SEM_ARRAY_SIZE BASEP_JD_SEM_WORD_NR(BASE_JD_ATOM_COUNT) +/* Set/reset values for a software event */ +#define BASE_JD_SOFT_EVENT_SET ((unsigned char)1) +#define BASE_JD_SOFT_EVENT_RESET ((unsigned char)0) + #define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 3 #define BASE_MAX_COHERENT_GROUPS 16 @@ -162,8 +173,13 @@ enum { /* IN */ BASE_MEM_COHERENT_SYSTEM_REQUIRED = (1U << 15), /**< Page coherence Outer shareable, required. */ - BASE_MEM_SECURE = (1U << 16) /**< Secure memory */ - + BASE_MEM_SECURE = (1U << 16), /**< Secure memory */ + BASE_MEM_DONT_NEED = (1U << 17), /**< Not needed physical + memory */ + BASE_MEM_IMPORT_SHARED = (1U << 18), /**< Must use shared CPU/GPU zone + (SAME_VA zone) but doesn't + require the addresses to + be the same */ }; /** @@ -171,7 +187,7 @@ enum { * * Must be kept in sync with the ::base_mem_alloc_flags flags */ -#define BASE_MEM_FLAGS_NR_BITS 17 +#define BASE_MEM_FLAGS_NR_BITS 19 /** * A mask for all output bits, excluding IN/OUT bits. @@ -184,6 +200,13 @@ enum { #define BASE_MEM_FLAGS_INPUT_MASK \ (((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK) +/** + * A mask for all the flags which are modifiable via the base_mem_set_flags + * interface. + */ +#define BASE_MEM_FLAGS_MODIFIABLE \ + (BASE_MEM_DONT_NEED | BASE_MEM_COHERENT_SYSTEM | \ + BASE_MEM_COHERENT_LOCAL) /** * enum base_mem_import_type - Memory types supported by @a base_mem_import @@ -224,14 +247,32 @@ struct base_mem_import_user_buffer { }; /** - * @brief Invalid memory handle type. - * Return value from functions returning @a base_mem_handle on error. + * @brief Invalid memory handle. + * + * Return value from functions returning @ref base_mem_handle on error. + * + * @warning @ref base_mem_handle_new_invalid must be used instead of this macro + * in C++ code or other situations where compound literals cannot be used. */ -#define BASE_MEM_INVALID_HANDLE (0ull << 12) +#define BASE_MEM_INVALID_HANDLE ((base_mem_handle) { {BASEP_MEM_INVALID_HANDLE} }) + +/** + * @brief Special write-alloc memory handle. + * + * A special handle is used to represent a region where a special page is mapped + * with a write-alloc cache setup, typically used when the write result of the + * GPU isn't needed, but the GPU must write anyway. + * + * @warning @ref base_mem_handle_new_write_alloc must be used instead of this macro + * in C++ code or other situations where compound literals cannot be used. + */ +#define BASE_MEM_WRITE_ALLOC_PAGES_HANDLE ((base_mem_handle) { {BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE} }) + +#define BASEP_MEM_INVALID_HANDLE (0ull << 12) #define BASE_MEM_MMU_DUMP_HANDLE (1ull << 12) #define BASE_MEM_TRACE_BUFFER_HANDLE (2ull << 12) #define BASE_MEM_MAP_TRACKING_HANDLE (3ull << 12) -#define BASE_MEM_WRITE_ALLOC_PAGES_HANDLE (4ull << 12) +#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE (4ull << 12) /* reserved handles ..-64< for future special handles */ #define BASE_MEM_COOKIE_BASE (64ul << 12) #define BASE_MEM_FIRST_FREE_ADDRESS ((BITS_PER_LONG << 12) + \ @@ -240,6 +281,7 @@ struct base_mem_import_user_buffer { /* Mask to detect 4GB boundary alignment */ #define BASE_MEM_MASK_4GB 0xfffff000UL + /* Bit mask of cookies used for for memory allocation setup */ #define KBASE_COOKIE_MASK ~1UL /* bit 0 is reserved */ @@ -251,7 +293,6 @@ typedef enum base_backing_threshold_status { BASE_BACKING_THRESHOLD_OK = 0, /**< Resize successful */ BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE = -1, /**< Not a growable tmem object */ BASE_BACKING_THRESHOLD_ERROR_OOM = -2, /**< Increase failed due to an out-of-memory condition */ - BASE_BACKING_THRESHOLD_ERROR_MAPPED = -3, /**< Resize attempted on buffer while it was mapped, which is not permitted */ BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS = -4 /**< Invalid arguments (not tmem, illegal size request, etc.) */ } base_backing_threshold_status; @@ -355,6 +396,28 @@ struct base_mem_aliasing_info { u64 length; }; +/** + * struct base_jit_alloc_info - Structure which describes a JIT allocation + * request. + * @gpu_alloc_addr: The GPU virtual address to write the JIT + * allocated GPU virtual address to. + * @va_pages: The minimum number of virtual pages required. + * @commit_pages: The minimum number of physical pages which + * should back the allocation. + * @extent: Granularity of physical pages to grow the + * allocation by during a fault. + * @id: Unique ID provided by the caller, this is used + * to pair allocation and free requests. + * Zero is not a valid value. + */ +struct base_jit_alloc_info { + u64 gpu_alloc_addr; + u64 va_pages; + u64 commit_pages; + u64 extent; + u8 id; +}; + /** * @brief Job dependency type. * @@ -381,11 +444,20 @@ typedef u8 base_jd_dep_type; * Special case is ::BASE_JD_REQ_DEP, which is used to express complex * dependencies, and that doesn't execute anything on the hardware. */ -typedef u16 base_jd_core_req; +typedef u32 base_jd_core_req; /* Requirements that come from the HW */ -#define BASE_JD_REQ_DEP 0 /**< No requirement, dependency only */ -#define BASE_JD_REQ_FS (1U << 0) /**< Requires fragment shaders */ + +/** + * No requirement, dependency only + */ +#define BASE_JD_REQ_DEP ((base_jd_core_req)0) + +/** + * Requires fragment shaders + */ +#define BASE_JD_REQ_FS ((base_jd_core_req)1 << 0) + /** * Requires compute shaders * This covers any of the following Midgard Job types: @@ -397,28 +469,38 @@ typedef u16 base_jd_core_req; * job is specifically just the "Compute Shader" job type, and not the "Vertex * Shader" nor the "Geometry Shader" job type. */ -#define BASE_JD_REQ_CS (1U << 1) -#define BASE_JD_REQ_T (1U << 2) /**< Requires tiling */ -#define BASE_JD_REQ_CF (1U << 3) /**< Requires cache flushes */ -#define BASE_JD_REQ_V (1U << 4) /**< Requires value writeback */ +#define BASE_JD_REQ_CS ((base_jd_core_req)1 << 1) +#define BASE_JD_REQ_T ((base_jd_core_req)1 << 2) /**< Requires tiling */ +#define BASE_JD_REQ_CF ((base_jd_core_req)1 << 3) /**< Requires cache flushes */ +#define BASE_JD_REQ_V ((base_jd_core_req)1 << 4) /**< Requires value writeback */ /* SW-only requirements - the HW does not expose these as part of the job slot capabilities */ /* Requires fragment job with AFBC encoding */ -#define BASE_JD_REQ_FS_AFBC (1U << 13) +#define BASE_JD_REQ_FS_AFBC ((base_jd_core_req)1 << 13) + +/** + * SW-only requirement: coalesce completion events. + * If this bit is set then completion of this atom will not cause an event to + * be sent to userspace, whether successful or not; completion events will be + * deferred until an atom completes which does not have this bit set. + * + * This bit may not be used in combination with BASE_JD_REQ_EXTERNAL_RESOURCES. + */ +#define BASE_JD_REQ_EVENT_COALESCE ((base_jd_core_req)1 << 5) /** * SW Only requirement: the job chain requires a coherent core group. We don't * mind which coherent core group is used. */ -#define BASE_JD_REQ_COHERENT_GROUP (1U << 6) +#define BASE_JD_REQ_COHERENT_GROUP ((base_jd_core_req)1 << 6) /** * SW Only requirement: The performance counters should be enabled only when * they are needed, to reduce power consumption. */ -#define BASE_JD_REQ_PERMON (1U << 7) +#define BASE_JD_REQ_PERMON ((base_jd_core_req)1 << 7) /** * SW Only requirement: External resources are referenced by this atom. @@ -426,14 +508,16 @@ typedef u16 base_jd_core_req; * but should instead be part of a NULL jobs inserted into the dependency tree. * The first pre_dep object must be configured for the external resouces to use, * the second pre_dep object can be used to create other dependencies. + * + * This bit may not be used in combination with BASE_JD_REQ_EVENT_COALESCE. */ -#define BASE_JD_REQ_EXTERNAL_RESOURCES (1U << 8) +#define BASE_JD_REQ_EXTERNAL_RESOURCES ((base_jd_core_req)1 << 8) /** * SW Only requirement: Software defined job. Jobs with this bit set will not be submitted * to the hardware but will cause some action to happen within the driver */ -#define BASE_JD_REQ_SOFT_JOB (1U << 9) +#define BASE_JD_REQ_SOFT_JOB ((base_jd_core_req)1 << 9) #define BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME (BASE_JD_REQ_SOFT_JOB | 0x1) #define BASE_JD_REQ_SOFT_FENCE_TRIGGER (BASE_JD_REQ_SOFT_JOB | 0x2) @@ -476,6 +560,66 @@ typedef u16 base_jd_core_req; * - Priority is inherited from the replay job. */ #define BASE_JD_REQ_SOFT_REPLAY (BASE_JD_REQ_SOFT_JOB | 0x4) +/** + * SW only requirement: event wait/trigger job. + * + * - BASE_JD_REQ_SOFT_EVENT_WAIT: this job will block until the event is set. + * - BASE_JD_REQ_SOFT_EVENT_SET: this job sets the event, thus unblocks the + * other waiting jobs. It completes immediately. + * - BASE_JD_REQ_SOFT_EVENT_RESET: this job resets the event, making it + * possible for other jobs to wait upon. It completes immediately. + */ +#define BASE_JD_REQ_SOFT_EVENT_WAIT (BASE_JD_REQ_SOFT_JOB | 0x5) +#define BASE_JD_REQ_SOFT_EVENT_SET (BASE_JD_REQ_SOFT_JOB | 0x6) +#define BASE_JD_REQ_SOFT_EVENT_RESET (BASE_JD_REQ_SOFT_JOB | 0x7) + +#define BASE_JD_REQ_SOFT_DEBUG_COPY (BASE_JD_REQ_SOFT_JOB | 0x8) + +/** + * SW only requirement: Just In Time allocation + * + * This job requests a JIT allocation based on the request in the + * @base_jit_alloc_info structure which is passed via the jc element of + * the atom. + * + * It should be noted that the id entry in @base_jit_alloc_info must not + * be reused until it has been released via @BASE_JD_REQ_SOFT_JIT_FREE. + * + * Should this soft job fail it is expected that a @BASE_JD_REQ_SOFT_JIT_FREE + * soft job to free the JIT allocation is still made. + * + * The job will complete immediately. + */ +#define BASE_JD_REQ_SOFT_JIT_ALLOC (BASE_JD_REQ_SOFT_JOB | 0x9) +/** + * SW only requirement: Just In Time free + * + * This job requests a JIT allocation created by @BASE_JD_REQ_SOFT_JIT_ALLOC + * to be freed. The ID of the JIT allocation is passed via the jc element of + * the atom. + * + * The job will complete immediately. + */ +#define BASE_JD_REQ_SOFT_JIT_FREE (BASE_JD_REQ_SOFT_JOB | 0xa) + +/** + * SW only requirement: Map external resource + * + * This job requests external resource(s) are mapped once the dependencies + * of the job have been satisfied. The list of external resources are + * passed via the jc element of the atom which is a pointer to a + * @base_external_resource_list. + */ +#define BASE_JD_REQ_SOFT_EXT_RES_MAP (BASE_JD_REQ_SOFT_JOB | 0xb) +/** + * SW only requirement: Unmap external resource + * + * This job requests external resource(s) are unmapped once the dependencies + * of the job has been satisfied. The list of external resources are + * passed via the jc element of the atom which is a pointer to a + * @base_external_resource_list. + */ +#define BASE_JD_REQ_SOFT_EXT_RES_UNMAP (BASE_JD_REQ_SOFT_JOB | 0xc) /** * HW Requirement: Requires Compute shaders (but not Vertex or Geometry Shaders) @@ -485,54 +629,79 @@ typedef u16 base_jd_core_req; * In contrast to @ref BASE_JD_REQ_CS, this does \b not indicate that the Job * Chain contains 'Geometry Shader' or 'Vertex Shader' jobs. */ -#define BASE_JD_REQ_ONLY_COMPUTE (1U << 10) +#define BASE_JD_REQ_ONLY_COMPUTE ((base_jd_core_req)1 << 10) /** * HW Requirement: Use the base_jd_atom::device_nr field to specify a * particular core group * - * If both BASE_JD_REQ_COHERENT_GROUP and this flag are set, this flag takes priority + * If both @ref BASE_JD_REQ_COHERENT_GROUP and this flag are set, this flag takes priority * - * This is only guaranteed to work for BASE_JD_REQ_ONLY_COMPUTE atoms. + * This is only guaranteed to work for @ref BASE_JD_REQ_ONLY_COMPUTE atoms. * * If the core availability policy is keeping the required core group turned off, then - * the job will fail with a BASE_JD_EVENT_PM_EVENT error code. + * the job will fail with a @ref BASE_JD_EVENT_PM_EVENT error code. */ -#define BASE_JD_REQ_SPECIFIC_COHERENT_GROUP (1U << 11) +#define BASE_JD_REQ_SPECIFIC_COHERENT_GROUP ((base_jd_core_req)1 << 11) /** * SW Flag: If this bit is set then the successful completion of this atom * will not cause an event to be sent to userspace */ -#define BASE_JD_REQ_EVENT_ONLY_ON_FAILURE (1U << 12) +#define BASE_JD_REQ_EVENT_ONLY_ON_FAILURE ((base_jd_core_req)1 << 12) /** * SW Flag: If this bit is set then completion of this atom will not cause an * event to be sent to userspace, whether successful or not. */ -#define BASEP_JD_REQ_EVENT_NEVER (1U << 14) +#define BASEP_JD_REQ_EVENT_NEVER ((base_jd_core_req)1 << 14) /** -* These requirement bits are currently unused in base_jd_core_req (currently a u16) -*/ - -#define BASEP_JD_REQ_RESERVED_BIT5 (1U << 5) -#define BASEP_JD_REQ_RESERVED_BIT15 (1U << 15) + * SW Flag: Skip GPU cache clean and invalidation before starting a GPU job. + * + * If this bit is set then the GPU's cache will not be cleaned and invalidated + * until a GPU job starts which does not have this bit set or a job completes + * which does not have the @ref BASE_JD_REQ_SKIP_CACHE_END bit set. Do not use if + * the CPU may have written to memory addressed by the job since the last job + * without this bit set was submitted. + */ +#define BASE_JD_REQ_SKIP_CACHE_START ((base_jd_core_req)1 << 15) /** -* Mask of all the currently unused requirement bits in base_jd_core_req. -*/ + * SW Flag: Skip GPU cache clean and invalidation after a GPU job completes. + * + * If this bit is set then the GPU's cache will not be cleaned and invalidated + * until a GPU job completes which does not have this bit set or a job starts + * which does not have the @ref BASE_JD_REQ_SKIP_CACHE_START bti set. Do not use if + * the CPU may read from or partially overwrite memory addressed by the job + * before the next job without this bit set completes. + */ +#define BASE_JD_REQ_SKIP_CACHE_END ((base_jd_core_req)1 << 16) -#define BASEP_JD_REQ_RESERVED (BASEP_JD_REQ_RESERVED_BIT5 | \ - BASEP_JD_REQ_RESERVED_BIT15) +/** + * These requirement bits are currently unused in base_jd_core_req + */ +#define BASEP_JD_REQ_RESERVED \ + (~(BASE_JD_REQ_ATOM_TYPE | BASE_JD_REQ_EXTERNAL_RESOURCES | \ + BASE_JD_REQ_EVENT_ONLY_ON_FAILURE | BASEP_JD_REQ_EVENT_NEVER | \ + BASE_JD_REQ_EVENT_COALESCE | \ + BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP | \ + BASE_JD_REQ_FS_AFBC | BASE_JD_REQ_PERMON | \ + BASE_JD_REQ_SKIP_CACHE_START | BASE_JD_REQ_SKIP_CACHE_END)) /** * Mask of all bits in base_jd_core_req that control the type of the atom. * * This allows dependency only atoms to have flags set */ -#define BASEP_JD_REQ_ATOM_TYPE (~(BASEP_JD_REQ_RESERVED | BASE_JD_REQ_EVENT_ONLY_ON_FAILURE |\ - BASE_JD_REQ_EXTERNAL_RESOURCES | BASEP_JD_REQ_EVENT_NEVER)) +#define BASE_JD_REQ_ATOM_TYPE \ + (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T | BASE_JD_REQ_CF | \ + BASE_JD_REQ_V | BASE_JD_REQ_SOFT_JOB | BASE_JD_REQ_ONLY_COMPUTE) + +/** + * Mask of all bits in base_jd_core_req that control the type of a soft job. + */ +#define BASE_JD_REQ_SOFT_JOB_TYPE (BASE_JD_REQ_SOFT_JOB | 0x1f) /** * @brief States to model state machine processed by kbasep_js_job_check_ref_cores(), which @@ -636,18 +805,26 @@ struct base_dependency { base_jd_dep_type dependency_type; /**< Dependency type */ }; +/* This structure has changed since UK 10.2 for which base_jd_core_req was a u16 value. + * In order to keep the size of the structure same, padding field has been adjusted + * accordingly and core_req field of a u32 type (to which UK 10.3 base_jd_core_req defines) + * is added at the end of the structure. Place in the structure previously occupied by u16 core_req + * is kept but renamed to compat_core_req and as such it can be used in ioctl call for job submission + * as long as UK 10.2 legacy is supported. Once when this support ends, this field can be left + * for possible future use. */ typedef struct base_jd_atom_v2 { u64 jc; /**< job-chain GPU address */ struct base_jd_udata udata; /**< user data */ kbase_pointer extres_list; /**< list of external resources */ u16 nr_extres; /**< nr of external resources */ - base_jd_core_req core_req; /**< core requirements */ + u16 compat_core_req; /**< core requirements which correspond to the legacy support for UK 10.2 */ struct base_dependency pre_dep[2]; /**< pre-dependencies, one need to use SETTER function to assign this field, this is done in order to reduce possibility of improper assigment of a dependency field */ base_atom_id atom_number; /**< unique number to identify the atom */ base_jd_prio prio; /**< Atom priority. Refer to @ref base_jd_prio for more details */ u8 device_nr; /**< coregroup when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified */ - u8 padding[5]; + u8 padding[1]; + base_jd_core_req core_req; /**< core requirements */ } base_jd_atom_v2; #ifdef BASE_LEGACY_UK6_SUPPORT @@ -656,14 +833,14 @@ struct base_jd_atom_v2_uk6 { struct base_jd_udata udata; /**< user data */ kbase_pointer extres_list; /**< list of external resources */ u16 nr_extres; /**< nr of external resources */ - base_jd_core_req core_req; /**< core requirements */ + u16 core_req; /**< core requirements */ base_atom_id pre_dep[2]; /**< pre-dependencies */ base_atom_id atom_number; /**< unique number to identify the atom */ base_jd_prio prio; /**< priority - smaller is higher priority */ u8 device_nr; /**< coregroup when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified */ u8 padding[7]; }; -#endif +#endif /* BASE_LEGACY_UK6_SUPPORT */ typedef enum base_external_resource_access { BASE_EXT_RES_ACCESS_SHARED, @@ -674,6 +851,31 @@ typedef struct base_external_resource { u64 ext_resource; } base_external_resource; + +/** + * The maximum number of external resources which can be mapped/unmapped + * in a single request. + */ +#define BASE_EXT_RES_COUNT_MAX 10 + +/** + * struct base_external_resource_list - Structure which describes a list of + * external resources. + * @count: The number of resources. + * @ext_res: Array of external resources which is + * sized at allocation time. + */ +struct base_external_resource_list { + u64 count; + struct base_external_resource ext_res[1]; +}; + +struct base_jd_debug_copy_buffer { + u64 address; + u64 size; + struct base_external_resource extres; +}; + /** * @brief Setter for a dependency structure * @@ -1450,7 +1652,7 @@ typedef struct mali_base_gpu_props { * Flags to pass to ::base_context_init. * Flags can be ORed together to enable multiple things. * - * These share the same space as @ref basep_context_private_flags, and so must + * These share the same space as BASEP_CONTEXT_FLAG_*, and so must * not collide with them. */ enum base_context_create_flags { @@ -1479,7 +1681,7 @@ enum base_context_create_flags { #define BASE_CONTEXT_CREATE_KERNEL_FLAGS \ ((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) -/** +/* * Private flags used on the base context * * These start at bit 31, and run down to zero. @@ -1487,10 +1689,8 @@ enum base_context_create_flags { * They share the same space as @ref base_context_create_flags, and so must * not collide with them. */ -enum basep_context_private_flags { - /** Private flag tracking whether job descriptor dumping is disabled */ - BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED = (1 << 31) -}; +/** Private flag tracking whether job descriptor dumping is disabled */ +#define BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED ((u32)(1 << 31)) /** @} end group base_user_api_core */ @@ -1563,9 +1763,21 @@ typedef struct base_jd_replay_payload { * Core requirements for the fragment job chain */ base_jd_core_req fragment_core_req; +} base_jd_replay_payload; +#ifdef BASE_LEGACY_UK10_2_SUPPORT +typedef struct base_jd_replay_payload_uk10_2 { + u64 tiler_jc_list; + u64 fragment_jc; + u64 tiler_heap_free; + u16 fragment_hierarchy_mask; + u16 tiler_hierarchy_mask; + u32 hierarchy_default_weight; + u16 tiler_core_req; + u16 fragment_core_req; u8 padding[4]; -} base_jd_replay_payload; +} base_jd_replay_payload_uk10_2; +#endif /* BASE_LEGACY_UK10_2_SUPPORT */ /** * @brief An entry in the linked list of job chains to be replayed. This must diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase.h b/drivers/gpu/arm/midgard_for_linux/mali_kbase.h index fe58341fdef8..b6d28fea9987 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase.h +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -45,6 +45,7 @@ #include #include +#include "mali_kbase_strings.h" #include "mali_kbase_pm.h" #include "mali_kbase_mem_lowlevel.h" #include "mali_kbase_defs.h" @@ -139,7 +140,6 @@ void kbase_jd_done_worker(struct work_struct *data); void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, ktime_t *end_timestamp, kbasep_js_atom_done_code done_code); void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom); -void kbase_jd_evict(struct kbase_device *kbdev, struct kbase_jd_atom *katom); void kbase_jd_zap_context(struct kbase_context *kctx); bool jd_done_nolock(struct kbase_jd_atom *katom, struct list_head *completed_jobs_ctx); @@ -147,6 +147,7 @@ void kbase_jd_free_external_resources(struct kbase_jd_atom *katom); bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *user_atom, struct kbase_jd_atom *katom); +void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom); void kbase_job_done(struct kbase_device *kbdev, u32 done); @@ -174,7 +175,7 @@ void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js, void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, struct kbase_jd_atom *target_katom); void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action, - u16 core_reqs, struct kbase_jd_atom *target_katom); + base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom); void kbase_job_check_leave_disjoint(struct kbase_device *kbdev, struct kbase_jd_atom *target_katom); @@ -191,9 +192,17 @@ int kbase_prepare_soft_job(struct kbase_jd_atom *katom); void kbase_finish_soft_job(struct kbase_jd_atom *katom); void kbase_cancel_soft_job(struct kbase_jd_atom *katom); void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev); +void kbasep_add_waiting_soft_job(struct kbase_jd_atom *katom); +void kbasep_remove_waiting_soft_job(struct kbase_jd_atom *katom); +int kbase_soft_event_update(struct kbase_context *kctx, + u64 event, + unsigned char new_status); bool kbase_replay_process(struct kbase_jd_atom *katom); +void kbasep_soft_job_timeout_worker(unsigned long data); +void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt); + /* api used internally for register access. Contains validation and tracing */ void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value); int kbase_device_trace_buffer_install( @@ -204,7 +213,6 @@ void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx); void kbase_os_reg_write(struct kbase_device *kbdev, u16 offset, u32 value); u32 kbase_os_reg_read(struct kbase_device *kbdev, u16 offset); - void kbasep_as_do_poke(struct work_struct *work); /** Returns the name associated with a Mali exception code diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_cache_policy.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_cache_policy.c index 2fb5e3edf49f..c67b3e97f1af 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_cache_policy.c +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_cache_policy.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -43,6 +43,11 @@ u32 kbase_cache_enabled(u32 flags, u32 nr_pages) void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle, size_t size, enum dma_data_direction dir) { +/* Check if kernel is using coherency with GPU */ +#ifdef CONFIG_MALI_COH_KERN + if (kbdev->system_coherency == COHERENCY_ACE) + return; +#endif /* CONFIG_MALI_COH_KERN */ dma_sync_single_for_device(kbdev->dev, handle, size, dir); } @@ -50,5 +55,10 @@ void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle, void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle, size_t size, enum dma_data_direction dir) { +/* Check if kernel is using coherency with GPU */ +#ifdef CONFIG_MALI_COH_KERN + if (kbdev->system_coherency == COHERENCY_ACE) + return; +#endif /* CONFIG_MALI_COH_KERN */ dma_sync_single_for_cpu(kbdev->dev, handle, size, dir); } diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_config.h b/drivers/gpu/arm/midgard_for_linux/mali_kbase_config.h index 816e45c4d02d..356d52bcd774 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_config.h +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_config.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -45,13 +45,6 @@ * @{ */ -#if !MALI_CUSTOMER_RELEASE -/* This flag is set for internal builds so we can run tests without credentials. */ -#define KBASE_HWCNT_DUMP_BYPASS_ROOT 1 -#else -#define KBASE_HWCNT_DUMP_BYPASS_ROOT 0 -#endif - #include /* Forward declaration of struct kbase_device */ @@ -105,7 +98,7 @@ struct kbase_pm_callback_conf { * The system integrator can decide whether to either do nothing, just switch off * the clocks to the GPU, or to completely power down the GPU. * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the - * platform \em callbacks responsiblity to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf). + * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf). */ void (*power_off_callback)(struct kbase_device *kbdev); @@ -115,7 +108,7 @@ struct kbase_pm_callback_conf { * succeed. The return value specifies whether the GPU was powered down since the call to power_off_callback. * If the GPU state has been lost then this function must return 1, otherwise it should return 0. * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the - * platform \em callbacks responsiblity to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf). + * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf). * * The return value of the first call to this function is ignored. * @@ -160,7 +153,7 @@ struct kbase_pm_callback_conf { * The runtime calls can be triggered by calls from @ref power_off_callback and @ref power_on_callback. * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature. * - * @return 0 on success, else int erro code. + * @return 0 on success, else int error code. */ int (*power_runtime_init_callback)(struct kbase_device *kbdev); @@ -203,8 +196,8 @@ struct kbase_pm_callback_conf { * Returning 0 will cause the runtime PM core to conduct a regular * autosuspend. * - * This callback is optional and if not provided regular ausosuspend - * will triggered. + * This callback is optional and if not provided regular autosuspend + * will be triggered. * * Note: The Linux kernel must have CONFIG_PM_RUNTIME enabled to use * this feature. diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_config_defaults.h b/drivers/gpu/arm/midgard_for_linux/mali_kbase_config_defaults.h index bd48ed96e962..9b00cce9b2b3 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_config_defaults.h +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_config_defaults.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2013-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -148,11 +148,6 @@ enum { */ #define DEFAULT_AWID_LIMIT KBASE_AID_32 -/** - * Default setting for using alternative hardware counters. - */ -#define DEFAULT_ALTERNATIVE_HWC false - /** * Default UMP device mapping. A UMP_DEVICE__SHIFT value which * defines which UMP device this GPU should be mapped to. @@ -220,6 +215,12 @@ enum { */ #define DEFAULT_JS_HARD_STOP_TICKS_DUMPING (15000) /* 1500s */ +/* + * Default timeout for some software jobs, after which the software event wait + * jobs will be cancelled. + */ +#define DEFAULT_JS_SOFT_JOB_TIMEOUT ((u32)3000) /* 3s */ + /* * Default minimum number of scheduling ticks before the GPU is reset to clear a * "stuck" job diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_context.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_context.c index 798979963937..344a1f16de8a 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_context.c +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_context.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -23,8 +23,7 @@ #include #include -#include - +#include /** * kbase_create_context() - Create a kernel base context. @@ -65,6 +64,8 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat) kctx->process_mm = NULL; atomic_set(&kctx->nonmapped_pages, 0); kctx->slots_pullable = 0; + kctx->tgid = current->tgid; + kctx->pid = current->pid; err = kbase_mem_pool_init(&kctx->mem_pool, kbdev->mem_pool_max_size_default, @@ -72,11 +73,15 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat) if (err) goto free_kctx; + err = kbase_mem_evictable_init(kctx); + if (err) + goto free_pool; + atomic_set(&kctx->used_pages, 0); err = kbase_jd_init(kctx); if (err) - goto free_pool; + goto deinit_evictable; err = kbasep_js_kctx_init(kctx); if (err) @@ -86,16 +91,22 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat) if (err) goto free_jd; + atomic_set(&kctx->drain_pending, 0); + mutex_init(&kctx->reg_lock); INIT_LIST_HEAD(&kctx->waiting_soft_jobs); + spin_lock_init(&kctx->waiting_soft_jobs_lock); #ifdef CONFIG_KDS INIT_LIST_HEAD(&kctx->waiting_kds_resource); #endif + err = kbase_dma_fence_init(kctx); + if (err) + goto free_event; err = kbase_mmu_init(kctx); if (err) - goto free_event; + goto term_dma_fence; kctx->pgd = kbase_mmu_alloc_pgd(kctx); if (!kctx->pgd) @@ -105,8 +116,6 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat) if (!kctx->aliasing_sink_page) goto no_sink_page; - kctx->tgid = current->tgid; - kctx->pid = current->pid; init_waitqueue_head(&kctx->event_queue); kctx->cookies = KBASE_COOKIE_MASK; @@ -115,6 +124,14 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat) err = kbase_region_tracker_init(kctx); if (err) goto no_region_tracker; + + err = kbase_sticky_resource_init(kctx); + if (err) + goto no_sticky; + + err = kbase_jit_init(kctx); + if (err) + goto no_jit; #ifdef CONFIG_GPU_TRACEPOINTS atomic_set(&kctx->jctx.work_id, 0); #endif @@ -126,8 +143,18 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat) mutex_init(&kctx->vinstr_cli_lock); + setup_timer(&kctx->soft_job_timeout, + kbasep_soft_job_timeout_worker, + (uintptr_t)kctx); + return kctx; +no_jit: + kbase_gpu_vm_lock(kctx); + kbase_sticky_resource_term(kctx); + kbase_gpu_vm_unlock(kctx); +no_sticky: + kbase_region_tracker_term(kctx); no_region_tracker: kbase_mem_pool_free(&kctx->mem_pool, kctx->aliasing_sink_page, false); no_sink_page: @@ -137,12 +164,16 @@ no_sink_page: kbase_gpu_vm_unlock(kctx); free_mmu: kbase_mmu_term(kctx); +term_dma_fence: + kbase_dma_fence_term(kctx); free_event: kbase_event_cleanup(kctx); free_jd: /* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */ kbasep_js_kctx_term(kctx); kbase_jd_exit(kctx); +deinit_evictable: + kbase_mem_evictable_deinit(kctx); free_pool: kbase_mem_pool_term(&kctx->mem_pool); free_kctx: @@ -188,8 +219,18 @@ void kbase_destroy_context(struct kbase_context *kctx) kbase_jd_zap_context(kctx); kbase_event_cleanup(kctx); + /* + * JIT must be terminated before the code below as it must be called + * without the region lock being held. + * The code above ensures no new JIT allocations can be made by + * by the time we get to this point of context tear down. + */ + kbase_jit_term(kctx); + kbase_gpu_vm_lock(kctx); + kbase_sticky_resource_term(kctx); + /* MMU is disabled as part of scheduling out the context */ kbase_mmu_free_pgd(kctx); @@ -219,12 +260,15 @@ void kbase_destroy_context(struct kbase_context *kctx) kbase_pm_context_idle(kbdev); + kbase_dma_fence_term(kctx); + kbase_mmu_term(kctx); pages = atomic_read(&kctx->used_pages); if (pages != 0) dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages); + kbase_mem_evictable_deinit(kctx); kbase_mem_pool_term(&kctx->mem_pool); WARN_ON(atomic_read(&kctx->nonmapped_pages) != 0); diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_context.h b/drivers/gpu/arm/midgard_for_linux/mali_kbase_context.h new file mode 100644 index 000000000000..a3f5bb0ce0da --- /dev/null +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_context.h @@ -0,0 +1,90 @@ +/* + * + * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#ifndef _KBASE_CONTEXT_H_ +#define _KBASE_CONTEXT_H_ + +#include + + +int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags); + +/** + * kbase_ctx_flag - Check if @flag is set on @kctx + * @kctx: Pointer to kbase context to check + * @flag: Flag to check + * + * Return: true if @flag is set on @kctx, false if not. + */ +static inline bool kbase_ctx_flag(struct kbase_context *kctx, + enum kbase_context_flags flag) +{ + return atomic_read(&kctx->flags) & flag; +} + +/** + * kbase_ctx_flag_clear - Clear @flag on @kctx + * @kctx: Pointer to kbase context + * @flag: Flag to clear + * + * Clear the @flag on @kctx. This is done atomically, so other flags being + * cleared or set at the same time will be safe. + * + * Some flags have locking requirements, check the documentation for the + * respective flags. + */ +static inline void kbase_ctx_flag_clear(struct kbase_context *kctx, + enum kbase_context_flags flag) +{ +#if KERNEL_VERSION(4, 3, 0) > LINUX_VERSION_CODE + /* + * Earlier kernel versions doesn't have atomic_andnot() or + * atomic_and(). atomic_clear_mask() was only available on some + * architectures and removed on arm in v3.13 on arm and arm64. + * + * Use a compare-exchange loop to clear the flag on pre 4.3 kernels, + * when atomic_andnot() becomes available. + */ + int old, new; + + do { + old = atomic_read(&kctx->flags); + new = old & ~flag; + + } while (atomic_cmpxchg(&kctx->flags, old, new) != old); +#else + atomic_andnot(flag, &kctx->flags); +#endif +} + +/** + * kbase_ctx_flag_set - Set @flag on @kctx + * @kctx: Pointer to kbase context + * @flag: Flag to clear + * + * Set the @flag on @kctx. This is done atomically, so other flags being + * cleared or set at the same time will be safe. + * + * Some flags have locking requirements, check the documentation for the + * respective flags. + */ +static inline void kbase_ctx_flag_set(struct kbase_context *kctx, + enum kbase_context_flags flag) +{ + atomic_or(flag, &kctx->flags); +} +#endif /* _KBASE_CONTEXT_H_ */ diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_core_linux.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_core_linux.c index ab6f5e45eacc..ee59504cd4e8 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_core_linux.c +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_core_linux.c @@ -1,7 +1,6 @@ - /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -18,13 +17,10 @@ #include "platform/rk/custom_log.h" #include -#include #include #include #include -#include #include -#include #include #ifdef CONFIG_MALI_DEVFREQ #include @@ -63,12 +59,10 @@ #include #include #include -#include #include #include /* is_compat_task */ #include #include -#include #ifdef CONFIG_MALI_PLATFORM_DEVICETREE #include #endif /* CONFIG_MALI_PLATFORM_DEVICETREE */ @@ -88,9 +82,6 @@ #include -#ifdef CONFIG_MACH_MANTA -#include -#endif #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) #include @@ -98,9 +89,9 @@ #include #endif -#if defined(CONFIG_MALI_MIPE_ENABLED) #include -#endif + +#include /* GPU IRQ Tags */ #define JOB_IRQ_TAG 0 @@ -112,12 +103,9 @@ static struct kbase_exported_test_data shared_kernel_test_data; EXPORT_SYMBOL(shared_kernel_test_data); #endif /* MALI_UNIT_TEST */ -#define KBASE_DRV_NAME "mali" /** rk_ext : version of rk_ext on mali_ko, aka. rk_ko_ver. */ #define ROCKCHIP_VERSION (13) -static const char kbase_drv_name[] = KBASE_DRV_NAME; - static int kbase_dev_nr; static DEFINE_MUTEX(kbase_dev_list_lock); @@ -160,204 +148,8 @@ static int kds_resource_release(struct inode *inode, struct file *file) } return 0; } - -static int kbasep_kds_allocate_resource_list_data(struct kbase_context *kctx, struct base_external_resource *ext_res, int num_elems, struct kbase_kds_resource_list_data *resources_list) -{ - struct base_external_resource *res = ext_res; - int res_id; - - /* assume we have to wait for all */ - - KBASE_DEBUG_ASSERT(0 != num_elems); - resources_list->kds_resources = kmalloc_array(num_elems, - sizeof(struct kds_resource *), GFP_KERNEL); - - if (NULL == resources_list->kds_resources) - return -ENOMEM; - - KBASE_DEBUG_ASSERT(0 != num_elems); - resources_list->kds_access_bitmap = kzalloc( - sizeof(unsigned long) * - ((num_elems + BITS_PER_LONG - 1) / BITS_PER_LONG), - GFP_KERNEL); - - if (NULL == resources_list->kds_access_bitmap) { - kfree(resources_list->kds_access_bitmap); - return -ENOMEM; - } - - kbase_gpu_vm_lock(kctx); - for (res_id = 0; res_id < num_elems; res_id++, res++) { - int exclusive; - struct kbase_va_region *reg; - struct kds_resource *kds_res = NULL; - - exclusive = res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE; - reg = kbase_region_tracker_find_region_enclosing_address(kctx, res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE); - - /* did we find a matching region object? */ - if (NULL == reg || (reg->flags & KBASE_REG_FREE)) - break; - - /* no need to check reg->alloc as only regions with an alloc has - * a size, and kbase_region_tracker_find_region_enclosing_address - * only returns regions with size > 0 */ - switch (reg->gpu_alloc->type) { -#if defined(CONFIG_UMP) && defined(CONFIG_KDS) - case KBASE_MEM_TYPE_IMPORTED_UMP: - kds_res = ump_dd_kds_resource_get(reg->gpu_alloc->imported.ump_handle); - break; -#endif /* defined(CONFIG_UMP) && defined(CONFIG_KDS) */ - default: - break; - } - - /* no kds resource for the region ? */ - if (!kds_res) - break; - - resources_list->kds_resources[res_id] = kds_res; - - if (exclusive) - set_bit(res_id, resources_list->kds_access_bitmap); - } - kbase_gpu_vm_unlock(kctx); - - /* did the loop run to completion? */ - if (res_id == num_elems) - return 0; - - /* Clean up as the resource list is not valid. */ - kfree(resources_list->kds_resources); - kfree(resources_list->kds_access_bitmap); - - return -EINVAL; -} - -static bool kbasep_validate_kbase_pointer( - struct kbase_context *kctx, union kbase_pointer *p) -{ - if (kctx->is_compat) { - if (p->compat_value == 0) - return false; - } else { - if (NULL == p->value) - return false; - } - return true; -} - -static int kbase_external_buffer_lock(struct kbase_context *kctx, - struct kbase_uk_ext_buff_kds_data *args, u32 args_size) -{ - struct base_external_resource *ext_res_copy; - size_t ext_resource_size; - int ret = -EINVAL; - int fd = -EBADF; - struct base_external_resource __user *ext_res_user; - int __user *file_desc_usr; - struct kbasep_kds_resource_set_file_data *fdata; - struct kbase_kds_resource_list_data resource_list_data; - - if (args_size != sizeof(struct kbase_uk_ext_buff_kds_data)) - return -EINVAL; - - /* Check user space has provided valid data */ - if (!kbasep_validate_kbase_pointer(kctx, &args->external_resource) || - !kbasep_validate_kbase_pointer(kctx, &args->file_descriptor) || - (0 == args->num_res) || - (args->num_res > KBASE_MAXIMUM_EXT_RESOURCES)) - return -EINVAL; - - ext_resource_size = sizeof(struct base_external_resource) * args->num_res; - - KBASE_DEBUG_ASSERT(0 != ext_resource_size); - ext_res_copy = kmalloc(ext_resource_size, GFP_KERNEL); - - if (!ext_res_copy) - return -EINVAL; -#ifdef CONFIG_COMPAT - if (kctx->is_compat) { - ext_res_user = compat_ptr(args->external_resource.compat_value); - file_desc_usr = compat_ptr(args->file_descriptor.compat_value); - } else { -#endif /* CONFIG_COMPAT */ - ext_res_user = args->external_resource.value; - file_desc_usr = args->file_descriptor.value; -#ifdef CONFIG_COMPAT - } -#endif /* CONFIG_COMPAT */ - - /* Copy the external resources to lock from user space */ - if (copy_from_user(ext_res_copy, ext_res_user, ext_resource_size)) - goto out; - - /* Allocate data to be stored in the file */ - fdata = kmalloc(sizeof(*fdata), GFP_KERNEL); - - if (!fdata) { - ret = -ENOMEM; - goto out; - } - - /* Parse given elements and create resource and access lists */ - ret = kbasep_kds_allocate_resource_list_data(kctx, - ext_res_copy, args->num_res, &resource_list_data); - if (!ret) { - long err; - - fdata->lock = NULL; - - fd = anon_inode_getfd("kds_ext", &kds_resource_fops, fdata, 0); - - err = copy_to_user(file_desc_usr, &fd, sizeof(fd)); - - /* If the file descriptor was valid and we successfully copied - * it to user space, then we can try and lock the requested - * kds resources. - */ - if ((fd >= 0) && (0 == err)) { - struct kds_resource_set *lock; - - lock = kds_waitall(args->num_res, - resource_list_data.kds_access_bitmap, - resource_list_data.kds_resources, - KDS_WAIT_BLOCKING); - - if (!lock) { - ret = -EINVAL; - } else if (IS_ERR(lock)) { - ret = PTR_ERR(lock); - } else { - ret = 0; - fdata->lock = lock; - } - } else { - ret = -EINVAL; - } - - kfree(resource_list_data.kds_resources); - kfree(resource_list_data.kds_access_bitmap); - } - - if (ret) { - /* If the file was opened successfully then close it which will - * clean up the file data, otherwise we clean up the file data - * ourself. - */ - if (fd >= 0) - sys_close(fd); - else - kfree(fdata); - } -out: - kfree(ext_res_copy); - - return ret; -} #endif /* CONFIG_KDS */ -#ifdef CONFIG_MALI_MIPE_ENABLED static void kbase_create_timeline_objects(struct kbase_context *kctx) { struct kbase_device *kbdev = kctx->kbdev; @@ -397,7 +189,8 @@ static void kbase_create_timeline_objects(struct kbase_context *kctx) list_for_each_entry(element, &kbdev->kctx_list, link) { kbase_tlstream_tl_summary_new_ctx( element->kctx, - (u32)(element->kctx->id)); + (u32)(element->kctx->id), + (u32)(element->kctx->tgid)); } /* Before releasing the lock, reset body stream buffers. * This will prevent context creation message to be directed to both @@ -409,7 +202,6 @@ static void kbase_create_timeline_objects(struct kbase_context *kctx) * user space. */ kbase_tlstream_flush_streams(); } -#endif static void kbase_api_handshake(struct uku_version_check_args *version) { @@ -476,6 +268,34 @@ enum mali_error { MALI_ERROR_FUNCTION_FAILED, }; +enum { + inited_mem = (1u << 0), + inited_js = (1u << 1), + inited_pm_runtime_init = (1u << 2), +#ifdef CONFIG_MALI_DEVFREQ + inited_devfreq = (1u << 3), +#endif /* CONFIG_MALI_DEVFREQ */ + inited_tlstream = (1u << 4), + inited_backend_early = (1u << 5), + inited_backend_late = (1u << 6), + inited_device = (1u << 7), + inited_vinstr = (1u << 8), +#ifndef CONFIG_MALI_PRFCNT_SET_SECONDARY + inited_ipa = (1u << 9), +#endif /* CONFIG_MALI_PRFCNT_SET_SECONDARY */ + inited_job_fault = (1u << 10), + inited_misc_register = (1u << 11), + inited_get_device = (1u << 12), + inited_sysfs_group = (1u << 13), + inited_dev_list = (1u << 14), + inited_debugfs = (1u << 15), + inited_gpu_device = (1u << 16), + inited_registers_map = (1u << 17), + inited_power_control = (1u << 19), + inited_buslogger = (1u << 20) +}; + + #ifdef CONFIG_MALI_DEBUG #define INACTIVE_WAIT_MS (5000) @@ -561,6 +381,18 @@ static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 arg /* setup complete, perform normal operation */ switch (id) { + case KBASE_FUNC_MEM_JIT_INIT: + { + struct kbase_uk_mem_jit_init *jit_init = args; + + if (sizeof(*jit_init) != args_size) + goto bad_size; + + if (kbase_region_tracker_init_jit(kctx, + jit_init->va_pages)) + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + break; + } case KBASE_FUNC_MEM_ALLOC: { struct kbase_uk_mem_alloc *mem = args; @@ -569,6 +401,13 @@ static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 arg if (sizeof(*mem) != args_size) goto bad_size; +#if defined(CONFIG_64BIT) + if (!kctx->is_compat) { + /* force SAME_VA if a 64-bit client */ + mem->flags |= BASE_MEM_SAME_VA; + } +#endif + reg = kbase_mem_alloc(kctx, mem->va_pages, mem->commit_pages, mem->extent, &mem->flags, &mem->gpu_va, @@ -595,10 +434,13 @@ static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 arg break; } - if (kbase_mem_import(kctx, mem_import->type, phandle, - &mem_import->gpu_va, - &mem_import->va_pages, - &mem_import->flags)) { + if (kbase_mem_import(kctx, + (enum base_mem_import_type) + mem_import->type, + phandle, + &mem_import->gpu_va, + &mem_import->va_pages, + &mem_import->flags)) { mem_import->type = BASE_MEM_IMPORT_TYPE_INVALID; ukh->ret = MALI_ERROR_FUNCTION_FAILED; } @@ -779,7 +621,7 @@ copy_failed: if (sizeof(*sn) != args_size) goto bad_size; - if (sn->sset.basep_sset.mem_handle & ~PAGE_MASK) { + if (sn->sset.basep_sset.mem_handle.basep.handle & ~PAGE_MASK) { dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_SYNC: sn->sset.basep_sset.mem_handle: passed parameter is invalid"); ukh->ret = MALI_ERROR_FUNCTION_FAILED; break; @@ -954,26 +796,6 @@ copy_failed: break; } - case KBASE_FUNC_EXT_BUFFER_LOCK: - { -#ifdef CONFIG_KDS - ret = kbase_external_buffer_lock(kctx, - (struct kbase_uk_ext_buff_kds_data *)args, - args_size); - switch (ret) { - case 0: - ukh->ret = MALI_ERROR_NONE; - break; - case -ENOMEM: - ukh->ret = MALI_ERROR_OUT_OF_MEMORY; - break; - default: - ukh->ret = MALI_ERROR_FUNCTION_FAILED; - } -#endif /* CONFIG_KDS */ - break; - } - case KBASE_FUNC_SET_TEST_DATA: { #if MALI_UNIT_TEST @@ -1075,7 +897,7 @@ copy_failed: goto bad_size; if (add_data->len > KBASE_MEM_PROFILE_MAX_BUF_SIZE) { - dev_err(kbdev->dev, "buffer too big"); + dev_err(kbdev->dev, "buffer too big\n"); goto out_bad; } @@ -1119,7 +941,6 @@ copy_failed: } #endif /* CONFIG_MALI_NO_MALI */ -#ifdef CONFIG_MALI_MIPE_ENABLED case KBASE_FUNC_TLSTREAM_ACQUIRE: { struct kbase_uk_tlstream_acquire *tlstream_acquire = @@ -1179,7 +1000,6 @@ copy_failed: break; } #endif /* MALI_UNIT_TEST */ -#endif /* CONFIG_MALI_MIPE_ENABLED */ case KBASE_FUNC_GET_CONTEXT_ID: { @@ -1189,8 +1009,27 @@ copy_failed: break; } + case KBASE_FUNC_SOFT_EVENT_UPDATE: + { + struct kbase_uk_soft_event_update *update = args; + + if (sizeof(*update) != args_size) + goto bad_size; + + if (((update->new_status != BASE_JD_SOFT_EVENT_SET) && + (update->new_status != BASE_JD_SOFT_EVENT_RESET)) || + (update->flags != 0)) + goto out_bad; + + if (kbase_soft_event_update(kctx, update->evt, + update->new_status)) + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + + break; + } + default: - dev_err(kbdev->dev, "unknown ioctl %u", id); + dev_err(kbdev->dev, "unknown ioctl %u\n", id); goto out_bad; } @@ -1207,6 +1046,47 @@ static struct kbase_device *to_kbase_device(struct device *dev) return dev_get_drvdata(dev); } +static int assign_irqs(struct platform_device *pdev) +{ + struct kbase_device *kbdev = to_kbase_device(&pdev->dev); + int i; + + if (!kbdev) + return -ENODEV; + + /* 3 IRQ resources */ + for (i = 0; i < 3; i++) { + struct resource *irq_res; + int irqtag; + + irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, i); + if (!irq_res) { + dev_err(kbdev->dev, "No IRQ resource at index %d\n", i); + return -ENOENT; + } + +#ifdef CONFIG_OF + if (!strcmp(irq_res->name, "JOB")) { + irqtag = JOB_IRQ_TAG; + } else if (!strcmp(irq_res->name, "MMU")) { + irqtag = MMU_IRQ_TAG; + } else if (!strcmp(irq_res->name, "GPU")) { + irqtag = GPU_IRQ_TAG; + } else { + dev_err(&pdev->dev, "Invalid irq res name: '%s'\n", + irq_res->name); + return -EINVAL; + } +#else + irqtag = i; +#endif /* CONFIG_OF */ + kbdev->irqs[irqtag].irq = irq_res->start; + kbdev->irqs[irqtag].flags = irq_res->flags & IRQF_TRIGGER_MASK; + } + + return 0; +} + /* * API to acquire device list mutex and * return pointer to the device list head @@ -1309,7 +1189,8 @@ static int kbase_open(struct inode *inode, struct file *filp) kbase_mem_pool_debugfs_add(kctx->kctx_dentry, &kctx->mem_pool); -#endif /* CONFIG_DEBUGFS */ + kbase_jit_debugfs_add(kctx); +#endif /* CONFIG_DEBUG_FS */ dev_dbg(kbdev->dev, "created base context\n"); @@ -1321,11 +1202,10 @@ static int kbase_open(struct inode *inode, struct file *filp) mutex_lock(&kbdev->kctx_list_lock); element->kctx = kctx; list_add(&element->link, &kbdev->kctx_list); -#ifdef CONFIG_MALI_MIPE_ENABLED kbase_tlstream_tl_new_ctx( element->kctx, - (u32)(element->kctx->id)); -#endif + (u32)(element->kctx->id), + (u32)(element->kctx->tgid)); mutex_unlock(&kbdev->kctx_list_lock); } else { /* we don't treat this as a fail - just warn about it */ @@ -1346,9 +1226,7 @@ static int kbase_release(struct inode *inode, struct file *filp) struct kbasep_kctx_list_element *element, *tmp; bool found_element = false; -#ifdef CONFIG_MALI_MIPE_ENABLED kbase_tlstream_tl_del_ctx(kctx); -#endif #ifdef CONFIG_DEBUG_FS debugfs_remove_recursive(kctx->kctx_dentry); @@ -1613,11 +1491,12 @@ static unsigned long kbase_get_unmapped_area(struct file *filp, flags); if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA)) { - info.high_limit = 1ul << 33; + info.high_limit = kctx->same_va_end << PAGE_SHIFT; info.align_mask = 0; info.align_offset = 0; } else { - info.high_limit = mm->mmap_base; + info.high_limit = min_t(unsigned long, mm->mmap_base, + (kctx->same_va_end << PAGE_SHIFT)); if (len >= SZ_2M) { info.align_offset = SZ_2M; info.align_mask = SZ_2M - 1; @@ -1661,7 +1540,6 @@ u32 kbase_os_reg_read(struct kbase_device *kbdev, u16 offset) } #endif /* !CONFIG_MALI_NO_MALI */ - /** Show callback for the @c power_policy sysfs file. * * This function is called to get the contents of the @c power_policy sysfs @@ -1984,6 +1862,89 @@ static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, */ static DEVICE_ATTR(core_mask, S_IRUGO | S_IWUSR, show_core_mask, set_core_mask); +/** + * set_soft_job_timeout() - Store callback for the soft_job_timeout sysfs + * file. + * + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The value written to the sysfs file. + * @count: The number of bytes written to the sysfs file. + * + * This allows setting the timeout for software jobs. Waiting soft event wait + * jobs will be cancelled after this period expires, while soft fence wait jobs + * will print debug information if the fence debug feature is enabled. + * + * This is expressed in milliseconds. + * + * Return: count if the function succeeded. An error code on failure. + */ +static ssize_t set_soft_job_timeout(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct kbase_device *kbdev; + int soft_job_timeout_ms; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + if ((kstrtoint(buf, 0, &soft_job_timeout_ms) != 0) || + (soft_job_timeout_ms <= 0)) + return -EINVAL; + + atomic_set(&kbdev->js_data.soft_job_timeout_ms, + soft_job_timeout_ms); + + return count; +} + +/** + * show_soft_job_timeout() - Show callback for the soft_job_timeout sysfs + * file. + * + * This will return the timeout for the software jobs. + * + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The output buffer for the sysfs file contents. + * + * Return: The number of bytes output to buf. + */ +static ssize_t show_soft_job_timeout(struct device *dev, + struct device_attribute *attr, + char * const buf) +{ + struct kbase_device *kbdev; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + return scnprintf(buf, PAGE_SIZE, "%i\n", + atomic_read(&kbdev->js_data.soft_job_timeout_ms)); +} + +static DEVICE_ATTR(soft_job_timeout, S_IRUGO | S_IWUSR, + show_soft_job_timeout, set_soft_job_timeout); + +static u32 timeout_ms_to_ticks(struct kbase_device *kbdev, long timeout_ms, + int default_ticks, u32 old_ticks) +{ + if (timeout_ms > 0) { + u64 ticks = timeout_ms * 1000000ULL; + do_div(ticks, kbdev->js_data.scheduling_period_ns); + if (!ticks) + return 1; + return ticks; + } else if (timeout_ms < 0) { + return default_ticks; + } else { + return old_ticks; + } +} + /** Store callback for the @c js_timeouts sysfs file. * * This function is called to get the contents of the @c js_timeouts sysfs @@ -2028,99 +1989,45 @@ static ssize_t set_js_timeouts(struct device *dev, struct device_attribute *attr &js_reset_ms_cl, &js_reset_ms_dumping); if (items == 8) { - u64 ticks; - - if (js_soft_stop_ms >= 0) { - ticks = js_soft_stop_ms * 1000000ULL; - do_div(ticks, kbdev->js_data.scheduling_period_ns); - kbdev->js_soft_stop_ticks = ticks; - } else { - kbdev->js_soft_stop_ticks = -1; - } - - if (js_soft_stop_ms_cl >= 0) { - ticks = js_soft_stop_ms_cl * 1000000ULL; - do_div(ticks, kbdev->js_data.scheduling_period_ns); - kbdev->js_soft_stop_ticks_cl = ticks; - } else { - kbdev->js_soft_stop_ticks_cl = -1; - } - - if (js_hard_stop_ms_ss >= 0) { - ticks = js_hard_stop_ms_ss * 1000000ULL; - do_div(ticks, kbdev->js_data.scheduling_period_ns); - kbdev->js_hard_stop_ticks_ss = ticks; - } else { - kbdev->js_hard_stop_ticks_ss = -1; - } - - if (js_hard_stop_ms_cl >= 0) { - ticks = js_hard_stop_ms_cl * 1000000ULL; - do_div(ticks, kbdev->js_data.scheduling_period_ns); - kbdev->js_hard_stop_ticks_cl = ticks; - } else { - kbdev->js_hard_stop_ticks_cl = -1; - } - - if (js_hard_stop_ms_dumping >= 0) { - ticks = js_hard_stop_ms_dumping * 1000000ULL; - do_div(ticks, kbdev->js_data.scheduling_period_ns); - kbdev->js_hard_stop_ticks_dumping = ticks; - } else { - kbdev->js_hard_stop_ticks_dumping = -1; - } - - if (js_reset_ms_ss >= 0) { - ticks = js_reset_ms_ss * 1000000ULL; - do_div(ticks, kbdev->js_data.scheduling_period_ns); - kbdev->js_reset_ticks_ss = ticks; - } else { - kbdev->js_reset_ticks_ss = -1; - } - - if (js_reset_ms_cl >= 0) { - ticks = js_reset_ms_cl * 1000000ULL; - do_div(ticks, kbdev->js_data.scheduling_period_ns); - kbdev->js_reset_ticks_cl = ticks; - } else { - kbdev->js_reset_ticks_cl = -1; - } - - if (js_reset_ms_dumping >= 0) { - ticks = js_reset_ms_dumping * 1000000ULL; - do_div(ticks, kbdev->js_data.scheduling_period_ns); - kbdev->js_reset_ticks_dumping = ticks; - } else { - kbdev->js_reset_ticks_dumping = -1; - } - - kbdev->js_timeouts_updated = true; - - dev_dbg(kbdev->dev, "Overriding JS_SOFT_STOP_TICKS with %lu ticks (%lu ms)\n", - (unsigned long)kbdev->js_soft_stop_ticks, - js_soft_stop_ms); - dev_dbg(kbdev->dev, "Overriding JS_SOFT_STOP_TICKS_CL with %lu ticks (%lu ms)\n", - (unsigned long)kbdev->js_soft_stop_ticks_cl, - js_soft_stop_ms_cl); - dev_dbg(kbdev->dev, "Overriding JS_HARD_STOP_TICKS_SS with %lu ticks (%lu ms)\n", - (unsigned long)kbdev->js_hard_stop_ticks_ss, - js_hard_stop_ms_ss); - dev_dbg(kbdev->dev, "Overriding JS_HARD_STOP_TICKS_CL with %lu ticks (%lu ms)\n", - (unsigned long)kbdev->js_hard_stop_ticks_cl, - js_hard_stop_ms_cl); - dev_dbg(kbdev->dev, "Overriding JS_HARD_STOP_TICKS_DUMPING with %lu ticks (%lu ms)\n", - (unsigned long) - kbdev->js_hard_stop_ticks_dumping, - js_hard_stop_ms_dumping); - dev_dbg(kbdev->dev, "Overriding JS_RESET_TICKS_SS with %lu ticks (%lu ms)\n", - (unsigned long)kbdev->js_reset_ticks_ss, - js_reset_ms_ss); - dev_dbg(kbdev->dev, "Overriding JS_RESET_TICKS_CL with %lu ticks (%lu ms)\n", - (unsigned long)kbdev->js_reset_ticks_cl, - js_reset_ms_cl); - dev_dbg(kbdev->dev, "Overriding JS_RESET_TICKS_DUMPING with %lu ticks (%lu ms)\n", - (unsigned long)kbdev->js_reset_ticks_dumping, - js_reset_ms_dumping); + struct kbasep_js_device_data *js_data = &kbdev->js_data; + unsigned long flags; + + spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags); + +#define UPDATE_TIMEOUT(ticks_name, ms_name, default) do {\ + js_data->ticks_name = timeout_ms_to_ticks(kbdev, ms_name, \ + default, js_data->ticks_name); \ + dev_dbg(kbdev->dev, "Overriding " #ticks_name \ + " with %lu ticks (%lu ms)\n", \ + (unsigned long)js_data->ticks_name, \ + ms_name); \ + } while (0) + + UPDATE_TIMEOUT(soft_stop_ticks, js_soft_stop_ms, + DEFAULT_JS_SOFT_STOP_TICKS); + UPDATE_TIMEOUT(soft_stop_ticks_cl, js_soft_stop_ms_cl, + DEFAULT_JS_SOFT_STOP_TICKS_CL); + UPDATE_TIMEOUT(hard_stop_ticks_ss, js_hard_stop_ms_ss, + kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408) ? + DEFAULT_JS_HARD_STOP_TICKS_SS_8408 : + DEFAULT_JS_HARD_STOP_TICKS_SS); + UPDATE_TIMEOUT(hard_stop_ticks_cl, js_hard_stop_ms_cl, + DEFAULT_JS_HARD_STOP_TICKS_CL); + UPDATE_TIMEOUT(hard_stop_ticks_dumping, + js_hard_stop_ms_dumping, + DEFAULT_JS_HARD_STOP_TICKS_DUMPING); + UPDATE_TIMEOUT(gpu_reset_ticks_ss, js_reset_ms_ss, + kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408) ? + DEFAULT_JS_RESET_TICKS_SS_8408 : + DEFAULT_JS_RESET_TICKS_SS); + UPDATE_TIMEOUT(gpu_reset_ticks_cl, js_reset_ms_cl, + DEFAULT_JS_RESET_TICKS_CL); + UPDATE_TIMEOUT(gpu_reset_ticks_dumping, js_reset_ms_dumping, + DEFAULT_JS_RESET_TICKS_DUMPING); + + kbase_js_set_timeouts(kbdev); + + spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags); return count; } @@ -2131,6 +2038,16 @@ static ssize_t set_js_timeouts(struct device *dev, struct device_attribute *attr return -EINVAL; } +static unsigned long get_js_timeout_in_ms( + u32 scheduling_period_ns, + u32 ticks) +{ + u64 ms = (u64)ticks * scheduling_period_ns; + + do_div(ms, 1000000UL); + return ms; +} + /** Show callback for the @c js_timeouts sysfs file. * * This function is called to get the contents of the @c js_timeouts sysfs @@ -2147,7 +2064,6 @@ static ssize_t show_js_timeouts(struct device *dev, struct device_attribute *att { struct kbase_device *kbdev; ssize_t ret; - u64 ms; unsigned long js_soft_stop_ms; unsigned long js_soft_stop_ms_cl; unsigned long js_hard_stop_ms_ss; @@ -2156,90 +2072,34 @@ static ssize_t show_js_timeouts(struct device *dev, struct device_attribute *att unsigned long js_reset_ms_ss; unsigned long js_reset_ms_cl; unsigned long js_reset_ms_dumping; - unsigned long ticks; u32 scheduling_period_ns; kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; - /* If no contexts have been scheduled since js_timeouts was last written - * to, the new timeouts might not have been latched yet. So check if an - * update is pending and use the new values if necessary. */ - if (kbdev->js_timeouts_updated && kbdev->js_scheduling_period_ns > 0) - scheduling_period_ns = kbdev->js_scheduling_period_ns; - else - scheduling_period_ns = kbdev->js_data.scheduling_period_ns; + scheduling_period_ns = kbdev->js_data.scheduling_period_ns; - if (kbdev->js_timeouts_updated && kbdev->js_soft_stop_ticks > 0) - ticks = kbdev->js_soft_stop_ticks; - else - ticks = kbdev->js_data.soft_stop_ticks; - ms = (u64)ticks * scheduling_period_ns; - do_div(ms, 1000000UL); - js_soft_stop_ms = (unsigned long)ms; +#define GET_TIMEOUT(name) get_js_timeout_in_ms(\ + scheduling_period_ns, \ + kbdev->js_data.name) - if (kbdev->js_timeouts_updated && kbdev->js_soft_stop_ticks_cl > 0) - ticks = kbdev->js_soft_stop_ticks_cl; - else - ticks = kbdev->js_data.soft_stop_ticks_cl; - ms = (u64)ticks * scheduling_period_ns; - do_div(ms, 1000000UL); - js_soft_stop_ms_cl = (unsigned long)ms; + js_soft_stop_ms = GET_TIMEOUT(soft_stop_ticks); + js_soft_stop_ms_cl = GET_TIMEOUT(soft_stop_ticks_cl); + js_hard_stop_ms_ss = GET_TIMEOUT(hard_stop_ticks_ss); + js_hard_stop_ms_cl = GET_TIMEOUT(hard_stop_ticks_cl); + js_hard_stop_ms_dumping = GET_TIMEOUT(hard_stop_ticks_dumping); + js_reset_ms_ss = GET_TIMEOUT(gpu_reset_ticks_ss); + js_reset_ms_cl = GET_TIMEOUT(gpu_reset_ticks_cl); + js_reset_ms_dumping = GET_TIMEOUT(gpu_reset_ticks_dumping); - if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_ss > 0) - ticks = kbdev->js_hard_stop_ticks_ss; - else - ticks = kbdev->js_data.hard_stop_ticks_ss; - ms = (u64)ticks * scheduling_period_ns; - do_div(ms, 1000000UL); - js_hard_stop_ms_ss = (unsigned long)ms; +#undef GET_TIMEOUT - if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_cl > 0) - ticks = kbdev->js_hard_stop_ticks_cl; - else - ticks = kbdev->js_data.hard_stop_ticks_cl; - ms = (u64)ticks * scheduling_period_ns; - do_div(ms, 1000000UL); - js_hard_stop_ms_cl = (unsigned long)ms; - - if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_dumping > 0) - ticks = kbdev->js_hard_stop_ticks_dumping; - else - ticks = kbdev->js_data.hard_stop_ticks_dumping; - ms = (u64)ticks * scheduling_period_ns; - do_div(ms, 1000000UL); - js_hard_stop_ms_dumping = (unsigned long)ms; - - if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_ss > 0) - ticks = kbdev->js_reset_ticks_ss; - else - ticks = kbdev->js_data.gpu_reset_ticks_ss; - ms = (u64)ticks * scheduling_period_ns; - do_div(ms, 1000000UL); - js_reset_ms_ss = (unsigned long)ms; - - if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_cl > 0) - ticks = kbdev->js_reset_ticks_cl; - else - ticks = kbdev->js_data.gpu_reset_ticks_cl; - ms = (u64)ticks * scheduling_period_ns; - do_div(ms, 1000000UL); - js_reset_ms_cl = (unsigned long)ms; - - if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_dumping > 0) - ticks = kbdev->js_reset_ticks_dumping; - else - ticks = kbdev->js_data.gpu_reset_ticks_dumping; - ms = (u64)ticks * scheduling_period_ns; - do_div(ms, 1000000UL); - js_reset_ms_dumping = (unsigned long)ms; - - ret = scnprintf(buf, PAGE_SIZE, "%lu %lu %lu %lu %lu %lu %lu %lu\n", - js_soft_stop_ms, js_soft_stop_ms_cl, - js_hard_stop_ms_ss, js_hard_stop_ms_cl, - js_hard_stop_ms_dumping, js_reset_ms_ss, - js_reset_ms_cl, js_reset_ms_dumping); + ret = scnprintf(buf, PAGE_SIZE, "%lu %lu %lu %lu %lu %lu %lu %lu\n", + js_soft_stop_ms, js_soft_stop_ms_cl, + js_hard_stop_ms_ss, js_hard_stop_ms_cl, + js_hard_stop_ms_dumping, js_reset_ms_ss, + js_reset_ms_cl, js_reset_ms_dumping); if (ret >= PAGE_SIZE) { buf[PAGE_SIZE - 2] = '\n'; @@ -2264,6 +2124,16 @@ static ssize_t show_js_timeouts(struct device *dev, struct device_attribute *att */ static DEVICE_ATTR(js_timeouts, S_IRUGO | S_IWUSR, show_js_timeouts, set_js_timeouts); +static u32 get_new_js_timeout( + u32 old_period, + u32 old_ticks, + u32 new_scheduling_period_ns) +{ + u64 ticks = (u64)old_period * (u64)old_ticks; + do_div(ticks, new_scheduling_period_ns); + return ticks?ticks:1; +} + /** * set_js_scheduling_period - Store callback for the js_scheduling_period sysfs * file @@ -2286,12 +2156,15 @@ static ssize_t set_js_scheduling_period(struct device *dev, unsigned int js_scheduling_period; u32 new_scheduling_period_ns; u32 old_period; - u64 ticks; + struct kbasep_js_device_data *js_data; + unsigned long flags; kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; + js_data = &kbdev->js_data; + ret = kstrtouint(buf, 0, &js_scheduling_period); if (ret || !js_scheduling_period) { dev_err(kbdev->dev, "Couldn't process js_scheduling_period write operation.\n" @@ -2302,86 +2175,39 @@ static ssize_t set_js_scheduling_period(struct device *dev, new_scheduling_period_ns = js_scheduling_period * 1000000; /* Update scheduling timeouts */ - mutex_lock(&kbdev->js_data.runpool_mutex); + mutex_lock(&js_data->runpool_mutex); + spin_lock_irqsave(&js_data->runpool_irq.lock, flags); /* If no contexts have been scheduled since js_timeouts was last written * to, the new timeouts might not have been latched yet. So check if an * update is pending and use the new values if necessary. */ /* Use previous 'new' scheduling period as a base if present. */ - if (kbdev->js_timeouts_updated && kbdev->js_scheduling_period_ns) - old_period = kbdev->js_scheduling_period_ns; - else - old_period = kbdev->js_data.scheduling_period_ns; - - if (kbdev->js_timeouts_updated && kbdev->js_soft_stop_ticks > 0) - ticks = (u64)kbdev->js_soft_stop_ticks * old_period; - else - ticks = (u64)kbdev->js_data.soft_stop_ticks * - kbdev->js_data.scheduling_period_ns; - do_div(ticks, new_scheduling_period_ns); - kbdev->js_soft_stop_ticks = ticks ? ticks : 1; - - if (kbdev->js_timeouts_updated && kbdev->js_soft_stop_ticks_cl > 0) - ticks = (u64)kbdev->js_soft_stop_ticks_cl * old_period; - else - ticks = (u64)kbdev->js_data.soft_stop_ticks_cl * - kbdev->js_data.scheduling_period_ns; - do_div(ticks, new_scheduling_period_ns); - kbdev->js_soft_stop_ticks_cl = ticks ? ticks : 1; - - if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_ss > 0) - ticks = (u64)kbdev->js_hard_stop_ticks_ss * old_period; - else - ticks = (u64)kbdev->js_data.hard_stop_ticks_ss * - kbdev->js_data.scheduling_period_ns; - do_div(ticks, new_scheduling_period_ns); - kbdev->js_hard_stop_ticks_ss = ticks ? ticks : 1; - - if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_cl > 0) - ticks = (u64)kbdev->js_hard_stop_ticks_cl * old_period; - else - ticks = (u64)kbdev->js_data.hard_stop_ticks_cl * - kbdev->js_data.scheduling_period_ns; - do_div(ticks, new_scheduling_period_ns); - kbdev->js_hard_stop_ticks_cl = ticks ? ticks : 1; + old_period = js_data->scheduling_period_ns; - if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_dumping > 0) - ticks = (u64)kbdev->js_hard_stop_ticks_dumping * old_period; - else - ticks = (u64)kbdev->js_data.hard_stop_ticks_dumping * - kbdev->js_data.scheduling_period_ns; - do_div(ticks, new_scheduling_period_ns); - kbdev->js_hard_stop_ticks_dumping = ticks ? ticks : 1; +#define SET_TIMEOUT(name) \ + (js_data->name = get_new_js_timeout(\ + old_period, \ + kbdev->js_data.name, \ + new_scheduling_period_ns)) - if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_ss > 0) - ticks = (u64)kbdev->js_reset_ticks_ss * old_period; - else - ticks = (u64)kbdev->js_data.gpu_reset_ticks_ss * - kbdev->js_data.scheduling_period_ns; - do_div(ticks, new_scheduling_period_ns); - kbdev->js_reset_ticks_ss = ticks ? ticks : 1; + SET_TIMEOUT(soft_stop_ticks); + SET_TIMEOUT(soft_stop_ticks_cl); + SET_TIMEOUT(hard_stop_ticks_ss); + SET_TIMEOUT(hard_stop_ticks_cl); + SET_TIMEOUT(hard_stop_ticks_dumping); + SET_TIMEOUT(gpu_reset_ticks_ss); + SET_TIMEOUT(gpu_reset_ticks_cl); + SET_TIMEOUT(gpu_reset_ticks_dumping); - if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_cl > 0) - ticks = (u64)kbdev->js_reset_ticks_cl * old_period; - else - ticks = (u64)kbdev->js_data.gpu_reset_ticks_cl * - kbdev->js_data.scheduling_period_ns; - do_div(ticks, new_scheduling_period_ns); - kbdev->js_reset_ticks_cl = ticks ? ticks : 1; +#undef SET_TIMEOUT - if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_dumping > 0) - ticks = (u64)kbdev->js_reset_ticks_dumping * old_period; - else - ticks = (u64)kbdev->js_data.gpu_reset_ticks_dumping * - kbdev->js_data.scheduling_period_ns; - do_div(ticks, new_scheduling_period_ns); - kbdev->js_reset_ticks_dumping = ticks ? ticks : 1; + js_data->scheduling_period_ns = new_scheduling_period_ns; - kbdev->js_scheduling_period_ns = new_scheduling_period_ns; - kbdev->js_timeouts_updated = true; + kbase_js_set_timeouts(kbdev); - mutex_unlock(&kbdev->js_data.runpool_mutex); + spin_unlock_irqrestore(&js_data->runpool_irq.lock, flags); + mutex_unlock(&js_data->runpool_mutex); dev_dbg(kbdev->dev, "JS scheduling period: %dms\n", js_scheduling_period); @@ -2412,10 +2238,7 @@ static ssize_t show_js_scheduling_period(struct device *dev, if (!kbdev) return -ENODEV; - if (kbdev->js_timeouts_updated && kbdev->js_scheduling_period_ns > 0) - period = kbdev->js_scheduling_period_ns; - else - period = kbdev->js_data.scheduling_period_ns; + period = kbdev->js_data.scheduling_period_ns; ret = scnprintf(buf, PAGE_SIZE, "%d\n", period / 1000000); @@ -2715,6 +2538,8 @@ static ssize_t kbase_show_gpuinfo(struct device *dev, { .id = GPU_ID_PI_T83X, .name = "Mali-T83x" }, { .id = GPU_ID_PI_T86X, .name = "Mali-T86x" }, { .id = GPU_ID_PI_TFRX, .name = "Mali-T88x" }, + { .id = GPU_ID2_PRODUCT_TMIX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-G71" }, }; const char *product_name = "(Unknown Mali GPU)"; struct kbase_device *kbdev; @@ -3042,26 +2867,42 @@ static DEVICE_ATTR(mem_pool_max_size, S_IRUGO | S_IWUSR, show_mem_pool_max_size, set_mem_pool_max_size); - -static int kbasep_secure_mode_init(struct kbase_device *kbdev) +static int kbasep_protected_mode_enter(struct kbase_device *kbdev) { + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_SET_PROTECTED_MODE, NULL); + return 0; +} -#ifdef SECURE_CALLBACKS - kbdev->secure_ops = SECURE_CALLBACKS; - kbdev->secure_mode_support = false; +static bool kbasep_protected_mode_supported(struct kbase_device *kbdev) +{ + return true; +} - if (kbdev->secure_ops) { - int err; +static struct kbase_protected_ops kbasep_protected_ops = { + .protected_mode_enter = kbasep_protected_mode_enter, + .protected_mode_reset = NULL, + .protected_mode_supported = kbasep_protected_mode_supported, +}; - /* Make sure secure mode is disabled on startup */ - err = kbdev->secure_ops->secure_mode_disable(kbdev); +static void kbasep_protected_mode_init(struct kbase_device *kbdev) +{ + kbdev->protected_ops = NULL; - /* secure_mode_disable() returns -EINVAL if not supported */ - kbdev->secure_mode_support = (err != -EINVAL); + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) { + /* Use native protected ops */ + kbdev->protected_ops = &kbasep_protected_ops; } +#ifdef PROTECTED_CALLBACKS + else + kbdev->protected_ops = PROTECTED_CALLBACKS; #endif - return 0; + if (kbdev->protected_ops) + kbdev->protected_mode_support = + kbdev->protected_ops->protected_mode_supported(kbdev); + else + kbdev->protected_mode_support = false; } #ifdef CONFIG_MALI_NO_MALI @@ -3100,11 +2941,146 @@ static int kbase_common_reg_map(struct kbase_device *kbdev) static void kbase_common_reg_unmap(struct kbase_device * const kbdev) { - iounmap(kbdev->reg); - release_mem_region(kbdev->reg_start, kbdev->reg_size); + if (kbdev->reg) { + iounmap(kbdev->reg); + release_mem_region(kbdev->reg_start, kbdev->reg_size); + kbdev->reg = NULL; + kbdev->reg_start = 0; + kbdev->reg_size = 0; + } } #endif /* CONFIG_MALI_NO_MALI */ +static int registers_map(struct kbase_device * const kbdev) +{ + + /* the first memory resource is the physical address of the GPU + * registers */ + struct platform_device *pdev = to_platform_device(kbdev->dev); + struct resource *reg_res; + int err; + + reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!reg_res) { + dev_err(kbdev->dev, "Invalid register resource\n"); + return -ENOENT; + } + + kbdev->reg_start = reg_res->start; + kbdev->reg_size = resource_size(reg_res); + + err = kbase_common_reg_map(kbdev); + if (err) { + dev_err(kbdev->dev, "Failed to map registers\n"); + return err; + } + + return 0; +} + +static void registers_unmap(struct kbase_device *kbdev) +{ + kbase_common_reg_unmap(kbdev); +} + +static int power_control_init(struct platform_device *pdev) +{ + struct kbase_device *kbdev = to_kbase_device(&pdev->dev); + int err = 0; + + if (!kbdev) + return -ENODEV; + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \ + && defined(CONFIG_REGULATOR) + kbdev->regulator = regulator_get_optional(kbdev->dev, "mali"); + if (IS_ERR_OR_NULL(kbdev->regulator)) { + err = PTR_ERR(kbdev->regulator); + kbdev->regulator = NULL; + if (err == -EPROBE_DEFER) { + dev_err(&pdev->dev, "Failed to get regulator\n"); + return err; + } + dev_info(kbdev->dev, + "Continuing without Mali regulator control\n"); + /* Allow probe to continue without regulator */ + } +#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */ + + kbdev->clock = clk_get(kbdev->dev, "clk_mali"); + if (IS_ERR_OR_NULL(kbdev->clock)) { + err = PTR_ERR(kbdev->clock); + kbdev->clock = NULL; + if (err == -EPROBE_DEFER) { + dev_err(&pdev->dev, "Failed to get clock\n"); + goto fail; + } + dev_info(kbdev->dev, "Continuing without Mali clock control\n"); + /* Allow probe to continue without clock. */ + } else { + err = clk_prepare(kbdev->clock); + if (err) { + dev_err(kbdev->dev, + "Failed to prepare and enable clock (%d)\n", + err); + goto fail; + } + } + +#if defined(CONFIG_OF) && defined(CONFIG_PM_OPP) + /* Register the OPPs if they are available in device tree */ +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) + err = dev_pm_opp_of_add_table(kbdev->dev); +#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)) + err = of_init_opp_table(kbdev->dev); +#else + err = 0; +#endif /* LINUX_VERSION_CODE */ + if (err) + dev_dbg(kbdev->dev, "OPP table not found\n"); +#endif /* CONFIG_OF && CONFIG_PM_OPP */ + + return 0; + +fail: + +if (kbdev->clock != NULL) { + clk_put(kbdev->clock); + kbdev->clock = NULL; +} + +#ifdef CONFIG_REGULATOR + if (NULL != kbdev->regulator) { + regulator_put(kbdev->regulator); + kbdev->regulator = NULL; + } +#endif + + return err; +} + +static void power_control_term(struct kbase_device *kbdev) +{ +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) + dev_pm_opp_of_remove_table(kbdev->dev); +#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)) + of_free_opp_table(kbdev->dev); +#endif + + if (kbdev->clock) { + clk_unprepare(kbdev->clock); + clk_put(kbdev->clock); + kbdev->clock = NULL; + } + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \ + && defined(CONFIG_REGULATOR) + if (kbdev->regulator) { + regulator_put(kbdev->regulator); + kbdev->regulator = NULL; + } +#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */ +} #ifdef CONFIG_DEBUG_FS @@ -3145,30 +3121,6 @@ MAKE_QUIRK_ACCESSORS(mmu); #endif /* KBASE_GPU_RESET_EN */ -static int kbasep_secure_mode_seq_show(struct seq_file *m, void *p) -{ - struct kbase_device *kbdev = m->private; - - if (!kbdev->secure_mode_support) - seq_puts(m, "unsupported\n"); - else - seq_printf(m, "%s\n", kbdev->secure_mode ? "Y" : "N"); - - return 0; -} - -static int kbasep_secure_mode_debugfs_open(struct inode *in, struct file *file) -{ - return single_open(file, kbasep_secure_mode_seq_show, in->i_private); -} - -static const struct file_operations kbasep_secure_mode_debugfs_fops = { - .open = kbasep_secure_mode_debugfs_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int kbase_device_debugfs_init(struct kbase_device *kbdev) { struct dentry *debugfs_ctx_defaults_directory; @@ -3204,6 +3156,7 @@ static int kbase_device_debugfs_init(struct kbase_device *kbdev) kbase_debug_job_fault_debugfs_init(kbdev); kbasep_gpu_memory_debugfs_init(kbdev); + kbase_as_fault_debugfs_init(kbdev); #if KBASE_GPU_RESET_EN debugfs_create_file("quirks_sc", 0644, kbdev->mali_debugfs_directory, kbdev, @@ -3234,10 +3187,6 @@ static int kbase_device_debugfs_init(struct kbase_device *kbdev) kbasep_trace_timeline_debugfs_init(kbdev); #endif /* CONFIG_MALI_TRACE_TIMELINE */ - debugfs_create_file("secure_mode", S_IRUGO, - kbdev->mali_debugfs_directory, kbdev, - &kbasep_secure_mode_debugfs_fops); - return 0; out: @@ -3316,535 +3265,412 @@ static void kbase_logging_started_cb(void *data) } #endif +static struct attribute *kbase_attrs[] = { +#ifdef CONFIG_MALI_DEBUG + &dev_attr_debug_command.attr, + &dev_attr_js_softstop_always.attr, +#endif +#if !MALI_CUSTOMER_RELEASE + &dev_attr_force_replay.attr, +#endif + &dev_attr_js_timeouts.attr, + &dev_attr_soft_job_timeout.attr, + &dev_attr_gpuinfo.attr, + &dev_attr_dvfs_period.attr, + &dev_attr_pm_poweroff.attr, + &dev_attr_reset_timeout.attr, + &dev_attr_js_scheduling_period.attr, + &dev_attr_power_policy.attr, + &dev_attr_core_availability_policy.attr, + &dev_attr_core_mask.attr, + &dev_attr_mem_pool_size.attr, + &dev_attr_mem_pool_max_size.attr, + NULL +}; + +static const struct attribute_group kbase_attr_group = { + .attrs = kbase_attrs, +}; -static int kbase_common_device_init(struct kbase_device *kbdev) +static int kbase_platform_device_remove(struct platform_device *pdev) { - int err; - struct mali_base_gpu_core_props *core_props; - enum { - inited_mem = (1u << 0), - inited_js = (1u << 1), - inited_pm_runtime_init = (1u << 6), -#ifdef CONFIG_MALI_DEVFREQ - inited_devfreq = (1u << 9), -#endif /* CONFIG_MALI_DEVFREQ */ -#ifdef CONFIG_MALI_MIPE_ENABLED - inited_tlstream = (1u << 10), -#endif /* CONFIG_MALI_MIPE_ENABLED */ - inited_backend_early = (1u << 11), - inited_backend_late = (1u << 12), - inited_device = (1u << 13), - inited_vinstr = (1u << 19), - inited_ipa = (1u << 20), - inited_job_fault = (1u << 21) - }; + struct kbase_device *kbdev = to_kbase_device(&pdev->dev); + const struct list_head *dev_list; - int inited = 0; - u32 gpu_id; -#if defined(CONFIG_MALI_PLATFORM_VEXPRESS) - u32 ve_logic_tile = 0; -#endif /* CONFIG_MALI_PLATFORM_VEXPRESS */ + if (!kbdev) + return -ENODEV; - dev_set_drvdata(kbdev->dev, kbdev); +#ifdef CONFIG_MALI_FPGA_BUS_LOGGER + if (kbdev->inited_subsys & inited_buslogger) { + bl_core_client_unregister(kbdev->buslogger); + kbdev->inited_subsys &= ~inited_buslogger; + } +#endif - err = kbase_backend_early_init(kbdev); - if (err) - goto out_partial; - inited |= inited_backend_early; + if (kbdev->inited_subsys & inited_sysfs_group) { + sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group); + kbdev->inited_subsys &= ~inited_sysfs_group; + } - scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", kbase_drv_name, - kbase_dev_nr); + if (kbdev->inited_subsys & inited_dev_list) { + dev_list = kbase_dev_list_get(); + list_del(&kbdev->entry); + kbase_dev_list_put(dev_list); + kbdev->inited_subsys &= ~inited_dev_list; + } - kbase_disjoint_init(kbdev); + if (kbdev->inited_subsys & inited_misc_register) { + misc_deregister(&kbdev->mdev); + kbdev->inited_subsys &= ~inited_misc_register; + } - /* obtain min/max configured gpu frequencies */ - core_props = &(kbdev->gpu_props.props.core_props); + if (kbdev->inited_subsys & inited_get_device) { + put_device(kbdev->dev); + kbdev->inited_subsys &= ~inited_get_device; + } - /* For versatile express platforms, min and max values of GPU frequency - * depend on the type of the logic tile; these values may not be known - * at the build time so in some cases a platform config file with wrong - * GPU freguency values may be included; to ensure the correct value of - * min and max GPU frequency is obtained, the type of the logic tile is - * read from the corresponding register on the platform and frequency - * values assigned accordingly.*/ -#if defined(CONFIG_MALI_PLATFORM_VEXPRESS) - ve_logic_tile = kbase_get_platform_logic_tile_type(); - - switch (ve_logic_tile) { - case 0x217: - /* Virtex 6, HBI0217 */ - core_props->gpu_freq_khz_min = VE_VIRTEX6_GPU_FREQ_MIN; - core_props->gpu_freq_khz_max = VE_VIRTEX6_GPU_FREQ_MAX; - break; - case 0x247: - /* Virtex 7, HBI0247 */ - core_props->gpu_freq_khz_min = VE_VIRTEX7_GPU_FREQ_MIN; - core_props->gpu_freq_khz_max = VE_VIRTEX7_GPU_FREQ_MAX; - break; - default: - /* all other logic tiles, i.e., Virtex 5 HBI0192 - * or unsuccessful reading from the platform - - * fall back to the config_platform default */ - core_props->gpu_freq_khz_min = GPU_FREQ_KHZ_MIN; - core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX; - break; + if (kbdev->inited_subsys & inited_debugfs) { + kbase_device_debugfs_term(kbdev); + kbdev->inited_subsys &= ~inited_debugfs; } -#else - core_props->gpu_freq_khz_min = GPU_FREQ_KHZ_MIN; - core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX; -#endif /* CONFIG_MALI_PLATFORM_VEXPRESS */ - kbdev->gpu_props.irq_throttle_time_us = DEFAULT_IRQ_THROTTLE_TIME_US; + if (kbdev->inited_subsys & inited_job_fault) { + kbase_debug_job_fault_dev_term(kbdev); + kbdev->inited_subsys &= ~inited_job_fault; + } - err = kbase_device_init(kbdev); - if (err) { - dev_err(kbdev->dev, "Can't initialize device (%d)\n", err); - goto out_partial; +#ifndef CONFIG_MALI_PRFCNT_SET_SECONDARY + if (kbdev->inited_subsys & inited_ipa) { + kbase_ipa_term(kbdev->ipa_ctx); + kbdev->inited_subsys &= ~inited_ipa; } +#endif /* CONFIG_MALI_PRFCNT_SET_SECONDARY */ - inited |= inited_device; + if (kbdev->inited_subsys & inited_vinstr) { + kbase_vinstr_term(kbdev->vinstr_ctx); + kbdev->inited_subsys &= ~inited_vinstr; + } - if (kbdev->pm.callback_power_runtime_init) { - err = kbdev->pm.callback_power_runtime_init(kbdev); - if (err) - goto out_partial; +#ifdef CONFIG_MALI_DEVFREQ + if (kbdev->inited_subsys & inited_devfreq) { + kbase_devfreq_term(kbdev); + kbdev->inited_subsys &= ~inited_devfreq; + } +#endif - inited |= inited_pm_runtime_init; + if (kbdev->inited_subsys & inited_backend_late) { + kbase_backend_late_term(kbdev); + kbdev->inited_subsys &= ~inited_backend_late; } - err = kbase_mem_init(kbdev); - if (err) - goto out_partial; + if (kbdev->inited_subsys & inited_tlstream) { + kbase_tlstream_term(); + kbdev->inited_subsys &= ~inited_tlstream; + } - inited |= inited_mem; + /* Bring job and mem sys to a halt before we continue termination */ - gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; - gpu_id &= GPU_ID_VERSION_PRODUCT_ID; - gpu_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT; + if (kbdev->inited_subsys & inited_js) + kbasep_js_devdata_halt(kbdev); - kbase_device_coherency_init(kbdev, gpu_id); + if (kbdev->inited_subsys & inited_mem) + kbase_mem_halt(kbdev); - err = kbasep_secure_mode_init(kbdev); - if (err) - goto out_partial; + if (kbdev->inited_subsys & inited_js) { + kbasep_js_devdata_term(kbdev); + kbdev->inited_subsys &= ~inited_js; + } - err = kbasep_js_devdata_init(kbdev); - if (err) - goto out_partial; + if (kbdev->inited_subsys & inited_mem) { + kbase_mem_term(kbdev); + kbdev->inited_subsys &= ~inited_mem; + } - inited |= inited_js; + if (kbdev->inited_subsys & inited_pm_runtime_init) { + kbdev->pm.callback_power_runtime_term(kbdev); + kbdev->inited_subsys &= ~inited_pm_runtime_init; + } -#ifdef CONFIG_MALI_MIPE_ENABLED - err = kbase_tlstream_init(); - if (err) { - dev_err(kbdev->dev, "Couldn't initialize timeline stream\n"); - goto out_partial; + if (kbdev->inited_subsys & inited_device) { + kbase_device_term(kbdev); + kbdev->inited_subsys &= ~inited_device; } - inited |= inited_tlstream; -#endif /* CONFIG_MALI_MIPE_ENABLED */ - err = kbase_backend_late_init(kbdev); - if (err) - goto out_partial; - inited |= inited_backend_late; - -#ifdef CONFIG_MALI_DEVFREQ - err = kbase_devfreq_init(kbdev); - if (err) { - dev_err(kbdev->dev, "Couldn't initialize devfreq\n"); - goto out_partial; + if (kbdev->inited_subsys & inited_backend_early) { + kbase_backend_early_term(kbdev); + kbdev->inited_subsys &= ~inited_backend_early; } - inited |= inited_devfreq; -#endif /* CONFIG_MALI_DEVFREQ */ - kbdev->vinstr_ctx = kbase_vinstr_init(kbdev); - if (!kbdev->vinstr_ctx) { - dev_err(kbdev->dev, "Can't initialize virtual instrumentation core\n"); - goto out_partial; + if (kbdev->inited_subsys & inited_power_control) { + power_control_term(kbdev); + kbdev->inited_subsys &= ~inited_power_control; } - inited |= inited_vinstr; - - kbdev->ipa_ctx = kbase_ipa_init(kbdev); - if (!kbdev->ipa_ctx) { - dev_err(kbdev->dev, "Can't initialize IPA\n"); - goto out_partial; + if (kbdev->inited_subsys & inited_registers_map) { + registers_unmap(kbdev); + kbdev->inited_subsys &= ~inited_registers_map; } - inited |= inited_ipa; - - err = kbase_debug_job_fault_dev_init(kbdev); - if (err) - goto out_partial; - - inited |= inited_job_fault; - - err = kbase_device_debugfs_init(kbdev); - if (err) - goto out_partial; - - /* intialise the kctx list */ - mutex_init(&kbdev->kctx_list_lock); - INIT_LIST_HEAD(&kbdev->kctx_list); - - kbdev->mdev.minor = MISC_DYNAMIC_MINOR; - kbdev->mdev.name = kbdev->devname; - kbdev->mdev.fops = &kbase_fops; - kbdev->mdev.parent = get_device(kbdev->dev); - - err = misc_register(&kbdev->mdev); - if (err) { - dev_err(kbdev->dev, "Couldn't register misc dev %s\n", kbdev->devname); - goto out_misc; - } - - { - const struct list_head *dev_list = kbase_dev_list_get(); - - list_add(&kbdev->entry, &kbase_dev_list); - kbase_dev_list_put(dev_list); +#ifdef CONFIG_MALI_NO_MALI + if (kbdev->inited_subsys & inited_gpu_device) { + gpu_device_destroy(kbdev); + kbdev->inited_subsys &= ~inited_gpu_device; } +#endif /* CONFIG_MALI_NO_MALI */ - dev_info(kbdev->dev, "Probed as %s\n", dev_name(kbdev->mdev.this_device)); + if (kbdev->inited_subsys != 0) + dev_err(kbdev->dev, "Missing sub system termination\n"); - kbase_dev_nr++; + kbase_device_free(kbdev); return 0; - -out_misc: - put_device(kbdev->dev); - kbase_device_debugfs_term(kbdev); -out_partial: - if (inited & inited_job_fault) - kbase_debug_job_fault_dev_term(kbdev); - if (inited & inited_ipa) - kbase_ipa_term(kbdev->ipa_ctx); - if (inited & inited_vinstr) - kbase_vinstr_term(kbdev->vinstr_ctx); -#ifdef CONFIG_MALI_DEVFREQ - if (inited & inited_devfreq) - kbase_devfreq_term(kbdev); -#endif /* CONFIG_MALI_DEVFREQ */ - if (inited & inited_backend_late) - kbase_backend_late_term(kbdev); -#ifdef CONFIG_MALI_MIPE_ENABLED - if (inited & inited_tlstream) - kbase_tlstream_term(); -#endif /* CONFIG_MALI_MIPE_ENABLED */ - - if (inited & inited_js) - kbasep_js_devdata_halt(kbdev); - - if (inited & inited_mem) - kbase_mem_halt(kbdev); - - if (inited & inited_js) - kbasep_js_devdata_term(kbdev); - - if (inited & inited_mem) - kbase_mem_term(kbdev); - - if (inited & inited_pm_runtime_init) { - if (kbdev->pm.callback_power_runtime_term) - kbdev->pm.callback_power_runtime_term(kbdev); - } - - if (inited & inited_device) - kbase_device_term(kbdev); - - if (inited & inited_backend_early) - kbase_backend_early_term(kbdev); - - return err; } +extern void kbase_platform_rk_shutdown(struct kbase_device *kbdev); +static void kbase_platform_device_shutdown(struct platform_device *pdev) +{ + struct kbase_device *kbdev = to_kbase_device(&pdev->dev); -static struct attribute *kbase_attrs[] = { -#ifdef CONFIG_MALI_DEBUG - &dev_attr_debug_command.attr, - &dev_attr_js_softstop_always.attr, -#endif -#if !MALI_CUSTOMER_RELEASE - &dev_attr_force_replay.attr, -#endif - &dev_attr_js_timeouts.attr, - &dev_attr_gpuinfo.attr, - &dev_attr_dvfs_period.attr, - &dev_attr_pm_poweroff.attr, - &dev_attr_reset_timeout.attr, - &dev_attr_js_scheduling_period.attr, - &dev_attr_power_policy.attr, - &dev_attr_core_availability_policy.attr, - &dev_attr_core_mask.attr, - &dev_attr_mem_pool_size.attr, - &dev_attr_mem_pool_max_size.attr, - NULL -}; - -static const struct attribute_group kbase_attr_group = { - .attrs = kbase_attrs, -}; - -static int kbase_common_device_remove(struct kbase_device *kbdev); + kbase_platform_rk_shutdown(kbdev); +} static int kbase_platform_device_probe(struct platform_device *pdev) { struct kbase_device *kbdev; - struct resource *reg_res; + struct mali_base_gpu_core_props *core_props; + u32 gpu_id; + const struct list_head *dev_list; int err = 0; - int i; #ifdef CONFIG_OF err = kbase_platform_early_init(); if (err) { dev_err(&pdev->dev, "Early platform initialization failed\n"); + kbase_platform_device_remove(pdev); return err; } #endif kbdev = kbase_device_alloc(); if (!kbdev) { - dev_err(&pdev->dev, "Can't allocate device\n"); - err = -ENOMEM; - goto out; + dev_err(&pdev->dev, "Allocate device failed\n"); + kbase_platform_device_remove(pdev); + return -ENOMEM; } + + kbdev->dev = &pdev->dev; + dev_set_drvdata(kbdev->dev, kbdev); + #ifdef CONFIG_MALI_NO_MALI err = gpu_device_create(kbdev); if (err) { - dev_err(&pdev->dev, "Can't initialize dummy model\n"); - goto out_midg; + dev_err(&pdev->dev, "Dummy model initialization failed\n"); + kbase_platform_device_remove(pdev); + return err; } + kbdev->inited_subsys |= inited_gpu_device; #endif /* CONFIG_MALI_NO_MALI */ - kbdev->dev = &pdev->dev; - /* 3 IRQ resources */ - for (i = 0; i < 3; i++) { - struct resource *irq_res; - int irqtag; + err = assign_irqs(pdev); + if (err) { + dev_err(&pdev->dev, "IRQ search failed\n"); + kbase_platform_device_remove(pdev); + return err; + } - irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, i); - if (!irq_res) { - dev_err(kbdev->dev, "No IRQ resource at index %d\n", i); - err = -ENOENT; - goto out_platform_irq; - } + err = registers_map(kbdev); + if (err) { + dev_err(&pdev->dev, "Register map failed\n"); + kbase_platform_device_remove(pdev); + return err; + } + kbdev->inited_subsys |= inited_registers_map; -#ifdef CONFIG_OF - if (!strcmp(irq_res->name, "JOB")) { - irqtag = JOB_IRQ_TAG; - } else if (!strcmp(irq_res->name, "MMU")) { - irqtag = MMU_IRQ_TAG; - } else if (!strcmp(irq_res->name, "GPU")) { - irqtag = GPU_IRQ_TAG; - } else { - dev_err(&pdev->dev, "Invalid irq res name: '%s'\n", - irq_res->name); - err = -EINVAL; - goto out_irq_name; - } -#else - irqtag = i; -#endif /* CONFIG_OF */ - kbdev->irqs[irqtag].irq = irq_res->start; - kbdev->irqs[irqtag].flags = (irq_res->flags & IRQF_TRIGGER_MASK); + err = power_control_init(pdev); + if (err) { + dev_err(&pdev->dev, "Power control initialization failed\n"); + kbase_platform_device_remove(pdev); + return err; } - /* the first memory resource is the physical address of the GPU - * registers */ - reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!reg_res) { - dev_err(kbdev->dev, "Invalid register resource\n"); - err = -ENOENT; - goto out_platform_mem; - } + kbdev->inited_subsys |= inited_power_control; - kbdev->reg_start = reg_res->start; - kbdev->reg_size = resource_size(reg_res); + err = kbase_backend_early_init(kbdev); + if (err) { + dev_err(kbdev->dev, "Early backend initialization failed\n"); + kbase_platform_device_remove(pdev); + return err; + } + kbdev->inited_subsys |= inited_backend_early; - err = kbase_common_reg_map(kbdev); - if (err) - goto out_reg_map; + scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", kbase_drv_name, + kbase_dev_nr); -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \ - && defined(CONFIG_REGULATOR) - kbdev->regulator = regulator_get_optional(kbdev->dev, "mali"); - if (IS_ERR_OR_NULL(kbdev->regulator)) { - err = PTR_ERR(kbdev->regulator); + kbase_disjoint_init(kbdev); - kbdev->regulator = NULL; - if (err == -EPROBE_DEFER) - goto out_regulator; - dev_info(kbdev->dev, "Continuing without Mali regulator control\n"); - /* Allow probe to continue without regulator */ - } -#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */ + /* obtain min/max configured gpu frequencies */ + core_props = &(kbdev->gpu_props.props.core_props); + core_props->gpu_freq_khz_min = GPU_FREQ_KHZ_MIN; + core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX; -#ifdef CONFIG_MALI_PLATFORM_DEVICETREE - pm_runtime_enable(kbdev->dev); -#endif + kbdev->gpu_props.irq_throttle_time_us = DEFAULT_IRQ_THROTTLE_TIME_US; - kbdev->clock = clk_get(kbdev->dev, "clk_mali"); - if (IS_ERR_OR_NULL(kbdev->clock)) { - err = PTR_ERR(kbdev->clock); + err = kbase_device_init(kbdev); + if (err) { + dev_err(kbdev->dev, "Device initialization failed (%d)\n", err); + kbase_platform_device_remove(pdev); + return err; + } + kbdev->inited_subsys |= inited_device; - kbdev->clock = NULL; - if (err == -EPROBE_DEFER) - goto out_clock_prepare; - dev_info(kbdev->dev, "Continuing without Mali clock control\n"); - /* Allow probe to continue without clock. */ - } else { - err = clk_prepare_enable(kbdev->clock); + if (kbdev->pm.callback_power_runtime_init) { + err = kbdev->pm.callback_power_runtime_init(kbdev); if (err) { dev_err(kbdev->dev, - "Failed to prepare and enable clock (%d)\n", err); - goto out_clock_prepare; + "Runtime PM initialization failed\n"); + kbase_platform_device_remove(pdev); + return err; } + kbdev->inited_subsys |= inited_pm_runtime_init; } -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)) && defined(CONFIG_OF) \ - && defined(CONFIG_PM_OPP) - /* Register the OPPs if they are available in device tree */ - if (dev_pm_opp_of_add_table(kbdev->dev) < 0) - dev_dbg(kbdev->dev, "OPP table not found\n"); -#endif + err = kbase_mem_init(kbdev); + if (err) { + dev_err(kbdev->dev, "Memory subsystem initialization failed\n"); + kbase_platform_device_remove(pdev); + return err; + } + kbdev->inited_subsys |= inited_mem; + gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + gpu_id &= GPU_ID_VERSION_PRODUCT_ID; + gpu_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT; - err = kbase_common_device_init(kbdev); + kbase_device_coherency_init(kbdev, gpu_id); + + kbasep_protected_mode_init(kbdev); + + err = kbasep_js_devdata_init(kbdev); if (err) { - dev_err(kbdev->dev, "Failed kbase_common_device_init\n"); - goto out_common_init; + dev_err(kbdev->dev, "Job JS devdata initialization failed\n"); + kbase_platform_device_remove(pdev); + return err; } + kbdev->inited_subsys |= inited_js; - err = sysfs_create_group(&kbdev->dev->kobj, &kbase_attr_group); + err = kbase_tlstream_init(); if (err) { - dev_err(&pdev->dev, "Failed to create sysfs entries\n"); - goto out_sysfs; + dev_err(kbdev->dev, "Timeline stream initialization failed\n"); + kbase_platform_device_remove(pdev); + return err; } + kbdev->inited_subsys |= inited_tlstream; -#ifdef CONFIG_MALI_FPGA_BUS_LOGGER - err = bl_core_client_register(kbdev->devname, - kbase_logging_started_cb, - kbdev, &kbdev->buslogger, - THIS_MODULE, NULL); + err = kbase_backend_late_init(kbdev); if (err) { - dev_err(kbdev->dev, "Couldn't register bus log client\n"); - goto out_bl_core_register; + dev_err(kbdev->dev, "Late backend initialization failed\n"); + kbase_platform_device_remove(pdev); + return err; } + kbdev->inited_subsys |= inited_backend_late; - bl_core_set_threshold(kbdev->buslogger, 1024*1024*1024); -#endif - return 0; - -#ifdef CONFIG_MALI_FPGA_BUS_LOGGER -out_bl_core_register: - sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group); -#endif - -out_sysfs: - kbase_common_device_remove(kbdev); -out_common_init: -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)) - dev_pm_opp_of_remove_table(kbdev->dev); -#endif - clk_disable_unprepare(kbdev->clock); -out_clock_prepare: - clk_put(kbdev->clock); -#ifdef CONFIG_MALI_PLATFORM_DEVICETREE - pm_runtime_disable(kbdev->dev); -#endif -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \ - && defined(CONFIG_REGULATOR) -out_regulator: - regulator_put(kbdev->regulator); -#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */ - kbase_common_reg_unmap(kbdev); -out_reg_map: -out_platform_mem: -#ifdef CONFIG_OF -out_irq_name: -#endif -out_platform_irq: -#ifdef CONFIG_MALI_NO_MALI - gpu_device_destroy(kbdev); -out_midg: -#endif /* CONFIG_MALI_NO_MALI */ - kbase_device_free(kbdev); -out: - return err; -} +#ifdef CONFIG_MALI_DEVFREQ + err = kbase_devfreq_init(kbdev); + if (err) { + dev_err(kbdev->dev, "Fevfreq initialization failed\n"); + kbase_platform_device_remove(pdev); + return err; + } + kbdev->inited_subsys |= inited_devfreq; +#endif /* CONFIG_MALI_DEVFREQ */ -static int kbase_common_device_remove(struct kbase_device *kbdev) -{ - kbase_debug_job_fault_dev_term(kbdev); - kbase_ipa_term(kbdev->ipa_ctx); - kbase_vinstr_term(kbdev->vinstr_ctx); - sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group); + kbdev->vinstr_ctx = kbase_vinstr_init(kbdev); + if (!kbdev->vinstr_ctx) { + dev_err(kbdev->dev, + "Virtual instrumentation initialization failed\n"); + kbase_platform_device_remove(pdev); + return -EINVAL; + } + kbdev->inited_subsys |= inited_vinstr; -#ifdef CONFIG_MALI_FPGA_BUS_LOGGER - if (kbdev->buslogger) - bl_core_client_unregister(kbdev->buslogger); -#endif +#ifndef CONFIG_MALI_PRFCNT_SET_SECONDARY + kbdev->ipa_ctx = kbase_ipa_init(kbdev); + if (!kbdev->ipa_ctx) { + dev_err(kbdev->dev, "IPA initialization failed\n"); + kbase_platform_device_remove(pdev); + return -EINVAL; + } -#ifdef CONFIG_DEBUG_FS - debugfs_remove_recursive(kbdev->mali_debugfs_directory); -#endif -#ifdef CONFIG_MALI_DEVFREQ - kbase_devfreq_term(kbdev); -#endif + kbdev->inited_subsys |= inited_ipa; +#endif /* CONFIG_MALI_PRFCNT_SET_SECONDARY */ - kbase_backend_late_term(kbdev); + err = kbase_debug_job_fault_dev_init(kbdev); + if (err) { + dev_err(kbdev->dev, "Job fault debug initialization failed\n"); + kbase_platform_device_remove(pdev); + return err; + } + kbdev->inited_subsys |= inited_job_fault; - if (kbdev->pm.callback_power_runtime_term) - kbdev->pm.callback_power_runtime_term(kbdev); -#ifdef CONFIG_MALI_PLATFORM_DEVICETREE - pm_runtime_disable(kbdev->dev); -#endif + err = kbase_device_debugfs_init(kbdev); + if (err) { + dev_err(kbdev->dev, "DebugFS initialization failed"); + kbase_platform_device_remove(pdev); + return err; + } + kbdev->inited_subsys |= inited_debugfs; -#ifdef CONFIG_MALI_MIPE_ENABLED - kbase_tlstream_term(); -#endif /* CONFIG_MALI_MIPE_ENABLED */ + /* initialize the kctx list */ + mutex_init(&kbdev->kctx_list_lock); + INIT_LIST_HEAD(&kbdev->kctx_list); - kbasep_js_devdata_halt(kbdev); - kbase_mem_halt(kbdev); + kbdev->mdev.minor = MISC_DYNAMIC_MINOR; + kbdev->mdev.name = kbdev->devname; + kbdev->mdev.fops = &kbase_fops; + kbdev->mdev.parent = get_device(kbdev->dev); + kbdev->inited_subsys |= inited_get_device; - kbasep_js_devdata_term(kbdev); - kbase_mem_term(kbdev); - kbase_backend_early_term(kbdev); + err = misc_register(&kbdev->mdev); + if (err) { + dev_err(kbdev->dev, "Misc device registration failed for %s\n", + kbdev->devname); + kbase_platform_device_remove(pdev); + return err; + } + kbdev->inited_subsys |= inited_misc_register; - { - const struct list_head *dev_list = kbase_dev_list_get(); + dev_list = kbase_dev_list_get(); + list_add(&kbdev->entry, &kbase_dev_list); + kbase_dev_list_put(dev_list); + kbdev->inited_subsys |= inited_dev_list; - list_del(&kbdev->entry); - kbase_dev_list_put(dev_list); - } - misc_deregister(&kbdev->mdev); - put_device(kbdev->dev); - kbase_common_reg_unmap(kbdev); - kbase_device_term(kbdev); - if (kbdev->clock) { - clk_disable_unprepare(kbdev->clock); - clk_put(kbdev->clock); - kbdev->clock = NULL; + err = sysfs_create_group(&kbdev->dev->kobj, &kbase_attr_group); + if (err) { + dev_err(&pdev->dev, "SysFS group creation failed\n"); + kbase_platform_device_remove(pdev); + return err; } -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \ - && defined(CONFIG_REGULATOR) - regulator_put(kbdev->regulator); -#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */ -#ifdef CONFIG_MALI_NO_MALI - gpu_device_destroy(kbdev); -#endif /* CONFIG_MALI_NO_MALI */ - kbase_device_free(kbdev); + kbdev->inited_subsys |= inited_sysfs_group; - return 0; -} +#ifdef CONFIG_MALI_FPGA_BUS_LOGGER + err = bl_core_client_register(kbdev->devname, + kbase_logging_started_cb, + kbdev, &kbdev->buslogger, + THIS_MODULE, NULL); + if (err == 0) { + kbdev->inited_subsys |= inited_buslogger; + bl_core_set_threshold(kbdev->buslogger, 1024*1024*1024); + } else { + dev_warn(kbdev->dev, "Bus log client registration failed\n"); + err = 0; + } +#endif -static int kbase_platform_device_remove(struct platform_device *pdev) -{ - struct kbase_device *kbdev = to_kbase_device(&pdev->dev); + dev_info(kbdev->dev, + "Probed as %s\n", dev_name(kbdev->mdev.this_device)); - if (!kbdev) - return -ENODEV; + kbase_dev_nr++; - return kbase_common_device_remove(kbdev); + return err; } /** Suspend callback from the OS. @@ -4008,6 +3834,7 @@ MODULE_DEVICE_TABLE(of, kbase_dt_ids); static struct platform_driver kbase_platform_driver = { .probe = kbase_platform_device_probe, .remove = kbase_platform_device_remove, + .shutdown = kbase_platform_device_shutdown, .driver = { .name = kbase_drv_name, .owner = THIS_MODULE, @@ -4038,19 +3865,15 @@ static int __init kbase_driver_init(void) if (ret) return ret; -#ifndef CONFIG_MACH_MANTA #ifdef CONFIG_MALI_PLATFORM_FAKE ret = kbase_platform_fake_register(); if (ret) return ret; -#endif #endif ret = platform_driver_register(&kbase_platform_driver); -#ifndef CONFIG_MACH_MANTA #ifdef CONFIG_MALI_PLATFORM_FAKE if (ret) kbase_platform_fake_unregister(); -#endif #endif return ret; } @@ -4058,11 +3881,9 @@ static int __init kbase_driver_init(void) static void __exit kbase_driver_exit(void) { platform_driver_unregister(&kbase_platform_driver); -#ifndef CONFIG_MACH_MANTA #ifdef CONFIG_MALI_PLATFORM_FAKE kbase_platform_fake_unregister(); #endif -#endif } module_init(kbase_driver_init); @@ -4091,7 +3912,6 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(mali_page_fault_insert_pages); EXPORT_TRACEPOINT_SYMBOL_GPL(mali_mmu_as_in_use); EXPORT_TRACEPOINT_SYMBOL_GPL(mali_mmu_as_released); EXPORT_TRACEPOINT_SYMBOL_GPL(mali_total_alloc_pages_change); -EXPORT_TRACEPOINT_SYMBOL_GPL(mali_sw_counter); void kbase_trace_mali_pm_status(u32 event, u64 value) { diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_debug_job_fault.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_debug_job_fault.c index f3e426f9539b..83c5c37942bd 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_debug_job_fault.c +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_debug_job_fault.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -15,51 +15,71 @@ -#include "mali_kbase_debug_job_fault.h" +#include +#include #ifdef CONFIG_DEBUG_FS -static bool kbase_is_job_fault_event_pending(struct list_head *event_list) +static bool kbase_is_job_fault_event_pending(struct kbase_device *kbdev) { - bool ret; + struct list_head *event_list = &kbdev->job_fault_event_list; + unsigned long flags; + bool ret; - ret = (!list_empty(event_list)); + spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); + ret = !list_empty(event_list); + spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); return ret; } -static bool kbase_ctx_has_no_event_pending( - struct kbase_context *kctx, struct list_head *event_list) +static bool kbase_ctx_has_no_event_pending(struct kbase_context *kctx) { + struct kbase_device *kbdev = kctx->kbdev; + struct list_head *event_list = &kctx->kbdev->job_fault_event_list; struct base_job_fault_event *event; + unsigned long flags; - if (list_empty(event_list)) + spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); + if (list_empty(event_list)) { + spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); return true; + } list_for_each_entry(event, event_list, head) { - if (event->katom->kctx == kctx) + if (event->katom->kctx == kctx) { + spin_unlock_irqrestore(&kbdev->job_fault_event_lock, + flags); return false; + } } - return false; + spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); + return true; } /* wait until the fault happen and copy the event */ static int kbase_job_fault_event_wait(struct kbase_device *kbdev, - struct list_head *event_list, struct base_job_fault_event *event) { + struct list_head *event_list = &kbdev->job_fault_event_list; struct base_job_fault_event *event_in; + unsigned long flags; + spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); if (list_empty(event_list)) { + spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); if (wait_event_interruptible(kbdev->job_fault_wq, - kbase_is_job_fault_event_pending(event_list))) + kbase_is_job_fault_event_pending(kbdev))) return -ERESTARTSYS; + spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); } event_in = list_entry(event_list->next, struct base_job_fault_event, head); - event->event_code = event_in->event_code; event->katom = event_in->katom; + + spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); + return 0; } @@ -102,12 +122,16 @@ static void kbase_job_fault_resume_event_cleanup(struct kbase_context *kctx) static void kbase_job_fault_event_cleanup(struct kbase_device *kbdev) { struct list_head *event_list = &kbdev->job_fault_event_list; + unsigned long flags; + spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); while (!list_empty(event_list)) { - kbase_job_fault_event_dequeue(kbdev, event_list); + spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); wake_up(&kbdev->job_fault_resume_wq); + spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); } + spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); } static void kbase_job_fault_resume_worker(struct work_struct *data) @@ -129,8 +153,7 @@ static void kbase_job_fault_resume_worker(struct work_struct *data) * atoms belong to the same context. */ wait_event(kctx->kbdev->job_fault_resume_wq, - kbase_ctx_has_no_event_pending(kctx, - &kctx->kbdev->job_fault_event_list)); + kbase_ctx_has_no_event_pending(kctx)); atomic_set(&kctx->job_fault_count, 0); kbase_jd_done_worker(&katom->work); @@ -166,9 +189,12 @@ static void kbase_job_fault_event_post(struct kbase_device *kbdev, struct kbase_jd_atom *katom, u32 completion_code) { struct base_job_fault_event *event; + unsigned long flags; + spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); event = kbase_job_fault_event_queue(&kbdev->job_fault_event_list, katom, completion_code); + spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); wake_up_interruptible(&kbdev->job_fault_wq); @@ -293,9 +319,10 @@ static void *debug_job_fault_start(struct seq_file *m, loff_t *pos) */ if (*pos == 0) { event = kmalloc(sizeof(*event), GFP_KERNEL); + if (!event) + return NULL; event->reg_offset = 0; - if (kbase_job_fault_event_wait(kbdev, - &kbdev->job_fault_event_list, event)) { + if (kbase_job_fault_event_wait(kbdev, event)) { kfree(event); return NULL; } @@ -329,11 +356,15 @@ static void debug_job_fault_stop(struct seq_file *m, void *v) dev_info(kbdev->dev, "debug job fault seq stop stage 1"); } else { + unsigned long flags; + + spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); if (!list_empty(&kbdev->job_fault_event_list)) { kbase_job_fault_event_dequeue(kbdev, &kbdev->job_fault_event_list); wake_up(&kbdev->job_fault_resume_wq); } + spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); dev_info(kbdev->dev, "debug job fault seq stop stage 2"); } @@ -404,6 +435,7 @@ int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev) init_waitqueue_head(&(kbdev->job_fault_wq)); init_waitqueue_head(&(kbdev->job_fault_resume_wq)); + spin_lock_init(&kbdev->job_fault_event_lock); kbdev->job_fault_resume_workq = alloc_workqueue( "kbase_job_fault_resume_work_queue", WQ_MEM_RECLAIM, 1); diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_debug_job_fault.h b/drivers/gpu/arm/midgard_for_linux/mali_kbase_debug_job_fault.h index 0930f905e4ef..a2bf8983c37c 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_debug_job_fault.h +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_debug_job_fault.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -18,7 +18,6 @@ #ifndef _KBASE_DEBUG_JOB_FAULT_H #define _KBASE_DEBUG_JOB_FAULT_H -#include #include #include diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_debug_mem_view.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_debug_mem_view.c index 42d1d832c0a3..a98355e33d07 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_debug_mem_view.c +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_debug_mem_view.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2013-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -125,6 +125,8 @@ static int debug_mem_show(struct seq_file *m, void *v) page = pfn_to_page(PFN_DOWN(map->alloc->pages[data->offset])); mapping = vmap(&page, 1, VM_MAP, prot); + if (!mapping) + goto out; for (i = 0; i < PAGE_SIZE; i += 4*sizeof(*mapping)) { seq_printf(m, "%016llx:", i + ((map->start_pfn + @@ -160,11 +162,15 @@ static int debug_mem_open(struct inode *i, struct file *file) int ret; ret = seq_open(file, &ops); - if (ret) return ret; mem_data = kmalloc(sizeof(*mem_data), GFP_KERNEL); + if (!mem_data) { + ret = -ENOMEM; + goto out; + } + mem_data->kctx = kctx; INIT_LIST_HEAD(&mem_data->mapping_list); @@ -184,6 +190,11 @@ static int debug_mem_open(struct inode *i, struct file *file) continue; mapping = kmalloc(sizeof(*mapping), GFP_KERNEL); + if (!mapping) { + ret = -ENOMEM; + kbase_gpu_vm_unlock(kctx); + goto out; + } mapping->alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); mapping->start_pfn = reg->start_pfn; @@ -197,6 +208,23 @@ static int debug_mem_open(struct inode *i, struct file *file) ((struct seq_file *)file->private_data)->private = mem_data; return 0; + +out: + if (mem_data) { + while (!list_empty(&mem_data->mapping_list)) { + struct debug_mem_mapping *mapping; + + mapping = list_first_entry(&mem_data->mapping_list, + struct debug_mem_mapping, node); + kbase_mem_phy_alloc_put(mapping->alloc); + list_del(&mapping->node); + kfree(mapping); + } + fput(kctx_file); + kfree(mem_data); + } + seq_release(i, file); + return ret; } static int debug_mem_release(struct inode *inode, struct file *file) diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_defs.h b/drivers/gpu/arm/midgard_for_linux/mali_kbase_defs.h index 0fc5ff95234e..4bb8c2c7aec2 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_defs.h +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_defs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -33,7 +33,7 @@ #include #include #include -#include +#include #include #include @@ -53,6 +53,8 @@ #include "sync.h" #endif /* CONFIG_SYNC */ +#include "mali_kbase_dma_fence.h" + #ifdef CONFIG_DEBUG_FS #include #endif /* CONFIG_DEBUG_FS */ @@ -175,18 +177,18 @@ #define KBASE_KATOM_FLAG_BEEN_HARD_STOPPED (1<<4) /** Atom has caused us to enter disjoint state */ #define KBASE_KATOM_FLAG_IN_DISJOINT (1<<5) -/* Atom has fail dependency on same-slot dependency */ -#define KBASE_KATOM_FLAG_FAIL_PREV (1<<6) /* Atom blocked on cross-slot dependency */ #define KBASE_KATOM_FLAG_X_DEP_BLOCKED (1<<7) /* Atom has fail dependency on cross-slot dependency */ #define KBASE_KATOM_FLAG_FAIL_BLOCKER (1<<8) -/* Atom has been submitted to JSCTX ringbuffers */ -#define KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED (1<<9) +/* Atom is currently in the list of atoms blocked on cross-slot dependencies */ +#define KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST (1<<9) /* Atom is currently holding a context reference */ #define KBASE_KATOM_FLAG_HOLDING_CTX_REF (1<<10) -/* Atom requires GPU to be in secure mode */ -#define KBASE_KATOM_FLAG_SECURE (1<<11) +/* Atom requires GPU to be in protected mode */ +#define KBASE_KATOM_FLAG_PROTECTED (1<<11) +/* Atom has been stored in runnable_tree */ +#define KBASE_KATOM_FLAG_JSCTX_IN_TREE (1<<12) /* SW related flags about types of JS_COMMAND action * NOTE: These must be masked off by JS_COMMAND_MASK */ @@ -233,11 +235,11 @@ struct kbase_jd_atom_dependency { * * @return readonly reference to dependent ATOM. */ -static inline const struct kbase_jd_atom *const kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep) +static inline const struct kbase_jd_atom * kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep) { LOCAL_ASSERT(dep != NULL); - return (const struct kbase_jd_atom * const)(dep->atom); + return (const struct kbase_jd_atom *)(dep->atom); } /** @@ -248,7 +250,7 @@ static inline const struct kbase_jd_atom *const kbase_jd_katom_dep_atom(const st * * @return A dependency type value. */ -static inline const u8 kbase_jd_katom_dep_type(const struct kbase_jd_atom_dependency *dep) +static inline u8 kbase_jd_katom_dep_type(const struct kbase_jd_atom_dependency *dep) { LOCAL_ASSERT(dep != NULL); @@ -299,13 +301,15 @@ enum kbase_atom_gpu_rb_state { KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB, /* Atom is in slot ringbuffer but is blocked on a previous atom */ KBASE_ATOM_GPU_RB_WAITING_BLOCKED, + /* Atom is in slot ringbuffer but is waiting for proected mode exit */ + KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_EXIT, /* Atom is in slot ringbuffer but is waiting for cores to become * available */ KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE, /* Atom is in slot ringbuffer but is blocked on affinity */ KBASE_ATOM_GPU_RB_WAITING_AFFINITY, - /* Atom is in slot ringbuffer but is waiting for secure mode switch */ - KBASE_ATOM_GPU_RB_WAITING_SECURE_MODE, + /* Atom is in slot ringbuffer but is waiting for protected mode entry */ + KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_ENTRY, /* Atom is in slot ringbuffer and ready to run */ KBASE_ATOM_GPU_RB_READY, /* Atom is in slot ringbuffer and has been submitted to the GPU */ @@ -315,6 +319,23 @@ enum kbase_atom_gpu_rb_state { KBASE_ATOM_GPU_RB_RETURN_TO_JS }; +enum kbase_atom_exit_protected_state { + /* + * Starting state: + * Check if a transition out of protected mode is required. + */ + KBASE_ATOM_EXIT_PROTECTED_CHECK, + /* Wait for the L2 to become idle in preparation for the reset. */ + KBASE_ATOM_EXIT_PROTECTED_IDLE_L2, + /* Issue the protected reset. */ + KBASE_ATOM_EXIT_PROTECTED_RESET, + /* + * End state; + * Wait for the reset to complete. + */ + KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT, +}; + struct kbase_ext_res { u64 gpu_address; struct kbase_mem_phy_alloc *alloc; @@ -331,6 +352,13 @@ struct kbase_jd_atom { struct list_head dep_head[2]; struct list_head dep_item[2]; const struct kbase_jd_atom_dependency dep[2]; + /* List head used during job dispatch job_done processing - as + * dependencies may not be entirely resolved at this point, we need to + * use a separate list head. */ + struct list_head jd_item; + /* true if atom's jd_item is currently on a list. Prevents atom being + * processed twice. */ + bool in_jd_list; u16 nr_extres; struct kbase_ext_res *extres; @@ -348,6 +376,59 @@ struct kbase_jd_atom { struct sync_fence *fence; struct sync_fence_waiter sync_waiter; #endif /* CONFIG_SYNC */ +#ifdef CONFIG_MALI_DMA_FENCE + struct { + /* This points to the dma-buf fence for this atom. If this is + * NULL then there is no fence for this atom and the other + * fields related to dma_fence may have invalid data. + * + * The context and seqno fields contain the details for this + * fence. + * + * This fence is signaled when the katom is completed, + * regardless of the event_code of the katom (signal also on + * failure). + */ + struct fence *fence; + /* The dma-buf fence context number for this atom. A unique + * context number is allocated to each katom in the context on + * context creation. + */ + unsigned int context; + /* The dma-buf fence sequence number for this atom. This is + * increased every time this katom uses dma-buf fence. + */ + atomic_t seqno; + /* This contains a list of all callbacks set up to wait on + * other fences. This atom must be held back from JS until all + * these callbacks have been called and dep_count have reached + * 0. The initial value of dep_count must be equal to the + * number of callbacks on this list. + * + * This list is protected by jctx.lock. Callbacks are added to + * this list when the atom is built and the wait are set up. + * All the callbacks then stay on the list until all callbacks + * have been called and the atom is queued, or cancelled, and + * then all callbacks are taken off the list and freed. + */ + struct list_head callbacks; + /* Atomic counter of number of outstandind dma-buf fence + * dependencies for this atom. When dep_count reaches 0 the + * atom may be queued. + * + * The special value "-1" may only be set after the count + * reaches 0, while holding jctx.lock. This indicates that the + * atom has been handled, either queued in JS or cancelled. + * + * If anyone but the dma-fence worker sets this to -1 they must + * ensure that any potentially queued worker must have + * completed before allowing the atom to be marked as unused. + * This can be done by flushing the fence work queue: + * kctx->dma_fence.wq. + */ + atomic_t dep_count; + } dma_fence; +#endif /* CONFIG_MALI_DMA_FENCE */ /* Note: refer to kbasep_js_atom_retained_state, which will take a copy of some of the following members */ enum base_jd_event_code event_code; @@ -383,6 +464,11 @@ struct kbase_jd_atom { atomic_t blocked; + /* Pointer to atom that this atom has same-slot dependency on */ + struct kbase_jd_atom *pre_dep; + /* Pointer to atom that has same-slot dependency on this atom */ + struct kbase_jd_atom *post_dep; + /* Pointer to atom that this atom has cross-slot dependency on */ struct kbase_jd_atom *x_pre_dep; /* Pointer to atom that has cross-slot dependency on this atom */ @@ -396,11 +482,32 @@ struct kbase_jd_atom { #ifdef CONFIG_DEBUG_FS struct base_job_fault_event fault_event; #endif + + /* List head used for two different purposes: + * 1. Overflow list for JS ring buffers. If an atom is ready to run, + * but there is no room in the JS ring buffer, then the atom is put + * on the ring buffer's overflow list using this list node. + * 2. List of waiting soft jobs. + */ + struct list_head queue; + + struct kbase_va_region *jit_addr_reg; + + /* If non-zero, this indicates that the atom will fail with the set + * event_code when the atom is processed. */ + enum base_jd_event_code will_fail_event_code; + + enum kbase_atom_exit_protected_state exit_protected_state; + + struct rb_node runnable_tree_node; + + /* 'Age' of atom relative to other atoms in the context. */ + u32 age; }; -static inline bool kbase_jd_katom_is_secure(const struct kbase_jd_atom *katom) +static inline bool kbase_jd_katom_is_protected(const struct kbase_jd_atom *katom) { - return (bool)(katom->atom_flags & KBASE_KATOM_FLAG_SECURE); + return (bool)(katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED); } /* @@ -476,6 +583,7 @@ typedef u32 kbase_as_poke_state; struct kbase_mmu_setup { u64 transtab; u64 memattr; + u64 transcfg; }; /** @@ -494,6 +602,7 @@ struct kbase_as { enum kbase_mmu_fault_type fault_type; u32 fault_status; u64 fault_addr; + u64 fault_extra_addr; struct mutex transaction_mutex; struct kbase_mmu_setup current_setup; @@ -720,27 +829,36 @@ struct kbase_pm_device_data { }; /** - * struct kbase_secure_ops - Platform specific functions for GPU secure mode - * operations - * @secure_mode_enable: Callback to enable secure mode on the GPU - * @secure_mode_disable: Callback to disable secure mode on the GPU + * struct kbase_protected_ops - Platform specific functions for GPU protected + * mode operations + * @protected_mode_enter: Callback to enter protected mode on the GPU + * @protected_mode_reset: Callback to reset the GPU and exit protected mode. + * @protected_mode_supported: Callback to check if protected mode is supported. */ -struct kbase_secure_ops { +struct kbase_protected_ops { + /** + * protected_mode_enter() - Enter protected mode on the GPU + * @kbdev: The kbase device + * + * Return: 0 on success, non-zero on error + */ + int (*protected_mode_enter)(struct kbase_device *kbdev); + /** - * secure_mode_enable() - Enable secure mode on the GPU + * protected_mode_reset() - Reset the GPU and exit protected mode * @kbdev: The kbase device * * Return: 0 on success, non-zero on error */ - int (*secure_mode_enable)(struct kbase_device *kbdev); + int (*protected_mode_reset)(struct kbase_device *kbdev); /** - * secure_mode_disable() - Disable secure mode on the GPU + * protected_mode_supported() - Check if protected mode is supported * @kbdev: The kbase device * * Return: 0 on success, non-zero on error */ - int (*secure_mode_disable)(struct kbase_device *kbdev); + bool (*protected_mode_supported)(struct kbase_device *kbdev); }; @@ -787,13 +905,13 @@ struct kbase_device { u64 reg_start; size_t reg_size; void __iomem *reg; + struct { int irq; int flags; } irqs[3]; -#ifdef CONFIG_HAVE_CLK + struct clk *clock; -#endif #ifdef CONFIG_REGULATOR struct regulator *regulator; #endif @@ -807,7 +925,7 @@ struct kbase_device { atomic_t serving_gpu_irq; atomic_t serving_mmu_irq; spinlock_t reg_op_lock; -#endif /* CONFIG_MALI_NO_MALI */ +#endif /* CONFIG_MALI_NO_MALI */ struct kbase_pm_device_data pm; struct kbasep_js_device_data js_data; @@ -879,16 +997,13 @@ struct kbase_device { s8 nr_user_address_spaces; /**< Number of address spaces available to user contexts */ /* Structure used for instrumentation and HW counters dumping */ - struct { + struct kbase_hwcnt { /* The lock should be used when accessing any of the following members */ spinlock_t lock; struct kbase_context *kctx; u64 addr; - struct kbase_context *suspended_kctx; - struct kbase_uk_hwcnt_setup suspended_state; - struct kbase_instr_backend backend; } hwcnt; @@ -904,30 +1019,6 @@ struct kbase_device { struct kbase_trace *trace_rbuf; #endif - /* This is used to override the current job scheduler values for - * JS_SCHEDULING_PERIOD_NS - * JS_SOFT_STOP_TICKS - * JS_SOFT_STOP_TICKS_CL - * JS_HARD_STOP_TICKS_SS - * JS_HARD_STOP_TICKS_CL - * JS_HARD_STOP_TICKS_DUMPING - * JS_RESET_TICKS_SS - * JS_RESET_TICKS_CL - * JS_RESET_TICKS_DUMPING. - * - * These values are set via the js_timeouts sysfs file. - */ - u32 js_scheduling_period_ns; - int js_soft_stop_ticks; - int js_soft_stop_ticks_cl; - int js_hard_stop_ticks_ss; - int js_hard_stop_ticks_cl; - int js_hard_stop_ticks_dumping; - int js_reset_ticks_ss; - int js_reset_ticks_cl; - int js_reset_ticks_dumping; - bool js_timeouts_updated; - u32 reset_timeout_ms; struct mutex cacheclean_lock; @@ -945,8 +1036,12 @@ struct kbase_device { unsigned long current_freq; unsigned long current_voltage; #ifdef CONFIG_DEVFREQ_THERMAL +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) + struct devfreq_cooling_device *devfreq_cooling; +#else struct thermal_cooling_device *devfreq_cooling; #endif +#endif #endif struct kbase_ipa_context *ipa_ctx; @@ -967,11 +1062,17 @@ struct kbase_device { /* Root directory for per context entry */ struct dentry *debugfs_ctx_directory; +#ifdef CONFIG_MALI_DEBUG + /* bit for each as, set if there is new data to report */ + u64 debugfs_as_read_bitmap; +#endif /* CONFIG_MALI_DEBUG */ + /* failed job dump, used for separate debug process */ wait_queue_head_t job_fault_wq; wait_queue_head_t job_fault_resume_wq; struct workqueue_struct *job_fault_resume_workq; struct list_head job_fault_event_list; + spinlock_t job_fault_event_lock; struct kbase_context *kctx_fault; #if !MALI_CUSTOMER_RELEASE @@ -1017,24 +1118,41 @@ struct kbase_device { /* defaults for new context created for this device */ +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) + bool infinite_cache_active_default; +#else u32 infinite_cache_active_default; +#endif size_t mem_pool_max_size_default; /* system coherency mode */ u32 system_coherency; + /* Flag to track when cci snoops have been enabled on the interface */ + bool cci_snoop_enabled; + + /* SMC function IDs to call into Trusted firmware to enable/disable + * cache snooping. Value of 0 indicates that they are not used + */ + u32 snoop_enable_smc; + u32 snoop_disable_smc; - /* Secure operations */ - struct kbase_secure_ops *secure_ops; + /* Protected operations */ + struct kbase_protected_ops *protected_ops; /* - * true when GPU is put into secure mode + * true when GPU is put into protected mode */ - bool secure_mode; + bool protected_mode; /* - * true if secure mode is supported + * true when GPU is transitioning into or out of protected mode */ - bool secure_mode_support; + bool protected_mode_transition; + + /* + * true if protected mode is supported + */ + bool protected_mode_support; #ifdef CONFIG_MALI_DEBUG @@ -1050,47 +1168,27 @@ struct kbase_device { #endif /* Boolean indicating if an IRQ flush during reset is in progress. */ bool irq_reset_flush; -}; -/* JSCTX ringbuffer size must always be a power of 2 */ -#define JSCTX_RB_SIZE 256 -#define JSCTX_RB_MASK (JSCTX_RB_SIZE-1) - -/** - * struct jsctx_rb_entry - Entry in &struct jsctx_rb ring buffer - * @atom_id: Atom ID - */ -struct jsctx_rb_entry { - u16 atom_id; + /* list of inited sub systems. Used during terminate/error recovery */ + u32 inited_subsys; }; /** - * struct jsctx_rb - JS context atom ring buffer - * @entries: Array of size %JSCTX_RB_SIZE which holds the &struct - * kbase_jd_atom pointers which make up the contents of the ring - * buffer. - * @read_idx: Index into @entries. Indicates the next entry in @entries to - * read, and is incremented when pulling an atom, and decremented - * when unpulling. - * HW access lock must be held when accessing. - * @write_idx: Index into @entries. Indicates the next entry to use when - * adding atoms into the ring buffer, and is incremented when - * adding a new atom. - * jctx->lock must be held when accessing. - * @running_idx: Index into @entries. Indicates the last valid entry, and is - * incremented when remving atoms from the ring buffer. - * HW access lock must be held when accessing. + * struct jsctx_queue - JS context atom queue + * @runnable_tree: Root of RB-tree containing currently runnable atoms on this + * job slot. + * @x_dep_head: Head item of the linked list of atoms blocked on cross-slot + * dependencies. Atoms on this list will be moved to the + * runnable_tree when the blocking atom completes. * - * &struct jsctx_rb is a ring buffer of &struct kbase_jd_atom. + * runpool_irq.lock must be held when accessing this structure. */ -struct jsctx_rb { - struct jsctx_rb_entry entries[JSCTX_RB_SIZE]; - - u16 read_idx; /* HW access lock must be held when accessing */ - u16 write_idx; /* jctx->lock must be held when accessing */ - u16 running_idx; /* HW access lock must be held when accessing */ +struct jsctx_queue { + struct rb_root runnable_tree; + struct list_head x_dep_head; }; + #define KBASE_API_VERSION(major, minor) ((((major) & 0xFFF) << 20) | \ (((minor) & 0xFFF) << 8) | \ ((0 & 0xFF) << 0)) @@ -1102,10 +1200,12 @@ struct kbase_context { unsigned long api_version; phys_addr_t pgd; struct list_head event_list; + struct list_head event_coalesce_list; struct mutex event_mutex; atomic_t event_closed; struct workqueue_struct *event_workq; atomic_t event_count; + int event_coalesce_count; bool is_compat; @@ -1116,6 +1216,7 @@ struct kbase_context { struct page *aliasing_sink_page; + struct mutex mmu_lock; struct mutex reg_lock; /* To be converted to a rwlock? */ struct rb_root reg_rbtree; /* Red-Black tree of GPU regions (live regions) */ @@ -1132,10 +1233,21 @@ struct kbase_context { struct kbase_mem_pool mem_pool; + struct shrinker reclaim; + struct list_head evict_list; + struct mutex evict_lock; + struct list_head waiting_soft_jobs; + spinlock_t waiting_soft_jobs_lock; #ifdef CONFIG_KDS struct list_head waiting_kds_resource; #endif +#ifdef CONFIG_MALI_DMA_FENCE + struct { + struct list_head waiting_resource; + struct workqueue_struct *wq; + } dma_fence; +#endif /* CONFIG_MALI_DMA_FENCE */ /** This is effectively part of the Run Pool, because it only has a valid * setting (!=KBASEP_AS_NR_INVALID) whilst the context is scheduled in * @@ -1157,6 +1269,8 @@ struct kbase_context { * All other flags must be added there */ spinlock_t mm_update_lock; struct mm_struct *process_mm; + /* End of the SAME_VA zone */ + u64 same_va_end; #ifdef CONFIG_MALI_TRACE_TIMELINE struct kbase_trace_kctx_timeline timeline; @@ -1182,7 +1296,7 @@ struct kbase_context { #endif /* CONFIG_DEBUG_FS */ - struct jsctx_rb jsctx_rb + struct jsctx_queue jsctx_queue [KBASE_JS_ATOM_SCHED_PRIO_COUNT][BASE_JM_MAX_NR_SLOTS]; /* Number of atoms currently pulled from this context */ @@ -1193,13 +1307,14 @@ struct kbase_context { bool pulled; /* true if infinite cache is to be enabled for new allocations. Existing * allocations will not change. bool stored as a u32 per Linux API */ +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) + bool infinite_cache_active; +#else u32 infinite_cache_active; +#endif /* Bitmask of slots that can be pulled from */ u32 slots_pullable; - /* true if address space assignment is pending */ - bool as_pending; - /* Backend specific data */ struct kbase_context_backend backend; @@ -1220,6 +1335,52 @@ struct kbase_context { /* true if context is counted in kbdev->js_data.nr_contexts_runnable */ bool ctx_runnable_ref; + + /* Waiting soft-jobs will fail when this timer expires */ + struct timer_list soft_job_timeout; + + /* JIT allocation management */ + struct kbase_va_region *jit_alloc[256]; + struct list_head jit_active_head; + struct list_head jit_pool_head; + struct list_head jit_destroy_head; + struct mutex jit_lock; + struct work_struct jit_work; + + /* External sticky resource management */ + struct list_head ext_res_meta_head; + + /* Used to record that a drain was requested from atomic context */ + atomic_t drain_pending; + + /* Current age count, used to determine age for newly submitted atoms */ + u32 age_count; +}; + +/** + * struct kbase_ctx_ext_res_meta - Structure which binds an external resource + * to a @kbase_context. + * @ext_res_node: List head for adding the metadata to a + * @kbase_context. + * @alloc: The physical memory allocation structure + * which is mapped. + * @gpu_addr: The GPU virtual address the resource is + * mapped to. + * + * External resources can be mapped into multiple contexts as well as the same + * context multiple times. + * As kbase_va_region itself isn't refcounted we can't attach our extra + * information to it as it could be removed under our feet leaving external + * resources pinned. + * This metadata structure binds a single external resource to a single + * context, ensuring that per context mapping is tracked separately so it can + * be overridden when needed and abuses by the application (freeing the resource + * multiple times) don't effect the refcount of the physical allocation. + */ +struct kbase_ctx_ext_res_meta { + struct list_head ext_res_node; + struct kbase_mem_phy_alloc *alloc; + u64 gpu_addr; }; enum kbase_reg_access_type { @@ -1249,7 +1410,7 @@ static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev) } /* Conversion helpers for setting up high resolution timers */ -#define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime((x)*1000000U)) +#define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime(((u64)(x))*1000000U)) #define HR_TIMER_DELAY_NSEC(x) (ns_to_ktime(x)) /* Maximum number of loops polling the GPU for a cache flush before we assume it must have completed */ @@ -1260,4 +1421,29 @@ static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev) /* Maximum number of times a job can be replayed */ #define BASEP_JD_REPLAY_LIMIT 15 +/* JobDescriptorHeader - taken from the architecture specifications, the layout + * is currently identical for all GPU archs. */ +struct job_descriptor_header { + u32 exception_status; + u32 first_incomplete_task; + u64 fault_pointer; + u8 job_descriptor_size : 1; + u8 job_type : 7; + u8 job_barrier : 1; + u8 _reserved_01 : 1; + u8 _reserved_1 : 1; + u8 _reserved_02 : 1; + u8 _reserved_03 : 1; + u8 _reserved_2 : 1; + u8 _reserved_04 : 1; + u8 _reserved_05 : 1; + u16 job_index; + u16 job_dependency_index_1; + u16 job_dependency_index_2; + union { + u64 _64; + u32 _32; + } next_job; +}; + #endif /* _KBASE_DEFS_H_ */ diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_device.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_device.c index c22e099db6f9..62ab0caf9858 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_device.c +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_device.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,6 +30,7 @@ #include #include +#include #include #include @@ -145,8 +146,32 @@ static void kbase_device_all_as_term(struct kbase_device *kbdev) int kbase_device_init(struct kbase_device * const kbdev) { int i, err; +#ifdef CONFIG_ARM64 + struct device_node *np = NULL; +#endif /* CONFIG_ARM64 */ spin_lock_init(&kbdev->mmu_mask_change); +#ifdef CONFIG_ARM64 + kbdev->cci_snoop_enabled = false; + np = kbdev->dev->of_node; + if (np != NULL) { + if (of_property_read_u32(np, "snoop_enable_smc", + &kbdev->snoop_enable_smc)) + kbdev->snoop_enable_smc = 0; + if (of_property_read_u32(np, "snoop_disable_smc", + &kbdev->snoop_disable_smc)) + kbdev->snoop_disable_smc = 0; + /* Either both or none of the calls should be provided. */ + if (!((kbdev->snoop_disable_smc == 0 + && kbdev->snoop_enable_smc == 0) + || (kbdev->snoop_disable_smc != 0 + && kbdev->snoop_enable_smc != 0))) { + WARN_ON(1); + err = -EINVAL; + goto fail; + } + } +#endif /* CONFIG_ARM64 */ /* Get the list of workarounds for issues on the current HW * (identified by the GPU_ID register) */ @@ -220,7 +245,11 @@ int kbase_device_init(struct kbase_device * const kbdev) kbdev->reset_timeout_ms = DEFAULT_RESET_TIMEOUT_MS; +#ifdef CONFIG_MALI_GPU_MMU_AARCH64 + kbdev->mmu_mode = kbase_mmu_mode_get_aarch64(); +#else kbdev->mmu_mode = kbase_mmu_mode_get_lpae(); +#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */ #ifdef CONFIG_MALI_DEBUG init_waitqueue_head(&kbdev->driver_inactive_wait); diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_event.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_event.c index 0d0c5258aaa4..bf8c304610eb 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_event.c +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_event.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -19,10 +19,7 @@ #include #include - -#if defined(CONFIG_MALI_MIPE_ENABLED) #include -#endif static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, struct kbase_jd_atom *katom) { @@ -38,10 +35,8 @@ static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, stru KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_sub_return(1, &kctx->timeline.jd_atoms_in_flight)); -#if defined(CONFIG_MALI_MIPE_ENABLED) kbase_tlstream_tl_nret_atom_ctx(katom, kctx); kbase_tlstream_tl_del_atom(katom); -#endif katom->status = KBASE_JD_ATOM_STATE_UNUSED; @@ -147,6 +142,29 @@ static void kbase_event_process_noreport(struct kbase_context *kctx, } } +/** + * kbase_event_coalesce - Move pending events to the main event list + * @kctx: Context pointer + * + * kctx->event_list and kctx->event_coalesce_count must be protected + * by a lock unless this is the last thread using them + * (and we're about to terminate the lock). + * + * Return: The number of pending events moved to the main event list + */ +static int kbase_event_coalesce(struct kbase_context *kctx) +{ + const int event_count = kctx->event_coalesce_count; + + /* Join the list of pending events onto the tail of the main list + and reset it */ + list_splice_tail_init(&kctx->event_coalesce_list, &kctx->event_list); + kctx->event_coalesce_count = 0; + + /* Return the number of events moved */ + return event_count; +} + void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom) { if (atom->core_req & BASE_JD_REQ_EVENT_ONLY_ON_FAILURE) { @@ -163,12 +181,24 @@ void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom) return; } - mutex_lock(&ctx->event_mutex); - atomic_inc(&ctx->event_count); - list_add_tail(&atom->dep_item[0], &ctx->event_list); - mutex_unlock(&ctx->event_mutex); + if (atom->core_req & BASE_JD_REQ_EVENT_COALESCE) { + /* Don't report the event until other event(s) have completed */ + mutex_lock(&ctx->event_mutex); + list_add_tail(&atom->dep_item[0], &ctx->event_coalesce_list); + ++ctx->event_coalesce_count; + mutex_unlock(&ctx->event_mutex); + } else { + /* Report the event and any pending events now */ + int event_count = 1; - kbase_event_wakeup(ctx); + mutex_lock(&ctx->event_mutex); + event_count += kbase_event_coalesce(ctx); + list_add_tail(&atom->dep_item[0], &ctx->event_list); + atomic_add(event_count, &ctx->event_count); + mutex_unlock(&ctx->event_mutex); + + kbase_event_wakeup(ctx); + } } KBASE_EXPORT_TEST_API(kbase_event_post); @@ -185,8 +215,10 @@ int kbase_event_init(struct kbase_context *kctx) KBASE_DEBUG_ASSERT(kctx); INIT_LIST_HEAD(&kctx->event_list); + INIT_LIST_HEAD(&kctx->event_coalesce_list); mutex_init(&kctx->event_mutex); atomic_set(&kctx->event_count, 0); + kctx->event_coalesce_count = 0; atomic_set(&kctx->event_closed, false); kctx->event_workq = alloc_workqueue("kbase_event", WQ_MEM_RECLAIM, 1); @@ -200,6 +232,8 @@ KBASE_EXPORT_TEST_API(kbase_event_init); void kbase_event_cleanup(struct kbase_context *kctx) { + int event_count; + KBASE_DEBUG_ASSERT(kctx); KBASE_DEBUG_ASSERT(kctx->event_workq); @@ -212,6 +246,9 @@ void kbase_event_cleanup(struct kbase_context *kctx) * Note: use of kctx->event_list without a lock is safe because this must be the last * thread using it (because we're about to terminate the lock) */ + event_count = kbase_event_coalesce(kctx); + atomic_add(event_count, &kctx->event_count); + while (!list_empty(&kctx->event_list)) { struct base_jd_event_v2 event; diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_gator_api.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_gator_api.c index a2174b24ac3c..4af3e4815e95 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_gator_api.c +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_gator_api.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -20,7 +20,6 @@ #include "mali_kbase_mem_linux.h" #include "mali_kbase_gator_api.h" #include "mali_kbase_gator_hwcnt_names.h" -#include "mali_kbase_instr.h" #define MALI_MAX_CORES_PER_GROUP 4 #define MALI_MAX_NUM_BLOCKS_PER_GROUP 8 @@ -28,18 +27,23 @@ #define MALI_BYTES_PER_COUNTER 4 struct kbase_gator_hwcnt_handles { - struct kbase_device *kbdev; - struct kbase_context *kctx; - u64 hwcnt_gpu_va; - void *hwcnt_cpu_va; - struct kbase_vmap_struct hwcnt_map; + struct kbase_device *kbdev; + struct kbase_vinstr_client *vinstr_cli; + void *vinstr_buffer; + struct work_struct dump_work; + int dump_complete; + spinlock_t dump_lock; }; +static void dump_worker(struct work_struct *work); + const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters) { - uint32_t gpu_id; const char * const *hardware_counters; struct kbase_device *kbdev; + uint32_t gpu_id; + uint32_t product_id; + uint32_t count; if (!total_counters) return NULL; @@ -50,58 +54,78 @@ const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters) return NULL; gpu_id = kbdev->gpu_props.props.core_props.product_id; - - switch (gpu_id) { - /* If we are using a Mali-T60x device */ - case GPU_ID_PI_T60X: - hardware_counters = hardware_counters_mali_t60x; - *total_counters = ARRAY_SIZE(hardware_counters_mali_t60x); - break; - /* If we are using a Mali-T62x device */ - case GPU_ID_PI_T62X: - hardware_counters = hardware_counters_mali_t62x; - *total_counters = ARRAY_SIZE(hardware_counters_mali_t62x); - break; - /* If we are using a Mali-T72x device */ - case GPU_ID_PI_T72X: - hardware_counters = hardware_counters_mali_t72x; - *total_counters = ARRAY_SIZE(hardware_counters_mali_t72x); - break; - /* If we are using a Mali-T76x device */ - case GPU_ID_PI_T76X: - hardware_counters = hardware_counters_mali_t76x; - *total_counters = ARRAY_SIZE(hardware_counters_mali_t76x); - break; - /* If we are using a Mali-T82x device */ - case GPU_ID_PI_T82X: - hardware_counters = hardware_counters_mali_t82x; - *total_counters = ARRAY_SIZE(hardware_counters_mali_t82x); - break; - /* If we are using a Mali-T83x device */ - case GPU_ID_PI_T83X: - hardware_counters = hardware_counters_mali_t83x; - *total_counters = ARRAY_SIZE(hardware_counters_mali_t83x); - break; - /* If we are using a Mali-T86x device */ - case GPU_ID_PI_T86X: - hardware_counters = hardware_counters_mali_t86x; - *total_counters = ARRAY_SIZE(hardware_counters_mali_t86x); - break; - /* If we are using a Mali-T88x device */ - case GPU_ID_PI_TFRX: - hardware_counters = hardware_counters_mali_t88x; - *total_counters = ARRAY_SIZE(hardware_counters_mali_t88x); - break; - default: - hardware_counters = NULL; - *total_counters = 0; - dev_err(kbdev->dev, "Unrecognized gpu ID: %u\n", gpu_id); - break; + product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID; + product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT; + + if (GPU_ID_IS_NEW_FORMAT(product_id)) { + switch (gpu_id & GPU_ID2_PRODUCT_MODEL) { + case GPU_ID2_PRODUCT_TMIX: + hardware_counters = hardware_counters_mali_tMIx; + count = ARRAY_SIZE(hardware_counters_mali_tMIx); + break; + default: + hardware_counters = NULL; + count = 0; + dev_err(kbdev->dev, "Unrecognized gpu ID: %u\n", + gpu_id); + break; + } + } else { + switch (gpu_id) { + /* If we are using a Mali-T60x device */ + case GPU_ID_PI_T60X: + hardware_counters = hardware_counters_mali_t60x; + count = ARRAY_SIZE(hardware_counters_mali_t60x); + break; + /* If we are using a Mali-T62x device */ + case GPU_ID_PI_T62X: + hardware_counters = hardware_counters_mali_t62x; + count = ARRAY_SIZE(hardware_counters_mali_t62x); + break; + /* If we are using a Mali-T72x device */ + case GPU_ID_PI_T72X: + hardware_counters = hardware_counters_mali_t72x; + count = ARRAY_SIZE(hardware_counters_mali_t72x); + break; + /* If we are using a Mali-T76x device */ + case GPU_ID_PI_T76X: + hardware_counters = hardware_counters_mali_t76x; + count = ARRAY_SIZE(hardware_counters_mali_t76x); + break; + /* If we are using a Mali-T82x device */ + case GPU_ID_PI_T82X: + hardware_counters = hardware_counters_mali_t82x; + count = ARRAY_SIZE(hardware_counters_mali_t82x); + break; + /* If we are using a Mali-T83x device */ + case GPU_ID_PI_T83X: + hardware_counters = hardware_counters_mali_t83x; + count = ARRAY_SIZE(hardware_counters_mali_t83x); + break; + /* If we are using a Mali-T86x device */ + case GPU_ID_PI_T86X: + hardware_counters = hardware_counters_mali_t86x; + count = ARRAY_SIZE(hardware_counters_mali_t86x); + break; + /* If we are using a Mali-T88x device */ + case GPU_ID_PI_TFRX: + hardware_counters = hardware_counters_mali_t88x; + count = ARRAY_SIZE(hardware_counters_mali_t88x); + break; + default: + hardware_counters = NULL; + count = 0; + dev_err(kbdev->dev, "Unrecognized gpu ID: %u\n", + gpu_id); + break; + } } /* Release the kbdev reference. */ kbase_release_device(kbdev); + *total_counters = count; + /* If we return a string array take a reference on the module (or fail). */ if (hardware_counters && !try_module_get(THIS_MODULE)) return NULL; @@ -120,13 +144,8 @@ KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term_names); struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info) { struct kbase_gator_hwcnt_handles *hand; - struct kbase_uk_hwcnt_setup setup; - int err; + struct kbase_uk_hwcnt_reader_setup setup; uint32_t dump_size = 0, i = 0; - struct kbase_va_region *reg; - u64 flags; - u64 nr_pages; - u16 va_alignment = 0; if (!in_out_info) return NULL; @@ -135,15 +154,19 @@ struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcn if (!hand) return NULL; + INIT_WORK(&hand->dump_work, dump_worker); + spin_lock_init(&hand->dump_lock); + /* Get the first device */ hand->kbdev = kbase_find_device(-1); if (!hand->kbdev) goto free_hand; - /* Create a kbase_context */ - hand->kctx = kbase_create_context(hand->kbdev, true); - if (!hand->kctx) + dump_size = kbase_vinstr_dump_size(hand->kbdev); + hand->vinstr_buffer = kzalloc(dump_size, GFP_KERNEL); + if (!hand->vinstr_buffer) goto release_device; + in_out_info->kernel_dump_buffer = hand->vinstr_buffer; in_out_info->nr_cores = hand->kbdev->gpu_props.num_cores; in_out_info->nr_core_groups = hand->kbdev->gpu_props.num_core_groups; @@ -160,7 +183,7 @@ struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcn in_out_info->nr_core_groups, GFP_KERNEL); if (!in_out_info->hwc_layout) - goto destroy_context; + goto free_vinstr_buffer; dump_size = in_out_info->nr_core_groups * MALI_MAX_NUM_BLOCKS_PER_GROUP * @@ -189,23 +212,23 @@ struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcn } /* If we are using any other device */ } else { - uint32_t nr_l2, nr_sc, j; + uint32_t nr_l2, nr_sc_bits, j; uint64_t core_mask; nr_l2 = hand->kbdev->gpu_props.props.l2_props.num_l2_slices; core_mask = hand->kbdev->gpu_props.props.coherency_info.group[0].core_mask; - nr_sc = hand->kbdev->gpu_props.props.coherency_info.group[0].num_cores; + nr_sc_bits = fls64(core_mask); /* The job manager and tiler sets of counters * are always present */ - in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * (2 + nr_sc + nr_l2), GFP_KERNEL); + in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * (2 + nr_sc_bits + nr_l2), GFP_KERNEL); if (!in_out_info->hwc_layout) - goto destroy_context; + goto free_vinstr_buffer; - dump_size = (2 + nr_sc + nr_l2) * MALI_COUNTERS_PER_BLOCK * MALI_BYTES_PER_COUNTER; + dump_size = (2 + nr_sc_bits + nr_l2) * MALI_COUNTERS_PER_BLOCK * MALI_BYTES_PER_COUNTER; in_out_info->hwc_layout[i++] = JM_BLOCK; in_out_info->hwc_layout[i++] = TILER_BLOCK; @@ -223,58 +246,32 @@ struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcn } in_out_info->nr_hwc_blocks = i; - in_out_info->size = dump_size; - flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_CPU_WR | BASE_MEM_PROT_GPU_WR; - nr_pages = PFN_UP(dump_size); - reg = kbase_mem_alloc(hand->kctx, nr_pages, nr_pages, 0, - &flags, &hand->hwcnt_gpu_va, &va_alignment); - if (!reg) - goto free_layout; - - hand->hwcnt_cpu_va = kbase_vmap(hand->kctx, hand->hwcnt_gpu_va, - dump_size, &hand->hwcnt_map); - - if (!hand->hwcnt_cpu_va) - goto free_buffer; - - in_out_info->kernel_dump_buffer = hand->hwcnt_cpu_va; - memset(in_out_info->kernel_dump_buffer, 0, nr_pages * PAGE_SIZE); - - /*setup.dump_buffer = (uintptr_t)in_out_info->kernel_dump_buffer;*/ - setup.dump_buffer = hand->hwcnt_gpu_va; setup.jm_bm = in_out_info->bitmask[0]; setup.tiler_bm = in_out_info->bitmask[1]; setup.shader_bm = in_out_info->bitmask[2]; setup.mmu_l2_bm = in_out_info->bitmask[3]; - - err = kbase_instr_hwcnt_enable(hand->kctx, &setup); - if (err) - goto free_unmap; - - kbase_instr_hwcnt_clear(hand->kctx); + hand->vinstr_cli = kbase_vinstr_hwcnt_kernel_setup(hand->kbdev->vinstr_ctx, + &setup, hand->vinstr_buffer); + if (!hand->vinstr_cli) { + dev_err(hand->kbdev->dev, "Failed to register gator with vinstr core"); + goto free_layout; + } return hand; -free_unmap: - kbase_vunmap(hand->kctx, &hand->hwcnt_map); - -free_buffer: - kbase_mem_free(hand->kctx, hand->hwcnt_gpu_va); - free_layout: kfree(in_out_info->hwc_layout); -destroy_context: - kbase_destroy_context(hand->kctx); +free_vinstr_buffer: + kfree(hand->vinstr_buffer); release_device: kbase_release_device(hand->kbdev); free_hand: kfree(hand); - return NULL; } KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_init); @@ -285,27 +282,39 @@ void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct k kfree(in_out_info->hwc_layout); if (opaque_handles) { - kbase_instr_hwcnt_disable(opaque_handles->kctx); - kbase_vunmap(opaque_handles->kctx, &opaque_handles->hwcnt_map); - kbase_mem_free(opaque_handles->kctx, opaque_handles->hwcnt_gpu_va); - kbase_destroy_context(opaque_handles->kctx); + cancel_work_sync(&opaque_handles->dump_work); + kbase_vinstr_detach_client(opaque_handles->vinstr_cli); + kfree(opaque_handles->vinstr_buffer); kbase_release_device(opaque_handles->kbdev); kfree(opaque_handles); } } KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term); +static void dump_worker(struct work_struct *work) +{ + struct kbase_gator_hwcnt_handles *hand; + + hand = container_of(work, struct kbase_gator_hwcnt_handles, dump_work); + if (!kbase_vinstr_hwc_dump(hand->vinstr_cli, + BASE_HWCNT_READER_EVENT_MANUAL)) { + spin_lock_bh(&hand->dump_lock); + hand->dump_complete = 1; + spin_unlock_bh(&hand->dump_lock); + } else { + schedule_work(&hand->dump_work); + } +} + uint32_t kbase_gator_instr_hwcnt_dump_complete( struct kbase_gator_hwcnt_handles *opaque_handles, uint32_t * const success) { - bool ret_res, success_res; if (opaque_handles && success) { - ret_res = kbase_instr_hwcnt_dump_complete(opaque_handles->kctx, - &success_res); - *success = (uint32_t)success_res; - return (uint32_t)(ret_res != 0); + *success = opaque_handles->dump_complete; + opaque_handles->dump_complete = 0; + return *success; } return 0; } @@ -314,9 +323,7 @@ KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_complete); uint32_t kbase_gator_instr_hwcnt_dump_irq(struct kbase_gator_hwcnt_handles *opaque_handles) { if (opaque_handles) - return (kbase_instr_hwcnt_request_dump( - opaque_handles->kctx) == 0); - + schedule_work(&opaque_handles->dump_work); return 0; } KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_irq); diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_gator_hwcnt_names.h b/drivers/gpu/arm/midgard_for_linux/mali_kbase_gator_hwcnt_names.h index eb76f01b0fda..c247dd698e19 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_gator_hwcnt_names.h +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_gator_hwcnt_names.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -2156,4 +2156,8 @@ static const char * const hardware_counters_mali_t88x[] = { "T88x_L2_REPLAY_FULL" }; +#include "mali_kbase_gator_hwcnt_names_tmix.h" + + + #endif diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_gator_hwcnt_names_thex.h b/drivers/gpu/arm/midgard_for_linux/mali_kbase_gator_hwcnt_names_thex.h new file mode 100644 index 000000000000..bcceef4fc9bc --- /dev/null +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_gator_hwcnt_names_thex.h @@ -0,0 +1,291 @@ +/* + * + * (C) COPYRIGHT 2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/* + * This header was autogenerated, it should not be edited. + */ + +#ifndef _KBASE_GATOR_HWCNT_NAMES_THEX_H_ +#define _KBASE_GATOR_HWCNT_NAMES_THEX_H_ + +static const char * const hardware_counters_mali_tHEx[] = { + /* Performance counters for the Job Manager */ + "", + "", + "", + "", + "THEx_MESSAGES_SENT", + "THEx_MESSAGES_RECEIVED", + "THEx_GPU_ACTIVE", + "THEx_IRQ_ACTIVE", + "THEx_JS0_JOBS", + "THEx_JS0_TASKS", + "THEx_JS0_ACTIVE", + "", + "THEx_JS0_WAIT_READ", + "THEx_JS0_WAIT_ISSUE", + "THEx_JS0_WAIT_DEPEND", + "THEx_JS0_WAIT_FINISH", + "THEx_JS1_JOBS", + "THEx_JS1_TASKS", + "THEx_JS1_ACTIVE", + "", + "THEx_JS1_WAIT_READ", + "THEx_JS1_WAIT_ISSUE", + "THEx_JS1_WAIT_DEPEND", + "THEx_JS1_WAIT_FINISH", + "THEx_JS2_JOBS", + "THEx_JS2_TASKS", + "THEx_JS2_ACTIVE", + "", + "THEx_JS2_WAIT_READ", + "THEx_JS2_WAIT_ISSUE", + "THEx_JS2_WAIT_DEPEND", + "THEx_JS2_WAIT_FINISH", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + + /* Performance counters for the Tiler */ + "", + "", + "", + "", + "THEx_TILER_ACTIVE", + "THEx_JOBS_PROCESSED", + "THEx_TRIANGLES", + "THEx_LINES", + "THEx_POINTS", + "THEx_FRONT_FACING", + "THEx_BACK_FACING", + "THEx_PRIM_VISIBLE", + "THEx_PRIM_CULLED", + "THEx_PRIM_CLIPPED", + "THEx_PRIM_SAT_CULLED", + "", + "", + "THEx_BUS_READ", + "", + "THEx_BUS_WRITE", + "THEx_LOADING_DESC", + "THEx_IDVS_POS_SHAD_REQ", + "THEx_IDVS_POS_SHAD_WAIT", + "THEx_IDVS_POS_SHAD_STALL", + "THEx_IDVS_POS_FIFO_FULL", + "THEx_PREFETCH_STALL", + "THEx_VCACHE_HIT", + "THEx_VCACHE_MISS", + "THEx_VCACHE_LINE_WAIT", + "THEx_VFETCH_POS_READ_WAIT", + "THEx_VFETCH_VERTEX_WAIT", + "THEx_VFETCH_STALL", + "THEx_PRIMASSY_STALL", + "THEx_BBOX_GEN_STALL", + "THEx_IDVS_VBU_HIT", + "THEx_IDVS_VBU_MISS", + "THEx_IDVS_VBU_LINE_DEALLOCATE", + "THEx_IDVS_VAR_SHAD_REQ", + "THEx_IDVS_VAR_SHAD_STALL", + "THEx_BINNER_STALL", + "THEx_ITER_STALL", + "THEx_COMPRESS_MISS", + "THEx_COMPRESS_STALL", + "THEx_PCACHE_HIT", + "THEx_PCACHE_MISS", + "THEx_PCACHE_MISS_STALL", + "THEx_PCACHE_EVICT_STALL", + "THEx_PMGR_PTR_WR_STALL", + "THEx_PMGR_PTR_RD_STALL", + "THEx_PMGR_CMD_WR_STALL", + "THEx_WRBUF_ACTIVE", + "THEx_WRBUF_HIT", + "THEx_WRBUF_MISS", + "THEx_WRBUF_NO_FREE_LINE_STALL", + "THEx_WRBUF_NO_AXI_ID_STALL", + "THEx_WRBUF_AXI_STALL", + "", + "", + "", + "THEx_UTLB_TRANS", + "THEx_UTLB_TRANS_HIT", + "THEx_UTLB_TRANS_STALL", + "THEx_UTLB_TRANS_MISS_DELAY", + "THEx_UTLB_MMU_REQ", + + /* Performance counters for the Shader Core */ + "", + "", + "", + "", + "THEx_FRAG_ACTIVE", + "THEx_FRAG_PRIMITIVES", + "THEx_FRAG_PRIM_RAST", + "THEx_FRAG_FPK_ACTIVE", + "THEx_FRAG_STARVING", + "THEx_FRAG_WARPS", + "THEx_FRAG_PARTIAL_WARPS", + "THEx_FRAG_QUADS_RAST", + "THEx_FRAG_QUADS_EZS_TEST", + "THEx_FRAG_QUADS_EZS_UPDATE", + "THEx_FRAG_QUADS_EZS_KILL", + "THEx_FRAG_LZS_TEST", + "THEx_FRAG_LZS_KILL", + "", + "THEx_FRAG_PTILES", + "THEx_FRAG_TRANS_ELIM", + "THEx_QUAD_FPK_KILLER", + "", + "THEx_COMPUTE_ACTIVE", + "THEx_COMPUTE_TASKS", + "THEx_COMPUTE_WARPS", + "THEx_COMPUTE_STARVING", + "THEx_EXEC_CORE_ACTIVE", + "THEx_EXEC_ACTIVE", + "THEx_EXEC_INSTR_COUNT", + "THEx_EXEC_INSTR_DIVERGED", + "THEx_EXEC_INSTR_STARVING", + "THEx_ARITH_INSTR_SINGLE_FMA", + "THEx_ARITH_INSTR_DOUBLE", + "THEx_ARITH_INSTR_MSG", + "THEx_ARITH_INSTR_MSG_ONLY", + "THEx_TEX_INSTR", + "THEx_TEX_INSTR_MIPMAP", + "THEx_TEX_INSTR_COMPRESSED", + "THEx_TEX_INSTR_3D", + "THEx_TEX_INSTR_TRILINEAR", + "THEx_TEX_COORD_ISSUE", + "THEx_TEX_COORD_STALL", + "THEx_TEX_STARVE_CACHE", + "THEx_TEX_STARVE_FILTER", + "THEx_LS_MEM_READ_FULL", + "THEx_LS_MEM_READ_SHORT", + "THEx_LS_MEM_WRITE_FULL", + "THEx_LS_MEM_WRITE_SHORT", + "THEx_LS_MEM_ATOMIC", + "THEx_VARY_INSTR", + "THEx_VARY_SLOT_32", + "THEx_VARY_SLOT_16", + "THEx_ATTR_INSTR", + "THEx_ARITH_INSTR_FP_MUL", + "THEx_BEATS_RD_FTC", + "THEx_BEATS_RD_FTC_EXT", + "THEx_BEATS_RD_LSC", + "THEx_BEATS_RD_LSC_EXT", + "THEx_BEATS_RD_TEX", + "THEx_BEATS_RD_TEX_EXT", + "THEx_BEATS_RD_OTHER", + "THEx_BEATS_WR_LSC", + "THEx_BEATS_WR_TIB", + "", + + /* Performance counters for the Memory System */ + "", + "", + "", + "", + "THEx_MMU_REQUESTS", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "THEx_L2_RD_MSG_IN", + "THEx_L2_RD_MSG_IN_STALL", + "THEx_L2_WR_MSG_IN", + "THEx_L2_WR_MSG_IN_STALL", + "THEx_L2_SNP_MSG_IN", + "THEx_L2_SNP_MSG_IN_STALL", + "THEx_L2_RD_MSG_OUT", + "THEx_L2_RD_MSG_OUT_STALL", + "THEx_L2_WR_MSG_OUT", + "THEx_L2_ANY_LOOKUP", + "THEx_L2_READ_LOOKUP", + "THEx_L2_WRITE_LOOKUP", + "THEx_L2_EXT_SNOOP_LOOKUP", + "THEx_L2_EXT_READ", + "THEx_L2_EXT_READ_NOSNP", + "THEx_L2_EXT_READ_UNIQUE", + "THEx_L2_EXT_READ_BEATS", + "THEx_L2_EXT_AR_STALL", + "THEx_L2_EXT_AR_CNT_Q1", + "THEx_L2_EXT_AR_CNT_Q2", + "THEx_L2_EXT_AR_CNT_Q3", + "THEx_L2_EXT_RRESP_0_127", + "THEx_L2_EXT_RRESP_128_191", + "THEx_L2_EXT_RRESP_192_255", + "THEx_L2_EXT_RRESP_256_319", + "THEx_L2_EXT_RRESP_320_383", + "THEx_L2_EXT_WRITE", + "THEx_L2_EXT_WRITE_NOSNP_FULL", + "THEx_L2_EXT_WRITE_NOSNP_PTL", + "THEx_L2_EXT_WRITE_SNP_FULL", + "THEx_L2_EXT_WRITE_SNP_PTL", + "THEx_L2_EXT_WRITE_BEATS", + "THEx_L2_EXT_W_STALL", + "THEx_L2_EXT_AW_CNT_Q1", + "THEx_L2_EXT_AW_CNT_Q2", + "THEx_L2_EXT_AW_CNT_Q3", + "THEx_L2_EXT_SNOOP", + "THEx_L2_EXT_SNOOP_STALL", + "THEx_L2_EXT_SNOOP_RESP_CLEAN", + "THEx_L2_EXT_SNOOP_RESP_DATA", + "THEx_L2_EXT_SNOOP_INTERNAL", + "", + "", + "", + "", + "", + "", + "", +}; + +#endif /* _KBASE_GATOR_HWCNT_NAMES_THEX_H_ */ diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_gpu_id.h b/drivers/gpu/arm/midgard_for_linux/mali_kbase_gpu_id.h index dc8af2d6e794..a962ecb3f9c6 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_gpu_id.h +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_gpu_id.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -77,8 +77,9 @@ /* Helper macro to create a complete GPU_ID (new format) */ #define GPU_ID2_MAKE(arch_major, arch_minor, arch_rev, product_major, \ version_major, version_minor, version_status) \ - (GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev) | \ - GPU_ID2_VERSION_MAKE(version_major, version_minor, \ + (GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, \ + product_major) | \ + GPU_ID2_VERSION_MAKE(version_major, version_minor, \ version_status)) /* Helper macro to create a partial GPU_ID (new format) that identifies @@ -94,6 +95,7 @@ (((product_id) << GPU_ID2_PRODUCT_MAJOR_SHIFT) & \ GPU_ID2_PRODUCT_MODEL) +#define GPU_ID2_PRODUCT_TMIX GPU_ID2_MODEL_MAKE(6, 0) /* Values for GPU_ID_VERSION_STATUS field for PRODUCT_ID GPU_ID_PI_T60X */ #define GPU_ID_S_15DEV0 0x1 diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_gpu_memory_debugfs.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_gpu_memory_debugfs.c index 82f4c36d509e..6df0a1cb1264 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_gpu_memory_debugfs.c +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_gpu_memory_debugfs.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -15,7 +15,7 @@ -#include +#include #ifdef CONFIG_DEBUG_FS /** Show callback for the @c gpu_memory debugfs file. diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_gpu_memory_debugfs.h b/drivers/gpu/arm/midgard_for_linux/mali_kbase_gpu_memory_debugfs.h index 3cf30a4e767e..7045693eb910 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_gpu_memory_debugfs.h +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_gpu_memory_debugfs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2014, 2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -23,10 +23,9 @@ * */ -#ifndef _KBASE_GPU_MEMORY_H -#define _KBASE_GPU_MEMORY_H +#ifndef _KBASE_GPU_MEMORY_DEBUGFS_H +#define _KBASE_GPU_MEMORY_DEBUGFS_H -#include #include #include @@ -35,4 +34,4 @@ */ void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev); -#endif /*_KBASE_GPU_MEMORY_H*/ +#endif /*_KBASE_GPU_MEMORY_DEBUGFS_H*/ diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_gpuprops_types.h b/drivers/gpu/arm/midgard_for_linux/mali_kbase_gpuprops_types.h index 781375a9a97f..f42e91b6daa1 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_gpuprops_types.h +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_gpuprops_types.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_hw.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_hw.c index f2f93de9d2e8..de2461fb8de4 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_hw.c +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_hw.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -37,6 +37,16 @@ void kbase_hw_set_features_mask(struct kbase_device *kbdev) product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID; product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT; + if (GPU_ID_IS_NEW_FORMAT(product_id)) { + switch (gpu_id & GPU_ID2_PRODUCT_MODEL) { + case GPU_ID2_PRODUCT_TMIX: + features = base_hw_features_tMIx; + break; + default: + features = base_hw_features_generic; + break; + } + } else { switch (product_id) { case GPU_ID_PI_TFRX: /* FALLTHROUGH */ @@ -65,7 +75,7 @@ void kbase_hw_set_features_mask(struct kbase_device *kbdev) features = base_hw_features_generic; break; } - + } for (; *features != BASE_HW_FEATURE_END; features++) set_bit(*features, &kbdev->hw_features_mask[0]); @@ -84,6 +94,25 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev) impl_tech = kbdev->gpu_props.props.thread_props.impl_tech; if (impl_tech != IMPLEMENTATION_MODEL) { + if (GPU_ID_IS_NEW_FORMAT(product_id)) { + switch (gpu_id) { + case GPU_ID2_MAKE(6, 0, 10, 0, 0, 0, 1): + issues = base_hw_issues_tMIx_r0p0_05dev0; + break; + case GPU_ID2_MAKE(6, 0, 10, 0, 0, 0, 2): + issues = base_hw_issues_tMIx_r0p0; + break; + default: + if ((gpu_id & GPU_ID2_PRODUCT_MODEL) == + GPU_ID2_PRODUCT_TMIX) { + issues = base_hw_issues_tMIx_r0p0; + } else { + dev_err(kbdev->dev, + "Unknown GPU ID %x", gpu_id); + return -EINVAL; + } + } + } else { switch (gpu_id) { case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_15DEV0): issues = base_hw_issues_t60x_r0p0_15dev0; @@ -174,11 +203,24 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev) issues = base_hw_issues_t82x_r1p0; break; default: - dev_err(kbdev->dev, "Unknown GPU ID %x", gpu_id); + dev_err(kbdev->dev, + "Unknown GPU ID %x", gpu_id); return -EINVAL; } + } } else { /* Software model */ + if (GPU_ID_IS_NEW_FORMAT(product_id)) { + switch (gpu_id & GPU_ID2_PRODUCT_MODEL) { + case GPU_ID2_PRODUCT_TMIX: + issues = base_hw_issues_model_tMIx; + break; + default: + dev_err(kbdev->dev, + "Unknown GPU ID %x", gpu_id); + return -EINVAL; + } + } else { switch (product_id) { case GPU_ID_PI_T60X: issues = base_hw_issues_model_t60x; @@ -209,6 +251,7 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev) gpu_id); return -EINVAL; } + } } dev_info(kbdev->dev, "GPU identified as 0x%04x r%dp%d status %d", (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, (gpu_id & GPU_ID_VERSION_MAJOR) >> GPU_ID_VERSION_MAJOR_SHIFT, (gpu_id & GPU_ID_VERSION_MINOR) >> GPU_ID_VERSION_MINOR_SHIFT, (gpu_id & GPU_ID_VERSION_STATUS) >> GPU_ID_VERSION_STATUS_SHIFT); diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_hwaccess_jm.h b/drivers/gpu/arm/midgard_for_linux/mali_kbase_hwaccess_jm.h index 2efa293088a1..abe66078029f 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_hwaccess_jm.h +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_hwaccess_jm.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -71,9 +71,7 @@ void kbase_backend_release_free_address_space(struct kbase_device *kbdev, * * kbase_gpu_next_job() will pull atoms from the active context. * - * Return: true if successful, false if ASID not assigned. If kctx->as_pending - * is true then ASID assignment will complete at some point in the - * future and will re-start scheduling, otherwise no ASIDs are available + * Return: true if successful, false if ASID not assigned. */ bool kbase_backend_use_ctx(struct kbase_device *kbdev, struct kbase_context *kctx, @@ -213,6 +211,15 @@ int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js); */ void kbase_backend_ctx_count_changed(struct kbase_device *kbdev); +/** + * kbase_backend_timeouts_changed() - Job Scheduler timeouts have changed. + * @kbdev: Device pointer + * + * Perform any required backend-specific actions (eg updating timeouts of + * currently running atoms). + */ +void kbase_backend_timeouts_changed(struct kbase_device *kbdev); + /** * kbase_backend_slot_free() - Return the number of jobs that can be currently * submitted to slot @js. @@ -319,6 +326,28 @@ bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev); * signalled to know when the reset has completed. */ void kbase_reset_gpu_locked(struct kbase_device *kbdev); + +/** + * kbase_reset_gpu_silent - Reset the GPU silently + * @kbdev: Device pointer + * + * Reset the GPU without trying to cancel jobs and don't emit messages into + * the kernel log while doing the reset. + * + * This function should be used in cases where we are doing a controlled reset + * of the GPU as part of normal processing (e.g. exiting protected mode) where + * the driver will have ensured the scheduler has been idled and all other + * users of the GPU (e.g. instrumentation) have been suspended. + */ +void kbase_reset_gpu_silent(struct kbase_device *kbdev); + +/** + * kbase_reset_gpu_active - Reports if the GPU is being reset + * @kbdev: Device pointer + * + * Return: True if the GPU is in the process of being reset. + */ +bool kbase_reset_gpu_active(struct kbase_device *kbdev); #endif /** diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_ipa.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_ipa.c index 6ac97eb7937c..c579d0a589f7 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_ipa.c +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_ipa.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -135,7 +135,7 @@ static void init_ipa_groups(struct kbase_ipa_context *ctx) memcpy(ctx->groups, ipa_groups_def, sizeof(ctx->groups)); } -#if defined(CONFIG_OF) && (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0)) +#if defined(CONFIG_OF) && (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)) static int update_ipa_groups_from_dt(struct kbase_ipa_context *ctx) { struct kbase_device *kbdev = ctx->kbdev; @@ -145,7 +145,7 @@ static int update_ipa_groups_from_dt(struct kbase_ipa_context *ctx) size_t i; int err; - np = of_find_node_by_name(kbdev->dev->of_node, "ipa-groups"); + np = of_get_child_by_name(kbdev->dev->of_node, "ipa-groups"); if (!np) return 0; diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_jd.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_jd.c index 1f9fbd9ee6d0..3e0a5892cc7a 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_jd.c +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_jd.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -25,20 +25,15 @@ #endif #include #include -#ifdef CONFIG_UMP -#include -#endif /* CONFIG_UMP */ #include #include #include -#include #include #include - -#if defined(CONFIG_MALI_MIPE_ENABLED) #include -#endif + +#include "mali_kbase_dma_fence.h" #define beenthere(kctx, f, a...) dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a) @@ -50,7 +45,7 @@ /* Return whether katom will run on the GPU or not. Currently only soft jobs and * dependency-only atoms do not run on the GPU */ #define IS_GPU_ATOM(katom) (!((katom->core_req & BASE_JD_REQ_SOFT_JOB) || \ - ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) == \ + ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) == \ BASE_JD_REQ_DEP))) /* * This is the kernel side of the API. Only entry points are: @@ -85,22 +80,23 @@ static int jd_run_atom(struct kbase_jd_atom *katom) KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED); - if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) { + if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) { /* Dependency only atom */ katom->status = KBASE_JD_ATOM_STATE_COMPLETED; return 0; } else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { /* Soft-job */ - if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) + if (katom->will_fail_event_code) { + katom->status = KBASE_JD_ATOM_STATE_COMPLETED; + return 0; + } + if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_REPLAY) { if (!kbase_replay_process(katom)) katom->status = KBASE_JD_ATOM_STATE_COMPLETED; } else if (kbase_process_soft_job(katom) == 0) { kbase_finish_soft_job(katom); katom->status = KBASE_JD_ATOM_STATE_COMPLETED; - } else { - /* The job has not completed */ - list_add_tail(&katom->dep_item[0], &kctx->waiting_soft_jobs); } return 0; } @@ -110,6 +106,39 @@ static int jd_run_atom(struct kbase_jd_atom *katom) return kbasep_js_add_job(kctx, katom); } +#if defined(CONFIG_KDS) || defined(CONFIG_MALI_DMA_FENCE) +void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom) +{ + struct kbase_device *kbdev; + + KBASE_DEBUG_ASSERT(katom); + kbdev = katom->kctx->kbdev; + KBASE_DEBUG_ASSERT(kbdev); + + /* Check whether the atom's other dependencies were already met. If + * katom is a GPU atom then the job scheduler may be able to represent + * the dependencies, hence we may attempt to submit it before they are + * met. Other atoms must have had both dependencies resolved. + */ + if (IS_GPU_ATOM(katom) || + (!kbase_jd_katom_dep_atom(&katom->dep[0]) && + !kbase_jd_katom_dep_atom(&katom->dep[1]))) { + /* katom dep complete, attempt to run it */ + bool resched = false; + + resched = jd_run_atom(katom); + + if (katom->status == KBASE_JD_ATOM_STATE_COMPLETED) { + /* The atom has already finished */ + resched |= jd_done_nolock(katom, NULL); + } + + if (resched) + kbase_js_sched_all(kbdev); + } +} +#endif + #ifdef CONFIG_KDS /* Add the katom to the kds waiting list. @@ -142,44 +171,20 @@ static void kds_dep_clear(void *callback_parameter, void *callback_extra_paramet { struct kbase_jd_atom *katom; struct kbase_jd_context *ctx; - struct kbase_device *kbdev; katom = (struct kbase_jd_atom *)callback_parameter; KBASE_DEBUG_ASSERT(katom); + ctx = &katom->kctx->jctx; - kbdev = katom->kctx->kbdev; - KBASE_DEBUG_ASSERT(kbdev); + /* If KDS resource has already been satisfied (e.g. due to zapping) + * do nothing. + */ mutex_lock(&ctx->lock); - - /* KDS resource has already been satisfied (e.g. due to zapping) */ - if (katom->kds_dep_satisfied) - goto out; - - /* This atom's KDS dependency has now been met */ - katom->kds_dep_satisfied = true; - - /* Check whether the atom's other dependencies were already met. If - * katom is a GPU atom then the job scheduler may be able to represent - * the dependencies, hence we may attempt to submit it before they are - * met. Other atoms must have had both dependencies resolved */ - if (IS_GPU_ATOM(katom) || - (!kbase_jd_katom_dep_atom(&katom->dep[0]) && - !kbase_jd_katom_dep_atom(&katom->dep[1]))) { - /* katom dep complete, attempt to run it */ - bool resched = false; - - resched = jd_run_atom(katom); - - if (katom->status == KBASE_JD_ATOM_STATE_COMPLETED) { - /* The atom has already finished */ - resched |= jd_done_nolock(katom, NULL); - } - - if (resched) - kbase_js_sched_all(kbdev); + if (!katom->kds_dep_satisfied) { + katom->kds_dep_satisfied = true; + kbase_jd_dep_clear_locked(katom); } - out: mutex_unlock(&ctx->lock); } @@ -199,208 +204,6 @@ static void kbase_cancel_kds_wait_job(struct kbase_jd_atom *katom) } #endif /* CONFIG_KDS */ -static int kbase_jd_user_buf_map(struct kbase_context *kctx, - struct kbase_va_region *reg) -{ - long pinned_pages; - struct kbase_mem_phy_alloc *alloc; - struct page **pages; - phys_addr_t *pa; - long i; - int err = -ENOMEM; - unsigned long address; - struct task_struct *owner; - struct device *dev; - unsigned long offset; - unsigned long local_size; - - alloc = reg->gpu_alloc; - pa = kbase_get_gpu_phy_pages(reg); - address = alloc->imported.user_buf.address; - owner = alloc->imported.user_buf.owner; - - KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF); - - pages = alloc->imported.user_buf.pages; - - down_read(&owner->mm->mmap_sem); - pinned_pages = get_user_pages(owner, owner->mm, - address, - alloc->imported.user_buf.nr_pages, - reg->flags & KBASE_REG_GPU_WR, - 0, pages, NULL); - up_read(&owner->mm->mmap_sem); - - if (pinned_pages <= 0) - return pinned_pages; - - if (pinned_pages != alloc->imported.user_buf.nr_pages) { - for (i = 0; i < pinned_pages; i++) - put_page(pages[i]); - return -ENOMEM; - } - - dev = kctx->kbdev->dev; - offset = address & ~PAGE_MASK; - local_size = alloc->imported.user_buf.size; - - for (i = 0; i < pinned_pages; i++) { - dma_addr_t dma_addr; - unsigned long min; - - min = MIN(PAGE_SIZE - offset, local_size); - dma_addr = dma_map_page(dev, pages[i], - offset, min, - DMA_BIDIRECTIONAL); - if (dma_mapping_error(dev, dma_addr)) - goto unwind; - - alloc->imported.user_buf.dma_addrs[i] = dma_addr; - pa[i] = page_to_phys(pages[i]); - - local_size -= min; - offset = 0; - } - - alloc->nents = pinned_pages; - - err = kbase_mmu_insert_pages(kctx, reg->start_pfn, pa, - kbase_reg_current_backed_size(reg), - reg->flags); - if (err == 0) - return 0; - - alloc->nents = 0; - /* fall down */ -unwind: - while (i--) { - dma_unmap_page(kctx->kbdev->dev, - alloc->imported.user_buf.dma_addrs[i], - PAGE_SIZE, DMA_BIDIRECTIONAL); - put_page(pages[i]); - pages[i] = NULL; - } - - return err; -} - -static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, - struct kbase_mem_phy_alloc *alloc, bool writeable) -{ - long i; - struct page **pages; - unsigned long size = alloc->imported.user_buf.size; - - KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF); - pages = alloc->imported.user_buf.pages; - for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) { - unsigned long local_size; - dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i]; - - local_size = MIN(size, PAGE_SIZE - (dma_addr & ~PAGE_MASK)); - dma_unmap_page(kctx->kbdev->dev, dma_addr, local_size, - DMA_BIDIRECTIONAL); - if (writeable) - set_page_dirty_lock(pages[i]); - put_page(pages[i]); - pages[i] = NULL; - - size -= local_size; - } - alloc->nents = 0; -} - -/* not to use sg_dma_len. */ -#define MALI_SG_DMA_LEN(sg) ((sg)->length) - -#ifdef CONFIG_DMA_SHARED_BUFFER -static int kbase_jd_umm_map(struct kbase_context *kctx, struct kbase_va_region *reg) -{ - struct sg_table *sgt; /* scatterlist_table */ - struct scatterlist *s; - int i; - phys_addr_t *pa; - int err; - size_t count = 0; - struct kbase_mem_phy_alloc *alloc; - - alloc = reg->gpu_alloc; - - KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM); - KBASE_DEBUG_ASSERT(NULL == alloc->imported.umm.sgt); - sgt = dma_buf_map_attachment(alloc->imported.umm.dma_attachment, DMA_BIDIRECTIONAL); - - if (IS_ERR_OR_NULL(sgt)) - return -EINVAL; - - /* save for later */ - alloc->imported.umm.sgt = sgt; - - pa = kbase_get_gpu_phy_pages(reg); - KBASE_DEBUG_ASSERT(pa); - - for_each_sg(sgt->sgl, s, sgt->nents, i) { - int j; - /* size_t pages = PFN_UP(sg_dma_len(s)); */ - size_t pages = PFN_UP(MALI_SG_DMA_LEN(s)); - - WARN_ONCE(MALI_SG_DMA_LEN(s) & (PAGE_SIZE-1), - "MALI_SG_DMA_LEN(s)=%u is not a multiple of PAGE_SIZE\n", - MALI_SG_DMA_LEN(s)); - /* - WARN_ONCE(sg_dma_len(s) & (PAGE_SIZE-1), - "sg_dma_len(s)=%u is not a multiple of PAGE_SIZE\n", - sg_dma_len(s)); - */ - - WARN_ONCE(sg_dma_address(s) & (PAGE_SIZE-1), - "sg_dma_address(s)=%llx is not aligned to PAGE_SIZE\n", - (unsigned long long) sg_dma_address(s)); - - for (j = 0; (j < pages) && (count < reg->nr_pages); j++, count++) - *pa++ = sg_dma_address(s) + (j << PAGE_SHIFT); - - WARN_ONCE(j < pages, - "sg list from dma_buf_map_attachment > dma_buf->size=%zu\n", - alloc->imported.umm.dma_buf->size); - } - - if (WARN_ONCE(count < reg->nr_pages, - "sg list from dma_buf_map_attachment < dma_buf->size=%zu, count : %zu, reg->nr_pages : %zu. \n", - alloc->imported.umm.dma_buf->size, - count, - reg->nr_pages)) { - err = -EINVAL; - goto out; - } - - /* Update nents as we now have pages to map */ - alloc->nents = count; - - err = kbase_mmu_insert_pages(kctx, reg->start_pfn, kbase_get_gpu_phy_pages(reg), kbase_reg_current_backed_size(reg), reg->flags | KBASE_REG_GPU_WR | KBASE_REG_GPU_RD); - -out: - if (err) { - dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment, alloc->imported.umm.sgt, DMA_BIDIRECTIONAL); - alloc->imported.umm.sgt = NULL; - } - - return err; -} - -static void kbase_jd_umm_unmap(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc) -{ - KBASE_DEBUG_ASSERT(kctx); - KBASE_DEBUG_ASSERT(alloc); - KBASE_DEBUG_ASSERT(alloc->imported.umm.dma_attachment); - KBASE_DEBUG_ASSERT(alloc->imported.umm.sgt); - dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment, - alloc->imported.umm.sgt, DMA_BIDIRECTIONAL); - alloc->imported.umm.sgt = NULL; - alloc->nents = 0; -} -#endif /* CONFIG_DMA_SHARED_BUFFER */ - void kbase_jd_free_external_resources(struct kbase_jd_atom *katom) { #ifdef CONFIG_KDS @@ -420,6 +223,16 @@ void kbase_jd_free_external_resources(struct kbase_jd_atom *katom) kds_resource_set_release_sync(&katom->kds_rset); } #endif /* CONFIG_KDS */ + +#ifdef CONFIG_MALI_DMA_FENCE + /* Flush dma-fence workqueue to ensure that any callbacks that may have + * been queued are done before continuing. + * Any successfully completed atom would have had all it's callbacks + * completed before the atom was run, so only flush for failed atoms. + */ + if (katom->event_code != BASE_JD_EVENT_DONE) + flush_workqueue(katom->kctx->dma_fence.wq); +#endif /* CONFIG_MALI_DMA_FENCE */ } static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom) @@ -433,6 +246,10 @@ static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom) katom->kds_dep_satisfied = true; #endif /* CONFIG_KDS */ +#ifdef CONFIG_MALI_DMA_FENCE + kbase_dma_fence_signal(katom); +#endif /* CONFIG_MALI_DMA_FENCE */ + kbase_gpu_vm_lock(katom->kctx); /* only roll back if extres is non-NULL */ if (katom->extres) { @@ -441,56 +258,12 @@ static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom) res_no = katom->nr_extres; while (res_no-- > 0) { struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc; + struct kbase_va_region *reg; - switch (alloc->type) { -#ifdef CONFIG_DMA_SHARED_BUFFER - case KBASE_MEM_TYPE_IMPORTED_UMM: { - alloc->imported.umm.current_mapping_usage_count--; - - if (0 == alloc->imported.umm.current_mapping_usage_count) { - struct kbase_va_region *reg; - - reg = kbase_region_tracker_find_region_base_address( - katom->kctx, - katom->extres[res_no].gpu_address); - - if (reg && reg->gpu_alloc == alloc) - kbase_mmu_teardown_pages( - katom->kctx, - reg->start_pfn, - kbase_reg_current_backed_size(reg)); - - kbase_jd_umm_unmap(katom->kctx, alloc); - } - } - break; -#endif /* CONFIG_DMA_SHARED_BUFFER */ - case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { - alloc->imported.user_buf.current_mapping_usage_count--; - - if (0 == alloc->imported.user_buf.current_mapping_usage_count) { - struct kbase_va_region *reg; - - reg = kbase_region_tracker_find_region_base_address( - katom->kctx, - katom->extres[res_no].gpu_address); - - if (reg && reg->gpu_alloc == alloc) - kbase_mmu_teardown_pages( - katom->kctx, - reg->start_pfn, - kbase_reg_current_backed_size(reg)); - - kbase_jd_user_buf_unmap(katom->kctx, - alloc, - reg->flags & KBASE_REG_GPU_WR); - } - } - break; - default: - break; - } - kbase_mem_phy_alloc_put(katom->extres[res_no].alloc); + reg = kbase_region_tracker_find_region_base_address( + katom->kctx, + katom->extres[res_no].gpu_address); + kbase_unmap_external_resource(katom->kctx, reg, alloc); } kfree(katom->extres); katom->extres = NULL; @@ -498,24 +271,6 @@ static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom) kbase_gpu_vm_unlock(katom->kctx); } -#if (defined(CONFIG_KDS) && defined(CONFIG_UMP)) || defined(CONFIG_DMA_SHARED_BUFFER_USES_KDS) -static void add_kds_resource(struct kds_resource *kds_res, struct kds_resource **kds_resources, u32 *kds_res_count, unsigned long *kds_access_bitmap, bool exclusive) -{ - u32 i; - - for (i = 0; i < *kds_res_count; i++) { - /* Duplicate resource, ignore */ - if (kds_resources[i] == kds_res) - return; - } - - kds_resources[*kds_res_count] = kds_res; - if (exclusive) - set_bit(*kds_res_count, kds_access_bitmap); - (*kds_res_count)++; -} -#endif - /* * Set up external resources needed by this job. * @@ -531,6 +286,11 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st struct kds_resource **kds_resources = NULL; unsigned long *kds_access_bitmap = NULL; #endif /* CONFIG_KDS */ +#ifdef CONFIG_MALI_DMA_FENCE + struct kbase_dma_fence_resv_info info = { + .dma_fence_resv_count = 0, + }; +#endif struct base_external_resource *input_extres; KBASE_DEBUG_ASSERT(katom); @@ -566,27 +326,53 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st KBASE_DEBUG_ASSERT(0 != katom->nr_extres); kds_resources = kmalloc_array(katom->nr_extres, sizeof(struct kds_resource *), GFP_KERNEL); - if (NULL == kds_resources) { + if (!kds_resources) { err_ret_val = -ENOMEM; goto early_err_out; } KBASE_DEBUG_ASSERT(0 != katom->nr_extres); - kds_access_bitmap = kzalloc(sizeof(unsigned long) * ((katom->nr_extres + BITS_PER_LONG - 1) / BITS_PER_LONG), GFP_KERNEL); - - if (NULL == kds_access_bitmap) { + kds_access_bitmap = kcalloc(BITS_TO_LONGS(katom->nr_extres), + sizeof(unsigned long), + GFP_KERNEL); + if (!kds_access_bitmap) { err_ret_val = -ENOMEM; goto early_err_out; } #endif /* CONFIG_KDS */ +#ifdef CONFIG_MALI_DMA_FENCE + info.resv_objs = kmalloc_array(katom->nr_extres, + sizeof(struct reservation_object *), + GFP_KERNEL); + if (!info.resv_objs) { + err_ret_val = -ENOMEM; + goto early_err_out; + } + + info.dma_fence_excl_bitmap = kcalloc(BITS_TO_LONGS(katom->nr_extres), + sizeof(unsigned long), + GFP_KERNEL); + if (!info.dma_fence_excl_bitmap) { + err_ret_val = -ENOMEM; + goto early_err_out; + } +#endif /* CONFIG_MALI_DMA_FENCE */ + + /* Take the processes mmap lock */ + down_read(¤t->mm->mmap_sem); + /* need to keep the GPU VM locked while we set up UMM buffers */ kbase_gpu_vm_lock(katom->kctx); for (res_no = 0; res_no < katom->nr_extres; res_no++) { struct base_external_resource *res; struct kbase_va_region *reg; + struct kbase_mem_phy_alloc *alloc; + bool exclusive; res = &input_extres[res_no]; + exclusive = (res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE) + ? true : false; reg = kbase_region_tracker_find_region_enclosing_address( katom->kctx, res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE); @@ -598,80 +384,32 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st if (!(katom->core_req & BASE_JD_REQ_SOFT_JOB) && (reg->flags & KBASE_REG_SECURE)) { - katom->atom_flags |= KBASE_KATOM_FLAG_SECURE; - if ((katom->core_req & BASE_JD_REQ_FS) == 0) { - WARN_RATELIMIT(1, "Secure non-fragment jobs not supported"); - goto failed_loop; - } + katom->atom_flags |= KBASE_KATOM_FLAG_PROTECTED; } - /* decide what needs to happen for this resource */ - switch (reg->gpu_alloc->type) { - case BASE_MEM_IMPORT_TYPE_USER_BUFFER: { - reg->gpu_alloc->imported.user_buf.current_mapping_usage_count++; - if (1 == reg->gpu_alloc->imported.user_buf.current_mapping_usage_count) { - /* use a local variable to not pollute - * err_ret_val with a potential success - * value as some other gotos depend on - * the default error code stored in - * err_ret_val */ - int tmp; - - tmp = kbase_jd_user_buf_map(katom->kctx, - reg); - if (0 != tmp) { - /* failed to map this buffer, - * roll back */ - err_ret_val = tmp; - reg->gpu_alloc->imported.user_buf.current_mapping_usage_count--; - goto failed_loop; - } - } - } - break; - case BASE_MEM_IMPORT_TYPE_UMP: { -#if defined(CONFIG_KDS) && defined(CONFIG_UMP) - struct kds_resource *kds_res; - - kds_res = ump_dd_kds_resource_get(reg->gpu_alloc->imported.ump_handle); - if (kds_res) - add_kds_resource(kds_res, kds_resources, &kds_res_count, - kds_access_bitmap, - res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE); -#endif /*defined(CONFIG_KDS) && defined(CONFIG_UMP) */ - break; - } -#ifdef CONFIG_DMA_SHARED_BUFFER - case BASE_MEM_IMPORT_TYPE_UMM: { -#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS - struct kds_resource *kds_res; - - kds_res = get_dma_buf_kds_resource(reg->gpu_alloc->imported.umm.dma_buf); - if (kds_res) - add_kds_resource(kds_res, kds_resources, &kds_res_count, kds_access_bitmap, res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE); -#endif - reg->gpu_alloc->imported.umm.current_mapping_usage_count++; - if (1 == reg->gpu_alloc->imported.umm.current_mapping_usage_count) { - /* use a local variable to not pollute err_ret_val - * with a potential success value as some other gotos depend - * on the default error code stored in err_ret_val */ - int tmp; - - tmp = kbase_jd_umm_map(katom->kctx, reg); - if (tmp) { - /* failed to map this buffer, roll back */ - err_ret_val = tmp; - reg->gpu_alloc->imported.umm.current_mapping_usage_count--; - goto failed_loop; - } - } - break; - } + alloc = kbase_map_external_resource(katom->kctx, reg, + current->mm +#ifdef CONFIG_KDS + , &kds_res_count, kds_resources, + kds_access_bitmap, exclusive #endif - default: + ); + if (!alloc) { + err_ret_val = -EINVAL; goto failed_loop; } +#ifdef CONFIG_MALI_DMA_FENCE + if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) { + struct reservation_object *resv; + + resv = reg->gpu_alloc->imported.umm.dma_buf->resv; + if (resv) + kbase_dma_fence_add_reservation(resv, &info, + exclusive); + } +#endif /* CONFIG_MALI_DMA_FENCE */ + /* finish with updating out array with the data we found */ /* NOTE: It is important that this is the last thing we do (or * at least not before the first write) as we overwrite elements @@ -679,12 +417,15 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st * until the last read for an element. * */ katom->extres[res_no].gpu_address = reg->start_pfn << PAGE_SHIFT; /* save the start_pfn (as an address, not pfn) to use fast lookup later */ - katom->extres[res_no].alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); + katom->extres[res_no].alloc = alloc; } /* successfully parsed the extres array */ /* drop the vm lock before we call into kds */ kbase_gpu_vm_unlock(katom->kctx); + /* Release the processes mmap lock */ + up_read(¤t->mm->mmap_sem); + #ifdef CONFIG_KDS if (kds_res_count) { int wait_failed; @@ -709,46 +450,63 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st kfree(kds_access_bitmap); #endif /* CONFIG_KDS */ +#ifdef CONFIG_MALI_DMA_FENCE + if (info.dma_fence_resv_count) { + int ret; + + ret = kbase_dma_fence_wait(katom, &info); + if (ret < 0) + goto failed_dma_fence_setup; + } + + kfree(info.resv_objs); + kfree(info.dma_fence_excl_bitmap); +#endif /* CONFIG_MALI_DMA_FENCE */ + /* all done OK */ return 0; /* error handling section */ +#ifdef CONFIG_MALI_DMA_FENCE +failed_dma_fence_setup: #ifdef CONFIG_KDS - failed_kds_setup: + /* If we are here, dma_fence setup failed but KDS didn't. + * Revert KDS setup if any. + */ + if (kds_res_count) { + mutex_unlock(&katom->kctx->jctx.lock); + kds_resource_set_release_sync(&katom->kds_rset); + mutex_lock(&katom->kctx->jctx.lock); + + kbase_jd_kds_waiters_remove(katom); + katom->kds_dep_satisfied = true; + } +#endif /* CONFIG_KDS */ +#endif /* CONFIG_MALI_DMA_FENCE */ +#ifdef CONFIG_KDS +failed_kds_setup: +#endif +#if defined(CONFIG_KDS) || defined(CONFIG_MALI_DMA_FENCE) + /* Lock the processes mmap lock */ + down_read(¤t->mm->mmap_sem); /* lock before we unmap */ kbase_gpu_vm_lock(katom->kctx); -#endif /* CONFIG_KDS */ +#endif failed_loop: /* undo the loop work */ while (res_no-- > 0) { struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc; -#ifdef CONFIG_DMA_SHARED_BUFFER - if (alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) { - alloc->imported.umm.current_mapping_usage_count--; - - if (0 == alloc->imported.umm.current_mapping_usage_count) { - struct kbase_va_region *reg; - - reg = kbase_region_tracker_find_region_base_address( - katom->kctx, - katom->extres[res_no].gpu_address); - if (reg && reg->gpu_alloc == alloc) - kbase_mmu_teardown_pages(katom->kctx, - reg->start_pfn, - kbase_reg_current_backed_size(reg)); - - kbase_jd_umm_unmap(katom->kctx, alloc); - } - } -#endif /* CONFIG_DMA_SHARED_BUFFER */ - kbase_mem_phy_alloc_put(alloc); + kbase_unmap_external_resource(katom->kctx, NULL, alloc); } kbase_gpu_vm_unlock(katom->kctx); + /* Release the processes mmap lock */ + up_read(¤t->mm->mmap_sem); + early_err_out: kfree(katom->extres); katom->extres = NULL; @@ -756,35 +514,33 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st kfree(kds_resources); kfree(kds_access_bitmap); #endif /* CONFIG_KDS */ +#ifdef CONFIG_MALI_DMA_FENCE + kfree(info.resv_objs); + kfree(info.dma_fence_excl_bitmap); +#endif return err_ret_val; } static inline void jd_resolve_dep(struct list_head *out_list, struct kbase_jd_atom *katom, - u8 d, - bool ctx_is_dying) + u8 d, bool ctx_is_dying) { u8 other_d = !d; while (!list_empty(&katom->dep_head[d])) { struct kbase_jd_atom *dep_atom; + struct kbase_jd_atom *other_dep_atom; u8 dep_type; dep_atom = list_entry(katom->dep_head[d].next, struct kbase_jd_atom, dep_item[d]); - list_del(katom->dep_head[d].next); dep_type = kbase_jd_katom_dep_type(&dep_atom->dep[d]); kbase_jd_katom_dep_clear(&dep_atom->dep[d]); if (katom->event_code != BASE_JD_EVENT_DONE && - (dep_type != BASE_JD_DEP_TYPE_ORDER || ctx_is_dying)) { - /* Atom failed, so remove the other dependencies and immediately fail the atom */ - if (kbase_jd_katom_dep_atom(&dep_atom->dep[other_d])) { - list_del(&dep_atom->dep_item[other_d]); - kbase_jd_katom_dep_clear(&dep_atom->dep[other_d]); - } + (dep_type != BASE_JD_DEP_TYPE_ORDER)) { #ifdef CONFIG_KDS if (!dep_atom->kds_dep_satisfied) { /* Just set kds_dep_satisfied to true. If the callback happens after this then it will early out and @@ -794,17 +550,67 @@ static inline void jd_resolve_dep(struct list_head *out_list, } #endif +#ifdef CONFIG_MALI_DMA_FENCE + kbase_dma_fence_cancel_callbacks(dep_atom); +#endif + dep_atom->event_code = katom->event_code; KBASE_DEBUG_ASSERT(dep_atom->status != KBASE_JD_ATOM_STATE_UNUSED); - dep_atom->status = KBASE_JD_ATOM_STATE_COMPLETED; - list_add_tail(&dep_atom->dep_item[0], out_list); - } else if (!kbase_jd_katom_dep_atom(&dep_atom->dep[other_d])) { + if ((dep_atom->core_req & BASE_JD_REQ_SOFT_REPLAY) + != BASE_JD_REQ_SOFT_REPLAY) { + dep_atom->will_fail_event_code = + dep_atom->event_code; + } else { + dep_atom->status = + KBASE_JD_ATOM_STATE_COMPLETED; + } + } + other_dep_atom = (struct kbase_jd_atom *) + kbase_jd_katom_dep_atom(&dep_atom->dep[other_d]); + + if (!dep_atom->in_jd_list && (!other_dep_atom || + (IS_GPU_ATOM(dep_atom) && !ctx_is_dying && + !dep_atom->will_fail_event_code && + !other_dep_atom->will_fail_event_code))) { + bool dep_satisfied = true; +#ifdef CONFIG_MALI_DMA_FENCE + int dep_count; + + dep_count = atomic_read(&dep_atom->dma_fence.dep_count); + if (likely(dep_count == -1)) { + dep_satisfied = true; + } else if (dep_count == 0) { + /* + * All fences for this atom has signaled, but + * the worker that will queue the atom has not + * yet run. + * + * Mark the atom as handled by setting + * dep_count to -1 so that the worker doesn't + * queue the atom again. + */ + atomic_set(&dep_atom->dma_fence.dep_count, -1); + /* + * Remove the atom from the list of dma-fence + * waiting atoms. + */ + kbase_dma_fence_waiters_remove(dep_atom); + dep_satisfied = true; + } else { + dep_satisfied = false; + } +#endif /* CONFIG_MALI_DMA_FENCE */ + #ifdef CONFIG_KDS - if (dep_atom->kds_dep_satisfied) + dep_satisfied = dep_satisfied && dep_atom->kds_dep_satisfied; #endif - list_add_tail(&dep_atom->dep_item[0], out_list); + + if (dep_satisfied) { + dep_atom->in_jd_list = true; + list_add_tail(&dep_atom->jd_item, out_list); + } } } } @@ -847,7 +653,7 @@ static void jd_check_force_failure(struct kbase_jd_atom *katom) kbase_jd_katom_dep_atom(&kctx->jctx.atoms[i].dep[1]) == katom) { struct kbase_jd_atom *dep_atom = &kctx->jctx.atoms[i]; - if ((dep_atom->core_req & BASEP_JD_REQ_ATOM_TYPE) == + if ((dep_atom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_REPLAY && (dep_atom->core_req & kbdev->force_replay_core_req) == kbdev->force_replay_core_req) { @@ -859,6 +665,36 @@ static void jd_check_force_failure(struct kbase_jd_atom *katom) } #endif +static void jd_try_submitting_deps(struct list_head *out_list, + struct kbase_jd_atom *node) +{ + int i; + + for (i = 0; i < 2; i++) { + struct list_head *pos; + + list_for_each(pos, &node->dep_head[i]) { + struct kbase_jd_atom *dep_atom = list_entry(pos, + struct kbase_jd_atom, dep_item[i]); + + if (IS_GPU_ATOM(dep_atom) && !dep_atom->in_jd_list) { + /*Check if atom deps look sane*/ + bool dep0_valid = !dep_atom->dep[0].atom || + (dep_atom->dep[0].atom->status + >= KBASE_JD_ATOM_STATE_IN_JS); + bool dep1_valid = !dep_atom->dep[1].atom || + (dep_atom->dep[1].atom->status + >= KBASE_JD_ATOM_STATE_IN_JS); + + if (dep0_valid && dep1_valid) { + dep_atom->in_jd_list = true; + list_add(&dep_atom->jd_item, out_list); + } + } + } + } +} + /* * Perform the necessary handling of an atom that has finished running * on the GPU. @@ -873,7 +709,6 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, { struct kbase_context *kctx = katom->kctx; struct kbase_device *kbdev = kctx->kbdev; - struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info; struct list_head completed_jobs; struct list_head runnable_jobs; bool need_to_try_schedule_context = false; @@ -888,7 +723,6 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, jd_check_force_failure(katom); #endif - /* This is needed in case an atom is failed due to being invalid, this * can happen *before* the jobs that the atom depends on have completed */ for (i = 0; i < 2; i++) { @@ -915,17 +749,16 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, } katom->status = KBASE_JD_ATOM_STATE_COMPLETED; - list_add_tail(&katom->dep_item[0], &completed_jobs); + list_add_tail(&katom->jd_item, &completed_jobs); while (!list_empty(&completed_jobs)) { - katom = list_entry(completed_jobs.prev, struct kbase_jd_atom, dep_item[0]); + katom = list_entry(completed_jobs.prev, struct kbase_jd_atom, jd_item); list_del(completed_jobs.prev); - KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED); for (i = 0; i < 2; i++) jd_resolve_dep(&runnable_jobs, katom, i, - js_kctx_info->ctx.is_dying); + kctx->jctx.sched_info.ctx.is_dying); if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) kbase_jd_post_external_resources(katom); @@ -934,35 +767,47 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, struct kbase_jd_atom *node; node = list_entry(runnable_jobs.next, - struct kbase_jd_atom, dep_item[0]); - + struct kbase_jd_atom, jd_item); list_del(runnable_jobs.next); + node->in_jd_list = false; KBASE_DEBUG_ASSERT(node->status != KBASE_JD_ATOM_STATE_UNUSED); - if (node->status != KBASE_JD_ATOM_STATE_COMPLETED) { + if (node->status != KBASE_JD_ATOM_STATE_COMPLETED && + !kctx->jctx.sched_info.ctx.is_dying) { need_to_try_schedule_context |= jd_run_atom(node); } else { node->event_code = katom->event_code; - if ((node->core_req & BASEP_JD_REQ_ATOM_TYPE) - == BASE_JD_REQ_SOFT_REPLAY) { + if ((node->core_req & + BASE_JD_REQ_SOFT_JOB_TYPE) == + BASE_JD_REQ_SOFT_REPLAY) { if (kbase_replay_process(node)) /* Don't complete this atom */ continue; } else if (node->core_req & BASE_JD_REQ_SOFT_JOB) { - /* If this is a fence wait then remove it from the list of sync waiters. */ + /* If this is a fence wait soft job + * then remove it from the list of sync + * waiters. + */ if (BASE_JD_REQ_SOFT_FENCE_WAIT == node->core_req) - list_del(&node->dep_item[0]); + kbasep_remove_waiting_soft_job(node); kbase_finish_soft_job(node); } node->status = KBASE_JD_ATOM_STATE_COMPLETED; } - if (node->status == KBASE_JD_ATOM_STATE_COMPLETED) - list_add_tail(&node->dep_item[0], &completed_jobs); + if (node->status == KBASE_JD_ATOM_STATE_COMPLETED) { + list_add_tail(&node->jd_item, &completed_jobs); + } else if (node->status == KBASE_JD_ATOM_STATE_IN_JS && + !node->will_fail_event_code) { + /* Node successfully submitted, try submitting + * dependencies as they may now be representable + * in JS */ + jd_try_submitting_deps(&runnable_jobs, node); + } } /* Register a completed job as a disjoint event when the GPU @@ -970,7 +815,7 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, */ kbase_disjoint_event_potential(kctx->kbdev); if (completed_jobs_ctx) - list_add_tail(&katom->dep_item[0], completed_jobs_ctx); + list_add_tail(&katom->jd_item, completed_jobs_ctx); else kbase_event_post(kctx, katom); @@ -1042,23 +887,19 @@ static const char *kbasep_map_core_reqs_to_string(base_jd_core_req core_req) } #endif -bool jd_submit_atom(struct kbase_context *kctx, - const struct base_jd_atom_v2 *user_atom, - struct kbase_jd_atom *katom) +bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *user_atom, struct kbase_jd_atom *katom) { struct kbase_jd_context *jctx = &kctx->jctx; - base_jd_core_req core_req; int queued = 0; int i; int sched_prio; bool ret; + bool will_fail = false; /* Update the TOTAL number of jobs. This includes those not tracked by * the scheduler: 'not ready to run' and 'dependency-only' jobs. */ jctx->job_nr++; - core_req = user_atom->core_req; - katom->start_timestamp.tv64 = 0; katom->time_spent_us = 0; katom->udata = user_atom->udata; @@ -1069,18 +910,28 @@ bool jd_submit_atom(struct kbase_context *kctx, katom->affinity = 0; katom->jc = user_atom->jc; katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED; - katom->core_req = core_req; + katom->core_req = user_atom->core_req; katom->atom_flags = 0; katom->retry_count = 0; katom->need_cache_flush_cores_retained = 0; + katom->pre_dep = NULL; + katom->post_dep = NULL; katom->x_pre_dep = NULL; katom->x_post_dep = NULL; + katom->will_fail_event_code = BASE_JD_EVENT_NOT_STARTED; + katom->exit_protected_state = KBASE_ATOM_EXIT_PROTECTED_CHECK; + katom->age = kctx->age_count++; + + INIT_LIST_HEAD(&katom->jd_item); #ifdef CONFIG_KDS /* Start by assuming that the KDS dependencies are satisfied, * kbase_jd_pre_external_resources will correct this if there are dependencies */ katom->kds_dep_satisfied = true; katom->kds_rset = NULL; #endif /* CONFIG_KDS */ +#ifdef CONFIG_MALI_DMA_FENCE + atomic_set(&katom->dma_fence.dep_count, -1); +#endif /* Don't do anything if there is a mess up with dependencies. This is done in a separate cycle to check both the dependencies at ones, otherwise @@ -1096,7 +947,7 @@ bool jd_submit_atom(struct kbase_context *kctx, dep_atom_type != BASE_JD_DEP_TYPE_DATA) { katom->event_code = BASE_JD_EVENT_JOB_CONFIG_FAULT; katom->status = KBASE_JD_ATOM_STATE_COMPLETED; -#if defined(CONFIG_MALI_MIPE_ENABLED) + /* Wrong dependency setup. Atom will be sent * back to user space. Do not record any * dependencies. */ @@ -1105,7 +956,7 @@ bool jd_submit_atom(struct kbase_context *kctx, kbase_jd_atom_id(kctx, katom)); kbase_tlstream_tl_ret_atom_ctx( katom, kctx); -#endif + ret = jd_done_nolock(katom, NULL); goto out; } @@ -1138,16 +989,10 @@ bool jd_submit_atom(struct kbase_context *kctx, continue; } - if (i == 1 && kbase_jd_katom_dep_atom(&katom->dep[0])) { - /* Remove the previous dependency */ - list_del(&katom->dep_item[0]); - kbase_jd_katom_dep_clear(&katom->dep[0]); - } - /* Atom has completed, propagate the error code if any */ katom->event_code = dep_atom->event_code; katom->status = KBASE_JD_ATOM_STATE_QUEUED; -#if defined(CONFIG_MALI_MIPE_ENABLED) + /* This atom is going through soft replay or * will be sent back to user space. Do not record any * dependencies. */ @@ -1155,17 +1000,16 @@ bool jd_submit_atom(struct kbase_context *kctx, katom, kbase_jd_atom_id(kctx, katom)); kbase_tlstream_tl_ret_atom_ctx(katom, kctx); -#endif - if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) + + if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_REPLAY) { if (kbase_replay_process(katom)) { ret = false; goto out; } } - ret = jd_done_nolock(katom, NULL); + will_fail = true; - goto out; } else { /* Atom is in progress, add this atom to the list */ list_add_tail(&katom->dep_item[i], &dep_atom->dep_head[i]); @@ -1174,12 +1018,25 @@ bool jd_submit_atom(struct kbase_context *kctx, } } - /* These must occur after the above loop to ensure that an atom that - * depends on a previous atom with the same number behaves as expected */ - katom->event_code = BASE_JD_EVENT_DONE; - katom->status = KBASE_JD_ATOM_STATE_QUEUED; + if (will_fail) { + if (!queued) { + ret = jd_done_nolock(katom, NULL); + + goto out; + } else { + katom->will_fail_event_code = katom->event_code; + ret = false; + + goto out; + } + } else { + /* These must occur after the above loop to ensure that an atom + * that depends on a previous atom with the same number behaves + * as expected */ + katom->event_code = BASE_JD_EVENT_DONE; + katom->status = KBASE_JD_ATOM_STATE_QUEUED; + } -#if defined(CONFIG_MALI_MIPE_ENABLED) /* Create a new atom recording all dependencies it was set up with. */ kbase_tlstream_tl_new_atom( katom, @@ -1187,15 +1044,26 @@ bool jd_submit_atom(struct kbase_context *kctx, kbase_tlstream_tl_ret_atom_ctx(katom, kctx); for (i = 0; i < 2; i++) if (BASE_JD_DEP_TYPE_INVALID != kbase_jd_katom_dep_type( - &katom->dep[i])) + &katom->dep[i])) { kbase_tlstream_tl_dep_atom_atom( (void *)kbase_jd_katom_dep_atom( &katom->dep[i]), (void *)katom); -#endif + } else if (BASE_JD_DEP_TYPE_INVALID != + user_atom->pre_dep[i].dependency_type) { + /* Resolved dependency. */ + int dep_atom_number = + user_atom->pre_dep[i].atom_id; + struct kbase_jd_atom *dep_atom = + &jctx->atoms[dep_atom_number]; + + kbase_tlstream_tl_rdep_atom_atom( + (void *)dep_atom, + (void *)katom); + } /* Reject atoms with job chain = NULL, as these cause issues with soft-stop */ - if (!katom->jc && (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { + if (!katom->jc && (katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { dev_warn(kctx->kbdev->dev, "Rejecting atom with jc = NULL"); katom->event_code = BASE_JD_EVENT_JOB_INVALID; ret = jd_done_nolock(katom, NULL); @@ -1213,6 +1081,17 @@ bool jd_submit_atom(struct kbase_context *kctx, goto out; } + /* Reject atoms with invalid core requirements */ + if ((katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) && + (katom->core_req & BASE_JD_REQ_EVENT_COALESCE)) { + dev_warn(kctx->kbdev->dev, + "Rejecting atom with invalid core requirements"); + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + katom->core_req &= ~BASE_JD_REQ_EVENT_COALESCE; + ret = jd_done_nolock(katom, NULL); + goto out; + } + /* For invalid priority, be most lenient and choose the default */ sched_prio = kbasep_js_atom_prio_to_sched_prio(user_atom->prio); if (sched_prio == KBASE_JS_ATOM_SCHED_PRIO_INVALID) @@ -1269,7 +1148,15 @@ bool jd_submit_atom(struct kbase_context *kctx, } #endif /* CONFIG_KDS */ - if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) + +#ifdef CONFIG_MALI_DMA_FENCE + if (atomic_read(&katom->dma_fence.dep_count) != -1) { + ret = false; + goto out; + } +#endif /* CONFIG_MALI_DMA_FENCE */ + + if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_REPLAY) { if (kbase_replay_process(katom)) ret = false; @@ -1283,10 +1170,9 @@ bool jd_submit_atom(struct kbase_context *kctx, ret = jd_done_nolock(katom, NULL); goto out; } - /* The job has not yet completed */ - list_add_tail(&katom->dep_item[0], &kctx->waiting_soft_jobs); + ret = false; - } else if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { + } else if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { katom->status = KBASE_JD_ATOM_STATE_IN_JS; ret = kbasep_js_add_job(kctx, katom); /* If job was cancelled then resolve immediately */ @@ -1372,7 +1258,7 @@ int kbase_jd_submit(struct kbase_context *kctx, user_atom.udata = user_atom_v6.udata; user_atom.extres_list = user_atom_v6.extres_list; user_atom.nr_extres = user_atom_v6.nr_extres; - user_atom.core_req = user_atom_v6.core_req; + user_atom.core_req = (u32)(user_atom_v6.core_req & 0x7fff); /* atom number 0 is used for no dependency atoms */ if (!user_atom_v6.pre_dep[0]) @@ -1404,6 +1290,12 @@ int kbase_jd_submit(struct kbase_context *kctx, } #endif /* BASE_LEGACY_UK6_SUPPORT */ +#ifdef BASE_LEGACY_UK10_2_SUPPORT + if (KBASE_API_VERSION(10, 3) > kctx->api_version) + user_atom.core_req = (u32)(user_atom.compat_core_req + & 0x7fff); +#endif /* BASE_LEGACY_UK10_2_SUPPORT */ + user_addr = (void __user *)((uintptr_t) user_addr + submit_data->stride); mutex_lock(&jctx->lock); @@ -1478,7 +1370,6 @@ void kbase_jd_done_worker(struct work_struct *data) struct kbase_jd_context *jctx; struct kbase_context *kctx; struct kbasep_js_kctx_info *js_kctx_info; - union kbasep_js_policy *js_policy; struct kbase_device *kbdev; struct kbasep_js_device_data *js_devdata; u64 cache_jc = katom->jc; @@ -1497,7 +1388,6 @@ void kbase_jd_done_worker(struct work_struct *data) kbdev = kctx->kbdev; js_kctx_info = &kctx->jctx.sched_info; js_devdata = &kbdev->js_data; - js_policy = &kbdev->js_data.policy; KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER, kctx, katom, katom->jc, 0); @@ -1522,7 +1412,6 @@ void kbase_jd_done_worker(struct work_struct *data) mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); mutex_unlock(&js_devdata->queue_mutex); - mutex_unlock(&jctx->lock); spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); @@ -1530,6 +1419,7 @@ void kbase_jd_done_worker(struct work_struct *data) kbase_js_unpull(kctx, katom); spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + mutex_unlock(&jctx->lock); return; } @@ -1639,7 +1529,7 @@ void kbase_jd_done_worker(struct work_struct *data) while (!list_empty(&kctx->completed_jobs)) { struct kbase_jd_atom *atom = list_entry( kctx->completed_jobs.next, - struct kbase_jd_atom, dep_item[0]); + struct kbase_jd_atom, jd_item); list_del(kctx->completed_jobs.next); kbase_event_post(kctx, atom); @@ -1714,51 +1604,6 @@ static void jd_cancel_worker(struct work_struct *data) kbase_js_sched_all(kbdev); } -/** - * jd_evict_worker - Work queue job evict function - * @data: a &struct work_struct - * - * Only called as part of evicting failed jobs. This is only called on jobs that - * were never submitted to HW Access. Jobs that were submitted are handled - * through kbase_jd_done_worker(). - * Operates serially with the kbase_jd_done_worker() on the work queue. - * - * We don't need to release most of the resources that would occur on - * kbase_jd_done() or kbase_jd_done_worker(), because the atoms here must not be - * running (by virtue of having not been submitted to HW Access). - */ -static void jd_evict_worker(struct work_struct *data) -{ - struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, - work); - struct kbase_jd_context *jctx; - struct kbase_context *kctx; - struct kbasep_js_kctx_info *js_kctx_info; - struct kbase_device *kbdev; - - /* Soft jobs should never reach this function */ - KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0); - - kctx = katom->kctx; - kbdev = kctx->kbdev; - jctx = &kctx->jctx; - js_kctx_info = &kctx->jctx.sched_info; - - KBASE_TRACE_ADD(kbdev, JD_CANCEL_WORKER, kctx, katom, katom->jc, 0); - - /* Scheduler: Remove the job from the system */ - mutex_lock(&js_kctx_info->ctx.jsctx_mutex); - kbasep_js_remove_cancelled_job(kbdev, kctx, katom); - mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); - - mutex_lock(&jctx->lock); - jd_done_nolock(katom, NULL); - /* katom may have been freed now, do not use! */ - mutex_unlock(&jctx->lock); - - kbase_js_sched_all(kbdev); -} - /** * kbase_jd_done - Complete a job that has been removed from the Hardware * @katom: atom which has been completed @@ -1802,7 +1647,8 @@ void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, #ifdef CONFIG_DEBUG_FS /* a failed job happened and is waiting for dumping*/ - if (kbase_debug_job_fault_process(katom, katom->event_code)) + if (!katom->will_fail_event_code && + kbase_debug_job_fault_process(katom, katom->event_code)) return; #endif @@ -1840,30 +1686,6 @@ void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom) queue_work(kctx->jctx.job_done_wq, &katom->work); } -void kbase_jd_evict(struct kbase_device *kbdev, struct kbase_jd_atom *katom) -{ - struct kbase_context *kctx; - struct kbasep_js_kctx_info *js_kctx_info; - - KBASE_DEBUG_ASSERT(NULL != kbdev); - KBASE_DEBUG_ASSERT(NULL != katom); - kctx = katom->kctx; - KBASE_DEBUG_ASSERT(NULL != kctx); - - js_kctx_info = &kctx->jctx.sched_info; - - KBASE_TRACE_ADD(kbdev, JD_CANCEL, kctx, katom, katom->jc, 0); - - /* This should only be done from a context that is currently scheduled - */ - KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled); - - WARN_ON(work_pending(&katom->work)); - - KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work)); - INIT_WORK(&katom->work, jd_evict_worker); - queue_work(kctx->jctx.job_done_wq, &katom->work); -} void kbase_jd_zap_context(struct kbase_context *kctx) { @@ -1886,8 +1708,9 @@ void kbase_jd_zap_context(struct kbase_context *kctx) * queued outside the job scheduler. */ + del_timer_sync(&kctx->soft_job_timeout); list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) { - katom = list_entry(entry, struct kbase_jd_atom, dep_item[0]); + katom = list_entry(entry, struct kbase_jd_atom, queue); kbase_cancel_soft_job(katom); } @@ -1910,8 +1733,19 @@ void kbase_jd_zap_context(struct kbase_context *kctx) } #endif +#ifdef CONFIG_MALI_DMA_FENCE + kbase_dma_fence_cancel_all_atoms(kctx); +#endif + mutex_unlock(&kctx->jctx.lock); +#ifdef CONFIG_MALI_DMA_FENCE + /* Flush dma-fence workqueue to ensure that any callbacks that may have + * been queued are done before continuing. + */ + flush_workqueue(kctx->dma_fence.wq); +#endif + kbase_jm_wait_for_zero_jobs(kctx); } @@ -1927,7 +1761,8 @@ int kbase_jd_init(struct kbase_context *kctx) KBASE_DEBUG_ASSERT(kctx); - kctx->jctx.job_done_wq = alloc_workqueue("mali_jd", 0, 1); + kctx->jctx.job_done_wq = alloc_workqueue("mali_jd", + WQ_HIGHPRI | WQ_UNBOUND, 1); if (NULL == kctx->jctx.job_done_wq) { mali_err = -ENOMEM; goto out1; @@ -1942,6 +1777,12 @@ int kbase_jd_init(struct kbase_context *kctx) /* Catch userspace attempting to use an atom which doesn't exist as a pre-dependency */ kctx->jctx.atoms[i].event_code = BASE_JD_EVENT_JOB_INVALID; kctx->jctx.atoms[i].status = KBASE_JD_ATOM_STATE_UNUSED; + +#ifdef CONFIG_MALI_DMA_FENCE + kctx->jctx.atoms[i].dma_fence.context = fence_context_alloc(1); + atomic_set(&kctx->jctx.atoms[i].dma_fence.seqno, 0); + INIT_LIST_HEAD(&kctx->jctx.atoms[i].dma_fence.callbacks); +#endif } mutex_init(&kctx->jctx.lock); diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_js.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_js.c index 78761e69d04d..ac6c3ce333ed 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_js.c +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_js.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -25,9 +25,7 @@ #if defined(CONFIG_MALI_GATOR_SUPPORT) #include #endif -#if defined(CONFIG_MALI_MIPE_ENABLED) #include -#endif #include #include @@ -79,13 +77,6 @@ static int kbase_js_get_slot(struct kbase_device *kbdev, static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, kbasep_js_policy_ctx_job_cb callback); -static bool kbase_js_evict_atom(struct kbase_context *kctx, - struct kbase_jd_atom *katom_evict, - struct kbase_jd_atom *start_katom, - struct kbase_jd_atom *head_katom, - struct list_head *evict_list, - struct jsctx_rb *rb, int idx); - /* Helper for trace subcodes */ #if KBASE_TRACE_ENABLE static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev, @@ -239,26 +230,6 @@ bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev, return result; } -/** - * jsctx_rb_is_empty_prio(): - Check if ring buffer is empty - * @kctx: Pointer to kbase context with ring buffer. - * @js: Job slot id to check. - * @prio: Priority to check. - * - * Caller must hold runpool_irq.lock - * - * Return: true if the ring buffer is empty, false otherwise. - */ -static inline bool -jsctx_rb_is_empty_prio(struct kbase_context *kctx, int js, int prio) -{ - struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; - - lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); - - return rb->running_idx == rb->write_idx; -} - /** * jsctx_rb_none_to_pull_prio(): - Check if there are no pullable atoms * @kctx: Pointer to kbase context with ring buffer. @@ -270,18 +241,16 @@ jsctx_rb_is_empty_prio(struct kbase_context *kctx, int js, int prio) * ring buffer to be full (with running atoms) when this functions returns * true. * - * Caller must hold runpool_irq.lock - * * Return: true if there are no atoms to pull, false otherwise. */ static inline bool jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio) { - struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; + struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); - return rb->read_idx == rb->write_idx; + return RB_EMPTY_ROOT(&rb->runnable_tree); } /** @@ -311,115 +280,68 @@ jsctx_rb_none_to_pull(struct kbase_context *kctx, int js) } /** - * jsctx_rb_compact_prio(): - Compact a ring buffer - * @kctx: Pointer to kbase context with ring buffer. - * @js: Job slot id to compact. - * @prio: Priority id to compact. - */ -static inline void -jsctx_rb_compact_prio(struct kbase_context *kctx, int js, int prio) -{ - struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; - u16 compact_idx = rb->write_idx - 1; - u16 end_idx = rb->running_idx - 1; - u16 i; - - lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); - lockdep_assert_held(&kctx->jctx.lock); - - for (i = compact_idx; i != end_idx; i--) { - if (rb->entries[i & JSCTX_RB_MASK].atom_id != - KBASEP_ATOM_ID_INVALID) { - WARN_ON(compact_idx < rb->running_idx); - rb->entries[compact_idx & JSCTX_RB_MASK].atom_id = - rb->entries[i & JSCTX_RB_MASK].atom_id; - - compact_idx--; - } - if (rb->read_idx == i) - rb->read_idx = compact_idx + 1; - } - - rb->running_idx = compact_idx + 1; -} - -/** - * jsctx_rb_compact(): - Compact all priority ring buffers - * @kctx: Pointer to kbase context with ring buffer. - * @js: Job slot id to compact. - */ -static inline void -jsctx_rb_compact(struct kbase_context *kctx, int js) -{ - int prio; - - for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) - jsctx_rb_compact_prio(kctx, js, prio); -} - -/** - * jsctx_rb_foreach_prio(): - Execute callback for each entry in ring buffer - * @kctx: Pointer to kbase context with ring buffer. + * jsctx_queue_foreach_prio(): - Execute callback for each entry in the queue. + * @kctx: Pointer to kbase context with the queue. * @js: Job slot id to iterate. * @prio: Priority id to iterate. * @callback: Function pointer to callback. * - * Iterate over a ring buffer and invoke @callback for each entry in buffer, and - * remove the entry from the buffer. + * Iterate over a queue and invoke @callback for each entry in the queue, and + * remove the entry from the queue. * - * If entries are added to the ring buffer while this is running those entries - * may, or may not be covered. To ensure that all entries in the buffer have - * been enumerated when this function returns jsctx->lock must be held when - * calling this function. + * If entries are added to the queue while this is running those entries may, or + * may not be covered. To ensure that all entries in the buffer have been + * enumerated when this function returns jsctx->lock must be held when calling + * this function. * * The HW access lock, js_data.runpool_irq.lock, must always be held when * calling this function. */ static void -jsctx_rb_foreach_prio(struct kbase_context *kctx, int js, int prio, +jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio, kbasep_js_policy_ctx_job_cb callback) { - struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; - struct kbase_jd_atom *katom; - u16 write_idx = ACCESS_ONCE(rb->write_idx); + struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); - /* There must be no jobs currently in HW access */ - WARN_ON(rb->read_idx != rb->running_idx); + while (!RB_EMPTY_ROOT(&queue->runnable_tree)) { + struct rb_node *node = rb_first(&queue->runnable_tree); + struct kbase_jd_atom *entry = rb_entry(node, + struct kbase_jd_atom, runnable_tree_node); - /* Invoke callback on all kbase_jd_atoms in the ring buffer, and - * removes them from the buffer */ - while (rb->read_idx != write_idx) { - int id = rb->entries[rb->read_idx & JSCTX_RB_MASK].atom_id; + rb_erase(node, &queue->runnable_tree); + callback(kctx->kbdev, entry); + } - katom = kbase_jd_atom_from_id(kctx, id); + while (!list_empty(&queue->x_dep_head)) { + struct kbase_jd_atom *entry = list_entry(queue->x_dep_head.next, + struct kbase_jd_atom, queue); - rb->read_idx++; - rb->running_idx++; + list_del(queue->x_dep_head.next); - callback(kctx->kbdev, katom); + callback(kctx->kbdev, entry); } } /** - * jsctx_rb_foreach(): - Execute callback for each entry in all priority rb - * @kctx: Pointer to kbase context with ring buffer. + * jsctx_queue_foreach(): - Execute callback for each entry in every queue + * @kctx: Pointer to kbase context with queue. * @js: Job slot id to iterate. * @callback: Function pointer to callback. * * Iterate over all the different priorities, and for each call - * jsctx_rb_foreach_prio() to iterate over the ring buffer and invoke @callback - * for each entry in buffer, and remove the entry from the buffer. + * jsctx_queue_foreach_prio() to iterate over the queue and invoke @callback + * for each entry, and remove the entry from the queue. */ static inline void -jsctx_rb_foreach(struct kbase_context *kctx, int js, +jsctx_queue_foreach(struct kbase_context *kctx, int js, kbasep_js_policy_ctx_job_cb callback) { int prio; for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) - jsctx_rb_foreach_prio(kctx, js, prio, callback); + jsctx_queue_foreach_prio(kctx, js, prio, callback); } /** @@ -436,16 +358,16 @@ jsctx_rb_foreach(struct kbase_context *kctx, int js, static inline struct kbase_jd_atom * jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio) { - struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; - int id; + struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; + struct rb_node *node; lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); - if (jsctx_rb_none_to_pull_prio(kctx, js, prio)) + node = rb_first(&rb->runnable_tree); + if (!node) return NULL; - id = rb->entries[rb->read_idx & JSCTX_RB_MASK].atom_id; - return kbase_jd_atom_from_id(kctx, id); + return rb_entry(node, struct kbase_jd_atom, runnable_tree_node); } /** @@ -457,6 +379,8 @@ jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio) * KBASE_JS_ATOM_SCHED_PRIO_HIGH, for the specified @js and @prio and return a * pointer to the next atom, unless all the priority's ring buffers are empty. * + * Caller must hold the runpool_irq.lock. + * * Return: Pointer to next atom in buffer, or NULL if there is no atom. */ static inline struct kbase_jd_atom * @@ -464,6 +388,8 @@ jsctx_rb_peek(struct kbase_context *kctx, int js) { int prio; + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { struct kbase_jd_atom *katom; @@ -475,35 +401,6 @@ jsctx_rb_peek(struct kbase_context *kctx, int js) return NULL; } -/** - * jsctx_rb_peek_last(): - Check a ring buffer and get the last atom - * @kctx: Pointer to kbase context with ring buffer. - * @js: Job slot id to check. - * @prio: Priority id to check. - * - * Check the ring buffer for the specified @js and @prio and return a - * pointer to the last atom, unless all the priority's ring buffers are empty. - * - * The last atom is the atom that was added using jsctx_rb_add() most recently. - * - * Return: Pointer to last atom in buffer, or NULL if there is no atom. - */ -static inline struct kbase_jd_atom * -jsctx_rb_peek_last(struct kbase_context *kctx, int js, int prio) -{ - struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; - int id; - - lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); - lockdep_assert_held(&kctx->jctx.lock); - - if (jsctx_rb_is_empty_prio(kctx, js, prio)) - return NULL; - - id = rb->entries[(rb->write_idx - 1) & JSCTX_RB_MASK].atom_id; - return kbase_jd_atom_from_id(kctx, id); -} - /** * jsctx_rb_pull(): - Mark atom in list as running * @kctx: Pointer to kbase context with ring buffer. @@ -518,158 +415,71 @@ jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom) { int prio = katom->sched_priority; int js = katom->slot_nr; - struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; + struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); /* Atoms must be pulled in the correct order. */ WARN_ON(katom != jsctx_rb_peek_prio(kctx, js, prio)); - rb->read_idx++; + rb_erase(&katom->runnable_tree_node, &rb->runnable_tree); } -/** - * jsctx_rb_unpull(): - Undo marking of atom in list as running - * @kctx: Pointer to kbase context with ring buffer. - * @katom: Pointer to katom to unpull. - * - * Undo jsctx_rb_pull() and put @katom back in the queue. - * - * jsctx_rb_unpull() must be called on atoms in the same order the atoms were - * pulled. - */ -static inline void -jsctx_rb_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) -{ - int prio = katom->sched_priority; - int js = katom->slot_nr; - struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; +#define LESS_THAN_WRAP(a, b) ((s32)(a - b) < 0) - lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); - - /* Atoms must be unpulled in correct order. */ - WARN_ON(rb->entries[(rb->read_idx - 1) & JSCTX_RB_MASK].atom_id != - kbase_jd_atom_id(kctx, katom)); - - rb->read_idx--; -} - -/** - * jsctx_rb_add(): - Add atom to ring buffer - * @kctx: Pointer to kbase context with ring buffer. - * @katom: Pointer to katom to add. - * - * Add @katom to the ring buffer determined by the atom's priority and job slot - * number. - * - * If the ring buffer is full -EBUSY will be returned. - * - * Return: On success 0 is returned, on failure a negative error code. - */ -static int -jsctx_rb_add_atom(struct kbase_context *kctx, struct kbase_jd_atom *katom) +static void +jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom) { int prio = katom->sched_priority; int js = katom->slot_nr; - struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; + struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; + struct rb_node **new = &(queue->runnable_tree.rb_node), *parent = NULL; - lockdep_assert_held(&kctx->jctx.lock); + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); - /* Check if the ring buffer is full */ - if ((rb->write_idx - rb->running_idx) >= JSCTX_RB_SIZE) - return -EBUSY; + while (*new) { + struct kbase_jd_atom *entry = container_of(*new, + struct kbase_jd_atom, runnable_tree_node); - rb->entries[rb->write_idx & JSCTX_RB_MASK].atom_id = - kbase_jd_atom_id(kctx, katom); - rb->write_idx++; + parent = *new; + if (LESS_THAN_WRAP(katom->age, entry->age)) + new = &((*new)->rb_left); + else + new = &((*new)->rb_right); + } - return 0; + /* Add new node and rebalance tree. */ + rb_link_node(&katom->runnable_tree_node, parent, new); + rb_insert_color(&katom->runnable_tree_node, &queue->runnable_tree); } /** - * jsctx_rb_remove(): - Remove atom from ring buffer + * jsctx_rb_unpull(): - Undo marking of atom in list as running * @kctx: Pointer to kbase context with ring buffer. - * @katom: Pointer to katom to remove. + * @katom: Pointer to katom to unpull. * - * Remove @katom from the ring buffer. + * Undo jsctx_rb_pull() and put @katom back in the queue. * - * @katom must have been pulled from the buffer earlier by jsctx_rb_pull(), and - * atoms must be removed in the same order they were pulled from the ring - * buffer. + * jsctx_rb_unpull() must be called on atoms in the same order the atoms were + * pulled. */ static inline void -jsctx_rb_remove(struct kbase_context *kctx, struct kbase_jd_atom *katom) +jsctx_rb_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) { - int prio = katom->sched_priority; - int js = katom->slot_nr; - struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; - lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); - /* Atoms must be completed in order. */ - WARN_ON(rb->entries[rb->running_idx & JSCTX_RB_MASK].atom_id != - kbase_jd_atom_id(kctx, katom)); - - rb->running_idx++; + jsctx_tree_add(kctx, katom); } -/** - * jsctx_rb_evict(): - Evict atom, and dependents, from ring buffer - * @kctx: Pointer to kbase context with ring buffer. - * @start_katom: Pointer to the first katom to evict. - * @head_katom: Pointer to head katom. - * @evict_list: Pointer to head of list where evicted atoms are added. - * - * Iterate over the ring buffer starting at @start_katom and evict @start_atom - * and dependent atoms in ring buffer. - * - * @evict_list and @head_katom is passed on to kbase_js_evict_atom() which will - * examine the atom dependencies. - * - * jsctx_rb_evict() is only called by kbase_js_evict_deps(). - */ -static void -jsctx_rb_evict(struct kbase_context *kctx, - struct kbase_jd_atom *start_katom, - struct kbase_jd_atom *head_katom, - struct list_head *evict_list) -{ - int prio = start_katom->sched_priority; - int js = start_katom->slot_nr; - struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; - bool atom_in_rb = false; - u16 i, start_idx; - - lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); - lockdep_assert_held(&kctx->jctx.lock); - - for (i = rb->running_idx; i != rb->write_idx; i++) { - if (rb->entries[i & JSCTX_RB_MASK].atom_id == - kbase_jd_atom_id(kctx, start_katom)) { - start_idx = i; - atom_in_rb = true; - break; - } - } - - /* start_katom must still be in ring buffer. */ - if (i == rb->write_idx || !atom_in_rb) - return; - - /* Evict all dependencies on same slot. */ - for (i = start_idx; i != rb->write_idx; i++) { - u8 katom_evict; - - katom_evict = rb->entries[i & JSCTX_RB_MASK].atom_id; - if (katom_evict != KBASEP_ATOM_ID_INVALID) { - if (!kbase_js_evict_atom(kctx, - &kctx->jctx.atoms[katom_evict], - start_katom, head_katom, - evict_list, rb, i)) - break; - } - } -} +static bool kbase_js_ctx_pullable(struct kbase_context *kctx, + int js, + bool is_scheduled); +static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev, + struct kbase_context *kctx, + int js); +static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev, + struct kbase_context *kctx, + int js); /* * Functions private to KBase ('Protected' functions) @@ -742,6 +552,7 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev) DEFAULT_JS_CFS_CTX_RUNTIME_INIT_SLICES; jsdd->cfs_ctx_runtime_min_slices = DEFAULT_JS_CFS_CTX_RUNTIME_MIN_SLICES; + atomic_set(&jsdd->soft_job_timeout_ms, DEFAULT_JS_SOFT_JOB_TIMEOUT); dev_dbg(kbdev->dev, "JS Config Attribs: "); dev_dbg(kbdev->dev, "\tscheduling_period_ns:%u", @@ -768,6 +579,8 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev) jsdd->cfs_ctx_runtime_init_slices); dev_dbg(kbdev->dev, "\tcfs_ctx_runtime_min_slices:%u", jsdd->cfs_ctx_runtime_min_slices); + dev_dbg(kbdev->dev, "\tsoft_job_timeout:%i", + atomic_read(&jsdd->soft_job_timeout_ms)); if (!(jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_ss && jsdd->hard_stop_ticks_ss < jsdd->gpu_reset_ticks_ss && @@ -872,7 +685,7 @@ int kbasep_js_kctx_init(struct kbase_context * const kctx) struct kbase_device *kbdev; struct kbasep_js_kctx_info *js_kctx_info; int err; - int i; + int i, j; KBASE_DEBUG_ASSERT(kctx != NULL); @@ -912,6 +725,13 @@ int kbasep_js_kctx_init(struct kbase_context * const kctx) if (js_kctx_info->init_status != JS_KCTX_INIT_ALL) return -EINVAL; + for (i = 0; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) { + for (j = 0; j < BASE_JM_MAX_NR_SLOTS; j++) { + INIT_LIST_HEAD(&kctx->jsctx_queue[i][j].x_dep_head); + kctx->jsctx_queue[i][j].runnable_tree = RB_ROOT; + } + } + return 0; } @@ -966,30 +786,25 @@ void kbasep_js_kctx_term(struct kbase_context *kctx) } /** - * kbase_js_ctx_list_add_pullable - Add context to the tail of the per-slot - * pullable context queue + * kbase_js_ctx_list_add_pullable_nolock - Variant of + * kbase_jd_ctx_list_add_pullable() + * where the caller must hold + * runpool_irq.lock * @kbdev: Device pointer * @kctx: Context to add to queue * @js: Job slot to use * - * If the context is on either the pullable or unpullable queues, then it is - * removed before being added to the tail. - * - * This function should be used when queueing a context for the first time, or - * re-queueing a context that has been pulled from. - * - * Caller must hold kbasep_jd_device_data.queue_mutex + * Caller must hold runpool_irq.lock * * Return: true if caller should call kbase_backend_ctx_count_changed() */ -static bool kbase_js_ctx_list_add_pullable(struct kbase_device *kbdev, +static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev, struct kbase_context *kctx, int js) { bool ret = false; - lockdep_assert_held(&kbdev->js_data.queue_mutex); - lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])) list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); @@ -1012,30 +827,24 @@ static bool kbase_js_ctx_list_add_pullable(struct kbase_device *kbdev, } /** - * kbase_js_ctx_list_add_pullable_head - Add context to the head of the - * per-slot pullable context queue + * kbase_js_ctx_list_add_pullable_head_nolock - Variant of + * kbase_js_ctx_list_add_pullable_head() + * where the caller must hold + * runpool_irq.lock * @kbdev: Device pointer * @kctx: Context to add to queue * @js: Job slot to use * - * If the context is on either the pullable or unpullable queues, then it is - * removed before being added to the head. - * - * This function should be used when a context has been scheduled, but no jobs - * can currently be pulled from it. - * - * Caller must hold kbasep_jd_device_data.queue_mutex + * Caller must hold runpool_irq.lock * * Return: true if caller should call kbase_backend_ctx_count_changed() */ -static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev, - struct kbase_context *kctx, - int js) +static bool kbase_js_ctx_list_add_pullable_head_nolock( + struct kbase_device *kbdev, struct kbase_context *kctx, int js) { bool ret = false; - lockdep_assert_held(&kbdev->js_data.queue_mutex); - lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])) list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); @@ -1058,8 +867,37 @@ static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev, } /** - * kbase_js_ctx_list_add_unpullable - Add context to the tail of the per-slot - * unpullable context queue + * kbase_js_ctx_list_add_pullable_head - Add context to the head of the + * per-slot pullable context queue + * @kbdev: Device pointer + * @kctx: Context to add to queue + * @js: Job slot to use + * + * If the context is on either the pullable or unpullable queues, then it is + * removed before being added to the head. + * + * This function should be used when a context has been scheduled, but no jobs + * can currently be pulled from it. + * + * Return: true if caller should call kbase_backend_ctx_count_changed() + */ +static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev, + struct kbase_context *kctx, + int js) +{ + bool ret; + unsigned long flags; + + spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags); + ret = kbase_js_ctx_list_add_pullable_head_nolock(kbdev, kctx, js); + spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags); + + return ret; +} + +/** + * kbase_js_ctx_list_add_unpullable_nolock - Add context to the tail of the + * per-slot unpullable context queue * @kbdev: Device pointer * @kctx: Context to add to queue * @js: Job slot to use @@ -1070,18 +908,17 @@ static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev, * This function should be used when a context has been pulled from, and there * are no jobs remaining on the specified slot. * - * Caller must hold kbasep_jd_device_data.queue_mutex + * Caller must hold runpool_irq.lock * * Return: true if caller should call kbase_backend_ctx_count_changed() */ -static bool kbase_js_ctx_list_add_unpullable(struct kbase_device *kbdev, +static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev, struct kbase_context *kctx, int js) { bool ret = false; - lockdep_assert_held(&kbdev->js_data.queue_mutex); - lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], &kbdev->js_data.ctx_list_unpullable[js]); @@ -1101,8 +938,8 @@ static bool kbase_js_ctx_list_add_unpullable(struct kbase_device *kbdev, } /** - * kbase_js_ctx_list_remove - Remove context from the per-slot pullable or - * unpullable context queues + * kbase_js_ctx_list_remove_nolock - Remove context from the per-slot pullable + * or unpullable context queues * @kbdev: Device pointer * @kctx: Context to remove from queue * @js: Job slot to use @@ -1112,18 +949,17 @@ static bool kbase_js_ctx_list_add_unpullable(struct kbase_device *kbdev, * This function should be used when a context has no jobs on the GPU, and no * jobs remaining for the specified slot. * - * Caller must hold kbasep_jd_device_data.queue_mutex + * Caller must hold runpool_irq.lock * * Return: true if caller should call kbase_backend_ctx_count_changed() */ -static bool kbase_js_ctx_list_remove(struct kbase_device *kbdev, +static bool kbase_js_ctx_list_remove_nolock(struct kbase_device *kbdev, struct kbase_context *kctx, int js) { bool ret = false; - lockdep_assert_held(&kbdev->js_data.queue_mutex); - lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); WARN_ON(list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])); @@ -1144,23 +980,24 @@ static bool kbase_js_ctx_list_remove(struct kbase_device *kbdev, } /** - * kbase_js_ctx_list_pop_head - Pop the head context off the per-slot pullable - * queue. + * kbase_js_ctx_list_pop_head_nolock - Variant of kbase_js_ctx_list_pop_head() + * where the caller must hold + * runpool_irq.lock * @kbdev: Device pointer * @js: Job slot to use * - * Caller must hold kbasep_jd_device_data::queue_mutex + * Caller must hold runpool_irq.lock * * Return: Context to use for specified slot. * NULL if no contexts present for specified slot */ -static struct kbase_context *kbase_js_ctx_list_pop_head( +static struct kbase_context *kbase_js_ctx_list_pop_head_nolock( struct kbase_device *kbdev, int js) { struct kbase_context *kctx; - lockdep_assert_held(&kbdev->js_data.queue_mutex); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); if (list_empty(&kbdev->js_data.ctx_list_pullable[js])) return NULL; @@ -1174,6 +1011,28 @@ static struct kbase_context *kbase_js_ctx_list_pop_head( return kctx; } +/** + * kbase_js_ctx_list_pop_head - Pop the head context off the per-slot pullable + * queue. + * @kbdev: Device pointer + * @js: Job slot to use + * + * Return: Context to use for specified slot. + * NULL if no contexts present for specified slot + */ +static struct kbase_context *kbase_js_ctx_list_pop_head( + struct kbase_device *kbdev, int js) +{ + struct kbase_context *kctx; + unsigned long flags; + + spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags); + kctx = kbase_js_ctx_list_pop_head_nolock(kbdev, js); + spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags); + + return kctx; +} + /** * kbase_js_ctx_pullable - Return if a context can be pulled from on the * specified slot @@ -1207,7 +1066,8 @@ static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js, return false; /* next atom blocked */ if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) { if (katom->x_pre_dep->gpu_rb_state == - KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB || + katom->x_pre_dep->will_fail_event_code) return false; if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) && kbase_backend_nr_atoms_on_slot(kctx->kbdev, js)) @@ -1236,7 +1096,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, /* Dependent atom must already have been submitted */ if (!(dep_atom->atom_flags & - KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED)) { + KBASE_KATOM_FLAG_JSCTX_IN_TREE)) { ret = false; break; } @@ -1255,6 +1115,12 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, ret = false; break; } + /* Each dependee atom can only have one + * same-slot dependency */ + if (dep_atom->post_dep) { + ret = false; + break; + } has_dep = true; } else { /* Only one cross-slot dependency can be @@ -1294,21 +1160,6 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, has_x_dep = true; } - if (kbase_jd_katom_dep_type(&katom->dep[i]) == - BASE_JD_DEP_TYPE_DATA && - js == dep_js) { - struct kbase_jd_atom *last_atom = - jsctx_rb_peek_last(kctx, js, - prio); - - /* Last atom on slot must be pre-dep for this - * atom */ - if (last_atom != dep_atom) { - ret = false; - break; - } - } - /* Dependency can be represented in ringbuffers */ } } @@ -1342,9 +1193,10 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, } if ((kbase_jd_katom_dep_type(&katom->dep[i]) == BASE_JD_DEP_TYPE_DATA) && - (js == dep_js)) - katom->atom_flags |= - KBASE_KATOM_FLAG_FAIL_PREV; + (js == dep_js)) { + katom->pre_dep = dep_atom; + dep_atom->post_dep = katom; + } list_del(&katom->dep_item[i]); kbase_jd_katom_dep_clear(&katom->dep[i]); @@ -1410,17 +1262,7 @@ bool kbasep_js_add_job(struct kbase_context *kctx, KBASE_TIMELINE_ATOM_READY(kctx, kbase_jd_atom_id(kctx, atom)); - if (kbase_js_dep_resolved_submit(kctx, atom, &enqueue_required) != 0) { - /* Ringbuffer was full (should be impossible) - fail the job */ - --(js_kctx_info->ctx.nr_jobs); - - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); - mutex_unlock(&js_devdata->runpool_mutex); - - atom->event_code = BASE_JD_EVENT_JOB_CANCELLED; - - goto out_unlock; - } + enqueue_required = kbase_js_dep_resolved_submit(kctx, atom); KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_ADD_JOB, kctx, atom, atom->jc, kbasep_js_trace_get_refcnt_nolock(kbdev, kctx)); @@ -1430,11 +1272,11 @@ bool kbasep_js_add_job(struct kbase_context *kctx, if (enqueue_required) { if (kbase_js_ctx_pullable(kctx, atom->slot_nr, false)) - timer_sync = kbase_js_ctx_list_add_pullable(kbdev, kctx, - atom->slot_nr); + timer_sync = kbase_js_ctx_list_add_pullable_nolock( + kbdev, kctx, atom->slot_nr); else - timer_sync = kbase_js_ctx_list_add_unpullable(kbdev, - kctx, atom->slot_nr); + timer_sync = kbase_js_ctx_list_add_unpullable_nolock( + kbdev, kctx, atom->slot_nr); } /* If this context is active and the atom is the first on its slot, * kick the job manager to attempt to fast-start the atom */ @@ -1762,9 +1604,7 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( #if defined(CONFIG_MALI_GATOR_SUPPORT) kbase_trace_mali_mmu_as_released(kctx->as_nr); #endif -#if defined(CONFIG_MALI_MIPE_ENABLED) kbase_tlstream_tl_nret_as_ctx(&kbdev->as[kctx->as_nr], kctx); -#endif kbase_backend_release_ctx_irq(kbdev, kctx); @@ -1817,7 +1657,8 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( wake_up(&js_kctx_info->ctx.is_scheduled_wait); /* Queue an action to occur after we've dropped the lock */ - release_result |= KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED; + release_result |= KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED | + KBASEP_JS_RELEASE_RESULT_SCHED_ALL; } else { kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev, kctx, katom_retained_state, runpool_ctx_attr_change); @@ -1875,14 +1716,12 @@ void kbasep_js_runpool_release_ctx_and_katom_retained_state( { struct kbasep_js_device_data *js_devdata; struct kbasep_js_kctx_info *js_kctx_info; - base_jd_event_code event_code; kbasep_js_release_result release_result; KBASE_DEBUG_ASSERT(kbdev != NULL); KBASE_DEBUG_ASSERT(kctx != NULL); js_kctx_info = &kctx->jctx.sched_info; js_devdata = &kbdev->js_data; - event_code = katom_retained_state->event_code; mutex_lock(&js_devdata->queue_mutex); mutex_lock(&js_kctx_info->ctx.jsctx_mutex); @@ -1954,77 +1793,11 @@ static void kbasep_js_runpool_release_ctx_no_schedule( */ } -/** - * kbase_js_set_timeouts - update all JS timeouts with user specified data - * @kbdev: Device pointer - * - * Timeouts are specified through the 'js_timeouts' sysfs file. If a timeout is - * set to a positive number then that becomes the new value used, if a timeout - * is negative then the default is set. - */ -static void kbase_js_set_timeouts(struct kbase_device *kbdev) +void kbase_js_set_timeouts(struct kbase_device *kbdev) { - struct kbasep_js_device_data *js_data = &kbdev->js_data; - - if (kbdev->js_scheduling_period_ns < 0) - js_data->scheduling_period_ns = DEFAULT_JS_SCHEDULING_PERIOD_NS; - else if (kbdev->js_scheduling_period_ns > 0) - js_data->scheduling_period_ns = kbdev->js_scheduling_period_ns; - - if (kbdev->js_soft_stop_ticks < 0) - js_data->soft_stop_ticks = DEFAULT_JS_SOFT_STOP_TICKS; - else if (kbdev->js_soft_stop_ticks > 0) - js_data->soft_stop_ticks = kbdev->js_soft_stop_ticks; - - if (kbdev->js_soft_stop_ticks_cl < 0) - js_data->soft_stop_ticks_cl = DEFAULT_JS_SOFT_STOP_TICKS_CL; - else if (kbdev->js_soft_stop_ticks_cl > 0) - js_data->soft_stop_ticks_cl = kbdev->js_soft_stop_ticks_cl; - - if (kbdev->js_hard_stop_ticks_ss < 0) { - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) - js_data->hard_stop_ticks_ss = - DEFAULT_JS_HARD_STOP_TICKS_SS_8408; - else - js_data->hard_stop_ticks_ss = - DEFAULT_JS_HARD_STOP_TICKS_SS; - } else if (kbdev->js_hard_stop_ticks_ss > 0) { - js_data->hard_stop_ticks_ss = kbdev->js_hard_stop_ticks_ss; - } - - if (kbdev->js_hard_stop_ticks_cl < 0) - js_data->hard_stop_ticks_cl = DEFAULT_JS_HARD_STOP_TICKS_CL; - else if (kbdev->js_hard_stop_ticks_cl > 0) - js_data->hard_stop_ticks_cl = kbdev->js_hard_stop_ticks_cl; - - if (kbdev->js_hard_stop_ticks_dumping < 0) - js_data->hard_stop_ticks_dumping = - DEFAULT_JS_HARD_STOP_TICKS_DUMPING; - else if (kbdev->js_hard_stop_ticks_dumping > 0) - js_data->hard_stop_ticks_dumping = - kbdev->js_hard_stop_ticks_dumping; - - if (kbdev->js_reset_ticks_ss < 0) { - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) - js_data->gpu_reset_ticks_ss = - DEFAULT_JS_RESET_TICKS_SS_8408; - else - js_data->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS; - } else if (kbdev->js_reset_ticks_ss > 0) { - js_data->gpu_reset_ticks_ss = kbdev->js_reset_ticks_ss; - } - - if (kbdev->js_reset_ticks_cl < 0) - js_data->gpu_reset_ticks_cl = DEFAULT_JS_RESET_TICKS_CL; - else if (kbdev->js_reset_ticks_cl > 0) - js_data->gpu_reset_ticks_cl = kbdev->js_reset_ticks_cl; + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); - if (kbdev->js_reset_ticks_dumping < 0) - js_data->gpu_reset_ticks_dumping = - DEFAULT_JS_RESET_TICKS_DUMPING; - else if (kbdev->js_reset_ticks_dumping > 0) - js_data->gpu_reset_ticks_dumping = - kbdev->js_reset_ticks_dumping; + kbase_backend_timeouts_changed(kbdev); } static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, @@ -2071,16 +1844,6 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, 0u, kbasep_js_trace_get_refcnt(kbdev, kctx)); - if (js_devdata->nr_user_contexts_running == 0 && - kbdev->js_timeouts_updated) { - /* Only when there are no other contexts submitting jobs: - * Latch in run-time job scheduler timeouts that were set - * through js_timeouts sysfs file */ - kbase_js_set_timeouts(kbdev); - - kbdev->js_timeouts_updated = false; - } - js_kctx_info->ctx.is_scheduled = true; mutex_lock(&new_address_space->transaction_mutex); @@ -2090,13 +1853,10 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, if (!kbase_backend_use_ctx(kbdev, kctx, as_nr)) { spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); mutex_unlock(&new_address_space->transaction_mutex); - /* If address space is not pending, then kbase_backend_use_ctx() - * failed. Roll back the transaction so far and return */ - if (!kctx->as_pending) { - js_kctx_info->ctx.is_scheduled = false; + /* Roll back the transaction so far and return */ + js_kctx_info->ctx.is_scheduled = false; - kbase_backend_release_free_address_space(kbdev, as_nr); - } + kbase_backend_release_free_address_space(kbdev, as_nr); mutex_unlock(&js_devdata->runpool_mutex); @@ -2109,9 +1869,7 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, #if defined(CONFIG_MALI_GATOR_SUPPORT) kbase_trace_mali_mmu_as_in_use(kctx->as_nr); #endif -#if defined(CONFIG_MALI_MIPE_ENABLED) kbase_tlstream_tl_ret_as_ctx(&kbdev->as[kctx->as_nr], kctx); -#endif /* Cause any future waiter-on-termination to wait until the context is * descheduled */ @@ -2169,11 +1927,6 @@ static bool kbase_js_use_ctx(struct kbase_device *kbdev, unsigned long flags; spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); - if (kctx->as_pending) { - /* Context waiting for AS to be assigned */ - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); - return false; - } if (kbase_backend_use_ctx_sched(kbdev, kctx)) { /* Context already has ASID - mark as active */ kbdev->hwaccess.active_kctx = kctx; @@ -2212,7 +1965,7 @@ void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, is_scheduled = js_kctx_info->ctx.is_scheduled; if (!is_scheduled) { /* Add the context to the pullable list */ - if (kbase_js_ctx_list_add_pullable(kbdev, kctx, 0)) + if (kbase_js_ctx_list_add_pullable_head(kbdev, kctx, 0)) kbase_js_sync_timers(kbdev); /* Fast-starting requires the jsctx_mutex to be dropped, @@ -2240,7 +1993,6 @@ void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) { struct kbasep_js_kctx_info *js_kctx_info; - bool pending; KBASE_DEBUG_ASSERT(kctx != NULL); js_kctx_info = &kctx->jctx.sched_info; @@ -2248,13 +2000,10 @@ void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, /* We don't need to use the address space anymore */ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); js_kctx_info->ctx.flags &= (~KBASE_CTX_FLAG_PRIVILEGED); - pending = kctx->as_pending; mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); - /* Release the context - it will be scheduled out if there is no - * pending job */ - if (!pending) - kbasep_js_runpool_release_ctx(kbdev, kctx); + /* Release the context - it will be scheduled out */ + kbasep_js_runpool_release_ctx(kbdev, kctx); kbase_js_sched_all(kbdev); } @@ -2293,8 +2042,10 @@ void kbasep_js_suspend(struct kbase_device *kbdev) * the instrumented context. It'll be suspended by * disabling instrumentation */ if (kctx->jctx.sched_info.ctx.flags & - KBASE_CTX_FLAG_PRIVILEGED) - KBASE_DEBUG_ASSERT(++nr_privileged_ctx == 1); + KBASE_CTX_FLAG_PRIVILEGED) { + ++nr_privileged_ctx; + WARN_ON(nr_privileged_ctx != 1); + } } } CSTD_UNUSED(nr_privileged_ctx); @@ -2345,7 +2096,8 @@ void kbasep_js_resume(struct kbase_device *kbdev) if (!js_kctx_info->ctx.is_scheduled && kbase_js_ctx_pullable(kctx, js, false)) - timer_sync = kbase_js_ctx_list_add_pullable( + timer_sync = + kbase_js_ctx_list_add_pullable_nolock( kbdev, kctx, js); spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, @@ -2397,37 +2149,118 @@ static int kbase_js_get_slot(struct kbase_device *kbdev, return 1; } -int kbase_js_dep_resolved_submit(struct kbase_context *kctx, - struct kbase_jd_atom *katom, - bool *enqueue_required) +bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, + struct kbase_jd_atom *katom) { + bool enqueue_required; + katom->slot_nr = kbase_js_get_slot(kctx->kbdev, katom); lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kctx->jctx.lock); /* If slot will transition from unpullable to pullable then add to * pullable list */ if (jsctx_rb_none_to_pull(kctx, katom->slot_nr)) { - *enqueue_required = true; + enqueue_required = true; } else { - *enqueue_required = false; + enqueue_required = false; } /* Check if there are lower priority jobs to soft stop */ kbase_job_slot_ctx_priority_check_locked(kctx, katom); - /* Add atom to ring buffer. */ - if (unlikely(jsctx_rb_add_atom(kctx, katom))) { - /* The ring buffer is full. This should be impossible as the - * job dispatcher can not submit enough atoms to exceed the - * ring buffer size. Fail the job. - */ - WARN(1, "Job submit while JSCTX ringbuffer already full\n"); - return -EINVAL; + if ((katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) || + (katom->pre_dep && (katom->pre_dep->atom_flags & + KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) { + int prio = katom->sched_priority; + int js = katom->slot_nr; + struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; + + list_add_tail(&katom->queue, &queue->x_dep_head); + katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; + enqueue_required = false; + } else { + /* Add atom to ring buffer. */ + jsctx_tree_add(kctx, katom); + katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE; } - katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED; + return enqueue_required; +} - return 0; +/** + * kbase_js_move_to_tree - Move atom (and any dependent atoms) to the + * runnable_tree, ready for execution + * @katom: Atom to submit + * + * It is assumed that @katom does not have KBASE_KATOM_FLAG_X_DEP_BLOCKED set, + * but is still present in the x_dep list. If @katom has a same-slot dependent + * atom then that atom (and any dependents) will also be moved. + */ +static void kbase_js_move_to_tree(struct kbase_jd_atom *katom) +{ + lockdep_assert_held(&katom->kctx->kbdev->js_data.runpool_irq.lock); + + while (katom) { + WARN_ON(!(katom->atom_flags & + KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST)); + + if (!(katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED)) { + list_del(&katom->queue); + katom->atom_flags &= + ~KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; + jsctx_tree_add(katom->kctx, katom); + katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE; + } else { + break; + } + + katom = katom->post_dep; + } +} + + +/** + * kbase_js_evict_deps - Evict dependencies of a failed atom. + * @kctx: Context pointer + * @katom: Pointer to the atom that has failed. + * @js: The job slot the katom was run on. + * @prio: Priority of the katom. + * + * Remove all post dependencies of an atom from the context ringbuffers. + * + * The original atom's event_code will be propogated to all dependent atoms. + * + * Context: Caller must hold the HW access lock + */ +static void kbase_js_evict_deps(struct kbase_context *kctx, + struct kbase_jd_atom *katom, int js, int prio) +{ + struct kbase_jd_atom *x_dep = katom->x_post_dep; + struct kbase_jd_atom *next_katom = katom->post_dep; + + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + + if (next_katom) { + KBASE_DEBUG_ASSERT(next_katom->status != + KBASE_JD_ATOM_STATE_HW_COMPLETED); + next_katom->will_fail_event_code = katom->event_code; + + } + + /* Has cross slot depenency. */ + if (x_dep && (x_dep->atom_flags & (KBASE_KATOM_FLAG_JSCTX_IN_TREE | + KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) { + /* Remove dependency.*/ + x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED; + + /* Fail if it had a data dependency. */ + if (x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) { + x_dep->will_fail_event_code = katom->event_code; + } + if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST) + kbase_js_move_to_tree(x_dep); + } } struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) @@ -2456,8 +2289,7 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) /* Due to ordering restrictions when unpulling atoms on failure, we do * not allow multiple runs of fail-dep atoms from the same context to be * present on the same slot */ - if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_PREV) && - atomic_read(&kctx->atoms_pulled_slot[js])) { + if (katom->pre_dep && atomic_read(&kctx->atoms_pulled_slot[js])) { struct kbase_jd_atom *prev_atom = kbase_backend_inspect_tail(kctx->kbdev, js); @@ -2467,7 +2299,8 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) { if (katom->x_pre_dep->gpu_rb_state == - KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB || + katom->x_pre_dep->will_fail_event_code) return NULL; if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) && kbase_backend_nr_atoms_on_slot(kctx->kbdev, js)) @@ -2510,6 +2343,8 @@ static void js_return_worker(struct work_struct *data) u64 affinity = katom->affinity; enum kbase_atom_coreref_state coreref_state = katom->coreref_state; + kbase_tlstream_aux_job_softstop_ex(katom); + kbase_backend_complete_wq(kbdev, katom); if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) @@ -2529,13 +2364,14 @@ static void js_return_worker(struct work_struct *data) if (!atomic_read(&kctx->atoms_pulled_slot[js]) && jsctx_rb_none_to_pull(kctx, js)) - timer_sync |= kbase_js_ctx_list_remove(kbdev, kctx, js); + timer_sync |= kbase_js_ctx_list_remove_nolock(kbdev, kctx, js); if (!atomic_read(&kctx->atoms_pulled)) { if (!kctx->slots_pullable) { WARN_ON(!kctx->ctx_runnable_ref); kctx->ctx_runnable_ref = false; atomic_dec(&kbdev->js_data.nr_contexts_runnable); + timer_sync = true; } if (kctx->as_nr != KBASEP_AS_NR_INVALID && @@ -2549,7 +2385,7 @@ static void js_return_worker(struct work_struct *data) for (slot = 0; slot < num_slots; slot++) { if (kbase_js_ctx_pullable(kctx, slot, true)) timer_sync |= - kbase_js_ctx_list_add_pullable( + kbase_js_ctx_list_add_pullable_nolock( kbdev, kctx, slot); } } @@ -2601,112 +2437,6 @@ void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) queue_work(kctx->jctx.job_done_wq, &katom->work); } -static bool kbase_js_evict_atom(struct kbase_context *kctx, - struct kbase_jd_atom *katom_evict, - struct kbase_jd_atom *start_katom, - struct kbase_jd_atom *head_katom, - struct list_head *evict_list, - struct jsctx_rb *rb, int idx) -{ - struct kbase_jd_atom *x_dep = katom_evict->x_post_dep; - - if (!(katom_evict->atom_flags & KBASE_KATOM_FLAG_FAIL_PREV) && - katom_evict != start_katom) - return false; - - if (katom_evict->gpu_rb_state != KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { - WARN_ON(katom_evict->event_code != head_katom->event_code); - - return false; - } - - if (katom_evict->status == KBASE_JD_ATOM_STATE_HW_COMPLETED && - katom_evict != head_katom) - return false; - - /* Evict cross dependency if present */ - if (x_dep && (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED) - && (x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER)) - list_add_tail(&x_dep->dep_item[0], evict_list); - - /* If cross dependency is present and does not have a data dependency - * then unblock */ - if (x_dep && (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED) - && !(x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER)) - x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED; - - if (katom_evict != head_katom) { - rb->entries[idx & JSCTX_RB_MASK].atom_id = - KBASEP_ATOM_ID_INVALID; - - katom_evict->event_code = head_katom->event_code; - katom_evict->atom_flags &= - ~KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED; - - if (katom_evict->atom_flags & KBASE_KATOM_FLAG_HOLDING_CTX_REF) - kbase_jd_done(katom_evict, katom_evict->slot_nr, NULL, - 0); - else - kbase_jd_evict(kctx->kbdev, katom_evict); - } - - return true; -} - -/** - * kbase_js_evict_deps - Evict dependencies - * @kctx: Context pointer - * @head_katom: Pointer to the atom to evict - * - * Remove all post dependencies of an atom from the context ringbuffers. - * - * The original atom's event_code will be propogated to all dependent atoms. - * - * Context: Caller must hold both jctx and HW access locks - */ -static void kbase_js_evict_deps(struct kbase_context *kctx, - struct kbase_jd_atom *head_katom) -{ - struct list_head evict_list; - - lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); - lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); - - INIT_LIST_HEAD(&evict_list); - - list_add_tail(&head_katom->dep_item[0], &evict_list); - - while (!list_empty(&evict_list)) { - struct kbase_jd_atom *start_katom; - - start_katom = list_entry(evict_list.prev, struct kbase_jd_atom, - dep_item[0]); - list_del(evict_list.prev); - - jsctx_rb_evict(kctx, start_katom, head_katom, &evict_list); - } -} - -/** - * kbase_js_compact - Compact JSCTX ringbuffers - * @kctx: Context pointer - * - * Compact the JSCTX ringbuffers, removing any NULL entries - * - * Context: Caller must hold both jctx and HW access locks - */ -static void kbase_js_compact(struct kbase_context *kctx) -{ - struct kbase_device *kbdev = kctx->kbdev; - int js; - - lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); - - for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) - jsctx_rb_compact(kctx, js); -} - bool kbase_js_complete_atom_wq(struct kbase_context *kctx, struct kbase_jd_atom *katom) { @@ -2729,12 +2459,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, mutex_lock(&js_devdata->runpool_mutex); spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); - if (katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED) { - if (katom->event_code != BASE_JD_EVENT_DONE) - kbase_js_evict_deps(kctx, katom); - - jsctx_rb_remove(kctx, katom); - + if (katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) { context_idle = !atomic_dec_return(&kctx->atoms_pulled); atomic_dec(&kctx->atoms_pulled_slot[atom_slot]); @@ -2743,16 +2468,18 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, WARN_ON(!kctx->ctx_runnable_ref); kctx->ctx_runnable_ref = false; atomic_dec(&kbdev->js_data.nr_contexts_runnable); + timer_sync = true; } - - if (katom->event_code != BASE_JD_EVENT_DONE) - kbase_js_compact(kctx); } + WARN_ON(!(katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE)); if (!atomic_read(&kctx->atoms_pulled_slot[atom_slot]) && - jsctx_rb_none_to_pull(kctx, atom_slot)) - timer_sync |= kbase_js_ctx_list_remove(kctx->kbdev, kctx, - atom_slot); + jsctx_rb_none_to_pull(kctx, atom_slot)) { + if (!list_empty( + &kctx->jctx.sched_info.ctx.ctx_list_entry[atom_slot])) + timer_sync |= kbase_js_ctx_list_remove_nolock( + kctx->kbdev, kctx, atom_slot); + } /* * If submission is disabled on this context (most likely due to an @@ -2768,7 +2495,8 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { if (kbase_js_ctx_pullable(kctx, js, true)) - timer_sync |= kbase_js_ctx_list_add_pullable( + timer_sync |= + kbase_js_ctx_list_add_pullable_nolock( kbdev, kctx, js); } } else if (katom->x_post_dep && @@ -2777,7 +2505,8 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { if (kbase_js_ctx_pullable(kctx, js, true)) - timer_sync |= kbase_js_ctx_list_add_pullable( + timer_sync |= + kbase_js_ctx_list_add_pullable_nolock( kbdev, kctx, js); } } @@ -2802,32 +2531,29 @@ void kbase_js_complete_atom(struct kbase_jd_atom *katom, ktime_t *end_timestamp) struct kbase_device *kbdev; struct kbase_context *kctx = katom->kctx; union kbasep_js_policy *js_policy; - struct kbasep_js_device_data *js_devdata; + struct kbase_jd_atom *x_dep = katom->x_post_dep; kbdev = kctx->kbdev; js_policy = &kbdev->js_data.policy; - js_devdata = &kbdev->js_data; lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + if (katom->will_fail_event_code) + katom->event_code = katom->will_fail_event_code; + katom->status = KBASE_JD_ATOM_STATE_HW_COMPLETED; + if (katom->event_code != BASE_JD_EVENT_DONE) { + kbase_js_evict_deps(kctx, katom, katom->slot_nr, + katom->sched_priority); + } + #if defined(CONFIG_MALI_GATOR_SUPPORT) kbase_trace_mali_job_slots_event(GATOR_MAKE_EVENT(GATOR_JOB_SLOT_STOP, katom->slot_nr), NULL, 0); #endif -#if defined(CONFIG_MALI_MIPE_ENABLED) - kbase_tlstream_tl_nret_atom_lpu( - katom, - &kbdev->gpu_props.props.raw_props.js_features[ - katom->slot_nr]); - kbase_tlstream_tl_nret_atom_as(katom, &kbdev->as[kctx->as_nr]); - kbase_tlstream_tl_nret_ctx_lpu( - kctx, - &kbdev->gpu_props.props.raw_props.js_features[ - katom->slot_nr]); -#endif + /* Calculate the job's time used */ if (end_timestamp != NULL) { /* Only calculating it for jobs that really run on the HW (e.g. @@ -2851,21 +2577,26 @@ void kbase_js_complete_atom(struct kbase_jd_atom *katom, ktime_t *end_timestamp) kbase_jd_done(katom, katom->slot_nr, end_timestamp, 0); /* Unblock cross dependency if present */ - if (katom->x_post_dep && (katom->event_code == BASE_JD_EVENT_DONE || - !(katom->x_post_dep->atom_flags & - KBASE_KATOM_FLAG_FAIL_BLOCKER))) - katom->x_post_dep->atom_flags &= - ~KBASE_KATOM_FLAG_X_DEP_BLOCKED; + if (x_dep && (katom->event_code == BASE_JD_EVENT_DONE || + !(x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER)) && + (x_dep->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED)) { + bool was_pullable = kbase_js_ctx_pullable(kctx, x_dep->slot_nr, + false); + x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED; + kbase_js_move_to_tree(x_dep); + if (!was_pullable && kbase_js_ctx_pullable(kctx, x_dep->slot_nr, + false)) + kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx, + x_dep->slot_nr); + } } void kbase_js_sched(struct kbase_device *kbdev, int js_mask) { struct kbasep_js_device_data *js_devdata; - union kbasep_js_policy *js_policy; bool timer_sync = false; js_devdata = &kbdev->js_data; - js_policy = &js_devdata->policy; down(&js_devdata->schedule_sem); mutex_lock(&js_devdata->queue_mutex); @@ -2915,16 +2646,15 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) /* Context can not be used at this time */ spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); - if (kctx->as_pending || - kbase_js_ctx_pullable(kctx, js, false) + if (kbase_js_ctx_pullable(kctx, js, false) || (kctx->jctx.sched_info.ctx.flags & KBASE_CTX_FLAG_PRIVILEGED)) timer_sync |= - kbase_js_ctx_list_add_pullable_head( + kbase_js_ctx_list_add_pullable_head_nolock( kctx->kbdev, kctx, js); else timer_sync |= - kbase_js_ctx_list_add_unpullable( + kbase_js_ctx_list_add_unpullable_nolock( kctx->kbdev, kctx, js); spin_unlock_irqrestore( &js_devdata->runpool_irq.lock, flags); @@ -2953,12 +2683,12 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) /* Failed to pull jobs - push to head of list */ if (kbase_js_ctx_pullable(kctx, js, true)) timer_sync |= - kbase_js_ctx_list_add_pullable_head( + kbase_js_ctx_list_add_pullable_head_nolock( kctx->kbdev, kctx, js); else timer_sync |= - kbase_js_ctx_list_add_unpullable( + kbase_js_ctx_list_add_unpullable_nolock( kctx->kbdev, kctx, js); @@ -2984,10 +2714,12 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) /* Push to back of list */ if (kbase_js_ctx_pullable(kctx, js, true)) - timer_sync |= kbase_js_ctx_list_add_pullable( + timer_sync |= + kbase_js_ctx_list_add_pullable_nolock( kctx->kbdev, kctx, js); else - timer_sync |= kbase_js_ctx_list_add_unpullable( + timer_sync |= + kbase_js_ctx_list_add_unpullable_nolock( kctx->kbdev, kctx, js); spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); @@ -3018,6 +2750,7 @@ void kbase_js_zap_context(struct kbase_context *kctx) /* First, atomically do the following: * - mark the context as dying * - try to evict it from the policy queue */ + mutex_lock(&kctx->jctx.lock); mutex_lock(&js_devdata->queue_mutex); mutex_lock(&js_kctx_info->ctx.jsctx_mutex); js_kctx_info->ctx.is_dying = true; @@ -3093,6 +2826,7 @@ void kbase_js_zap_context(struct kbase_context *kctx) mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); mutex_unlock(&js_devdata->queue_mutex); + mutex_unlock(&kctx->jctx.lock); } else { unsigned long flags; bool was_retained; @@ -3128,6 +2862,7 @@ void kbase_js_zap_context(struct kbase_context *kctx) spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); mutex_unlock(&js_devdata->queue_mutex); + mutex_unlock(&kctx->jctx.lock); dev_dbg(kbdev->dev, "Zap: Ctx %p Release (may or may not schedule out immediately)", kctx); @@ -3194,7 +2929,6 @@ static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, u32 js; kbdev = kctx->kbdev; - js_devdata = &kbdev->js_data; spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); @@ -3204,7 +2938,7 @@ static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, /* Invoke callback on jobs on each slot in turn */ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) - jsctx_rb_foreach(kctx, js, callback); + jsctx_queue_foreach(kctx, js, callback); spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); } diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_js.h b/drivers/gpu/arm/midgard_for_linux/mali_kbase_js.h index 868c6808d628..66b213293016 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_js.h +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_js.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -491,15 +491,22 @@ void kbasep_js_resume(struct kbase_device *kbdev); * @param[in] kctx Context pointer * @param[in] atom Pointer to the atom to submit * - * @return 0 if submit succeeded - * error code if the atom can not be submitted at this - * time, due to insufficient space in the ringbuffer, or dependencies - * that can not be represented. - */ -int kbase_js_dep_resolved_submit(struct kbase_context *kctx, - struct kbase_jd_atom *katom, - bool *enqueue_required); + * @return Whether the context requires to be enqueued. */ +bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, + struct kbase_jd_atom *katom); +/** + * jsctx_ll_flush_to_rb() - Pushes atoms from the linked list to ringbuffer. + * @kctx: Context Pointer + * @prio: Priority (specifies the queue together with js). + * @js: Job slot (specifies the queue together with prio). + * + * Pushes all possible atoms from the linked list to the ringbuffer. + * Number of atoms are limited to free space in the ringbuffer and + * number of available atoms in the linked list. + * + */ +void jsctx_ll_flush_to_rb(struct kbase_context *kctx, int prio, int js); /** * @brief Pull an atom from a context in the job scheduler for execution. * @@ -601,6 +608,16 @@ void kbase_js_zap_context(struct kbase_context *kctx); bool kbase_js_is_atom_valid(struct kbase_device *kbdev, struct kbase_jd_atom *katom); +/** + * kbase_js_set_timeouts - update all JS timeouts with user specified data + * @kbdev: Device pointer + * + * Timeouts are specified through the 'js_timeouts' sysfs file. If a timeout is + * set to a positive number then that becomes the new value used, if a timeout + * is negative then the default is set. + */ +void kbase_js_set_timeouts(struct kbase_device *kbdev); + /* * Helpers follow */ diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_js_defs.h b/drivers/gpu/arm/midgard_for_linux/mali_kbase_js_defs.h index 04f7809f79d3..e1342045b394 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_js_defs.h +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_js_defs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -348,6 +348,9 @@ struct kbasep_js_device_data { u32 cfs_ctx_runtime_init_slices; /**< Value for DEFAULT_JS_CFS_CTX_RUNTIME_INIT_SLICES */ u32 cfs_ctx_runtime_min_slices; /**< Value for DEFAULT_JS_CFS_CTX_RUNTIME_MIN_SLICES */ + /**< Value for JS_SOFT_JOB_TIMEOUT */ + atomic_t soft_job_timeout_ms; + /** List of suspended soft jobs */ struct list_head suspended_soft_jobs_list; @@ -402,7 +405,7 @@ struct kbasep_js_kctx_info { * * You may not access any of these members from IRQ context. */ - struct { + struct kbase_jsctx { struct mutex jsctx_mutex; /**< Job Scheduler Context lock */ /** Number of jobs ready to run - does \em not include the jobs waiting in diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_js_policy_cfs.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_js_policy_cfs.c index 692460710ce0..90c13458ec7c 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_js_policy_cfs.c +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_js_policy_cfs.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -235,16 +235,11 @@ int kbasep_js_policy_init_ctx(struct kbase_device *kbdev, struct kbase_context * void kbasep_js_policy_term_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx) { - struct kbasep_js_policy_cfs_ctx *ctx_info; - struct kbasep_js_policy_cfs *policy_info; struct kbase_device *kbdev; KBASE_DEBUG_ASSERT(js_policy != NULL); KBASE_DEBUG_ASSERT(kctx != NULL); - policy_info = &js_policy->cfs; - ctx_info = &kctx->jctx.sched_info.runpool.policy_ctx.cfs; - kbdev = container_of(js_policy, struct kbase_device, js_data.policy); KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_TERM_CTX, kctx, NULL, 0u, kbasep_js_policy_trace_get_refcnt(kbdev, kctx)); diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem.c index ffc12a538af6..c1851caa95a0 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem.c +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,22 +24,21 @@ #ifdef CONFIG_DMA_SHARED_BUFFER #include #endif /* CONFIG_DMA_SHARED_BUFFER */ - +#ifdef CONFIG_UMP +#include +#endif /* CONFIG_UMP */ #include #include #include +#include #include #include #include #include #include -#include #include - -#if defined(CONFIG_MALI_MIPE_ENABLED) #include -#endif /** * @brief Check the zone compatibility of two regions. @@ -392,13 +391,33 @@ int kbase_add_va_region(struct kbase_context *kctx, { u64 start_pfn; - tmp = kbase_region_tracker_find_region_meeting_reqs(kctx, reg, nr_pages, align); - if (!tmp) { + /* + * Depending on the zone the allocation request is for + * we might need to retry it. + */ + do { + tmp = kbase_region_tracker_find_region_meeting_reqs( + kctx, reg, nr_pages, align); + if (tmp) { + start_pfn = (tmp->start_pfn + align - 1) & + ~(align - 1); + err = kbase_insert_va_region_nolock(kctx, reg, + tmp, start_pfn, nr_pages); + break; + } + + /* + * If the allocation is not from the same zone as JIT + * then don't retry, we're out of VA and there is + * nothing which can be done about it. + */ + if ((reg->flags & KBASE_REG_ZONE_MASK) != + KBASE_REG_ZONE_CUSTOM_VA) + break; + } while (kbase_jit_evict(kctx)); + + if (!tmp) err = -ENOMEM; - goto exit; - } - start_pfn = (tmp->start_pfn + align - 1) & ~(align - 1); - err = kbase_insert_va_region_nolock(kctx, reg, tmp, start_pfn, nr_pages); } exit: @@ -410,7 +429,10 @@ KBASE_EXPORT_TEST_API(kbase_add_va_region); /** * @brief Initialize the internal region tracker data structure. */ -static void kbase_region_tracker_ds_init(struct kbase_context *kctx, struct kbase_va_region *same_va_reg, struct kbase_va_region *exec_reg, struct kbase_va_region *custom_va_reg) +static void kbase_region_tracker_ds_init(struct kbase_context *kctx, + struct kbase_va_region *same_va_reg, + struct kbase_va_region *exec_reg, + struct kbase_va_region *custom_va_reg) { kctx->reg_rbtree = RB_ROOT; kbase_region_tracker_insert(kctx, same_va_reg); @@ -448,6 +470,11 @@ int kbase_region_tracker_init(struct kbase_context *kctx) size_t same_va_bits = sizeof(void *) * BITS_PER_BYTE; u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE; u64 gpu_va_limit = (1ULL << kctx->kbdev->gpu_props.mmu.va_bits) >> PAGE_SHIFT; + u64 same_va_pages; + int err; + + /* Take the lock as kbase_free_alloced_region requires it */ + kbase_gpu_vm_lock(kctx); #if defined(CONFIG_ARM64) same_va_bits = VA_BITS; @@ -464,24 +491,29 @@ int kbase_region_tracker_init(struct kbase_context *kctx) same_va_bits = 33; #endif - if (kctx->kbdev->gpu_props.mmu.va_bits < same_va_bits) - return -EINVAL; + if (kctx->kbdev->gpu_props.mmu.va_bits < same_va_bits) { + err = -EINVAL; + goto fail_unlock; + } + same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1; /* all have SAME_VA */ same_va_reg = kbase_alloc_free_region(kctx, 1, - (1ULL << (same_va_bits - PAGE_SHIFT)) - 1, + same_va_pages, KBASE_REG_ZONE_SAME_VA); - if (!same_va_reg) - return -ENOMEM; + if (!same_va_reg) { + err = -ENOMEM; + goto fail_unlock; + } #ifdef CONFIG_64BIT - /* only 32-bit clients have the other two zones */ + /* 32-bit clients have exec and custom VA zones */ if (kctx->is_compat) { #endif if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) { - kbase_free_alloced_region(same_va_reg); - return -EINVAL; + err = -EINVAL; + goto fail_free_same_va; } /* If the current size of TMEM is out of range of the * virtual address space addressable by the MMU then @@ -496,8 +528,8 @@ int kbase_region_tracker_init(struct kbase_context *kctx) KBASE_REG_ZONE_EXEC); if (!exec_reg) { - kbase_free_alloced_region(same_va_reg); - return -ENOMEM; + err = -ENOMEM; + goto fail_free_same_va; } custom_va_reg = kbase_alloc_free_region(kctx, @@ -505,9 +537,8 @@ int kbase_region_tracker_init(struct kbase_context *kctx) custom_va_size, KBASE_REG_ZONE_CUSTOM_VA); if (!custom_va_reg) { - kbase_free_alloced_region(same_va_reg); - kbase_free_alloced_region(exec_reg); - return -ENOMEM; + err = -ENOMEM; + goto fail_free_exec; } #ifdef CONFIG_64BIT } @@ -515,7 +546,108 @@ int kbase_region_tracker_init(struct kbase_context *kctx) kbase_region_tracker_ds_init(kctx, same_va_reg, exec_reg, custom_va_reg); + kctx->same_va_end = same_va_pages + 1; + + kbase_gpu_vm_unlock(kctx); + return 0; + +fail_free_exec: + kbase_free_alloced_region(exec_reg); +fail_free_same_va: + kbase_free_alloced_region(same_va_reg); +fail_unlock: + kbase_gpu_vm_unlock(kctx); + return err; +} + +int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages) +{ +#ifdef CONFIG_64BIT + struct kbase_va_region *same_va; + struct kbase_va_region *custom_va_reg; + u64 same_va_bits; + u64 total_va_size; + int err; + + /* + * Nothing to do for 32-bit clients, JIT uses the existing + * custom VA zone. + */ + if (kctx->is_compat) + return 0; + +#if defined(CONFIG_ARM64) + same_va_bits = VA_BITS; +#elif defined(CONFIG_X86_64) + same_va_bits = 47; +#elif defined(CONFIG_64BIT) +#error Unsupported 64-bit architecture +#endif + + if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA)) + same_va_bits = 33; + + total_va_size = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1; + + kbase_gpu_vm_lock(kctx); + + /* + * Modify the same VA free region after creation. Be careful to ensure + * that allocations haven't been made as they could cause an overlap + * to happen with existing same VA allocations and the custom VA zone. + */ + same_va = kbase_region_tracker_find_region_base_address(kctx, + PAGE_SIZE); + if (!same_va) { + err = -ENOMEM; + goto fail_unlock; + } + + /* The region flag or region size has changed since creation so bail. */ + if ((!(same_va->flags & KBASE_REG_FREE)) || + (same_va->nr_pages != total_va_size)) { + err = -ENOMEM; + goto fail_unlock; + } + + if (same_va->nr_pages < jit_va_pages || + kctx->same_va_end < jit_va_pages) { + err = -ENOMEM; + goto fail_unlock; + } + + /* It's safe to adjust the same VA zone now */ + same_va->nr_pages -= jit_va_pages; + kctx->same_va_end -= jit_va_pages; + + /* + * Create a custom VA zone at the end of the VA for allocations which + * JIT can use so it doesn't have to allocate VA from the kernel. + */ + custom_va_reg = kbase_alloc_free_region(kctx, + kctx->same_va_end, + jit_va_pages, + KBASE_REG_ZONE_CUSTOM_VA); + if (!custom_va_reg) { + /* + * The context will be destroyed if we fail here so no point + * reverting the change we made to same_va. + */ + err = -ENOMEM; + goto fail_unlock; + } + + kbase_region_tracker_insert(kctx, custom_va_reg); + + kbase_gpu_vm_unlock(kctx); + return 0; + +fail_unlock: + kbase_gpu_vm_unlock(kctx); + return err; +#else return 0; +#endif } int kbase_mem_init(struct kbase_device *kbdev) @@ -613,8 +745,46 @@ KBASE_EXPORT_TEST_API(kbase_alloc_free_region); */ void kbase_free_alloced_region(struct kbase_va_region *reg) { - KBASE_DEBUG_ASSERT(NULL != reg); if (!(reg->flags & KBASE_REG_FREE)) { + /* + * The physical allocation should have been removed from the + * eviction list before this function is called. However, in the + * case of abnormal process termination or the app leaking the + * memory kbase_mem_free_region is not called so it can still be + * on the list at termination time of the region tracker. + */ + if (!list_empty(®->gpu_alloc->evict_node)) { + /* + * Unlink the physical allocation before unmaking it + * evictable so that the allocation isn't grown back to + * its last backed size as we're going to unmap it + * anyway. + */ + reg->cpu_alloc->reg = NULL; + if (reg->cpu_alloc != reg->gpu_alloc) + reg->gpu_alloc->reg = NULL; + + /* + * If a region has been made evictable then we must + * unmake it before trying to free it. + * If the memory hasn't been reclaimed it will be + * unmapped and freed below, if it has been reclaimed + * then the operations below are no-ops. + */ + if (reg->flags & KBASE_REG_DONT_NEED) { + KBASE_DEBUG_ASSERT(reg->cpu_alloc->type == + KBASE_MEM_TYPE_NATIVE); + kbase_mem_evictable_unmake(reg->gpu_alloc); + } + } + + /* + * Remove the region from the sticky resource metadata + * list should it be there. + */ + kbase_sticky_resource_release(reg->kctx, NULL, + reg->start_pfn << PAGE_SHIFT); + kbase_mem_phy_alloc_put(reg->cpu_alloc); kbase_mem_phy_alloc_put(reg->gpu_alloc); /* To detect use-after-free in debug builds */ @@ -625,41 +795,6 @@ void kbase_free_alloced_region(struct kbase_va_region *reg) KBASE_EXPORT_TEST_API(kbase_free_alloced_region); -void kbase_mmu_update(struct kbase_context *kctx) -{ - KBASE_DEBUG_ASSERT(NULL != kctx); - lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); - /* ASSERT that the context has a valid as_nr, which is only the case - * when it's scheduled in. - * - * as_nr won't change because the caller has the runpool_irq lock */ - KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); - lockdep_assert_held(&kctx->kbdev->as[kctx->as_nr].transaction_mutex); - - kctx->kbdev->mmu_mode->update(kctx); -} - -KBASE_EXPORT_TEST_API(kbase_mmu_update); - -void kbase_mmu_disable(struct kbase_context *kctx) -{ - KBASE_DEBUG_ASSERT(NULL != kctx); - /* ASSERT that the context has a valid as_nr, which is only the case - * when it's scheduled in. - * - * as_nr won't change because the caller has the runpool_irq lock */ - KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); - - kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr); -} - -KBASE_EXPORT_TEST_API(kbase_mmu_disable); - -void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr) -{ - kbdev->mmu_mode->disable_as(kbdev, as_nr); -} - int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align) { int err; @@ -891,10 +1026,10 @@ static int kbase_do_syncset(struct kbase_context *kctx, /* find the region where the virtual address is contained */ reg = kbase_region_tracker_find_region_enclosing_address(kctx, - sset->mem_handle); + sset->mem_handle.basep.handle); if (!reg) { dev_warn(kctx->kbdev->dev, "Can't find region at VA 0x%016llX", - sset->mem_handle); + sset->mem_handle.basep.handle); err = -EINVAL; goto out_unlock; } @@ -908,7 +1043,7 @@ static int kbase_do_syncset(struct kbase_context *kctx, map = kbasep_find_enclosing_cpu_mapping_of_region(reg, start, size); if (!map) { dev_warn(kctx->kbdev->dev, "Can't find CPU mapping 0x%016lX for VA 0x%016llX", - start, sset->mem_handle); + start, sset->mem_handle.basep.handle); err = -EINVAL; goto out_unlock; } @@ -989,17 +1124,34 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re KBASE_DEBUG_ASSERT(NULL != kctx); KBASE_DEBUG_ASSERT(NULL != reg); lockdep_assert_held(&kctx->reg_lock); + + /* + * Unlink the physical allocation before unmaking it evictable so + * that the allocation isn't grown back to its last backed size + * as we're going to unmap it anyway. + */ + reg->cpu_alloc->reg = NULL; + if (reg->cpu_alloc != reg->gpu_alloc) + reg->gpu_alloc->reg = NULL; + + /* + * If a region has been made evictable then we must unmake it + * before trying to free it. + * If the memory hasn't been reclaimed it will be unmapped and freed + * below, if it has been reclaimed then the operations below are no-ops. + */ + if (reg->flags & KBASE_REG_DONT_NEED) { + KBASE_DEBUG_ASSERT(reg->cpu_alloc->type == + KBASE_MEM_TYPE_NATIVE); + kbase_mem_evictable_unmake(reg->gpu_alloc); + } + err = kbase_gpu_munmap(kctx, reg); if (err) { dev_warn(reg->kctx->kbdev->dev, "Could not unmap from the GPU...\n"); goto out; } -#ifndef CONFIG_MALI_NO_MALI - if (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_6367)) { - /* Wait for GPU to flush write buffer before freeing physical pages */ - kbase_wait_write_flush(kctx); - } -#endif + /* This will also free the physical pages */ kbase_free_alloced_region(reg); @@ -1046,7 +1198,6 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr) kbase_free_alloced_region(reg); } else { /* A real GPU va */ - /* Validate the region */ reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); if (!reg || (reg->flags & KBASE_REG_FREE)) { @@ -1063,7 +1214,6 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr) err = -EINVAL; goto out_unlock; } - err = kbase_mem_free_region(kctx, reg); } @@ -1125,8 +1275,8 @@ int kbase_alloc_phy_pages_helper( size_t nr_pages_requested) { int new_page_count __maybe_unused; + size_t old_page_count = alloc->nents; - KBASE_DEBUG_ASSERT(alloc); KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE); KBASE_DEBUG_ASSERT(alloc->imported.kctx); @@ -1142,14 +1292,21 @@ int kbase_alloc_phy_pages_helper( kbase_process_page_usage_inc(alloc->imported.kctx, nr_pages_requested); if (kbase_mem_pool_alloc_pages(&alloc->imported.kctx->mem_pool, - nr_pages_requested, alloc->pages + alloc->nents) != 0) + nr_pages_requested, alloc->pages + old_page_count) != 0) goto no_alloc; -#if defined(CONFIG_MALI_MIPE_ENABLED) + /* + * Request a zone cache update, this scans only the new pages an + * appends their information to the zone cache. if the update + * fails then clear the cache so we fall-back to doing things + * page by page. + */ + if (kbase_zone_cache_update(alloc, old_page_count) != 0) + kbase_zone_cache_clear(alloc); + kbase_tlstream_aux_pagesalloc( (u32)alloc->imported.kctx->id, (u64)new_page_count); -#endif alloc->nents += nr_pages_requested; done: @@ -1167,11 +1324,12 @@ int kbase_free_phy_pages_helper( struct kbase_mem_phy_alloc *alloc, size_t nr_pages_to_free) { + struct kbase_context *kctx = alloc->imported.kctx; bool syncback; + bool reclaimed = (alloc->evicted != 0); phys_addr_t *start_free; int new_page_count __maybe_unused; - KBASE_DEBUG_ASSERT(alloc); KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE); KBASE_DEBUG_ASSERT(alloc->imported.kctx); KBASE_DEBUG_ASSERT(alloc->nents >= nr_pages_to_free); @@ -1184,22 +1342,37 @@ int kbase_free_phy_pages_helper( syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED; - kbase_mem_pool_free_pages(&alloc->imported.kctx->mem_pool, + /* + * Clear the zone cache, we don't expect JIT allocations to be + * shrunk in parts so there is no point trying to optimize for that + * by scanning for the changes caused by freeing this memory and + * updating the existing cache entries. + */ + kbase_zone_cache_clear(alloc); + + kbase_mem_pool_free_pages(&kctx->mem_pool, nr_pages_to_free, start_free, - syncback); + syncback, + reclaimed); alloc->nents -= nr_pages_to_free; - kbase_process_page_usage_dec(alloc->imported.kctx, nr_pages_to_free); - new_page_count = kbase_atomic_sub_pages( - nr_pages_to_free, &alloc->imported.kctx->used_pages); - kbase_atomic_sub_pages(nr_pages_to_free, &alloc->imported.kctx->kbdev->memdev.used_pages); -#if defined(CONFIG_MALI_MIPE_ENABLED) - kbase_tlstream_aux_pagesalloc( - (u32)alloc->imported.kctx->id, - (u64)new_page_count); -#endif + /* + * If the allocation was not evicted (i.e. evicted == 0) then + * the page accounting needs to be done. + */ + if (!reclaimed) { + kbase_process_page_usage_dec(kctx, nr_pages_to_free); + new_page_count = kbase_atomic_sub_pages(nr_pages_to_free, + &kctx->used_pages); + kbase_atomic_sub_pages(nr_pages_to_free, + &kctx->kbdev->memdev.used_pages); + + kbase_tlstream_aux_pagesalloc( + (u32)kctx->id, + (u64)new_page_count); + } return 0; } @@ -1212,7 +1385,12 @@ void kbase_mem_kref_free(struct kref *kref) switch (alloc->type) { case KBASE_MEM_TYPE_NATIVE: { - KBASE_DEBUG_ASSERT(alloc->imported.kctx); + WARN_ON(!alloc->imported.kctx); + /* + * The physical allocation must have been removed from the + * eviction list before trying to free it. + */ + WARN_ON(!list_empty(&alloc->evict_node)); kbase_free_phy_pages_helper(alloc, alloc->nents); break; } @@ -1246,6 +1424,8 @@ void kbase_mem_kref_free(struct kref *kref) break; #endif case KBASE_MEM_TYPE_IMPORTED_USER_BUF: + if (alloc->imported.user_buf.mm) + mmdrop(alloc->imported.user_buf.mm); kfree(alloc->imported.user_buf.pages); break; case KBASE_MEM_TYPE_TB:{ @@ -1290,9 +1470,11 @@ int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, size) != 0) goto out_term; + reg->cpu_alloc->reg = reg; if (reg->cpu_alloc != reg->gpu_alloc) { if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, size) != 0) goto out_rollback; + reg->gpu_alloc->reg = reg; } return 0; @@ -1332,6 +1514,10 @@ bool kbase_check_alloc_flags(unsigned long flags) if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0) return false; + /* BASE_MEM_IMPORT_SHARED is only valid for imported memory */ + if ((flags & BASE_MEM_IMPORT_SHARED) == BASE_MEM_IMPORT_SHARED) + return false; + return true; } @@ -1386,3 +1572,923 @@ void kbase_gpu_vm_unlock(struct kbase_context *kctx) } KBASE_EXPORT_TEST_API(kbase_gpu_vm_unlock); + +#ifdef CONFIG_DEBUG_FS +struct kbase_jit_debugfs_data { + int (*func)(struct kbase_jit_debugfs_data *); + struct mutex lock; + struct kbase_context *kctx; + u64 active_value; + u64 pool_value; + u64 destroy_value; + char buffer[50]; +}; + +static int kbase_jit_debugfs_common_open(struct inode *inode, + struct file *file, int (*func)(struct kbase_jit_debugfs_data *)) +{ + struct kbase_jit_debugfs_data *data; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + data->func = func; + mutex_init(&data->lock); + data->kctx = (struct kbase_context *) inode->i_private; + + file->private_data = data; + + return nonseekable_open(inode, file); +} + +static ssize_t kbase_jit_debugfs_common_read(struct file *file, + char __user *buf, size_t len, loff_t *ppos) +{ + struct kbase_jit_debugfs_data *data; + size_t size; + int ret; + + data = (struct kbase_jit_debugfs_data *) file->private_data; + mutex_lock(&data->lock); + + if (*ppos) { + size = strnlen(data->buffer, sizeof(data->buffer)); + } else { + if (!data->func) { + ret = -EACCES; + goto out_unlock; + } + + if (data->func(data)) { + ret = -EACCES; + goto out_unlock; + } + + size = scnprintf(data->buffer, sizeof(data->buffer), + "%llu,%llu,%llu", data->active_value, + data->pool_value, data->destroy_value); + } + + ret = simple_read_from_buffer(buf, len, ppos, data->buffer, size); + +out_unlock: + mutex_unlock(&data->lock); + return ret; +} + +static int kbase_jit_debugfs_common_release(struct inode *inode, + struct file *file) +{ + kfree(file->private_data); + return 0; +} + +#define KBASE_JIT_DEBUGFS_DECLARE(__fops, __func) \ +static int __fops ## _open(struct inode *inode, struct file *file) \ +{ \ + return kbase_jit_debugfs_common_open(inode, file, __func); \ +} \ +static const struct file_operations __fops = { \ + .owner = THIS_MODULE, \ + .open = __fops ## _open, \ + .release = kbase_jit_debugfs_common_release, \ + .read = kbase_jit_debugfs_common_read, \ + .write = NULL, \ + .llseek = generic_file_llseek, \ +} + +static int kbase_jit_debugfs_count_get(struct kbase_jit_debugfs_data *data) +{ + struct kbase_context *kctx = data->kctx; + struct list_head *tmp; + + mutex_lock(&kctx->jit_lock); + list_for_each(tmp, &kctx->jit_active_head) { + data->active_value++; + } + + list_for_each(tmp, &kctx->jit_pool_head) { + data->pool_value++; + } + + list_for_each(tmp, &kctx->jit_destroy_head) { + data->destroy_value++; + } + mutex_unlock(&kctx->jit_lock); + + return 0; +} +KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_count_fops, + kbase_jit_debugfs_count_get); + +static int kbase_jit_debugfs_vm_get(struct kbase_jit_debugfs_data *data) +{ + struct kbase_context *kctx = data->kctx; + struct kbase_va_region *reg; + + mutex_lock(&kctx->jit_lock); + list_for_each_entry(reg, &kctx->jit_active_head, jit_node) { + data->active_value += reg->nr_pages; + } + + list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) { + data->pool_value += reg->nr_pages; + } + + list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) { + data->destroy_value += reg->nr_pages; + } + mutex_unlock(&kctx->jit_lock); + + return 0; +} +KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_vm_fops, + kbase_jit_debugfs_vm_get); + +static int kbase_jit_debugfs_phys_get(struct kbase_jit_debugfs_data *data) +{ + struct kbase_context *kctx = data->kctx; + struct kbase_va_region *reg; + + mutex_lock(&kctx->jit_lock); + list_for_each_entry(reg, &kctx->jit_active_head, jit_node) { + data->active_value += reg->gpu_alloc->nents; + } + + list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) { + data->pool_value += reg->gpu_alloc->nents; + } + + list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) { + data->destroy_value += reg->gpu_alloc->nents; + } + mutex_unlock(&kctx->jit_lock); + + return 0; +} +KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_phys_fops, + kbase_jit_debugfs_phys_get); + +void kbase_jit_debugfs_add(struct kbase_context *kctx) +{ + /* Debugfs entry for getting the number of JIT allocations. */ + debugfs_create_file("mem_jit_count", S_IRUGO, kctx->kctx_dentry, + kctx, &kbase_jit_debugfs_count_fops); + + /* + * Debugfs entry for getting the total number of virtual pages + * used by JIT allocations. + */ + debugfs_create_file("mem_jit_vm", S_IRUGO, kctx->kctx_dentry, + kctx, &kbase_jit_debugfs_vm_fops); + + /* + * Debugfs entry for getting the number of physical pages used + * by JIT allocations. + */ + debugfs_create_file("mem_jit_phys", S_IRUGO, kctx->kctx_dentry, + kctx, &kbase_jit_debugfs_phys_fops); +} +#endif /* CONFIG_DEBUG_FS */ + +/** + * kbase_jit_destroy_worker - Deferred worker which frees JIT allocations + * @work: Work item + * + * This function does the work of freeing JIT allocations whose physical + * backing has been released. + */ +static void kbase_jit_destroy_worker(struct work_struct *work) +{ + struct kbase_context *kctx; + struct kbase_va_region *reg; + + kctx = container_of(work, struct kbase_context, jit_work); + do { + mutex_lock(&kctx->jit_lock); + if (list_empty(&kctx->jit_destroy_head)) + reg = NULL; + else + reg = list_first_entry(&kctx->jit_destroy_head, + struct kbase_va_region, jit_node); + + if (reg) { + list_del(®->jit_node); + mutex_unlock(&kctx->jit_lock); + + kbase_gpu_vm_lock(kctx); + kbase_mem_free_region(kctx, reg); + kbase_gpu_vm_unlock(kctx); + } else + mutex_unlock(&kctx->jit_lock); + } while (reg); +} + +int kbase_jit_init(struct kbase_context *kctx) +{ + INIT_LIST_HEAD(&kctx->jit_active_head); + INIT_LIST_HEAD(&kctx->jit_pool_head); + INIT_LIST_HEAD(&kctx->jit_destroy_head); + mutex_init(&kctx->jit_lock); + INIT_WORK(&kctx->jit_work, kbase_jit_destroy_worker); + + return 0; +} + +struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, + struct base_jit_alloc_info *info) +{ + struct kbase_va_region *reg = NULL; + struct kbase_va_region *walker; + struct kbase_va_region *temp; + size_t current_diff = SIZE_MAX; + + int ret; + + mutex_lock(&kctx->jit_lock); + /* + * Scan the pool for an existing allocation which meets our + * requirements and remove it. + */ + list_for_each_entry_safe(walker, temp, &kctx->jit_pool_head, jit_node) { + + if (walker->nr_pages >= info->va_pages) { + size_t min_size, max_size, diff; + + /* + * The JIT allocations VA requirements have been + * meet, it's suitable but other allocations + * might be a better fit. + */ + min_size = min_t(size_t, walker->gpu_alloc->nents, + info->commit_pages); + max_size = max_t(size_t, walker->gpu_alloc->nents, + info->commit_pages); + diff = max_size - min_size; + + if (current_diff > diff) { + current_diff = diff; + reg = walker; + } + + /* The allocation is an exact match, stop looking */ + if (current_diff == 0) + break; + } + } + + if (reg) { + /* + * Remove the found region from the pool and add it to the + * active list. + */ + list_del_init(®->jit_node); + list_add(®->jit_node, &kctx->jit_active_head); + + /* Release the jit lock before modifying the allocation */ + mutex_unlock(&kctx->jit_lock); + + kbase_gpu_vm_lock(kctx); + + /* Make the physical backing no longer reclaimable */ + if (!kbase_mem_evictable_unmake(reg->gpu_alloc)) + goto update_failed; + + /* Grow the backing if required */ + if (reg->gpu_alloc->nents < info->commit_pages) { + size_t delta; + size_t old_size = reg->gpu_alloc->nents; + + /* Allocate some more pages */ + delta = info->commit_pages - reg->gpu_alloc->nents; + if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, delta) + != 0) + goto update_failed; + + if (reg->cpu_alloc != reg->gpu_alloc) { + if (kbase_alloc_phy_pages_helper( + reg->cpu_alloc, delta) != 0) { + kbase_free_phy_pages_helper( + reg->gpu_alloc, delta); + goto update_failed; + } + } + + ret = kbase_mem_grow_gpu_mapping(kctx, reg, + info->commit_pages, old_size); + /* + * The grow failed so put the allocation back in the + * pool and return failure. + */ + if (ret) + goto update_failed; + } + kbase_gpu_vm_unlock(kctx); + } else { + /* No suitable JIT allocation was found so create a new one */ + u64 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD | + BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF | + BASE_MEM_COHERENT_LOCAL; + u64 gpu_addr; + u16 alignment; + + mutex_unlock(&kctx->jit_lock); + + reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages, + info->extent, &flags, &gpu_addr, &alignment); + if (!reg) + goto out_unlocked; + + mutex_lock(&kctx->jit_lock); + list_add(®->jit_node, &kctx->jit_active_head); + mutex_unlock(&kctx->jit_lock); + } + + return reg; + +update_failed: + /* + * An update to an allocation from the pool failed, chances + * are slim a new allocation would fair any better so return + * the allocation to the pool and return the function with failure. + */ + kbase_gpu_vm_unlock(kctx); + mutex_lock(&kctx->jit_lock); + list_del_init(®->jit_node); + list_add(®->jit_node, &kctx->jit_pool_head); + mutex_unlock(&kctx->jit_lock); +out_unlocked: + return NULL; +} + +void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) +{ + /* The physical backing of memory in the pool is always reclaimable */ + down_read(&kctx->process_mm->mmap_sem); + kbase_gpu_vm_lock(kctx); + kbase_mem_evictable_make(reg->gpu_alloc); + kbase_gpu_vm_unlock(kctx); + up_read(&kctx->process_mm->mmap_sem); + + mutex_lock(&kctx->jit_lock); + list_del_init(®->jit_node); + list_add(®->jit_node, &kctx->jit_pool_head); + mutex_unlock(&kctx->jit_lock); +} + +void kbase_jit_backing_lost(struct kbase_va_region *reg) +{ + struct kbase_context *kctx = reg->kctx; + + /* + * JIT allocations will always be on a list, if the region + * is not on a list then it's not a JIT allocation. + */ + if (list_empty(®->jit_node)) + return; + + /* + * Freeing the allocation requires locks we might not be able + * to take now, so move the allocation to the free list and kick + * the worker which will do the freeing. + */ + mutex_lock(&kctx->jit_lock); + list_del_init(®->jit_node); + list_add(®->jit_node, &kctx->jit_destroy_head); + mutex_unlock(&kctx->jit_lock); + + schedule_work(&kctx->jit_work); +} + +bool kbase_jit_evict(struct kbase_context *kctx) +{ + struct kbase_va_region *reg = NULL; + + lockdep_assert_held(&kctx->reg_lock); + + /* Free the oldest allocation from the pool */ + mutex_lock(&kctx->jit_lock); + if (!list_empty(&kctx->jit_pool_head)) { + reg = list_entry(kctx->jit_pool_head.prev, + struct kbase_va_region, jit_node); + list_del(®->jit_node); + } + mutex_unlock(&kctx->jit_lock); + + if (reg) + kbase_mem_free_region(kctx, reg); + + return (reg != NULL); +} + +void kbase_jit_term(struct kbase_context *kctx) +{ + struct kbase_va_region *walker; + + /* Free all allocations for this context */ + + /* + * Flush the freeing of allocations whose backing has been freed + * (i.e. everything in jit_destroy_head). + */ + cancel_work_sync(&kctx->jit_work); + + kbase_gpu_vm_lock(kctx); + /* Free all allocations from the pool */ + while (!list_empty(&kctx->jit_pool_head)) { + walker = list_first_entry(&kctx->jit_pool_head, + struct kbase_va_region, jit_node); + list_del(&walker->jit_node); + kbase_mem_free_region(kctx, walker); + } + + /* Free all allocations from active list */ + while (!list_empty(&kctx->jit_active_head)) { + walker = list_first_entry(&kctx->jit_active_head, + struct kbase_va_region, jit_node); + list_del(&walker->jit_node); + kbase_mem_free_region(kctx, walker); + } + kbase_gpu_vm_unlock(kctx); +} + +static int kbase_jd_user_buf_map(struct kbase_context *kctx, + struct kbase_va_region *reg) +{ + long pinned_pages; + struct kbase_mem_phy_alloc *alloc; + struct page **pages; + phys_addr_t *pa; + long i; + int err = -ENOMEM; + unsigned long address; + struct mm_struct *mm; + struct device *dev; + unsigned long offset; + unsigned long local_size; + + alloc = reg->gpu_alloc; + pa = kbase_get_gpu_phy_pages(reg); + address = alloc->imported.user_buf.address; + mm = alloc->imported.user_buf.mm; + + KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF); + + pages = alloc->imported.user_buf.pages; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) + pinned_pages = get_user_pages(NULL, mm, + address, + alloc->imported.user_buf.nr_pages, + reg->flags & KBASE_REG_GPU_WR, + 0, pages, NULL); +#else + pinned_pages = get_user_pages_remote(NULL, mm, + address, + alloc->imported.user_buf.nr_pages, + reg->flags & KBASE_REG_GPU_WR, + 0, pages, NULL); +#endif + + if (pinned_pages <= 0) + return pinned_pages; + + if (pinned_pages != alloc->imported.user_buf.nr_pages) { + for (i = 0; i < pinned_pages; i++) + put_page(pages[i]); + return -ENOMEM; + } + + dev = kctx->kbdev->dev; + offset = address & ~PAGE_MASK; + local_size = alloc->imported.user_buf.size; + + for (i = 0; i < pinned_pages; i++) { + dma_addr_t dma_addr; + unsigned long min; + + min = MIN(PAGE_SIZE - offset, local_size); + dma_addr = dma_map_page(dev, pages[i], + offset, min, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(dev, dma_addr)) + goto unwind; + + alloc->imported.user_buf.dma_addrs[i] = dma_addr; + pa[i] = page_to_phys(pages[i]); + + local_size -= min; + offset = 0; + } + + alloc->nents = pinned_pages; + + err = kbase_mmu_insert_pages(kctx, reg->start_pfn, pa, + kbase_reg_current_backed_size(reg), + reg->flags); + if (err == 0) + return 0; + + alloc->nents = 0; + /* fall down */ +unwind: + while (i--) { + dma_unmap_page(kctx->kbdev->dev, + alloc->imported.user_buf.dma_addrs[i], + PAGE_SIZE, DMA_BIDIRECTIONAL); + put_page(pages[i]); + pages[i] = NULL; + } + + return err; +} + +static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, + struct kbase_mem_phy_alloc *alloc, bool writeable) +{ + long i; + struct page **pages; + unsigned long size = alloc->imported.user_buf.size; + + KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF); + pages = alloc->imported.user_buf.pages; + for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) { + unsigned long local_size; + dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i]; + + local_size = MIN(size, PAGE_SIZE - (dma_addr & ~PAGE_MASK)); + dma_unmap_page(kctx->kbdev->dev, dma_addr, local_size, + DMA_BIDIRECTIONAL); + if (writeable) + set_page_dirty_lock(pages[i]); + put_page(pages[i]); + pages[i] = NULL; + + size -= local_size; + } + alloc->nents = 0; +} + + +/* to replace sg_dma_len. */ +#define MALI_SG_DMA_LEN(sg) ((sg)->length) + +#ifdef CONFIG_DMA_SHARED_BUFFER +static int kbase_jd_umm_map(struct kbase_context *kctx, + struct kbase_va_region *reg) +{ + struct sg_table *sgt; + struct scatterlist *s; + int i; + phys_addr_t *pa; + int err; + size_t count = 0; + struct kbase_mem_phy_alloc *alloc; + + alloc = reg->gpu_alloc; + + KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM); + KBASE_DEBUG_ASSERT(NULL == alloc->imported.umm.sgt); + sgt = dma_buf_map_attachment(alloc->imported.umm.dma_attachment, + DMA_BIDIRECTIONAL); + + if (IS_ERR_OR_NULL(sgt)) + return -EINVAL; + + /* save for later */ + alloc->imported.umm.sgt = sgt; + + pa = kbase_get_gpu_phy_pages(reg); + KBASE_DEBUG_ASSERT(pa); + + for_each_sg(sgt->sgl, s, sgt->nents, i) { + int j; + size_t pages = PFN_UP(MALI_SG_DMA_LEN(s)); + + WARN_ONCE(MALI_SG_DMA_LEN(s) & (PAGE_SIZE-1), + "MALI_SG_DMA_LEN(s)=%u is not a multiple of PAGE_SIZE\n", + MALI_SG_DMA_LEN(s)); + + WARN_ONCE(sg_dma_address(s) & (PAGE_SIZE-1), + "sg_dma_address(s)=%llx is not aligned to PAGE_SIZE\n", + (unsigned long long) sg_dma_address(s)); + + for (j = 0; (j < pages) && (count < reg->nr_pages); j++, + count++) + *pa++ = sg_dma_address(s) + (j << PAGE_SHIFT); + WARN_ONCE(j < pages, + "sg list from dma_buf_map_attachment > dma_buf->size=%zu\n", + alloc->imported.umm.dma_buf->size); + } + + if (WARN_ONCE(count < reg->nr_pages, + "sg list from dma_buf_map_attachment < dma_buf->size=%zu\n", + alloc->imported.umm.dma_buf->size)) { + err = -EINVAL; + goto out; + } + + /* Update nents as we now have pages to map */ + alloc->nents = count; + + err = kbase_mmu_insert_pages(kctx, reg->start_pfn, + kbase_get_gpu_phy_pages(reg), + kbase_reg_current_backed_size(reg), + reg->flags | KBASE_REG_GPU_WR | KBASE_REG_GPU_RD); + +out: + if (err) { + dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment, + alloc->imported.umm.sgt, DMA_BIDIRECTIONAL); + alloc->imported.umm.sgt = NULL; + } + + return err; +} + +static void kbase_jd_umm_unmap(struct kbase_context *kctx, + struct kbase_mem_phy_alloc *alloc) +{ + KBASE_DEBUG_ASSERT(kctx); + KBASE_DEBUG_ASSERT(alloc); + KBASE_DEBUG_ASSERT(alloc->imported.umm.dma_attachment); + KBASE_DEBUG_ASSERT(alloc->imported.umm.sgt); + dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment, + alloc->imported.umm.sgt, DMA_BIDIRECTIONAL); + alloc->imported.umm.sgt = NULL; + alloc->nents = 0; +} +#endif /* CONFIG_DMA_SHARED_BUFFER */ + +#if (defined(CONFIG_KDS) && defined(CONFIG_UMP)) \ + || defined(CONFIG_DMA_SHARED_BUFFER_USES_KDS) +static void add_kds_resource(struct kds_resource *kds_res, + struct kds_resource **kds_resources, u32 *kds_res_count, + unsigned long *kds_access_bitmap, bool exclusive) +{ + u32 i; + + for (i = 0; i < *kds_res_count; i++) { + /* Duplicate resource, ignore */ + if (kds_resources[i] == kds_res) + return; + } + + kds_resources[*kds_res_count] = kds_res; + if (exclusive) + set_bit(*kds_res_count, kds_access_bitmap); + (*kds_res_count)++; +} +#endif + +struct kbase_mem_phy_alloc *kbase_map_external_resource( + struct kbase_context *kctx, struct kbase_va_region *reg, + struct mm_struct *locked_mm +#ifdef CONFIG_KDS + , u32 *kds_res_count, struct kds_resource **kds_resources, + unsigned long *kds_access_bitmap, bool exclusive +#endif + ) +{ + int err; + + /* decide what needs to happen for this resource */ + switch (reg->gpu_alloc->type) { + case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { + if (reg->gpu_alloc->imported.user_buf.mm != locked_mm) + goto exit; + + reg->gpu_alloc->imported.user_buf.current_mapping_usage_count++; + if (1 == reg->gpu_alloc->imported.user_buf.current_mapping_usage_count) { + err = kbase_jd_user_buf_map(kctx, reg); + if (err) { + reg->gpu_alloc->imported.user_buf.current_mapping_usage_count--; + goto exit; + } + } + } + break; + case KBASE_MEM_TYPE_IMPORTED_UMP: { +#if defined(CONFIG_KDS) && defined(CONFIG_UMP) + if (kds_res_count) { + struct kds_resource *kds_res; + + kds_res = ump_dd_kds_resource_get( + reg->gpu_alloc->imported.ump_handle); + if (kds_res) + add_kds_resource(kds_res, kds_resources, + kds_res_count, + kds_access_bitmap, exclusive); + } +#endif /*defined(CONFIG_KDS) && defined(CONFIG_UMP) */ + break; + } +#ifdef CONFIG_DMA_SHARED_BUFFER + case KBASE_MEM_TYPE_IMPORTED_UMM: { +#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS + if (kds_res_count) { + struct kds_resource *kds_res; + + kds_res = get_dma_buf_kds_resource( + reg->gpu_alloc->imported.umm.dma_buf); + if (kds_res) + add_kds_resource(kds_res, kds_resources, + kds_res_count, + kds_access_bitmap, exclusive); + } +#endif + reg->gpu_alloc->imported.umm.current_mapping_usage_count++; + if (1 == reg->gpu_alloc->imported.umm.current_mapping_usage_count) { + err = kbase_jd_umm_map(kctx, reg); + if (err) { + reg->gpu_alloc->imported.umm.current_mapping_usage_count--; + goto exit; + } + } + break; + } +#endif + default: + goto exit; + } + + return kbase_mem_phy_alloc_get(reg->gpu_alloc); +exit: + return NULL; +} + +void kbase_unmap_external_resource(struct kbase_context *kctx, + struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc) +{ + switch (alloc->type) { +#ifdef CONFIG_DMA_SHARED_BUFFER + case KBASE_MEM_TYPE_IMPORTED_UMM: { + alloc->imported.umm.current_mapping_usage_count--; + + if (0 == alloc->imported.umm.current_mapping_usage_count) { + if (reg && reg->gpu_alloc == alloc) + kbase_mmu_teardown_pages( + kctx, + reg->start_pfn, + kbase_reg_current_backed_size(reg)); + + kbase_jd_umm_unmap(kctx, alloc); + } + } + break; +#endif /* CONFIG_DMA_SHARED_BUFFER */ + case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { + alloc->imported.user_buf.current_mapping_usage_count--; + + if (0 == alloc->imported.user_buf.current_mapping_usage_count) { + bool writeable = true; + + if (reg && reg->gpu_alloc == alloc) + kbase_mmu_teardown_pages( + kctx, + reg->start_pfn, + kbase_reg_current_backed_size(reg)); + + if (reg && ((reg->flags & KBASE_REG_GPU_WR) == 0)) + writeable = false; + + kbase_jd_user_buf_unmap(kctx, alloc, writeable); + } + } + break; + default: + break; + } + kbase_mem_phy_alloc_put(alloc); +} + +struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( + struct kbase_context *kctx, u64 gpu_addr) +{ + struct kbase_ctx_ext_res_meta *meta = NULL; + struct kbase_ctx_ext_res_meta *walker; + + lockdep_assert_held(&kctx->reg_lock); + + /* + * Walk the per context external resource metadata list for the + * metadata which matches the region which is being acquired. + */ + list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) { + if (walker->gpu_addr == gpu_addr) { + meta = walker; + break; + } + } + + /* No metadata exists so create one. */ + if (!meta) { + struct kbase_va_region *reg; + + /* Find the region */ + reg = kbase_region_tracker_find_region_enclosing_address( + kctx, gpu_addr); + if (NULL == reg || (reg->flags & KBASE_REG_FREE)) + goto failed; + + /* Allocate the metadata object */ + meta = kzalloc(sizeof(*meta), GFP_KERNEL); + if (!meta) + goto failed; + + /* + * Fill in the metadata object and acquire a reference + * for the physical resource. + */ + meta->alloc = kbase_map_external_resource(kctx, reg, NULL +#ifdef CONFIG_KDS + , NULL, NULL, + NULL, false +#endif + ); + + if (!meta->alloc) + goto fail_map; + + meta->gpu_addr = reg->start_pfn << PAGE_SHIFT; + + list_add(&meta->ext_res_node, &kctx->ext_res_meta_head); + } + + return meta; + +fail_map: + kfree(meta); +failed: + return NULL; +} + +bool kbase_sticky_resource_release(struct kbase_context *kctx, + struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr) +{ + struct kbase_ctx_ext_res_meta *walker; + struct kbase_va_region *reg; + + lockdep_assert_held(&kctx->reg_lock); + + /* Search of the metadata if one isn't provided. */ + if (!meta) { + /* + * Walk the per context external resource metadata list for the + * metadata which matches the region which is being released. + */ + list_for_each_entry(walker, &kctx->ext_res_meta_head, + ext_res_node) { + if (walker->gpu_addr == gpu_addr) { + meta = walker; + break; + } + } + } + + /* No metadata so just return. */ + if (!meta) + return false; + + /* Drop the physical memory reference and free the metadata. */ + reg = kbase_region_tracker_find_region_enclosing_address( + kctx, + meta->gpu_addr); + + kbase_unmap_external_resource(kctx, reg, meta->alloc); + list_del(&meta->ext_res_node); + kfree(meta); + + return true; +} + +int kbase_sticky_resource_init(struct kbase_context *kctx) +{ + INIT_LIST_HEAD(&kctx->ext_res_meta_head); + + return 0; +} + +void kbase_sticky_resource_term(struct kbase_context *kctx) +{ + struct kbase_ctx_ext_res_meta *walker; + + lockdep_assert_held(&kctx->reg_lock); + + /* + * Free any sticky resources which haven't been unmapped. + * + * Note: + * We don't care about refcounts at this point as no future + * references to the meta data will be made. + * Region termination would find these if we didn't free them + * here, but it's more efficient if we do the clean up here. + */ + while (!list_empty(&kctx->ext_res_meta_head)) { + walker = list_first_entry(&kctx->ext_res_meta_head, + struct kbase_ctx_ext_res_meta, ext_res_node); + + kbase_sticky_resource_release(kctx, walker, 0); + } +} diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem.h b/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem.h index 7372e1088bd4..7b2433e868bd 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem.h +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,7 +30,9 @@ #endif #include - +#ifdef CONFIG_KDS +#include +#endif /* CONFIG_KDS */ #ifdef CONFIG_UMP #include #endif /* CONFIG_UMP */ @@ -41,6 +43,8 @@ #if defined(CONFIG_MALI_GATOR_SUPPORT) #include "mali_kbase_gator.h" #endif +/* Required for kbase_mem_evictable_unmake */ +#include "mali_kbase_mem_linux.h" /* Part of the workaround for uTLB invalid pages is to ensure we grow/shrink tmem by 4 pages at a time */ #define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316 (2) /* round to 4 pages */ @@ -112,11 +116,23 @@ struct kbase_mem_phy_alloc { /* kbase_cpu_mappings */ struct list_head mappings; + /* Node used to store this allocation on the eviction list */ + struct list_head evict_node; + /* Physical backing size when the pages where evicted */ + size_t evicted; + /* + * Back reference to the region structure which created this + * allocation, or NULL if it has been freed. + */ + struct kbase_va_region *reg; + /* type of buffer */ enum kbase_memory_type type; unsigned long properties; + struct list_head zone_cache; + /* member in union valid based on @a type */ union { #ifdef CONFIG_UMP @@ -143,7 +159,7 @@ struct kbase_mem_phy_alloc { unsigned long nr_pages; struct page **pages; unsigned int current_mapping_usage_count; - struct task_struct *owner; + struct mm_struct *mm; dma_addr_t *dma_addrs; } user_buf; } imported; @@ -242,6 +258,8 @@ struct kbase_va_region { #define KBASE_REG_SECURE (1ul << 19) +#define KBASE_REG_DONT_NEED (1ul << 20) + #define KBASE_REG_ZONE_SAME_VA KBASE_REG_ZONE(0) /* only used with 32-bit clients */ @@ -276,6 +294,8 @@ struct kbase_va_region { /* non-NULL if this memory object is a kds_resource */ struct kds_resource *kds_res; + /* List head used to store the region in the JIT allocation pool */ + struct list_head jit_node; }; /* Common functions */ @@ -355,6 +375,7 @@ static inline struct kbase_mem_phy_alloc *kbase_alloc_create(size_t nr_pages, en alloc->pages = (void *)(alloc + 1); INIT_LIST_HEAD(&alloc->mappings); alloc->type = type; + INIT_LIST_HEAD(&alloc->zone_cache); if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) alloc->imported.user_buf.dma_addrs = @@ -378,14 +399,17 @@ static inline int kbase_reg_prepare_native(struct kbase_va_region *reg, else if (!reg->cpu_alloc) return -ENOMEM; reg->cpu_alloc->imported.kctx = kctx; + INIT_LIST_HEAD(®->cpu_alloc->evict_node); if (kctx->infinite_cache_active && (reg->flags & KBASE_REG_CPU_CACHED)) { reg->gpu_alloc = kbase_alloc_create(reg->nr_pages, KBASE_MEM_TYPE_NATIVE); reg->gpu_alloc->imported.kctx = kctx; + INIT_LIST_HEAD(®->gpu_alloc->evict_node); } else { reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc); } + INIT_LIST_HEAD(®->jit_node); reg->flags &= ~KBASE_REG_FREE; return 0; } @@ -505,11 +529,13 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_pages, * @pages: Pointer to array holding the physical addresses of the pages to * free. * @dirty: Whether any pages may be dirty in the cache. + * @reclaimed: Whether the pages where reclaimable and thus should bypass + * the pool and go straight to the kernel. * * Like kbase_mem_pool_free() but optimized for freeing many pages. */ void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages, - phys_addr_t *pages, bool dirty); + phys_addr_t *pages, bool dirty, bool reclaimed); /** * kbase_mem_pool_size - Get number of free pages in memory pool @@ -561,6 +587,7 @@ size_t kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size); int kbase_region_tracker_init(struct kbase_context *kctx); +int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages); void kbase_region_tracker_term(struct kbase_context *kctx); struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(struct kbase_context *kctx, u64 gpu_addr); @@ -591,6 +618,9 @@ void kbase_mmu_term(struct kbase_context *kctx); phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx); void kbase_mmu_free_pgd(struct kbase_context *kctx); +int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx, u64 vpfn, + phys_addr_t *phys, size_t nr, + unsigned long flags); int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *phys, size_t nr, unsigned long flags); @@ -623,6 +653,12 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg); void kbase_mmu_update(struct kbase_context *kctx); /** + * kbase_mmu_disable() - Disable the MMU for a previously active kbase context. + * @kctx: Kbase context + * + * Disable and perform the required cache maintenance to remove the all + * data from provided kbase context from the GPU caches. + * * The caller has the following locking conditions: * - It must hold kbase_as::transaction_mutex on kctx's address space * - It must hold the kbasep_js_device_data::runpool_irq::lock @@ -630,11 +666,13 @@ void kbase_mmu_update(struct kbase_context *kctx); void kbase_mmu_disable(struct kbase_context *kctx); /** - * kbase_mmu_disable_as() - set the MMU in unmapped mode for an address space. - * + * kbase_mmu_disable_as() - Set the MMU to unmapped mode for the specified + * address space. * @kbdev: Kbase device - * @as_nr: Number of the address space for which the MMU - * should be set in unmapped mode. + * @as_nr: The address space number to set to unmapped. + * + * This function must only be called during reset/power-up and it used to + * ensure the registers are in a known state. * * The caller must hold kbdev->as[as_nr].transaction_mutex. */ @@ -854,4 +892,166 @@ void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle, void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle, size_t size, enum dma_data_direction dir); +#ifdef CONFIG_DEBUG_FS +/** + * kbase_jit_debugfs_add - Add per context debugfs entry for JIT. + * @kctx: kbase context + */ +void kbase_jit_debugfs_add(struct kbase_context *kctx); +#endif /* CONFIG_DEBUG_FS */ + +/** + * kbase_jit_init - Initialize the JIT memory pool management + * @kctx: kbase context + * + * Returns zero on success or negative error number on failure. + */ +int kbase_jit_init(struct kbase_context *kctx); + +/** + * kbase_jit_allocate - Allocate JIT memory + * @kctx: kbase context + * @info: JIT allocation information + * + * Return: JIT allocation on success or NULL on failure. + */ +struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, + struct base_jit_alloc_info *info); + +/** + * kbase_jit_free - Free a JIT allocation + * @kctx: kbase context + * @reg: JIT allocation + * + * Frees a JIT allocation and places it into the free pool for later reuse. + */ +void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg); + +/** + * kbase_jit_backing_lost - Inform JIT that an allocation has lost backing + * @reg: JIT allocation + */ +void kbase_jit_backing_lost(struct kbase_va_region *reg); + +/** + * kbase_jit_evict - Evict a JIT allocation from the pool + * @kctx: kbase context + * + * Evict the least recently used JIT allocation from the pool. This can be + * required if normal VA allocations are failing due to VA exhaustion. + * + * Return: True if a JIT allocation was freed, false otherwise. + */ +bool kbase_jit_evict(struct kbase_context *kctx); + +/** + * kbase_jit_term - Terminate the JIT memory pool management + * @kctx: kbase context + */ +void kbase_jit_term(struct kbase_context *kctx); + +/** + * kbase_map_external_resource - Map an external resource to the GPU. + * @kctx: kbase context. + * @reg: The region to map. + * @locked_mm: The mm_struct which has been locked for this operation. + * @kds_res_count: The number of KDS resources. + * @kds_resources: Array of KDS resources. + * @kds_access_bitmap: Access bitmap for KDS. + * @exclusive: If the KDS resource requires exclusive access. + * + * Return: The physical allocation which backs the region on success or NULL + * on failure. + */ +struct kbase_mem_phy_alloc *kbase_map_external_resource( + struct kbase_context *kctx, struct kbase_va_region *reg, + struct mm_struct *locked_mm +#ifdef CONFIG_KDS + , u32 *kds_res_count, struct kds_resource **kds_resources, + unsigned long *kds_access_bitmap, bool exclusive +#endif + ); + +/** + * kbase_unmap_external_resource - Unmap an external resource from the GPU. + * @kctx: kbase context. + * @reg: The region to unmap or NULL if it has already been released. + * @alloc: The physical allocation being unmapped. + */ +void kbase_unmap_external_resource(struct kbase_context *kctx, + struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc); + +/** + * kbase_sticky_resource_init - Initialize sticky resource management. + * @kctx: kbase context + * + * Returns zero on success or negative error number on failure. + */ +int kbase_sticky_resource_init(struct kbase_context *kctx); + +/** + * kbase_sticky_resource_acquire - Acquire a reference on a sticky resource. + * @kctx: kbase context. + * @gpu_addr: The GPU address of the external resource. + * + * Return: The metadata object which represents the binding between the + * external resource and the kbase context on success or NULL on failure. + */ +struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( + struct kbase_context *kctx, u64 gpu_addr); + +/** + * kbase_sticky_resource_release - Release a reference on a sticky resource. + * @kctx: kbase context. + * @meta: Binding metadata. + * @gpu_addr: GPU address of the external resource. + * + * If meta is NULL then gpu_addr will be used to scan the metadata list and + * find the matching metadata (if any), otherwise the provided meta will be + * used and gpu_addr will be ignored. + * + * Return: True if the release found the metadata and the reference was dropped. + */ +bool kbase_sticky_resource_release(struct kbase_context *kctx, + struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr); + +/** + * kbase_sticky_resource_term - Terminate sticky resource management. + * @kctx: kbase context + */ +void kbase_sticky_resource_term(struct kbase_context *kctx); + +/** + * kbase_zone_cache_update - Update the memory zone cache after new pages have + * been added. + * @alloc: The physical memory allocation to build the cache for. + * @start_offset: Offset to where the new pages start. + * + * Updates an existing memory zone cache, updating the counters for the + * various zones. + * If the memory allocation doesn't already have a zone cache assume that + * one isn't created and thus don't do anything. + * + * Return: Zero cache was updated, negative error code on error. + */ +int kbase_zone_cache_update(struct kbase_mem_phy_alloc *alloc, + size_t start_offset); + +/** + * kbase_zone_cache_build - Build the memory zone cache. + * @alloc: The physical memory allocation to build the cache for. + * + * Create a new zone cache for the provided physical memory allocation if + * one doesn't already exist, if one does exist then just return. + * + * Return: Zero if the zone cache was created, negative error code on error. + */ +int kbase_zone_cache_build(struct kbase_mem_phy_alloc *alloc); + +/** + * kbase_zone_cache_clear - Clear the memory zone cache. + * @alloc: The physical memory allocation to clear the cache on. + */ +void kbase_zone_cache_clear(struct kbase_mem_phy_alloc *alloc); + #endif /* _KBASE_MEM_H_ */ diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem_linux.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem_linux.c index b359f4d94148..f91d3c916355 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem_linux.c +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem_linux.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -36,15 +36,52 @@ #ifdef CONFIG_DMA_SHARED_BUFFER #include #endif /* defined(CONFIG_DMA_SHARED_BUFFER) */ +#include #include #include #include #include +#include static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma); static const struct vm_operations_struct kbase_vm_ops; +/** + * kbase_mem_shrink_cpu_mapping - Shrink the CPU mapping(s) of an allocation + * @kctx: Context the region belongs to + * @reg: The GPU region + * @new_pages: The number of pages after the shrink + * @old_pages: The number of pages before the shrink + * + * Return: 0 on success, -errno on error. + * + * Shrink (or completely remove) all CPU mappings which reference the shrunk + * part of the allocation. + * + * Note: Caller must be holding the processes mmap_sem lock. + */ +static int kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx, + struct kbase_va_region *reg, + u64 new_pages, u64 old_pages); + +/** + * kbase_mem_shrink_gpu_mapping - Shrink the GPU mapping of an allocation + * @kctx: Context the region belongs to + * @reg: The GPU region or NULL if there isn't one + * @new_pages: The number of pages after the shrink + * @old_pages: The number of pages before the shrink + * + * Return: 0 on success, negative -errno on error + * + * Unmap the shrunk pages from the GPU mapping. Note that the size of the region + * itself is unmodified as we still need to reserve the VA, only the page tables + * will be modified by this function. + */ +static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx, + struct kbase_va_region *reg, + u64 new_pages, u64 old_pages); + struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages, u64 extent, u64 *flags, u64 *gpu_va, u16 *va_alignment) { int zone; @@ -77,9 +114,6 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages #if defined(CONFIG_64BIT) if (kctx->is_compat) cpu_va_bits = 32; - else - /* force SAME_VA if a 64-bit client */ - *flags |= BASE_MEM_SAME_VA; #endif if (!kbase_check_alloc_flags(*flags)) { @@ -195,8 +229,9 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages if (*flags & BASE_MEM_PROT_CPU_WR) prot |= PROT_WRITE; - cpu_addr = vm_mmap(kctx->filp, 0, va_map, prot, MAP_SHARED, - cookie); + cpu_addr = vm_mmap(kctx->filp, 0, va_map, prot, + MAP_SHARED, cookie); + if (IS_ERR_VALUE(cpu_addr)) { kctx->pending_regions[cookie_nr] = NULL; kctx->cookies |= (1UL << cookie_nr); @@ -343,12 +378,412 @@ out_unlock: return ret; } +/** + * kbase_mem_evictable_reclaim_count_objects - Count number of pages in the + * Ephemeral memory eviction list. + * @s: Shrinker + * @sc: Shrinker control + * + * Return: Number of pages which can be freed. + */ +static +unsigned long kbase_mem_evictable_reclaim_count_objects(struct shrinker *s, + struct shrink_control *sc) +{ + struct kbase_context *kctx; + struct kbase_mem_phy_alloc *alloc; + unsigned long pages = 0; + + kctx = container_of(s, struct kbase_context, reclaim); + + mutex_lock(&kctx->evict_lock); + + list_for_each_entry(alloc, &kctx->evict_list, evict_node) + pages += alloc->nents; + + mutex_unlock(&kctx->evict_lock); + return pages; +} + +/** + * kbase_mem_evictable_reclaim_scan_objects - Scan the Ephemeral memory eviction + * list for pages and try to reclaim them. + * @s: Shrinker + * @sc: Shrinker control + * + * Return: Number of pages freed (can be less then requested) or -1 if the + * shrinker failed to free pages in its pool. + * + * Note: + * This function accesses region structures without taking the region lock, + * this is required as the OOM killer can call the shrinker after the region + * lock has already been held. + * This is safe as we can guarantee that a region on the eviction list will + * not be freed (kbase_mem_free_region removes the allocation from the list + * before destroying it), or modified by other parts of the driver. + * The eviction list itself is guarded by the eviction lock and the MMU updates + * are protected by their own lock. + */ +static +unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s, + struct shrink_control *sc) +{ + struct kbase_context *kctx; + struct kbase_mem_phy_alloc *alloc; + struct kbase_mem_phy_alloc *tmp; + unsigned long freed = 0; + + kctx = container_of(s, struct kbase_context, reclaim); + mutex_lock(&kctx->evict_lock); + + list_for_each_entry_safe(alloc, tmp, &kctx->evict_list, evict_node) { + int err; + + err = kbase_mem_shrink_gpu_mapping(kctx, alloc->reg, + 0, alloc->nents); + if (err != 0) { + /* + * Failed to remove GPU mapping, tell the shrinker + * to stop trying to shrink our slab even though we + * have pages in it. + */ + freed = -1; + goto out_unlock; + } + + /* + * Update alloc->evicted before freeing the backing so the + * helper can determine that it needs to bypass the accounting + * and memory pool. + */ + alloc->evicted = alloc->nents; + + kbase_free_phy_pages_helper(alloc, alloc->evicted); + freed += alloc->evicted; + list_del_init(&alloc->evict_node); + + /* + * Inform the JIT allocator this region has lost backing + * as it might need to free the allocation. + */ + kbase_jit_backing_lost(alloc->reg); + + /* Enough pages have been freed so stop now */ + if (freed > sc->nr_to_scan) + break; + } +out_unlock: + mutex_unlock(&kctx->evict_lock); + + return freed; +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) +static int kbase_mem_evictable_reclaim_shrink(struct shrinker *s, + struct shrink_control *sc) +{ + if (sc->nr_to_scan == 0) + return kbase_mem_evictable_reclaim_count_objects(s, sc); + + return kbase_mem_evictable_reclaim_scan_objects(s, sc); +} +#endif + +int kbase_mem_evictable_init(struct kbase_context *kctx) +{ + INIT_LIST_HEAD(&kctx->evict_list); + mutex_init(&kctx->evict_lock); + + /* Register shrinker */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) + kctx->reclaim.shrink = kbase_mem_evictable_reclaim_shrink; +#else + kctx->reclaim.count_objects = kbase_mem_evictable_reclaim_count_objects; + kctx->reclaim.scan_objects = kbase_mem_evictable_reclaim_scan_objects; +#endif + kctx->reclaim.seeks = DEFAULT_SEEKS; + /* Kernel versions prior to 3.1 : + * struct shrinker does not define batch */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0) + kctx->reclaim.batch = 0; +#endif + register_shrinker(&kctx->reclaim); + return 0; +} + +void kbase_mem_evictable_deinit(struct kbase_context *kctx) +{ + unregister_shrinker(&kctx->reclaim); +} + +struct kbase_mem_zone_cache_entry { + /* List head used to link the cache entry to the memory allocation. */ + struct list_head zone_node; + /* The zone the cacheline is for. */ + struct zone *zone; + /* The number of pages in the allocation which belong to this zone. */ + u64 count; +}; + +static bool kbase_zone_cache_builder(struct kbase_mem_phy_alloc *alloc, + size_t start_offset) +{ + struct kbase_mem_zone_cache_entry *cache = NULL; + size_t i; + int ret = 0; + + for (i = start_offset; i < alloc->nents; i++) { + struct page *p = phys_to_page(alloc->pages[i]); + struct zone *zone = page_zone(p); + bool create = true; + + if (cache && (cache->zone == zone)) { + /* + * Fast path check as most of the time adjacent + * pages come from the same zone. + */ + create = false; + } else { + /* + * Slow path check, walk all the cache entries to see + * if we already know about this zone. + */ + list_for_each_entry(cache, &alloc->zone_cache, zone_node) { + if (cache->zone == zone) { + create = false; + break; + } + } + } + + /* This zone wasn't found in the cache, create an entry for it */ + if (create) { + cache = kmalloc(sizeof(*cache), GFP_KERNEL); + if (!cache) { + ret = -ENOMEM; + goto bail; + } + cache->zone = zone; + cache->count = 0; + list_add(&cache->zone_node, &alloc->zone_cache); + } + + cache->count++; + } + return 0; + +bail: + return ret; +} + +int kbase_zone_cache_update(struct kbase_mem_phy_alloc *alloc, + size_t start_offset) +{ + /* + * Bail if the zone cache is empty, only update the cache if it + * existed in the first place. + */ + if (list_empty(&alloc->zone_cache)) + return 0; + + return kbase_zone_cache_builder(alloc, start_offset); +} + +int kbase_zone_cache_build(struct kbase_mem_phy_alloc *alloc) +{ + /* Bail if the zone cache already exists */ + if (!list_empty(&alloc->zone_cache)) + return 0; + + return kbase_zone_cache_builder(alloc, 0); +} + +void kbase_zone_cache_clear(struct kbase_mem_phy_alloc *alloc) +{ + struct kbase_mem_zone_cache_entry *walker; + + while(!list_empty(&alloc->zone_cache)){ + walker = list_first_entry(&alloc->zone_cache, + struct kbase_mem_zone_cache_entry, zone_node); + list_del(&walker->zone_node); + kfree(walker); + } +} + +/** + * kbase_mem_evictable_mark_reclaim - Mark the pages as reclaimable. + * @alloc: The physical allocation + */ +static void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc) +{ + struct kbase_context *kctx = alloc->imported.kctx; + struct kbase_mem_zone_cache_entry *zone_cache; + int __maybe_unused new_page_count; + int err; + + /* Attempt to build a zone cache of tracking */ + err = kbase_zone_cache_build(alloc); + if (err == 0) { + /* Bulk update all the zones */ + list_for_each_entry(zone_cache, &alloc->zone_cache, zone_node) { + zone_page_state_add(zone_cache->count, + zone_cache->zone, NR_SLAB_RECLAIMABLE); + } + } else { + /* Fall-back to page by page updates */ + int i; + + for (i = 0; i < alloc->nents; i++) { + struct page *p = phys_to_page(alloc->pages[i]); + struct zone *zone = page_zone(p); + + zone_page_state_add(1, zone, NR_SLAB_RECLAIMABLE); + } + } + + kbase_process_page_usage_dec(kctx, alloc->nents); + new_page_count = kbase_atomic_sub_pages(alloc->nents, + &kctx->used_pages); + kbase_atomic_sub_pages(alloc->nents, &kctx->kbdev->memdev.used_pages); + + kbase_tlstream_aux_pagesalloc( + (u32)kctx->id, + (u64)new_page_count); +} + +/** + * kbase_mem_evictable_unmark_reclaim - Mark the pages as no longer reclaimable. + * @alloc: The physical allocation + */ +static +void kbase_mem_evictable_unmark_reclaim(struct kbase_mem_phy_alloc *alloc) +{ + struct kbase_context *kctx = alloc->imported.kctx; + struct kbase_mem_zone_cache_entry *zone_cache; + int __maybe_unused new_page_count; + int err; + + new_page_count = kbase_atomic_add_pages(alloc->nents, + &kctx->used_pages); + kbase_atomic_add_pages(alloc->nents, &kctx->kbdev->memdev.used_pages); + + /* Increase mm counters so that the allocation is accounted for + * against the process and thus is visible to the OOM killer, + * then remove it from the reclaimable accounting. */ + kbase_process_page_usage_inc(kctx, alloc->nents); + + /* Attempt to build a zone cache of tracking */ + err = kbase_zone_cache_build(alloc); + if (err == 0) { + /* Bulk update all the zones */ + list_for_each_entry(zone_cache, &alloc->zone_cache, zone_node) { + zone_page_state_add(-zone_cache->count, + zone_cache->zone, NR_SLAB_RECLAIMABLE); + } + } else { + /* Fall-back to page by page updates */ + int i; + + for (i = 0; i < alloc->nents; i++) { + struct page *p = phys_to_page(alloc->pages[i]); + struct zone *zone = page_zone(p); + + zone_page_state_add(-1, zone, NR_SLAB_RECLAIMABLE); + } + } + + kbase_tlstream_aux_pagesalloc( + (u32)kctx->id, + (u64)new_page_count); +} + +int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc) +{ + struct kbase_context *kctx = gpu_alloc->imported.kctx; + int err; + + lockdep_assert_held(&kctx->reg_lock); + + /* This alloction can't already be on a list. */ + WARN_ON(!list_empty(&gpu_alloc->evict_node)); + + /* + * Try to shrink the CPU mappings as required, if we fail then + * fail the process of making this allocation evictable. + */ + err = kbase_mem_shrink_cpu_mapping(kctx, gpu_alloc->reg, + 0, gpu_alloc->nents); + if (err) + return -EINVAL; + + /* + * Add the allocation to the eviction list, after this point the shrink + * can reclaim it. + */ + mutex_lock(&kctx->evict_lock); + list_add(&gpu_alloc->evict_node, &kctx->evict_list); + mutex_unlock(&kctx->evict_lock); + kbase_mem_evictable_mark_reclaim(gpu_alloc); + + gpu_alloc->reg->flags |= KBASE_REG_DONT_NEED; + return 0; +} + +bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc) +{ + struct kbase_context *kctx = gpu_alloc->imported.kctx; + int err = 0; + + lockdep_assert_held(&kctx->reg_lock); + + /* + * First remove the allocation from the eviction list as it's no + * longer eligible for eviction. + */ + mutex_lock(&kctx->evict_lock); + list_del_init(&gpu_alloc->evict_node); + mutex_unlock(&kctx->evict_lock); + + if (gpu_alloc->evicted == 0) { + /* + * The backing is still present, update the VM stats as it's + * in use again. + */ + kbase_mem_evictable_unmark_reclaim(gpu_alloc); + } else { + /* If the region is still alive ... */ + if (gpu_alloc->reg) { + /* ... allocate replacement backing ... */ + err = kbase_alloc_phy_pages_helper(gpu_alloc, + gpu_alloc->evicted); + + /* + * ... and grow the mapping back to its + * pre-eviction size. + */ + if (!err) + err = kbase_mem_grow_gpu_mapping(kctx, + gpu_alloc->reg, + gpu_alloc->evicted, 0); + + gpu_alloc->evicted = 0; + } + } + + /* If the region is still alive remove the DONT_NEED attribute. */ + if (gpu_alloc->reg) + gpu_alloc->reg->flags &= ~KBASE_REG_DONT_NEED; + + return (err == 0); +} + int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask) { struct kbase_va_region *reg; int ret = -EINVAL; unsigned int real_flags = 0; unsigned int prev_flags = 0; + bool prev_needed, new_needed; KBASE_DEBUG_ASSERT(kctx); @@ -359,11 +794,11 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in flags &= mask; /* check for only supported flags */ - if (flags & ~(BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL)) + if (flags & ~(BASE_MEM_FLAGS_MODIFIABLE)) goto out; /* mask covers bits we don't support? */ - if (mask & ~(BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL)) + if (mask & ~(BASE_MEM_FLAGS_MODIFIABLE)) goto out; /* convert flags */ @@ -373,6 +808,7 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in real_flags |= KBASE_REG_SHARE_IN; /* now we can lock down the context, and find the region */ + down_write(¤t->mm->mmap_sem); kbase_gpu_vm_lock(kctx); /* Validate the region */ @@ -380,6 +816,28 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in if (!reg || (reg->flags & KBASE_REG_FREE)) goto out_unlock; + /* Is the region being transitioning between not needed and needed? */ + prev_needed = (KBASE_REG_DONT_NEED & reg->flags) == KBASE_REG_DONT_NEED; + new_needed = (BASE_MEM_DONT_NEED & flags) == BASE_MEM_DONT_NEED; + if (prev_needed != new_needed) { + /* Aliased allocations can't be made ephemeral */ + if (atomic_read(®->cpu_alloc->gpu_mappings) > 1) + goto out_unlock; + + if (new_needed) { + /* Only native allocations can be marked not needed */ + if (reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE) { + ret = -EINVAL; + goto out_unlock; + } + ret = kbase_mem_evictable_make(reg->gpu_alloc); + if (ret) + goto out_unlock; + } else { + kbase_mem_evictable_unmake(reg->gpu_alloc); + } + } + /* limit to imported memory */ if ((reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMP) && (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM)) @@ -422,6 +880,7 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in out_unlock: kbase_gpu_vm_unlock(kctx); + up_write(¤t->mm->mmap_sem); out: return ret; } @@ -552,6 +1011,7 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, in struct kbase_va_region *reg; struct dma_buf *dma_buf; struct dma_buf_attachment *dma_attachment; + bool shared_zone = false; dma_buf = dma_buf_get(fd); if (IS_ERR_OR_NULL(dma_buf)) @@ -572,15 +1032,23 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, in /* ignore SAME_VA */ *flags &= ~BASE_MEM_SAME_VA; + if (*flags & BASE_MEM_IMPORT_SHARED) + shared_zone = true; + #ifdef CONFIG_64BIT if (!kctx->is_compat) { - /* 64-bit tasks must MMAP anyway, but not expose this address to clients */ + /* + * 64-bit tasks require us to reserve VA on the CPU that we use + * on the GPU. + */ + shared_zone = true; + } +#endif + + if (shared_zone) { *flags |= BASE_MEM_NEED_MMAP; reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_SAME_VA); } else { -#else - if (1) { -#endif reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA); } @@ -618,7 +1086,7 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, in /* no read or write permission given on import, only on run do we give the right permissions */ - reg->gpu_alloc->type = BASE_MEM_IMPORT_TYPE_UMM; + reg->gpu_alloc->type = KBASE_MEM_TYPE_IMPORTED_UMM; reg->gpu_alloc->imported.umm.sgt = NULL; reg->gpu_alloc->imported.umm.dma_buf = dma_buf; reg->gpu_alloc->imported.umm.dma_attachment = dma_attachment; @@ -647,6 +1115,7 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( struct kbase_va_region *reg; long faulted_pages; int zone = KBASE_REG_ZONE_CUSTOM_VA; + bool shared_zone = false; *va_pages = (PAGE_ALIGN(address + size) >> PAGE_SHIFT) - PFN_DOWN(address); @@ -660,14 +1129,24 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( /* SAME_VA generally not supported with imported memory (no known use cases) */ *flags &= ~BASE_MEM_SAME_VA; + if (*flags & BASE_MEM_IMPORT_SHARED) + shared_zone = true; + #ifdef CONFIG_64BIT if (!kctx->is_compat) { - /* 64-bit tasks must MMAP anyway, but not expose this address to - * clients */ + /* + * 64-bit tasks require us to reserve VA on the CPU that we use + * on the GPU. + */ + shared_zone = true; + } +#endif + + if (shared_zone) { *flags |= BASE_MEM_NEED_MMAP; zone = KBASE_REG_ZONE_SAME_VA; } -#endif + reg = kbase_alloc_free_region(kctx, 0, *va_pages, zone); if (!reg) @@ -705,8 +1184,13 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( /* We can't really store the page list because that would involve */ /* keeping the pages pinned - instead we pin/unpin around the job */ /* (as part of the external resources handling code) */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) faulted_pages = get_user_pages(current, current->mm, address, *va_pages, reg->flags & KBASE_REG_GPU_WR, 0, NULL, NULL); +#else + faulted_pages = get_user_pages(address, *va_pages, + reg->flags & KBASE_REG_GPU_WR, 0, NULL, NULL); +#endif up_read(¤t->mm->mmap_sem); if (faulted_pages != *va_pages) @@ -717,7 +1201,8 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( reg->gpu_alloc->imported.user_buf.nr_pages = faulted_pages; reg->gpu_alloc->imported.user_buf.pages = kmalloc_array(faulted_pages, sizeof(struct page *), GFP_KERNEL); - reg->gpu_alloc->imported.user_buf.owner = current; + reg->gpu_alloc->imported.user_buf.mm = current->mm; + atomic_inc(¤t->mm->mm_count); if (!reg->gpu_alloc->imported.user_buf.pages) goto no_page_array; @@ -817,8 +1302,9 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, /* validate and add src handles */ for (i = 0; i < nents; i++) { - if (ai[i].handle < BASE_MEM_FIRST_FREE_ADDRESS) { - if (ai[i].handle != BASE_MEM_WRITE_ALLOC_PAGES_HANDLE) + if (ai[i].handle.basep.handle < BASE_MEM_FIRST_FREE_ADDRESS) { + if (ai[i].handle.basep.handle != + BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE) goto bad_handle; /* unsupported magic handle */ if (!ai[i].length) goto bad_handle; /* must be > 0 */ @@ -830,13 +1316,17 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, struct kbase_va_region *aliasing_reg; struct kbase_mem_phy_alloc *alloc; - aliasing_reg = kbase_region_tracker_find_region_base_address(kctx, (ai[i].handle >> PAGE_SHIFT) << PAGE_SHIFT); + aliasing_reg = kbase_region_tracker_find_region_base_address( + kctx, + (ai[i].handle.basep.handle >> PAGE_SHIFT) << PAGE_SHIFT); /* validate found region */ if (!aliasing_reg) goto bad_handle; /* Not found */ if (aliasing_reg->flags & KBASE_REG_FREE) goto bad_handle; /* Free region */ + if (aliasing_reg->flags & KBASE_REG_DONT_NEED) + goto bad_handle; /* Ephemeral region */ if (!aliasing_reg->gpu_alloc) goto bad_handle; /* No alloc */ if (aliasing_reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) @@ -1058,6 +1548,7 @@ static int zap_range_nolock(struct mm_struct *mm, int err = -EINVAL; /* in case end < start */ while (start < end) { + unsigned long local_start; unsigned long local_end; vma = find_vma_intersection(mm, start, end); @@ -1068,12 +1559,17 @@ static int zap_range_nolock(struct mm_struct *mm, if (vma->vm_ops != vm_ops) goto try_next; + local_start = vma->vm_start; + + if (start > local_start) + local_start = start; + local_end = vma->vm_end; if (end < local_end) local_end = end; - err = zap_vma_ptes(vma, start, local_end - start); + err = zap_vma_ptes(vma, local_start, local_end - local_start); if (unlikely(err)) break; @@ -1085,19 +1581,98 @@ try_next: return err; } +int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx, + struct kbase_va_region *reg, + u64 new_pages, u64 old_pages) +{ + phys_addr_t *phy_pages; + u64 delta = new_pages - old_pages; + int ret = 0; + + lockdep_assert_held(&kctx->reg_lock); + + /* Map the new pages into the GPU */ + phy_pages = kbase_get_gpu_phy_pages(reg); + ret = kbase_mmu_insert_pages(kctx, reg->start_pfn + old_pages, + phy_pages + old_pages, delta, reg->flags); + + return ret; +} + +static int kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx, + struct kbase_va_region *reg, + u64 new_pages, u64 old_pages) +{ + struct kbase_mem_phy_alloc *cpu_alloc = reg->cpu_alloc; + struct kbase_cpu_mapping *mapping; + int err; + + lockdep_assert_held(&kctx->process_mm->mmap_sem); + + list_for_each_entry(mapping, &cpu_alloc->mappings, mappings_list) { + unsigned long mapping_size; + + mapping_size = (mapping->vm_end - mapping->vm_start) + >> PAGE_SHIFT; + + /* is this mapping affected ?*/ + if ((mapping->page_off + mapping_size) > new_pages) { + unsigned long first_bad = 0; + + if (new_pages > mapping->page_off) + first_bad = new_pages - mapping->page_off; + + err = zap_range_nolock(current->mm, + &kbase_vm_ops, + mapping->vm_start + + (first_bad << PAGE_SHIFT), + mapping->vm_end); + + WARN(err, + "Failed to zap VA range (0x%lx - 0x%lx);\n", + mapping->vm_start + + (first_bad << PAGE_SHIFT), + mapping->vm_end + ); + + /* The zap failed, give up and exit */ + if (err) + goto failed; + } + } + + return 0; + +failed: + return err; +} + +static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx, + struct kbase_va_region *reg, + u64 new_pages, u64 old_pages) +{ + u64 delta = old_pages - new_pages; + int ret = 0; + + ret = kbase_mmu_teardown_pages(kctx, + reg->start_pfn + new_pages, delta); + + return ret; +} + int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, enum base_backing_threshold_status *failure_reason) { u64 old_pages; u64 delta; int res = -EINVAL; struct kbase_va_region *reg; - phys_addr_t *phy_pages; + bool read_locked = false; KBASE_DEBUG_ASSERT(kctx); KBASE_DEBUG_ASSERT(failure_reason); KBASE_DEBUG_ASSERT(gpu_addr != 0); - down_read(¤t->mm->mmap_sem); + down_write(¤t->mm->mmap_sem); kbase_gpu_vm_lock(kctx); /* Validate the region */ @@ -1131,6 +1706,11 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, en *failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE; goto out_unlock; } + /* can't grow regions which are ephemeral */ + if (reg->flags & KBASE_REG_DONT_NEED) { + *failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE; + goto out_unlock; + } if (new_pages == reg->gpu_alloc->nents) { /* no change */ @@ -1138,14 +1718,17 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, en goto out_unlock; } - phy_pages = kbase_get_gpu_phy_pages(reg); old_pages = kbase_reg_current_backed_size(reg); - if (new_pages > old_pages) { - /* growing */ - int err; - delta = new_pages - old_pages; + + /* + * No update to the mm so downgrade the writer lock to a read + * lock so other readers aren't blocked after this point. + */ + downgrade_write(¤t->mm->mmap_sem); + read_locked = true; + /* Allocate some more pages */ if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, delta) != 0) { *failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM; @@ -1160,9 +1743,15 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, en goto out_unlock; } } - err = kbase_mmu_insert_pages(kctx, reg->start_pfn + old_pages, - phy_pages + old_pages, delta, reg->flags); - if (err) { + + /* No update required for CPU mappings, that's done on fault. */ + + /* Update GPU mapping. */ + res = kbase_mem_grow_gpu_mapping(kctx, reg, + new_pages, old_pages); + + /* On error free the new pages */ + if (res) { kbase_free_phy_pages_helper(reg->cpu_alloc, delta); if (reg->cpu_alloc != reg->gpu_alloc) kbase_free_phy_pages_helper(reg->gpu_alloc, @@ -1171,60 +1760,35 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, en goto out_unlock; } } else { - /* shrinking */ - struct kbase_cpu_mapping *mapping; - int err; + delta = old_pages - new_pages; - /* first, unmap from any mappings affected */ - list_for_each_entry(mapping, ®->cpu_alloc->mappings, mappings_list) { - unsigned long mapping_size = (mapping->vm_end - mapping->vm_start) >> PAGE_SHIFT; - - /* is this mapping affected ?*/ - if ((mapping->page_off + mapping_size) > new_pages) { - unsigned long first_bad = 0; - int zap_res; - - if (new_pages > mapping->page_off) - first_bad = new_pages - mapping->page_off; - - zap_res = zap_range_nolock(current->mm, - &kbase_vm_ops, - mapping->vm_start + - (first_bad << PAGE_SHIFT), - mapping->vm_end); - WARN(zap_res, - "Failed to zap VA range (0x%lx - 0x%lx);\n", - mapping->vm_start + - (first_bad << PAGE_SHIFT), - mapping->vm_end - ); - } + /* Update all CPU mapping(s) */ + res = kbase_mem_shrink_cpu_mapping(kctx, reg, + new_pages, old_pages); + if (res) { + *failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM; + goto out_unlock; } - /* Free some pages */ - delta = old_pages - new_pages; - err = kbase_mmu_teardown_pages(kctx, reg->start_pfn + new_pages, - delta); - if (err) { + /* Update the GPU mapping */ + res = kbase_mem_shrink_gpu_mapping(kctx, reg, + new_pages, old_pages); + if (res) { *failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM; goto out_unlock; } -#ifndef CONFIG_MALI_NO_MALI - if (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_6367)) { - /* Wait for GPU to flush write buffer before freeing physical pages */ - kbase_wait_write_flush(kctx); - } -#endif + kbase_free_phy_pages_helper(reg->cpu_alloc, delta); if (reg->cpu_alloc != reg->gpu_alloc) kbase_free_phy_pages_helper(reg->gpu_alloc, delta); } - res = 0; - out_unlock: kbase_gpu_vm_unlock(kctx); - up_read(¤t->mm->mmap_sem); + if (read_locked) + up_read(¤t->mm->mmap_sem); + else + up_write(¤t->mm->mmap_sem); return res; } @@ -1296,6 +1860,10 @@ static int kbase_cpu_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) if (map->page_off + rel_pgoff >= map->alloc->nents) goto locked_bad_fault; + /* Fault on access to DONT_NEED regions */ + if (map->alloc->reg && (map->alloc->reg->flags & KBASE_REG_DONT_NEED)) + goto locked_bad_fault; + /* insert all valid pages from the fault location */ for (i = rel_pgoff; i < MIN((vma->vm_end - vma->vm_start) >> PAGE_SHIFT, @@ -1663,8 +2231,8 @@ int kbase_mmap(struct file *file, struct vm_area_struct *vma) rcu_read_unlock(); switch (vma->vm_pgoff) { - case PFN_DOWN(BASE_MEM_INVALID_HANDLE): - case PFN_DOWN(BASE_MEM_WRITE_ALLOC_PAGES_HANDLE): + case PFN_DOWN(BASEP_MEM_INVALID_HANDLE): + case PFN_DOWN(BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE): /* Illegal handle for direct map */ err = -EINVAL; goto out_unlock; @@ -1875,8 +2443,8 @@ out: KBASE_EXPORT_TEST_API(kbase_mmap); -void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size, - struct kbase_vmap_struct *map) +void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size, + unsigned long prot_request, struct kbase_vmap_struct *map) { struct kbase_va_region *reg; unsigned long page_index; @@ -1911,6 +2479,14 @@ void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size, if (page_index + page_count > kbase_reg_current_backed_size(reg)) goto out_unlock; + if (reg->flags & KBASE_REG_DONT_NEED) + goto out_unlock; + + /* check access permissions can be satisfied + * Intended only for checking KBASE_REG_{CPU,GPU}_{RD,WR} */ + if ((reg->flags & prot_request) != prot_request) + goto out_unlock; + page_array = kbase_get_cpu_phy_pages(reg); if (!page_array) goto out_unlock; @@ -1927,6 +2503,9 @@ void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size, /* Map uncached */ prot = pgprot_writecombine(prot); } + /* Note: enforcing a RO prot_request onto prot is not done, since: + * - CPU-arch-specific integration required + * - kbase_vmap() requires no access checks to be made/enforced */ cpu_addr = vmap(pages, page_count, VM_MAP, prot); @@ -1945,6 +2524,12 @@ void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size, map->is_cached = (reg->flags & KBASE_REG_CPU_CACHED) != 0; sync_needed = map->is_cached; +#ifdef CONFIG_MALI_COH_KERN + /* kernel can use coherent memory if supported */ + if (kctx->kbdev->system_coherency == COHERENCY_ACE) + sync_needed = false; +#endif + if (sync_needed) { /* Sync first page */ size_t sz = MIN(((size_t) PAGE_SIZE - offset), size); @@ -1979,6 +2564,17 @@ out_unlock: kbase_gpu_vm_unlock(kctx); return NULL; } + +void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size, + struct kbase_vmap_struct *map) +{ + /* 0 is specified for prot_request to indicate no access checks should + * be made. + * + * As mentioned in kbase_vmap_prot() this means that a kernel-side + * CPU-RO mapping is not enforced to allow this to work */ + return kbase_vmap_prot(kctx, gpu_addr, size, 0u, map); +} KBASE_EXPORT_TEST_API(kbase_vmap); void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map) @@ -1986,6 +2582,11 @@ void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map) void *addr = (void *)((uintptr_t)map->addr & PAGE_MASK); bool sync_needed = map->is_cached; vunmap(addr); +#ifdef CONFIG_MALI_COH_KERN + /* kernel can use coherent memory if supported */ + if (kctx->kbdev->system_coherency == COHERENCY_ACE) + sync_needed = false; +#endif if (sync_needed) { off_t offset = (uintptr_t)map->addr & ~PAGE_MASK; size_t size = map->size; diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem_linux.h b/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem_linux.h index 6a139fd70234..6471747a7dc5 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem_linux.h +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem_linux.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010, 2012-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010, 2012-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -42,6 +42,72 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, enum base_backing_threshold_status *failure_reason); int kbase_mmap(struct file *file, struct vm_area_struct *vma); +/** + * kbase_mem_evictable_init - Initialize the Ephemeral memory the eviction + * mechanism. + * @kctx: The kbase context to initialize. + * + * Return: Zero on success or -errno on failure. + */ +int kbase_mem_evictable_init(struct kbase_context *kctx); + +/** + * kbase_mem_evictable_deinit - De-initialize the Ephemeral memory eviction + * mechanism. + * @kctx: The kbase context to de-initialize. + */ +void kbase_mem_evictable_deinit(struct kbase_context *kctx); + +/** + * kbase_mem_grow_gpu_mapping - Grow the GPU mapping of an allocation + * @kctx: Context the region belongs to + * @reg: The GPU region + * @new_pages: The number of pages after the grow + * @old_pages: The number of pages before the grow + * + * Return: 0 on success, -errno on error. + * + * Expand the GPU mapping to encompass the new psychical pages which have + * been added to the allocation. + * + * Note: Caller must be holding the region lock. + */ +int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx, + struct kbase_va_region *reg, + u64 new_pages, u64 old_pages); + +/** + * kbase_mem_evictable_make - Make a physical allocation eligible for eviction + * @gpu_alloc: The physical allocation to make evictable + * + * Return: 0 on success, -errno on error. + * + * Take the provided region and make all the physical pages within it + * reclaimable by the kernel, updating the per-process VM stats as well. + * Remove any CPU mappings (as these can't be removed in the shrinker callback + * as mmap_sem might already be taken) but leave the GPU mapping intact as + * and until the shrinker reclaims the allocation. + * + * Note: Must be called with the region lock of the containing context. + */ +int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc); + +/** + * kbase_mem_evictable_unmake - Remove a physical allocations eligibility for + * eviction. + * @alloc: The physical allocation to remove eviction eligibility from. + * + * Return: True if the allocation had its backing restored and false if + * it hasn't. + * + * Make the physical pages in the region no longer reclaimable and update the + * per-process stats, if the shrinker has already evicted the memory then + * re-allocate it if the region is still alive. + * + * Note: Must be called with the region lock of the containing context. + */ +bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *alloc); + struct kbase_vmap_struct { u64 gpu_addr; struct kbase_mem_phy_alloc *cpu_alloc; @@ -52,8 +118,83 @@ struct kbase_vmap_struct { size_t size; bool is_cached; }; + + +/** + * kbase_vmap_prot - Map a GPU VA range into the kernel safely, only if the + * requested access permissions are supported + * @kctx: Context the VA range belongs to + * @gpu_addr: Start address of VA range + * @size: Size of VA range + * @prot_request: Flags indicating how the caller will then access the memory + * @map: Structure to be given to kbase_vunmap() on freeing + * + * Return: Kernel-accessible CPU pointer to the VA range, or NULL on error + * + * Map a GPU VA Range into the kernel. The VA range must be contained within a + * GPU memory region. Appropriate CPU cache-flushing operations are made as + * required, dependent on the CPU mapping for the memory region. + * + * This is safer than using kmap() on the pages directly, + * because the pages here are refcounted to prevent freeing (and hence reuse + * elsewhere in the system) until an kbase_vunmap() + * + * The flags in @prot_request should use KBASE_REG_{CPU,GPU}_{RD,WR}, to check + * whether the region should allow the intended access, and return an error if + * disallowed. This is essential for security of imported memory, particularly + * a user buf from SHM mapped into the process as RO. In that case, write + * access must be checked if the intention is for kernel to write to the + * memory. + * + * The checks are also there to help catch access errors on memory where + * security is not a concern: imported memory that is always RW, and memory + * that was allocated and owned by the process attached to @kctx. In this case, + * it helps to identify memory that was was mapped with the wrong access type. + * + * Note: KBASE_REG_GPU_{RD,WR} flags are currently supported for legacy cases + * where either the security of memory is solely dependent on those flags, or + * when userspace code was expecting only the GPU to access the memory (e.g. HW + * workarounds). + * + */ +void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size, + unsigned long prot_request, struct kbase_vmap_struct *map); + +/** + * kbase_vmap - Map a GPU VA range into the kernel safely + * @kctx: Context the VA range belongs to + * @gpu_addr: Start address of VA range + * @size: Size of VA range + * @map: Structure to be given to kbase_vunmap() on freeing + * + * Return: Kernel-accessible CPU pointer to the VA range, or NULL on error + * + * Map a GPU VA Range into the kernel. The VA range must be contained within a + * GPU memory region. Appropriate CPU cache-flushing operations are made as + * required, dependent on the CPU mapping for the memory region. + * + * This is safer than using kmap() on the pages directly, + * because the pages here are refcounted to prevent freeing (and hence reuse + * elsewhere in the system) until an kbase_vunmap() + * + * kbase_vmap_prot() should be used in preference, since kbase_vmap() makes no + * checks to ensure the security of e.g. imported user bufs from RO SHM. + */ void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size, struct kbase_vmap_struct *map); + +/** + * kbase_vunmap - Unmap a GPU VA range from the kernel + * @kctx: Context the VA range belongs to + * @map: Structure describing the mapping from the corresponding kbase_vmap() + * call + * + * Unmaps a GPU VA range from the kernel, given its @map structure obtained + * from kbase_vmap(). Appropriate CPU cache-flushing operations are made as + * required, dependent on the CPU mapping for the memory region. + * + * The reference taken on pages during kbase_vmap() is released. + */ void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map); /** @brief Allocate memory from kernel space and map it onto the GPU diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem_pool.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem_pool.c index 153cd4efac49..957061893b00 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem_pool.c +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem_pool.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -43,6 +43,9 @@ int __init kbase_carveout_mem_reserve(phys_addr_t size) kbase_mem_pool_max_size(pool), \ ##__VA_ARGS__) +#define NOT_DIRTY false +#define NOT_RECLAIMED false + static inline void kbase_mem_pool_lock(struct kbase_mem_pool *pool) { spin_lock(&pool->pool_lock); @@ -261,6 +264,8 @@ static size_t kbase_mem_pool_grow(struct kbase_mem_pool *pool, for (i = 0; i < nr_to_grow && !kbase_mem_pool_is_full(pool); i++) { p = kbase_mem_pool_alloc_page(pool); + if (!p) + break; kbase_mem_pool_add(pool, p); } @@ -505,7 +510,7 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_pages, return 0; err_rollback: - kbase_mem_pool_free_pages(pool, i, pages, false); + kbase_mem_pool_free_pages(pool, i, pages, NOT_DIRTY, NOT_RECLAIMED); return err; } @@ -548,7 +553,7 @@ static void kbase_mem_pool_add_array(struct kbase_mem_pool *pool, } void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages, - phys_addr_t *pages, bool dirty) + phys_addr_t *pages, bool dirty, bool reclaimed) { struct kbase_mem_pool *next_pool = pool->next_pool; struct page *p; @@ -558,22 +563,24 @@ void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages, pool_dbg(pool, "free_pages(%zu):\n", nr_pages); - /* Add to this pool */ - nr_to_pool = kbase_mem_pool_capacity(pool); - nr_to_pool = min(nr_pages, nr_to_pool); + if (!reclaimed) { + /* Add to this pool */ + nr_to_pool = kbase_mem_pool_capacity(pool); + nr_to_pool = min(nr_pages, nr_to_pool); - kbase_mem_pool_add_array(pool, nr_to_pool, pages, false, dirty); + kbase_mem_pool_add_array(pool, nr_to_pool, pages, false, dirty); - i += nr_to_pool; + i += nr_to_pool; - if (i != nr_pages && next_pool) { - /* Spill to next pool (may overspill) */ - nr_to_pool = kbase_mem_pool_capacity(next_pool); - nr_to_pool = min(nr_pages - i, nr_to_pool); + if (i != nr_pages && next_pool) { + /* Spill to next pool (may overspill) */ + nr_to_pool = kbase_mem_pool_capacity(next_pool); + nr_to_pool = min(nr_pages - i, nr_to_pool); - kbase_mem_pool_add_array(next_pool, nr_to_pool, pages + i, - true, dirty); - i += nr_to_pool; + kbase_mem_pool_add_array(next_pool, nr_to_pool, + pages + i, true, dirty); + i += nr_to_pool; + } } /* Free any remaining pages to kernel */ @@ -582,6 +589,10 @@ void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages, continue; p = phys_to_page(pages[i]); + if (reclaimed) + zone_page_state_add(-1, page_zone(p), + NR_SLAB_RECLAIMABLE); + kbase_mem_pool_free_page(pool, p); pages[i] = 0; } diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem_profile_debugfs.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem_profile_debugfs.c index 0b19d05c46e8..03594102f7ef 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem_profile_debugfs.c +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem_profile_debugfs.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -15,7 +15,7 @@ -#include +#include #ifdef CONFIG_DEBUG_FS diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem_profile_debugfs.h b/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem_profile_debugfs.h index 9555197f305c..a1dc2e0b165b 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem_profile_debugfs.h +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem_profile_debugfs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -26,7 +26,6 @@ #ifndef _KBASE_MEM_PROFILE_DEBUGFS_H #define _KBASE_MEM_PROFILE_DEBUGFS_H -#include #include #include diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_mmu.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_mmu.c index d81ef593e928..48d53723a9b4 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_mmu.c +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_mmu.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,9 +30,8 @@ #if defined(CONFIG_MALI_GATOR_SUPPORT) #include #endif -#if defined(CONFIG_MALI_MIPE_ENABLED) #include -#endif +#include #include #define beenthere(kctx, f, a...) dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a) @@ -41,9 +40,31 @@ #include #include #include +#include #define KBASE_MMU_PAGE_ENTRIES 512 +/** + * kbase_mmu_flush_invalidate() - Flush and invalidate the GPU caches. + * @kctx: The KBase context. + * @vpfn: The virtual page frame number to start the flush on. + * @nr: The number of pages to flush. + * @sync: Set if the operation should be synchronous or not. + * + * Issue a cache flush + invalidate to the GPU caches and invalidate the TLBs. + * + * If sync is not set then transactions still in flight when the flush is issued + * may use the old page tables and the data they write will not be written out + * to memory, this function returns after the flush has been issued but + * before all accesses which might effect the flushed region have completed. + * + * If sync is set then accesses in the flushed region will be drained + * before data is flush and invalidated through L1, L2 and into memory, + * after which point this function will return. + */ +static void kbase_mmu_flush_invalidate(struct kbase_context *kctx, + u64 vpfn, size_t nr, bool sync); + /** * kbase_mmu_sync_pgd - sync page directory to memory * @kbdev: Device pointer. @@ -56,8 +77,12 @@ static void kbase_mmu_sync_pgd(struct kbase_device *kbdev, dma_addr_t handle, size_t size) { - - dma_sync_single_for_device(kbdev->dev, handle, size, DMA_TO_DEVICE); + /* If page table is not coherent then ensure the gpu can read + * the pages from memory + */ + if (kbdev->system_coherency != COHERENCY_ACE) + dma_sync_single_for_device(kbdev->dev, handle, size, + DMA_TO_DEVICE); } /* @@ -136,6 +161,18 @@ void page_fault_worker(struct work_struct *data) dev_warn(kbdev->dev, "Access flag unexpectedly set"); goto fault_done; +#ifdef CONFIG_MALI_GPU_MMU_AARCH64 + case AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT: + + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Address size fault"); + goto fault_done; + + case AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT: + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Memory attributes fault"); + goto fault_done; +#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */ default: kbase_mmu_report_fault_and_kill(kctx, faulting_as, @@ -164,6 +201,13 @@ void page_fault_worker(struct work_struct *data) goto fault_done; } + if ((region->flags & KBASE_REG_DONT_NEED)) { + kbase_gpu_vm_unlock(kctx); + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Don't need memory can't be grown"); + goto fault_done; + } + /* find the size we need to grow it by */ /* we know the result fit in a size_t due to kbase_region_tracker_find_region_enclosing_address * validating the fault_adress to be within a size_t from the start_pfn */ @@ -233,19 +277,27 @@ void page_fault_worker(struct work_struct *data) if (grown) { + u64 pfn_offset; u32 op; /* alloc success */ KBASE_DEBUG_ASSERT(kbase_reg_current_backed_size(region) <= region->nr_pages); - /* AS transaction begin */ - mutex_lock(&faulting_as->transaction_mutex); - /* set up the new pages */ - err = kbase_mmu_insert_pages(kctx, region->start_pfn + kbase_reg_current_backed_size(region) - new_pages, &kbase_get_gpu_phy_pages(region)[kbase_reg_current_backed_size(region) - new_pages], new_pages, region->flags); + pfn_offset = kbase_reg_current_backed_size(region) - new_pages; + /* + * Note: + * Issuing an MMU operation will unlock the MMU and cause the + * translation to be replayed. If the page insertion fails then + * rather then trying to continue the context should be killed + * so the no_flush version of insert_pages is used which allows + * us to unlock the MMU as we see fit. + */ + err = kbase_mmu_insert_pages_no_flush(kctx, + region->start_pfn + pfn_offset, + &kbase_get_gpu_phy_pages(region)[pfn_offset], + new_pages, region->flags); if (err) { - /* failed to insert pages, handle as a normal PF */ - mutex_unlock(&faulting_as->transaction_mutex); kbase_free_phy_pages_helper(region->gpu_alloc, new_pages); if (region->gpu_alloc != region->cpu_alloc) kbase_free_phy_pages_helper(region->cpu_alloc, @@ -259,9 +311,10 @@ void page_fault_worker(struct work_struct *data) #if defined(CONFIG_MALI_GATOR_SUPPORT) kbase_trace_mali_page_fault_insert_pages(as_no, new_pages); #endif -#if defined(CONFIG_MALI_MIPE_ENABLED) kbase_tlstream_aux_pagefault(kctx->id, (u64)new_pages); -#endif + + /* AS transaction begin */ + mutex_lock(&faulting_as->transaction_mutex); /* flush L2 and unlock the VA (resumes the MMU) */ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367)) @@ -324,11 +377,9 @@ phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx) if (!p) goto sub_pages; -#if defined(CONFIG_MALI_MIPE_ENABLED) kbase_tlstream_aux_pagesalloc( (u32)kctx->id, (u64)new_page_count); -#endif page = kmap(p); if (NULL == page) @@ -365,7 +416,7 @@ static phys_addr_t mmu_get_next_pgd(struct kbase_context *kctx, phys_addr_t pgd, KBASE_DEBUG_ASSERT(pgd); KBASE_DEBUG_ASSERT(NULL != kctx); - lockdep_assert_held(&kctx->reg_lock); + lockdep_assert_held(&kctx->mmu_lock); /* * Architecture spec defines level-0 as being the top-most. @@ -406,8 +457,9 @@ static phys_addr_t mmu_get_bottom_pgd(struct kbase_context *kctx, u64 vpfn) phys_addr_t pgd; int l; - pgd = kctx->pgd; + lockdep_assert_held(&kctx->mmu_lock); + pgd = kctx->pgd; for (l = MIDGARD_MMU_TOPLEVEL; l < MIDGARD_MMU_BOTTOMLEVEL; l++) { pgd = mmu_get_next_pgd(kctx, pgd, vpfn, l); /* Handle failure condition */ @@ -428,7 +480,7 @@ static phys_addr_t mmu_insert_pages_recover_get_next_pgd(struct kbase_context *k KBASE_DEBUG_ASSERT(pgd); KBASE_DEBUG_ASSERT(NULL != kctx); - lockdep_assert_held(&kctx->reg_lock); + lockdep_assert_held(&kctx->mmu_lock); /* * Architecture spec defines level-0 as being the top-most. @@ -453,6 +505,8 @@ static phys_addr_t mmu_insert_pages_recover_get_bottom_pgd(struct kbase_context phys_addr_t pgd; int l; + lockdep_assert_held(&kctx->mmu_lock); + pgd = kctx->pgd; for (l = MIDGARD_MMU_TOPLEVEL; l < MIDGARD_MMU_BOTTOMLEVEL; l++) { @@ -476,7 +530,7 @@ static void mmu_insert_pages_failure_recovery(struct kbase_context *kctx, u64 vp /* 64-bit address range is the max */ KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); - lockdep_assert_held(&kctx->reg_lock); + lockdep_assert_held(&kctx->mmu_lock); mmu_mode = kctx->kbdev->mmu_mode; @@ -524,22 +578,28 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, bool recover_required = false; u64 recover_vpfn = vpfn; size_t recover_count = 0; + size_t remain = nr; + int err; KBASE_DEBUG_ASSERT(NULL != kctx); KBASE_DEBUG_ASSERT(0 != vpfn); /* 64-bit address range is the max */ KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); - lockdep_assert_held(&kctx->reg_lock); + /* Early out if there is nothing to do */ + if (nr == 0) + return 0; - while (nr) { + mutex_lock(&kctx->mmu_lock); + + while (remain) { unsigned int i; unsigned int index = vpfn & 0x1FF; unsigned int count = KBASE_MMU_PAGE_ENTRIES - index; struct page *p; - if (count > nr) - count = nr; + if (count > remain) + count = remain; /* * Repeatedly calling mmu_get_bottom_pte() is clearly @@ -558,7 +618,8 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, recover_vpfn, recover_count); } - return -EINVAL; + err = -EINVAL; + goto fail_unlock; } p = pfn_to_page(PFN_DOWN(pgd)); @@ -572,7 +633,8 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, recover_vpfn, recover_count); } - return -ENOMEM; + err = -ENOMEM; + goto fail_unlock; } for (i = 0; i < count; i++) { @@ -584,7 +646,7 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, } vpfn += count; - nr -= count; + remain -= count; kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p) + (index * sizeof(u64)), @@ -597,13 +659,17 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, recover_required = true; recover_count += count; } + mutex_unlock(&kctx->mmu_lock); + kbase_mmu_flush_invalidate(kctx, vpfn, nr, false); return 0; + +fail_unlock: + mutex_unlock(&kctx->mmu_lock); + kbase_mmu_flush_invalidate(kctx, vpfn, nr, false); + return err; } -/* - * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn' - */ -int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn, +int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx, u64 vpfn, phys_addr_t *phys, size_t nr, unsigned long flags) { @@ -614,22 +680,28 @@ int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn, bool recover_required = false; u64 recover_vpfn = vpfn; size_t recover_count = 0; + size_t remain = nr; + int err; KBASE_DEBUG_ASSERT(NULL != kctx); KBASE_DEBUG_ASSERT(0 != vpfn); /* 64-bit address range is the max */ KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); - lockdep_assert_held(&kctx->reg_lock); + /* Early out if there is nothing to do */ + if (nr == 0) + return 0; + + mutex_lock(&kctx->mmu_lock); - while (nr) { + while (remain) { unsigned int i; unsigned int index = vpfn & 0x1FF; unsigned int count = KBASE_MMU_PAGE_ENTRIES - index; struct page *p; - if (count > nr) - count = nr; + if (count > remain) + count = remain; /* * Repeatedly calling mmu_get_bottom_pte() is clearly @@ -648,7 +720,8 @@ int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn, recover_vpfn, recover_count); } - return -EINVAL; + err = -EINVAL; + goto fail_unlock; } p = pfn_to_page(PFN_DOWN(pgd)); @@ -662,7 +735,8 @@ int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn, recover_vpfn, recover_count); } - return -ENOMEM; + err = -ENOMEM; + goto fail_unlock; } for (i = 0; i < count; i++) { @@ -675,7 +749,7 @@ int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn, phys += count; vpfn += count; - nr -= count; + remain -= count; kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p) + (index * sizeof(u64)), @@ -688,81 +762,209 @@ int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn, recover_required = true; recover_count += count; } + + mutex_unlock(&kctx->mmu_lock); return 0; + +fail_unlock: + mutex_unlock(&kctx->mmu_lock); + return err; +} + +/* + * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn' + */ +int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn, + phys_addr_t *phys, size_t nr, + unsigned long flags) +{ + int err; + + err = kbase_mmu_insert_pages_no_flush(kctx, vpfn, phys, nr, flags); + kbase_mmu_flush_invalidate(kctx, vpfn, nr, false); + return err; } KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages); /** - * This function is responsible for validating the MMU PTs - * triggering reguired flushes. + * kbase_mmu_flush_invalidate_noretain() - Flush and invalidate the GPU caches + * without retaining the kbase context. + * @kctx: The KBase context. + * @vpfn: The virtual page frame number to start the flush on. + * @nr: The number of pages to flush. + * @sync: Set if the operation should be synchronous or not. * - * * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is - * currently scheduled into the runpool, and so potentially uses a lot of locks. - * These locks must be taken in the correct order with respect to others - * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more - * information. + * As per kbase_mmu_flush_invalidate but doesn't retain the kctx or do any + * other locking. */ -static void kbase_mmu_flush(struct kbase_context *kctx, u64 vpfn, size_t nr) +static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx, + u64 vpfn, size_t nr, bool sync) +{ + struct kbase_device *kbdev = kctx->kbdev; + int err; + u32 op; + + /* Early out if there is nothing to do */ + if (nr == 0) + return; + + if (sync) + op = AS_COMMAND_FLUSH_MEM; + else + op = AS_COMMAND_FLUSH_PT; + + err = kbase_mmu_hw_do_operation(kbdev, + &kbdev->as[kctx->as_nr], + kctx, vpfn, nr, op, 0); +#if KBASE_GPU_RESET_EN + if (err) { + /* Flush failed to complete, assume the + * GPU has hung and perform a reset to + * recover */ + dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n"); + + if (kbase_prepare_to_reset_gpu_locked(kbdev)) + kbase_reset_gpu_locked(kbdev); + } +#endif /* KBASE_GPU_RESET_EN */ + +#ifndef CONFIG_MALI_NO_MALI + /* + * As this function could be called in interrupt context the sync + * request can't block. Instead log the request and the next flush + * request will pick it up. + */ + if ((!err) && sync && + kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_6367)) + atomic_set(&kctx->drain_pending, 1); +#endif /* !CONFIG_MALI_NO_MALI */ +} + +static void kbase_mmu_flush_invalidate(struct kbase_context *kctx, + u64 vpfn, size_t nr, bool sync) { struct kbase_device *kbdev; bool ctx_is_in_runpool; +#ifndef CONFIG_MALI_NO_MALI + bool drain_pending = false; - KBASE_DEBUG_ASSERT(NULL != kctx); + if (atomic_xchg(&kctx->drain_pending, 0)) + drain_pending = true; +#endif /* !CONFIG_MALI_NO_MALI */ - kbdev = kctx->kbdev; + /* Early out if there is nothing to do */ + if (nr == 0) + return; - /* We must flush if we're currently running jobs. At the very least, we need to retain the - * context to ensure it doesn't schedule out whilst we're trying to flush it */ + kbdev = kctx->kbdev; ctx_is_in_runpool = kbasep_js_runpool_retain_ctx(kbdev, kctx); if (ctx_is_in_runpool) { KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); - /* Second level check is to try to only do this when jobs are running. The refcount is - * a heuristic for this. */ - if (kbdev->js_data.runpool_irq.per_as_data[kctx->as_nr].as_busy_refcount >= 2) { - if (!kbase_pm_context_active_handle_suspend(kbdev, - KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { - int ret; - u32 op; - - /* AS transaction begin */ - mutex_lock(&kbdev->as[ - kctx->as_nr].transaction_mutex); - - if (kbase_hw_has_issue(kbdev, - BASE_HW_ISSUE_6367)) - op = AS_COMMAND_FLUSH; - else - op = AS_COMMAND_FLUSH_MEM; - - ret = kbase_mmu_hw_do_operation(kbdev, - &kbdev->as[kctx->as_nr], - kctx, vpfn, nr, - op, 0); + if (!kbase_pm_context_active_handle_suspend(kbdev, + KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { + int err; + u32 op; + + /* AS transaction begin */ + mutex_lock(&kbdev->as[ + kctx->as_nr].transaction_mutex); + + if (sync) + op = AS_COMMAND_FLUSH_MEM; + else + op = AS_COMMAND_FLUSH_PT; + + err = kbase_mmu_hw_do_operation(kbdev, + &kbdev->as[kctx->as_nr], + kctx, vpfn, nr, op, 0); + #if KBASE_GPU_RESET_EN - if (ret) { - /* Flush failed to complete, assume the - * GPU has hung and perform a reset to - * recover */ - dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issueing GPU soft-reset to recover\n"); - if (kbase_prepare_to_reset_gpu(kbdev)) - kbase_reset_gpu(kbdev); - } + if (err) { + /* Flush failed to complete, assume the + * GPU has hung and perform a reset to + * recover */ + dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issueing GPU soft-reset to recover\n"); + + if (kbase_prepare_to_reset_gpu(kbdev)) + kbase_reset_gpu(kbdev); + } #endif /* KBASE_GPU_RESET_EN */ - mutex_unlock(&kbdev->as[ - kctx->as_nr].transaction_mutex); - /* AS transaction end */ + mutex_unlock(&kbdev->as[ + kctx->as_nr].transaction_mutex); + /* AS transaction end */ - kbase_pm_context_idle(kbdev); +#ifndef CONFIG_MALI_NO_MALI + /* + * The transaction lock must be dropped before here + * as kbase_wait_write_flush could take it if + * the GPU was powered down (static analysis doesn't + * know this can't happen). + */ + drain_pending |= (!err) && sync && + kbase_hw_has_issue(kctx->kbdev, + BASE_HW_ISSUE_6367); + if (drain_pending) { + /* Wait for GPU to flush write buffer */ + kbase_wait_write_flush(kctx); } +#endif /* !CONFIG_MALI_NO_MALI */ + + kbase_pm_context_idle(kbdev); } kbasep_js_runpool_release_ctx(kbdev, kctx); } } +void kbase_mmu_update(struct kbase_context *kctx) +{ + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + /* ASSERT that the context has a valid as_nr, which is only the case + * when it's scheduled in. + * + * as_nr won't change because the caller has the runpool_irq lock */ + KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + lockdep_assert_held(&kctx->kbdev->as[kctx->as_nr].transaction_mutex); + + kctx->kbdev->mmu_mode->update(kctx); +} +KBASE_EXPORT_TEST_API(kbase_mmu_update); + +void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr) +{ + lockdep_assert_held(&kbdev->as[as_nr].transaction_mutex); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + + kbdev->mmu_mode->disable_as(kbdev, as_nr); +} + +void kbase_mmu_disable(struct kbase_context *kctx) +{ + /* ASSERT that the context has a valid as_nr, which is only the case + * when it's scheduled in. + * + * as_nr won't change because the caller has the runpool_irq lock */ + KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + + lockdep_assert_held(&kctx->kbdev->as[kctx->as_nr].transaction_mutex); + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + + /* + * The address space is being disabled, drain all knowledge of it out + * from the caches as pages and page tables might be freed after this. + * + * The job scheduler code will already be holding the locks and context + * so just do the flush. + */ + kbase_mmu_flush_invalidate_noretain(kctx, 0, ~0, true); + + kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr); +} +KBASE_EXPORT_TEST_API(kbase_mmu_disable); + /* * We actually only discard the ATE, and not the page table * pages. There is a potential DoS here, as we'll leak memory by @@ -782,17 +984,18 @@ int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr) struct kbase_device *kbdev; size_t requested_nr = nr; struct kbase_mmu_mode const *mmu_mode; + int err; KBASE_DEBUG_ASSERT(NULL != kctx); beenthere(kctx, "kctx %p vpfn %lx nr %zd", (void *)kctx, (unsigned long)vpfn, nr); - lockdep_assert_held(&kctx->reg_lock); - if (0 == nr) { /* early out if nothing to do */ return 0; } + mutex_lock(&kctx->mmu_lock); + kbdev = kctx->kbdev; mmu_mode = kbdev->mmu_mode; @@ -808,14 +1011,16 @@ int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr) pgd = mmu_get_bottom_pgd(kctx, vpfn); if (!pgd) { dev_warn(kbdev->dev, "kbase_mmu_teardown_pages: mmu_get_bottom_pgd failure\n"); - return -EINVAL; + err = -EINVAL; + goto fail_unlock; } p = pfn_to_page(PFN_DOWN(pgd)); pgd_page = kmap(p); if (!pgd_page) { dev_warn(kbdev->dev, "kbase_mmu_teardown_pages: kmap failure\n"); - return -ENOMEM; + err = -ENOMEM; + goto fail_unlock; } for (i = 0; i < count; i++) @@ -831,8 +1036,14 @@ int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr) kunmap(p); } - kbase_mmu_flush(kctx, vpfn, requested_nr); + mutex_unlock(&kctx->mmu_lock); + kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true); return 0; + +fail_unlock: + mutex_unlock(&kctx->mmu_lock); + kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true); + return err; } KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages); @@ -855,12 +1066,17 @@ int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *ph u64 *pgd_page; size_t requested_nr = nr; struct kbase_mmu_mode const *mmu_mode; + int err; KBASE_DEBUG_ASSERT(NULL != kctx); KBASE_DEBUG_ASSERT(0 != vpfn); KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); - lockdep_assert_held(&kctx->reg_lock); + /* Early out if there is nothing to do */ + if (nr == 0) + return 0; + + mutex_lock(&kctx->mmu_lock); mmu_mode = kctx->kbdev->mmu_mode; @@ -879,14 +1095,16 @@ int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *ph pgd = mmu_get_bottom_pgd(kctx, vpfn); if (!pgd) { dev_warn(kctx->kbdev->dev, "mmu_get_bottom_pgd failure\n"); - return -EINVAL; + err = -EINVAL; + goto fail_unlock; } p = pfn_to_page(PFN_DOWN(pgd)); pgd_page = kmap(p); if (!pgd_page) { dev_warn(kctx->kbdev->dev, "kmap failure\n"); - return -ENOMEM; + err = -ENOMEM; + goto fail_unlock; } for (i = 0; i < count; i++) @@ -904,9 +1122,14 @@ int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *ph kunmap(pfn_to_page(PFN_DOWN(pgd))); } - kbase_mmu_flush(kctx, vpfn, requested_nr); - + mutex_unlock(&kctx->mmu_lock); + kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true); return 0; + +fail_unlock: + mutex_unlock(&kctx->mmu_lock); + kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true); + return err; } /* This is a debug feature only */ @@ -934,7 +1157,7 @@ static void mmu_teardown_level(struct kbase_context *kctx, phys_addr_t pgd, int struct kbase_mmu_mode const *mmu_mode; KBASE_DEBUG_ASSERT(NULL != kctx); - lockdep_assert_held(&kctx->reg_lock); + lockdep_assert_held(&kctx->mmu_lock); pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd))); /* kmap_atomic should NEVER fail. */ @@ -979,6 +1202,8 @@ int kbase_mmu_init(struct kbase_context *kctx) KBASE_DEBUG_ASSERT(NULL != kctx); KBASE_DEBUG_ASSERT(NULL == kctx->mmu_teardown_pages); + mutex_init(&kctx->mmu_lock); + /* Preallocate MMU depth of four pages for mmu_teardown_level to use */ kctx->mmu_teardown_pages = kmalloc(PAGE_SIZE * 4, GFP_KERNEL); @@ -1004,9 +1229,9 @@ void kbase_mmu_free_pgd(struct kbase_context *kctx) KBASE_DEBUG_ASSERT(NULL != kctx); KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages); - lockdep_assert_held(&kctx->reg_lock); - + mutex_lock(&kctx->mmu_lock); mmu_teardown_level(kctx, kctx->pgd, MIDGARD_MMU_TOPLEVEL, 1, kctx->mmu_teardown_pages); + mutex_unlock(&kctx->mmu_lock); beenthere(kctx, "pgd %lx", (unsigned long)kctx->pgd); kbase_mem_pool_free(&kctx->mem_pool, phys_to_page(kctx->pgd), true); @@ -1014,11 +1239,9 @@ void kbase_mmu_free_pgd(struct kbase_context *kctx) new_page_count = kbase_atomic_sub_pages(1, &kctx->used_pages); kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages); -#if defined(CONFIG_MALI_MIPE_ENABLED) kbase_tlstream_aux_pagesalloc( (u32)kctx->id, (u64)new_page_count); -#endif } KBASE_EXPORT_TEST_API(kbase_mmu_free_pgd); @@ -1033,7 +1256,7 @@ static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, struct kbase_mmu_mode const *mmu_mode; KBASE_DEBUG_ASSERT(NULL != kctx); - lockdep_assert_held(&kctx->reg_lock); + lockdep_assert_held(&kctx->mmu_lock); mmu_mode = kctx->kbdev->mmu_mode; @@ -1088,13 +1311,13 @@ void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages) KBASE_DEBUG_ASSERT(kctx); - lockdep_assert_held(&kctx->reg_lock); - if (0 == nr_pages) { /* can't dump in a 0 sized buffer, early out */ return NULL; } + mutex_lock(&kctx->mmu_lock); + size_left = nr_pages * PAGE_SIZE; KBASE_DEBUG_ASSERT(0 != size_left); @@ -1116,7 +1339,7 @@ void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages) kctx->kbdev->mmu_mode->get_as_setup(kctx, &as_setup); config[0] = as_setup.transtab; config[1] = as_setup.memattr; - config[2] = 0; + config[2] = as_setup.transcfg; memcpy(buffer, &config, sizeof(config)); mmu_dump_buffer += sizeof(config); size_left -= sizeof(config); @@ -1130,10 +1353,8 @@ void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages) &mmu_dump_buffer, &size_left); - if (!size) { - vfree(kaddr); - return NULL; - } + if (!size) + goto fail_free; /* Add on the size for the end marker */ size += sizeof(u64); @@ -1144,15 +1365,20 @@ void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages) if (size > nr_pages * PAGE_SIZE || size_left < sizeof(u64)) { /* The buffer isn't big enough - free the memory and return failure */ - vfree(kaddr); - return NULL; + goto fail_free; } /* Add the end marker */ memcpy(mmu_dump_buffer, &end_marker, sizeof(u64)); } + mutex_unlock(&kctx->mmu_lock); return kaddr; + +fail_free: + vfree(kaddr); + mutex_unlock(&kctx->mmu_lock); + return NULL; } KBASE_EXPORT_TEST_API(kbase_mmu_dump); @@ -1193,13 +1419,17 @@ void bus_fault_worker(struct work_struct *data) #endif /* KBASE_GPU_RESET_EN */ /* NOTE: If GPU already powered off for suspend, we don't need to switch to unmapped */ if (!kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { + unsigned long flags; /* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */ /* AS transaction begin */ mutex_lock(&kbdev->as[as_no].transaction_mutex); /* Set the MMU into unmapped mode */ - kbase_mmu_disable_as(kbdev, as_no); + spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags); + kbase_mmu_disable(kctx); + spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, + flags); mutex_unlock(&kbdev->as[as_no].transaction_mutex); /* AS transaction end */ @@ -1317,6 +1547,15 @@ const char *kbase_exception_name(struct kbase_device *kbdev, u32 exception_code) e = "TRANSLATION_FAULT"; break; case 0xC8: +#ifdef CONFIG_MALI_GPU_MMU_AARCH64 + case 0xC9: + case 0xCA: + case 0xCB: + case 0xCC: + case 0xCD: + case 0xCE: + case 0xCF: +#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */ e = "PERMISSION_FAULT"; break; case 0xD0: @@ -1330,8 +1569,38 @@ const char *kbase_exception_name(struct kbase_device *kbdev, u32 exception_code) e = "TRANSTAB_BUS_FAULT"; break; case 0xD8: +#ifdef CONFIG_MALI_GPU_MMU_AARCH64 + case 0xD9: + case 0xDA: + case 0xDB: + case 0xDC: + case 0xDD: + case 0xDE: + case 0xDF: +#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */ e = "ACCESS_FLAG"; break; +#ifdef CONFIG_MALI_GPU_MMU_AARCH64 + case 0xE0: + case 0xE1: + case 0xE2: + case 0xE3: + case 0xE4: + case 0xE5: + case 0xE6: + case 0xE7: + e = "ADDRESS_SIZE_FAULT"; + break; + case 0xE8: + case 0xE9: + case 0xEA: + case 0xEB: + case 0xEC: + case 0xED: + case 0xEE: + case 0xEF: + e = "MEMORY_ATTRIBUTES_FAULT"; +#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */ break; default: e = "UNKNOWN"; @@ -1345,7 +1614,12 @@ static const char *access_type_name(struct kbase_device *kbdev, u32 fault_status) { switch (fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK) { + case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC: +#ifdef CONFIG_MALI_GPU_MMU_AARCH64 + return "ATOMIC"; +#else return "UNKNOWN"; +#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */ case AS_FAULTSTATUS_ACCESS_TYPE_READ: return "READ"; case AS_FAULTSTATUS_ACCESS_TYPE_WRITE: @@ -1441,7 +1715,9 @@ static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, } #endif /* KBASE_GPU_RESET_EN */ /* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */ - kbase_mmu_disable_as(kbdev, as_no); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + kbase_mmu_disable(kctx); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); mutex_unlock(&as->transaction_mutex); /* AS transaction end */ @@ -1678,8 +1954,15 @@ void kbase_mmu_interrupt_process(struct kbase_device *kbdev, struct kbase_contex */ kbasep_js_clear_submit_allowed(js_devdata, kctx); +#ifdef CONFIG_MALI_GPU_MMU_AARCH64 + dev_warn(kbdev->dev, + "Bus error in AS%d at VA=0x%016llx, IPA=0x%016llx\n", + as->number, as->fault_addr, + as->fault_extra_addr); +#else dev_warn(kbdev->dev, "Bus error in AS%d at 0x%016llx\n", as->number, as->fault_addr); +#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */ /* * We need to switch to UNMAPPED mode - but we do this in a diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_mmu_mode_lpae.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_mmu_mode_lpae.c index 079ef81d06d1..683cabb797db 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_mmu_mode_lpae.c +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_mmu_mode_lpae.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -77,17 +77,25 @@ static void mmu_get_as_setup(struct kbase_context *kctx, (AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY << (AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) | (AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL << - (AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) | + (AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) | (AS_MEMATTR_LPAE_WRITE_ALLOC << - (AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) | + (AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) | + (AS_MEMATTR_LPAE_OUTER_IMPL_DEF << + (AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8)) | + (AS_MEMATTR_LPAE_OUTER_WA << + (AS_MEMATTR_INDEX_OUTER_WA * 8)) | 0; /* The other indices are unused for now */ - setup->transtab = (u64)kctx->pgd & - ((0xFFFFFFFFULL << 32) | AS_TRANSTAB_LPAE_ADDR_SPACE_MASK); - - setup->transtab |= AS_TRANSTAB_LPAE_ADRMODE_TABLE; - setup->transtab |= AS_TRANSTAB_LPAE_READ_INNER; + setup->transtab = ((u64)kctx->pgd & + ((0xFFFFFFFFULL << 32) | AS_TRANSTAB_LPAE_ADDR_SPACE_MASK)) | + AS_TRANSTAB_LPAE_ADRMODE_TABLE | + AS_TRANSTAB_LPAE_READ_INNER; +#ifdef CONFIG_MALI_GPU_MMU_AARCH64 + setup->transcfg = AS_TRANSCFG_ADRMODE_LEGACY; +#else + setup->transcfg = 0; +#endif } static void mmu_update(struct kbase_context *kctx) @@ -109,6 +117,9 @@ static void mmu_disable_as(struct kbase_device *kbdev, int as_nr) current_setup->transtab = AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED; +#ifdef CONFIG_MALI_GPU_MMU_AARCH64 + current_setup->transcfg = AS_TRANSCFG_ADRMODE_LEGACY; +#endif /* Apply the address space setting */ kbase_mmu_hw_configure(kbdev, as, NULL); diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_platform_fake.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_platform_fake.c index 5bbd6d485638..1a44957fe44a 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_platform_fake.c +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_platform_fake.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2014, 2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -23,9 +23,6 @@ #include #include -#ifdef CONFIG_MACH_MANTA -#include -#endif /* * This file is included only for type definitions and functions belonging to @@ -62,6 +59,7 @@ static void kbasep_config_parse_io_resources(const struct kbase_io_resources *io linux_resources[0].start = io_resources->io_memory_region.start; linux_resources[0].end = io_resources->io_memory_region.end; linux_resources[0].flags = IORESOURCE_MEM; + linux_resources[1].start = io_resources->job_irq_number; linux_resources[1].end = io_resources->job_irq_number; linux_resources[1].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL; diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_pm.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_pm.c index 261441fa145b..97d543464c28 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_pm.c +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_pm.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -21,10 +21,10 @@ * @file mali_kbase_pm.c * Base kernel power management APIs */ + #include #include -#include -#include +#include #include @@ -151,6 +151,10 @@ void kbase_pm_suspend(struct kbase_device *kbdev) { KBASE_DEBUG_ASSERT(kbdev); + /* Suspend vinstr. + * This call will block until vinstr is suspended. */ + kbase_vinstr_suspend(kbdev->vinstr_ctx); + mutex_lock(&kbdev->pm.lock); KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev)); kbdev->pm.suspending = true; @@ -164,9 +168,6 @@ void kbase_pm_suspend(struct kbase_device *kbdev) * the PM active count references */ kbasep_js_suspend(kbdev); - /* Suspend any counter collection that might be happening */ - kbase_instr_hwcnt_suspend(kbdev); - /* Wait for the active count to reach zero. This is not the same as * waiting for a power down, since not all policies power down when this * reaches zero. */ @@ -186,9 +187,6 @@ void kbase_pm_resume(struct kbase_device *kbdev) /* Initial active call, to power on the GPU/cores if needed */ kbase_pm_context_active(kbdev); - /* Re-enable instrumentation, if it was previously disabled */ - kbase_instr_hwcnt_resume(kbdev); - /* Resume any blocked atoms (which may cause contexts to be scheduled in * and dependent atoms to run) */ kbase_resume_suspended_soft_jobs(kbdev); @@ -200,5 +198,8 @@ void kbase_pm_resume(struct kbase_device *kbdev) /* Matching idle call, to power off the GPU/cores if we didn't actually * need it and the policy doesn't want it on */ kbase_pm_context_idle(kbdev); + + /* Resume vinstr operation */ + kbase_vinstr_resume(kbdev->vinstr_ctx); } diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_regs_history_debugfs.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_regs_history_debugfs.c new file mode 100644 index 000000000000..c970650069cd --- /dev/null +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_regs_history_debugfs.c @@ -0,0 +1,130 @@ +/* + * + * (C) COPYRIGHT 2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#include "mali_kbase.h" + +#include "mali_kbase_regs_history_debugfs.h" + +#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI) + +#include + + +static int regs_history_size_get(void *data, u64 *val) +{ + struct kbase_io_history *const h = data; + + *val = h->size; + + return 0; +} + +static int regs_history_size_set(void *data, u64 val) +{ + struct kbase_io_history *const h = data; + + return kbase_io_history_resize(h, (u16)val); +} + + +DEFINE_SIMPLE_ATTRIBUTE(regs_history_size_fops, + regs_history_size_get, + regs_history_size_set, + "%llu\n"); + + +/** + * regs_history_show - show callback for the register access history file. + * + * @sfile: The debugfs entry + * @data: Data associated with the entry + * + * This function is called to dump all recent accesses to the GPU registers. + * + * @return 0 if successfully prints data in debugfs entry file, failure + * otherwise + */ +static int regs_history_show(struct seq_file *sfile, void *data) +{ + struct kbase_io_history *const h = sfile->private; + u16 i; + size_t iters; + unsigned long flags; + + if (!h->enabled) { + seq_puts(sfile, "The register access history is disabled\n"); + goto out; + } + + spin_lock_irqsave(&h->lock, flags); + + iters = (h->size > h->count) ? h->count : h->size; + seq_printf(sfile, "Last %zu register accesses of %zu total:\n", iters, + h->count); + for (i = 0; i < iters; ++i) { + struct kbase_io_access *io = + &h->buf[(h->count - iters + i) % h->size]; + char const access = (io->addr & 1) ? 'w' : 'r'; + + seq_printf(sfile, "%6i: %c: reg 0x%p val %08x\n", i, access, + (void *)(io->addr & ~0x1), io->value); + } + + spin_unlock_irqrestore(&h->lock, flags); + +out: + return 0; +} + + +/** + * regs_history_open - open operation for regs_history debugfs file + * + * @in: &struct inode pointer + * @file: &struct file pointer + * + * @return file descriptor + */ +static int regs_history_open(struct inode *in, struct file *file) +{ + return single_open(file, ®s_history_show, in->i_private); +} + + +static const struct file_operations regs_history_fops = { + .open = ®s_history_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + + +void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev) +{ + debugfs_create_bool("regs_history_enabled", S_IRUGO | S_IWUSR, + kbdev->mali_debugfs_directory, + &kbdev->io_history.enabled); + debugfs_create_file("regs_history_size", S_IRUGO | S_IWUSR, + kbdev->mali_debugfs_directory, + &kbdev->io_history, ®s_history_size_fops); + debugfs_create_file("regs_history", S_IRUGO, + kbdev->mali_debugfs_directory, &kbdev->io_history, + ®s_history_fops); +} + + +#endif /* CONFIG_DEBUG_FS */ diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_regs_history_debugfs.h b/drivers/gpu/arm/midgard_for_linux/mali_kbase_regs_history_debugfs.h new file mode 100644 index 000000000000..f10837002330 --- /dev/null +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_regs_history_debugfs.h @@ -0,0 +1,50 @@ +/* + * + * (C) COPYRIGHT 2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/** + * Header file for register access history support via debugfs + * + * This interface is made available via /sys/kernel/debug/mali#/regs_history*. + * + * Usage: + * - regs_history_enabled: whether recording of register accesses is enabled. + * Write 'y' to enable, 'n' to disable. + * - regs_history_size: size of the register history buffer, must be > 0 + * - regs_history: return the information about last accesses to the registers. + */ + +#ifndef _KBASE_REGS_HISTORY_DEBUGFS_H +#define _KBASE_REGS_HISTORY_DEBUGFS_H + +struct kbase_device; + +#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI) + +/** + * kbasep_regs_history_debugfs_init - add debugfs entries for register history + * + * @kbdev: Pointer to kbase_device containing the register history + */ +void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev); + +#else /* CONFIG_DEBUG_FS */ + +#define kbasep_regs_history_debugfs_init CSTD_NOP + +#endif /* CONFIG_DEBUG_FS */ + +#endif /*_KBASE_REGS_HISTORY_DEBUGFS_H*/ diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_replay.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_replay.c index 71f005e32521..d3a3dbfa5241 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_replay.c +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_replay.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,15 +27,11 @@ #include #define JOB_NOT_STARTED 0 -#define JOB_TYPE_MASK 0xfe -#define JOB_TYPE_NULL (1 << 1) -#define JOB_TYPE_VERTEX (5 << 1) -#define JOB_TYPE_TILER (7 << 1) -#define JOB_TYPE_FUSED (8 << 1) -#define JOB_TYPE_FRAGMENT (9 << 1) - -#define JOB_FLAG_DESC_SIZE (1 << 0) -#define JOB_FLAG_PERFORM_JOB_BARRIER (1 << 8) +#define JOB_TYPE_NULL (1) +#define JOB_TYPE_VERTEX (5) +#define JOB_TYPE_TILER (7) +#define JOB_TYPE_FUSED (8) +#define JOB_TYPE_FRAGMENT (9) #define JOB_HEADER_32_FBD_OFFSET (31*4) #define JOB_HEADER_64_FBD_OFFSET (44*4) @@ -58,17 +54,9 @@ #define JOB_SOURCE_ID(status) (((status) >> 16) & 0xFFFF) #define JOB_POLYGON_LIST (0x03) -struct job_head { - u32 status; - u32 not_complete_index; - u64 fault_addr; - u16 flags; - u16 index; - u16 dependencies[2]; - union { - u64 _64; - u32 _32; - } next; +struct fragment_job { + struct job_descriptor_header header; + u32 x[2]; union { u64 _64; @@ -77,28 +65,43 @@ struct job_head { }; static void dump_job_head(struct kbase_context *kctx, char *head_str, - struct job_head *job) + struct job_descriptor_header *job) { #ifdef CONFIG_MALI_DEBUG dev_dbg(kctx->kbdev->dev, "%s\n", head_str); - dev_dbg(kctx->kbdev->dev, "addr = %p\n" - "status = %x\n" - "not_complete_index = %x\n" - "fault_addr = %llx\n" - "flags = %x\n" - "index = %x\n" - "dependencies = %x,%x\n", - job, job->status, job->not_complete_index, - job->fault_addr, job->flags, job->index, - job->dependencies[0], - job->dependencies[1]); - - if (job->flags & JOB_FLAG_DESC_SIZE) + dev_dbg(kctx->kbdev->dev, + "addr = %p\n" + "exception_status = %x (Source ID: 0x%x Access: 0x%x Exception: 0x%x)\n" + "first_incomplete_task = %x\n" + "fault_pointer = %llx\n" + "job_descriptor_size = %x\n" + "job_type = %x\n" + "job_barrier = %x\n" + "_reserved_01 = %x\n" + "_reserved_02 = %x\n" + "_reserved_03 = %x\n" + "_reserved_04/05 = %x,%x\n" + "job_index = %x\n" + "dependencies = %x,%x\n", + job, job->exception_status, + JOB_SOURCE_ID(job->exception_status), + (job->exception_status >> 8) & 0x3, + job->exception_status & 0xFF, + job->first_incomplete_task, + job->fault_pointer, job->job_descriptor_size, + job->job_type, job->job_barrier, job->_reserved_01, + job->_reserved_02, job->_reserved_03, + job->_reserved_04, job->_reserved_05, + job->job_index, + job->job_dependency_index_1, + job->job_dependency_index_2); + + if (job->job_descriptor_size) dev_dbg(kctx->kbdev->dev, "next = %llx\n", - job->next._64); + job->next_job._64); else dev_dbg(kctx->kbdev->dev, "next = %x\n", - job->next._32); + job->next_job._32); #endif } @@ -372,77 +375,81 @@ static int kbasep_replay_reset_job(struct kbase_context *kctx, u32 default_weight, u16 hw_job_id_offset, bool first_in_chain, bool fragment_chain) { - struct job_head *job; + struct fragment_job *frag_job; + struct job_descriptor_header *job; u64 new_job_header; struct kbase_vmap_struct map; - job = kbase_vmap(kctx, *job_header, sizeof(*job), &map); - if (!job) { + frag_job = kbase_vmap(kctx, *job_header, sizeof(*frag_job), &map); + if (!frag_job) { dev_err(kctx->kbdev->dev, "kbasep_replay_parse_jc: failed to map jc\n"); return -EINVAL; } + job = &frag_job->header; dump_job_head(kctx, "Job header:", job); - if (job->status == JOB_NOT_STARTED && !fragment_chain) { + if (job->exception_status == JOB_NOT_STARTED && !fragment_chain) { dev_err(kctx->kbdev->dev, "Job already not started\n"); goto out_unmap; } - job->status = JOB_NOT_STARTED; + job->exception_status = JOB_NOT_STARTED; - if ((job->flags & JOB_TYPE_MASK) == JOB_TYPE_VERTEX) - job->flags = (job->flags & ~JOB_TYPE_MASK) | JOB_TYPE_NULL; + if (job->job_type == JOB_TYPE_VERTEX) + job->job_type = JOB_TYPE_NULL; - if ((job->flags & JOB_TYPE_MASK) == JOB_TYPE_FUSED) { + if (job->job_type == JOB_TYPE_FUSED) { dev_err(kctx->kbdev->dev, "Fused jobs can not be replayed\n"); goto out_unmap; } if (first_in_chain) - job->flags |= JOB_FLAG_PERFORM_JOB_BARRIER; + job->job_barrier = 1; - if ((job->dependencies[0] + hw_job_id_offset) > JOB_HEADER_ID_MAX || - (job->dependencies[1] + hw_job_id_offset) > JOB_HEADER_ID_MAX || - (job->index + hw_job_id_offset) > JOB_HEADER_ID_MAX) { + if ((job->job_dependency_index_1 + hw_job_id_offset) > + JOB_HEADER_ID_MAX || + (job->job_dependency_index_2 + hw_job_id_offset) > + JOB_HEADER_ID_MAX || + (job->job_index + hw_job_id_offset) > JOB_HEADER_ID_MAX) { dev_err(kctx->kbdev->dev, "Job indicies/dependencies out of valid range\n"); goto out_unmap; } - if (job->dependencies[0]) - job->dependencies[0] += hw_job_id_offset; - if (job->dependencies[1]) - job->dependencies[1] += hw_job_id_offset; + if (job->job_dependency_index_1) + job->job_dependency_index_1 += hw_job_id_offset; + if (job->job_dependency_index_2) + job->job_dependency_index_2 += hw_job_id_offset; - job->index += hw_job_id_offset; + job->job_index += hw_job_id_offset; - if (job->flags & JOB_FLAG_DESC_SIZE) { - new_job_header = job->next._64; - if (!job->next._64) - job->next._64 = prev_jc; + if (job->job_descriptor_size) { + new_job_header = job->next_job._64; + if (!job->next_job._64) + job->next_job._64 = prev_jc; } else { - new_job_header = job->next._32; - if (!job->next._32) - job->next._32 = prev_jc; + new_job_header = job->next_job._32; + if (!job->next_job._32) + job->next_job._32 = prev_jc; } dump_job_head(kctx, "Updated to:", job); - if ((job->flags & JOB_TYPE_MASK) == JOB_TYPE_TILER) { - bool job_64 = (job->flags & JOB_FLAG_DESC_SIZE) != 0; + if (job->job_type == JOB_TYPE_TILER) { + bool job_64 = job->job_descriptor_size != 0; if (kbasep_replay_reset_tiler_job(kctx, *job_header, tiler_heap_free, hierarchy_mask, default_weight, job_64) != 0) goto out_unmap; - } else if ((job->flags & JOB_TYPE_MASK) == JOB_TYPE_FRAGMENT) { + } else if (job->job_type == JOB_TYPE_FRAGMENT) { u64 fbd_address; - if (job->flags & JOB_FLAG_DESC_SIZE) - fbd_address = job->fragment_fbd._64; + if (job->job_descriptor_size) + fbd_address = frag_job->fragment_fbd._64; else - fbd_address = (u64)job->fragment_fbd._32; + fbd_address = (u64)frag_job->fragment_fbd._32; if (fbd_address & FBD_TYPE) { if (kbasep_replay_reset_mfbd(kctx, @@ -485,7 +492,7 @@ static int kbasep_replay_find_hw_job_id(struct kbase_context *kctx, u64 jc, u16 *hw_job_id) { while (jc) { - struct job_head *job; + struct job_descriptor_header *job; struct kbase_vmap_struct map; dev_dbg(kctx->kbdev->dev, @@ -498,13 +505,13 @@ static int kbasep_replay_find_hw_job_id(struct kbase_context *kctx, return -EINVAL; } - if (job->index > *hw_job_id) - *hw_job_id = job->index; + if (job->job_index > *hw_job_id) + *hw_job_id = job->job_index; - if (job->flags & JOB_FLAG_DESC_SIZE) - jc = job->next._64; + if (job->job_descriptor_size) + jc = job->next_job._64; else - jc = job->next._32; + jc = job->next_job._32; kbase_vunmap(kctx, &map); } @@ -749,7 +756,7 @@ static int kbasep_replay_parse_payload(struct kbase_context *kctx, struct base_jd_atom_v2 *t_atom, struct base_jd_atom_v2 *f_atom) { - base_jd_replay_payload *payload; + base_jd_replay_payload *payload = NULL; u64 next; u64 prev_jc = 0; u16 hw_job_id_offset = 0; @@ -760,12 +767,27 @@ static int kbasep_replay_parse_payload(struct kbase_context *kctx, replay_atom->jc, sizeof(payload)); payload = kbase_vmap(kctx, replay_atom->jc, sizeof(*payload), &map); - if (!payload) { dev_err(kctx->kbdev->dev, "kbasep_replay_parse_payload: failed to map payload into kernel space\n"); return -EINVAL; } +#ifdef BASE_LEGACY_UK10_2_SUPPORT + if (KBASE_API_VERSION(10, 3) > replay_atom->kctx->api_version) { + base_jd_replay_payload_uk10_2 *payload_uk10_2; + u16 tiler_core_req; + u16 fragment_core_req; + + payload_uk10_2 = (base_jd_replay_payload_uk10_2 *) payload; + memcpy(&tiler_core_req, &payload_uk10_2->tiler_core_req, + sizeof(tiler_core_req)); + memcpy(&fragment_core_req, &payload_uk10_2->fragment_core_req, + sizeof(fragment_core_req)); + payload->tiler_core_req = (u32)(tiler_core_req & 0x7fff); + payload->fragment_core_req = (u32)(fragment_core_req & 0x7fff); + } +#endif /* BASE_LEGACY_UK10_2_SUPPORT */ + #ifdef CONFIG_MALI_DEBUG dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_payload: payload=%p\n", payload); dev_dbg(kctx->kbdev->dev, "Payload structure:\n" @@ -787,18 +809,35 @@ static int kbasep_replay_parse_payload(struct kbase_context *kctx, payload->fragment_core_req); payload_dump(kctx, payload); #endif - t_atom->core_req = payload->tiler_core_req | BASEP_JD_REQ_EVENT_NEVER; f_atom->core_req = payload->fragment_core_req | BASEP_JD_REQ_EVENT_NEVER; /* Sanity check core requirements*/ - if ((t_atom->core_req & BASEP_JD_REQ_ATOM_TYPE & - ~BASE_JD_REQ_COHERENT_GROUP) != BASE_JD_REQ_T || - (f_atom->core_req & BASEP_JD_REQ_ATOM_TYPE & - ~BASE_JD_REQ_COHERENT_GROUP) != BASE_JD_REQ_FS || + if ((t_atom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_T || + (f_atom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_FS || t_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES || f_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) { - dev_err(kctx->kbdev->dev, "Invalid core requirements\n"); + + int t_atom_type = t_atom->core_req & BASE_JD_REQ_ATOM_TYPE & ~BASE_JD_REQ_COHERENT_GROUP; + int f_atom_type = f_atom->core_req & BASE_JD_REQ_ATOM_TYPE & ~BASE_JD_REQ_COHERENT_GROUP & ~BASE_JD_REQ_FS_AFBC; + int t_has_ex_res = t_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES; + int f_has_ex_res = f_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES; + + if (t_atom_type != BASE_JD_REQ_T) { + dev_err(kctx->kbdev->dev, "Invalid core requirement: Tiler atom not a tiler job. Was: 0x%x\n Expected: 0x%x", + t_atom_type, BASE_JD_REQ_T); + } + if (f_atom_type != BASE_JD_REQ_FS) { + dev_err(kctx->kbdev->dev, "Invalid core requirement: Fragment shader atom not a fragment shader. Was 0x%x Expected: 0x%x\n", + f_atom_type, BASE_JD_REQ_FS); + } + if (t_has_ex_res) { + dev_err(kctx->kbdev->dev, "Invalid core requirement: Tiler atom has external resources.\n"); + } + if (f_has_ex_res) { + dev_err(kctx->kbdev->dev, "Invalid core requirement: Fragment shader atom has external resources.\n"); + } + goto out; } @@ -957,7 +996,7 @@ static bool kbase_replay_fault_check(struct kbase_jd_atom *katom) base_jd_replay_payload *payload; u64 job_header; u64 job_loop_detect; - struct job_head *job; + struct job_descriptor_header *job; struct kbase_vmap_struct job_map; struct kbase_vmap_struct map; bool err = false; @@ -1012,41 +1051,22 @@ static bool kbase_replay_fault_check(struct kbase_jd_atom *katom) } -#ifdef CONFIG_MALI_DEBUG - dev_dbg(dev, "\njob_head structure:\n" - "Source ID:0x%x Access:0x%x Exception:0x%x\n" - "at job addr = %p\n" - "not_complete_index = 0x%x\n" - "fault_addr = 0x%llx\n" - "flags = 0x%x\n" - "index = 0x%x\n" - "dependencies = 0x%x,0x%x\n", - JOB_SOURCE_ID(job->status), - ((job->status >> 8) & 0x3), - (job->status & 0xFF), - job, - job->not_complete_index, - job->fault_addr, - job->flags, - job->index, - job->dependencies[0], - job->dependencies[1]); -#endif + dump_job_head(kctx, "\njob_head structure:\n", job); /* Replay only when the polygon list reader caused the * DATA_INVALID_FAULT */ if ((BASE_JD_EVENT_DATA_INVALID_FAULT == katom->event_code) && - (JOB_POLYGON_LIST == JOB_SOURCE_ID(job->status))) { + (JOB_POLYGON_LIST == JOB_SOURCE_ID(job->exception_status))) { err = true; kbase_vunmap(kctx, &job_map); break; } /* Move on to next fragment job in the list */ - if (job->flags & JOB_FLAG_DESC_SIZE) - job_header = job->next._64; + if (job->job_descriptor_size) + job_header = job->next_job._64; else - job_header = job->next._32; + job_header = job->next_job._32; kbase_vunmap(kctx, &job_map); diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_softjobs.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_softjobs.c index 108c49d9150a..07b862546f80 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_softjobs.c +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_softjobs.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -19,14 +19,23 @@ #include +#if defined(CONFIG_DMA_SHARED_BUFFER) +#include +#include +#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */ #include #ifdef CONFIG_SYNC #include "sync.h" #include #include "mali_kbase_sync.h" #endif +#include #include +#include #include +#include +#include +#include /* Mask to check cache alignment of data structures */ #define KBASE_CACHE_ALIGNMENT_MASK ((1<kctx; + unsigned long lflags; + + spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); + list_add_tail(&katom->queue, &kctx->waiting_soft_jobs); + spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); +} + +void kbasep_remove_waiting_soft_job(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + unsigned long lflags; + + spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); + list_del(&katom->queue); + spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); +} + +static void kbasep_add_waiting_with_timeout(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + + /* Record the start time of this atom so we could cancel it at + * the right time. + */ + katom->start_timestamp = ktime_get(); + + /* Add the atom to the waiting list before the timer is + * (re)started to make sure that it gets processed. + */ + kbasep_add_waiting_soft_job(katom); + + /* Schedule timeout of this atom after a period if it is not active */ + if (!timer_pending(&kctx->soft_job_timeout)) { + int timeout_ms = atomic_read( + &kctx->kbdev->js_data.soft_job_timeout_ms); + mod_timer(&kctx->soft_job_timeout, + jiffies + msecs_to_jiffies(timeout_ms)); + } +} + +static int kbasep_read_soft_event_status( + struct kbase_context *kctx, u64 evt, unsigned char *status) +{ + unsigned char *mapped_evt; + struct kbase_vmap_struct map; + + mapped_evt = kbase_vmap(kctx, evt, sizeof(*mapped_evt), &map); + if (!mapped_evt) + return -EFAULT; + + *status = *mapped_evt; + + kbase_vunmap(kctx, &map); + + return 0; +} + +static int kbasep_write_soft_event_status( + struct kbase_context *kctx, u64 evt, unsigned char new_status) +{ + unsigned char *mapped_evt; + struct kbase_vmap_struct map; + + if ((new_status != BASE_JD_SOFT_EVENT_SET) && + (new_status != BASE_JD_SOFT_EVENT_RESET)) + return -EINVAL; + + mapped_evt = kbase_vmap(kctx, evt, sizeof(*mapped_evt), &map); + if (!mapped_evt) + return -EFAULT; + + *mapped_evt = new_status; + + kbase_vunmap(kctx, &map); + + return 0; +} + static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom) { - struct kbase_va_region *reg; - phys_addr_t addr = 0; - u64 pfn; - u32 offset; - char *page; + struct kbase_vmap_struct map; + void *user_result; struct timespec ts; struct base_dump_cpu_gpu_counters data; u64 system_time; @@ -63,11 +150,16 @@ static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom) struct kbasep_js_device_data *js_devdata = &kctx->kbdev->js_data; /* We're suspended - queue this on the list of suspended jobs - * Use dep_item[1], because dep_item[0] is in use for 'waiting_soft_jobs' */ + * Use dep_item[1], because dep_item[0] was previously in use + * for 'waiting_soft_jobs'. + */ mutex_lock(&js_devdata->runpool_mutex); list_add_tail(&katom->dep_item[1], &js_devdata->suspended_soft_jobs_list); mutex_unlock(&js_devdata->runpool_mutex); + /* Also adding this to the list of waiting soft job */ + kbasep_add_waiting_soft_job(katom); + return pm_active_err; } @@ -81,44 +173,20 @@ static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom) data.system_time = system_time; data.cycle_counter = cycle_counter; - pfn = jc >> PAGE_SHIFT; - offset = jc & ~PAGE_MASK; - /* Assume this atom will be cancelled until we know otherwise */ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; - if (offset > 0x1000 - sizeof(data)) { - /* Wouldn't fit in the page */ - return 0; - } - - kbase_gpu_vm_lock(kctx); - reg = kbase_region_tracker_find_region_enclosing_address(kctx, jc); - if (reg && - (reg->flags & KBASE_REG_GPU_WR) && - reg->cpu_alloc && reg->cpu_alloc->pages) - addr = reg->cpu_alloc->pages[pfn - reg->start_pfn]; - - kbase_gpu_vm_unlock(kctx); - if (!addr) - return 0; - page = kmap(pfn_to_page(PFN_DOWN(addr))); - if (!page) + /* GPU_WR access is checked on the range for returning the result to + * userspace for the following reasons: + * - security, this is currently how imported user bufs are checked. + * - userspace ddk guaranteed to assume region was mapped as GPU_WR */ + user_result = kbase_vmap_prot(kctx, jc, sizeof(data), KBASE_REG_GPU_WR, &map); + if (!user_result) return 0; - kbase_sync_single_for_cpu(katom->kctx->kbdev, - kbase_dma_addr(pfn_to_page(PFN_DOWN(addr))) + - offset, sizeof(data), - DMA_BIDIRECTIONAL); + memcpy(user_result, &data, sizeof(data)); - memcpy(page + offset, &data, sizeof(data)); - - kbase_sync_single_for_device(katom->kctx->kbdev, - kbase_dma_addr(pfn_to_page(PFN_DOWN(addr))) + - offset, sizeof(data), - DMA_BIDIRECTIONAL); - - kunmap(pfn_to_page(PFN_DOWN(addr))); + kbase_vunmap(kctx, &map); /* Atom was fine - mark it as done */ katom->event_code = BASE_JD_EVENT_DONE; @@ -128,22 +196,6 @@ static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom) #ifdef CONFIG_SYNC -/* Complete an atom that has returned '1' from kbase_process_soft_job (i.e. has waited) - * - * @param katom The atom to complete - */ -static void complete_soft_job(struct kbase_jd_atom *katom) -{ - struct kbase_context *kctx = katom->kctx; - - mutex_lock(&kctx->jctx.lock); - list_del(&katom->dep_item[0]); - kbase_finish_soft_job(katom); - if (jd_done_nolock(katom, NULL)) - kbase_js_sched_all(kctx->kbdev); - mutex_unlock(&kctx->jctx.lock); -} - static enum base_jd_event_code kbase_fence_trigger(struct kbase_jd_atom *katom, int result) { struct sync_pt *pt; @@ -185,7 +237,12 @@ static void kbase_fence_wait_worker(struct work_struct *data) katom = container_of(data, struct kbase_jd_atom, work); kctx = katom->kctx; - complete_soft_job(katom); + mutex_lock(&kctx->jctx.lock); + kbasep_remove_waiting_soft_job(katom); + kbase_finish_soft_job(katom); + if (jd_done_nolock(katom, NULL)) + kbase_js_sched_all(kctx->kbdev); + mutex_unlock(&kctx->jctx.lock); } static void kbase_fence_wait_callback(struct sync_fence *fence, struct sync_fence_waiter *waiter) @@ -202,11 +259,7 @@ static void kbase_fence_wait_callback(struct sync_fence *fence, struct sync_fenc /* Propagate the fence status to the atom. * If negative then cancel this atom and its dependencies. */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) - if (fence->status < 0) -#else - if (atomic_read(&fence->status) < 0) -#endif + if (kbase_fence_get_status(fence) < 0) katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; /* To prevent a potential deadlock we schedule the work onto the job_done_wq workqueue @@ -234,18 +287,25 @@ static int kbase_fence_wait(struct kbase_jd_atom *katom) if (ret == 1) { /* Already signalled */ return 0; - } else if (ret < 0) { - goto cancel_atom; } - return 1; - cancel_atom: - katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; - /* We should cause the dependant jobs in the bag to be failed, - * to do this we schedule the work queue to complete this job */ - KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work)); - INIT_WORK(&katom->work, kbase_fence_wait_worker); - queue_work(katom->kctx->jctx.job_done_wq, &katom->work); + if (ret < 0) { + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + /* We should cause the dependent jobs in the bag to be failed, + * to do this we schedule the work queue to complete this job */ + KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work)); + INIT_WORK(&katom->work, kbase_fence_wait_worker); + queue_work(katom->kctx->jctx.job_done_wq, &katom->work); + } + +#ifdef CONFIG_MALI_FENCE_DEBUG + /* The timeout code will add this job to the list of waiting soft jobs. + */ + kbasep_add_waiting_with_timeout(katom); +#else + kbasep_add_waiting_soft_job(katom); +#endif + return 1; } @@ -276,6 +336,7 @@ static void kbase_fence_cancel_wait(struct kbase_jd_atom *katom) finish_softjob: katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + kbasep_remove_waiting_soft_job(katom); kbase_finish_soft_job(katom); if (jd_done_nolock(katom, NULL)) @@ -283,9 +344,959 @@ finish_softjob: } #endif /* CONFIG_SYNC */ +static void kbasep_soft_event_complete_job(struct work_struct *work) +{ + struct kbase_jd_atom *katom = container_of(work, struct kbase_jd_atom, + work); + struct kbase_context *kctx = katom->kctx; + int resched; + + mutex_lock(&kctx->jctx.lock); + resched = jd_done_nolock(katom, NULL); + mutex_unlock(&kctx->jctx.lock); + + if (resched) + kbase_js_sched_all(kctx->kbdev); +} + +void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt) +{ + int cancel_timer = 1; + struct list_head *entry, *tmp; + unsigned long lflags; + + spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); + list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) { + struct kbase_jd_atom *katom = list_entry( + entry, struct kbase_jd_atom, queue); + + switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { + case BASE_JD_REQ_SOFT_EVENT_WAIT: + if (katom->jc == evt) { + list_del(&katom->queue); + + katom->event_code = BASE_JD_EVENT_DONE; + INIT_WORK(&katom->work, + kbasep_soft_event_complete_job); + queue_work(kctx->jctx.job_done_wq, + &katom->work); + } else { + /* There are still other waiting jobs, we cannot + * cancel the timer yet. + */ + cancel_timer = 0; + } + break; +#ifdef CONFIG_MALI_FENCE_DEBUG + case BASE_JD_REQ_SOFT_FENCE_WAIT: + /* Keep the timer running if fence debug is enabled and + * there are waiting fence jobs. + */ + cancel_timer = 0; + break; +#endif + } + } + + if (cancel_timer) + del_timer(&kctx->soft_job_timeout); + spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); +} + +#ifdef CONFIG_MALI_FENCE_DEBUG +static char *kbase_fence_debug_status_string(int status) +{ + if (status == 0) + return "signaled"; + else if (status > 0) + return "active"; + else + return "error"; +} + +static void kbase_fence_debug_check_atom(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + struct device *dev = kctx->kbdev->dev; + int i; + + for (i = 0; i < 2; i++) { + struct kbase_jd_atom *dep; + + list_for_each_entry(dep, &katom->dep_head[i], dep_item[i]) { + if (dep->status == KBASE_JD_ATOM_STATE_UNUSED || + dep->status == KBASE_JD_ATOM_STATE_COMPLETED) + continue; + + if ((dep->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) + == BASE_JD_REQ_SOFT_FENCE_TRIGGER) { + struct sync_fence *fence = dep->fence; + int status = kbase_fence_get_status(fence); + + /* Found blocked trigger fence. */ + dev_warn(dev, + "\tVictim trigger atom %d fence [%p] %s: %s\n", + kbase_jd_atom_id(kctx, dep), + fence, fence->name, + kbase_fence_debug_status_string(status)); + } + + kbase_fence_debug_check_atom(dep); + } + } +} + +static void kbase_fence_debug_wait_timeout(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + struct device *dev = katom->kctx->kbdev->dev; + struct sync_fence *fence = katom->fence; + int timeout_ms = atomic_read(&kctx->kbdev->js_data.soft_job_timeout_ms); + int status = kbase_fence_get_status(fence); + unsigned long lflags; + + spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); + + dev_warn(dev, "ctx %d_%d: Atom %d still waiting for fence [%p] after %dms\n", + kctx->tgid, kctx->id, + kbase_jd_atom_id(kctx, katom), + fence, timeout_ms); + dev_warn(dev, "\tGuilty fence [%p] %s: %s\n", + fence, fence->name, + kbase_fence_debug_status_string(status)); + + /* Search for blocked trigger atoms */ + kbase_fence_debug_check_atom(katom); + + spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); + + /* Dump out the full state of all the Android sync fences. + * The function sync_dump() isn't exported to modules, so force + * sync_fence_wait() to time out to trigger sync_dump(). + */ + sync_fence_wait(fence, 1); +} + +struct kbase_fence_debug_work { + struct kbase_jd_atom *katom; + struct work_struct work; +}; + +static void kbase_fence_debug_wait_timeout_worker(struct work_struct *work) +{ + struct kbase_fence_debug_work *w = container_of(work, + struct kbase_fence_debug_work, work); + struct kbase_jd_atom *katom = w->katom; + struct kbase_context *kctx = katom->kctx; + + mutex_lock(&kctx->jctx.lock); + kbase_fence_debug_wait_timeout(katom); + mutex_unlock(&kctx->jctx.lock); + + kfree(w); +} + +static void kbase_fence_debug_timeout(struct kbase_jd_atom *katom) +{ + struct kbase_fence_debug_work *work; + struct kbase_context *kctx = katom->kctx; + + /* Enqueue fence debug worker. Use job_done_wq to get + * debug print ordered with job completion. + */ + work = kzalloc(sizeof(struct kbase_fence_debug_work), GFP_ATOMIC); + /* Ignore allocation failure. */ + if (work) { + work->katom = katom; + INIT_WORK(&work->work, kbase_fence_debug_wait_timeout_worker); + queue_work(kctx->jctx.job_done_wq, &work->work); + } +} +#endif /* CONFIG_MALI_FENCE_DEBUG */ + +void kbasep_soft_job_timeout_worker(unsigned long data) +{ + struct kbase_context *kctx = (struct kbase_context *)data; + u32 timeout_ms = (u32)atomic_read( + &kctx->kbdev->js_data.soft_job_timeout_ms); + struct timer_list *timer = &kctx->soft_job_timeout; + ktime_t cur_time = ktime_get(); + bool restarting = false; + unsigned long lflags; + struct list_head *entry, *tmp; + + spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); + list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) { + struct kbase_jd_atom *katom = list_entry(entry, + struct kbase_jd_atom, queue); + s64 elapsed_time = ktime_to_ms(ktime_sub(cur_time, + katom->start_timestamp)); + + if (elapsed_time < (s64)timeout_ms) { + restarting = true; + continue; + } + + switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { + case BASE_JD_REQ_SOFT_EVENT_WAIT: + /* Take it out of the list to ensure that it + * will be cancelled in all cases + */ + list_del(&katom->queue); + + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + INIT_WORK(&katom->work, kbasep_soft_event_complete_job); + queue_work(kctx->jctx.job_done_wq, &katom->work); + break; +#ifdef CONFIG_MALI_FENCE_DEBUG + case BASE_JD_REQ_SOFT_FENCE_WAIT: + kbase_fence_debug_timeout(katom); + break; +#endif + } + } + + if (restarting) + mod_timer(timer, jiffies + msecs_to_jiffies(timeout_ms)); + spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); +} + +static int kbasep_soft_event_wait(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + unsigned char status; + + /* The status of this soft-job is stored in jc */ + if (kbasep_read_soft_event_status(kctx, katom->jc, &status)) { + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + return 0; + } + + if (status == BASE_JD_SOFT_EVENT_SET) + return 0; /* Event already set, nothing to do */ + + kbasep_add_waiting_with_timeout(katom); + + return 1; +} + +static void kbasep_soft_event_update_locked(struct kbase_jd_atom *katom, + unsigned char new_status) +{ + /* Complete jobs waiting on the same event */ + struct kbase_context *kctx = katom->kctx; + + if (kbasep_write_soft_event_status(kctx, katom->jc, new_status) != 0) { + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + return; + } + + if (new_status == BASE_JD_SOFT_EVENT_SET) + kbasep_complete_triggered_soft_events(kctx, katom->jc); +} + +/** + * kbase_soft_event_update() - Update soft event state + * @kctx: Pointer to context + * @event: Event to update + * @new_status: New status value of event + * + * Update the event, and wake up any atoms waiting for the event. + * + * Return: 0 on success, a negative error code on failure. + */ +int kbase_soft_event_update(struct kbase_context *kctx, + u64 event, + unsigned char new_status) +{ + int err = 0; + + mutex_lock(&kctx->jctx.lock); + + if (kbasep_write_soft_event_status(kctx, event, new_status)) { + err = -ENOENT; + goto out; + } + + if (new_status == BASE_JD_SOFT_EVENT_SET) + kbasep_complete_triggered_soft_events(kctx, event); + +out: + mutex_unlock(&kctx->jctx.lock); + + return err; +} + +static void kbasep_soft_event_cancel_job(struct kbase_jd_atom *katom) +{ + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + if (jd_done_nolock(katom, NULL)) + kbase_js_sched_all(katom->kctx->kbdev); +} + +struct kbase_debug_copy_buffer { + size_t size; + struct page **pages; + int nr_pages; + size_t offset; + /*To find memory region*/ + u64 gpu_addr; + + struct page **extres_pages; + int nr_extres_pages; +}; + +static inline void free_user_buffer(struct kbase_debug_copy_buffer *buffer) +{ + struct page **pages = buffer->extres_pages; + int nr_pages = buffer->nr_extres_pages; + + if (pages) { + int i; + + for (i = 0; i < nr_pages; i++) { + struct page *pg = pages[i]; + + if (pg) + put_page(pg); + } + kfree(pages); + } +} + +static void kbase_debug_copy_finish(struct kbase_jd_atom *katom) +{ + struct kbase_debug_copy_buffer *buffers = + (struct kbase_debug_copy_buffer *)(uintptr_t)katom->jc; + unsigned int i; + unsigned int nr = katom->nr_extres; + + if (!buffers) + return; + + kbase_gpu_vm_lock(katom->kctx); + for (i = 0; i < nr; i++) { + int p; + struct kbase_va_region *reg; + + reg = kbase_region_tracker_find_region_enclosing_address( + katom->kctx, buffers[i].gpu_addr); + + if (!buffers[i].pages) + break; + for (p = 0; p < buffers[i].nr_pages; p++) { + struct page *pg = buffers[i].pages[p]; + + if (pg) + put_page(pg); + } + kfree(buffers[i].pages); + if (reg && reg->gpu_alloc) { + switch (reg->gpu_alloc->type) { + case KBASE_MEM_TYPE_IMPORTED_USER_BUF: + { + free_user_buffer(&buffers[i]); + break; + } + default: + /* Nothing to be done. */ + break; + } + kbase_mem_phy_alloc_put(reg->gpu_alloc); + } + } + kbase_gpu_vm_unlock(katom->kctx); + kfree(buffers); + + katom->jc = 0; +} + +static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom) +{ + struct kbase_debug_copy_buffer *buffers; + struct base_jd_debug_copy_buffer *user_buffers = NULL; + unsigned int i; + unsigned int nr = katom->nr_extres; + int ret = 0; + void __user *user_structs = (void __user *)(uintptr_t)katom->jc; + + if (!user_structs) + return -EINVAL; + + buffers = kcalloc(nr, sizeof(*buffers), GFP_KERNEL); + if (!buffers) { + ret = -ENOMEM; + katom->jc = 0; + goto out_cleanup; + } + katom->jc = (u64)(uintptr_t)buffers; + + user_buffers = kmalloc_array(nr, sizeof(*user_buffers), GFP_KERNEL); + + if (!user_buffers) { + ret = -ENOMEM; + goto out_cleanup; + } + + ret = copy_from_user(user_buffers, user_structs, + sizeof(*user_buffers)*nr); + if (ret) + goto out_cleanup; + + for (i = 0; i < nr; i++) { + u64 addr = user_buffers[i].address; + u64 page_addr = addr & PAGE_MASK; + u64 end_page_addr = addr + user_buffers[i].size - 1; + u64 last_page_addr = end_page_addr & PAGE_MASK; + int nr_pages = (last_page_addr-page_addr)/PAGE_SIZE+1; + int pinned_pages; + struct kbase_va_region *reg; + struct base_external_resource user_extres; + + if (!addr) + continue; + + buffers[i].nr_pages = nr_pages; + buffers[i].offset = addr & ~PAGE_MASK; + if (buffers[i].offset >= PAGE_SIZE) { + ret = -EINVAL; + goto out_cleanup; + } + buffers[i].size = user_buffers[i].size; + + buffers[i].pages = kcalloc(nr_pages, sizeof(struct page *), + GFP_KERNEL); + if (!buffers[i].pages) { + ret = -ENOMEM; + goto out_cleanup; + } + + pinned_pages = get_user_pages_fast(page_addr, + nr_pages, + 1, /* Write */ + buffers[i].pages); + if (pinned_pages < 0) { + ret = pinned_pages; + goto out_cleanup; + } + if (pinned_pages != nr_pages) { + ret = -EINVAL; + goto out_cleanup; + } + + user_extres = user_buffers[i].extres; + if (user_extres.ext_resource == 0ULL) { + ret = -EINVAL; + goto out_cleanup; + } + + buffers[i].gpu_addr = user_extres.ext_resource & + ~BASE_EXT_RES_ACCESS_EXCLUSIVE; + kbase_gpu_vm_lock(katom->kctx); + reg = kbase_region_tracker_find_region_enclosing_address( + katom->kctx, buffers[i].gpu_addr); + + if (NULL == reg || NULL == reg->cpu_alloc || + (reg->flags & KBASE_REG_FREE)) { + ret = -EINVAL; + goto out_unlock; + } + kbase_mem_phy_alloc_get(reg->gpu_alloc); + + buffers[i].nr_extres_pages = reg->nr_pages; + if (reg->nr_pages*PAGE_SIZE != buffers[i].size) + dev_warn(katom->kctx->kbdev->dev, "Copy buffer is not of same size as the external resource to copy.\n"); + + switch (reg->gpu_alloc->type) { + case KBASE_MEM_TYPE_IMPORTED_USER_BUF: + { + struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; + unsigned long nr_pages = + alloc->imported.user_buf.nr_pages; + + if (alloc->imported.user_buf.mm != current->mm) { + ret = -EINVAL; + goto out_unlock; + } + buffers[i].extres_pages = kcalloc(nr_pages, + sizeof(struct page *), GFP_KERNEL); + if (!buffers[i].extres_pages) { + ret = -ENOMEM; + goto out_unlock; + } + + ret = get_user_pages_fast( + alloc->imported.user_buf.address, + nr_pages, 0, + buffers[i].extres_pages); + if (ret != nr_pages) + goto out_unlock; + ret = 0; + break; + } + case KBASE_MEM_TYPE_IMPORTED_UMP: + { + dev_warn(katom->kctx->kbdev->dev, + "UMP is not supported for debug_copy jobs\n"); + ret = -EINVAL; + goto out_unlock; + } + default: + /* Nothing to be done. */ + break; + } + kbase_gpu_vm_unlock(katom->kctx); + } + kfree(user_buffers); + + return ret; + +out_unlock: + kbase_gpu_vm_unlock(katom->kctx); + +out_cleanup: + kfree(buffers); + kfree(user_buffers); + + /* Frees allocated memory for kbase_debug_copy_job struct, including + * members, and sets jc to 0 */ + kbase_debug_copy_finish(katom); + return ret; +} + +static void kbase_mem_copy_from_extres_page(struct kbase_context *kctx, + void *extres_page, struct page **pages, unsigned int nr_pages, + unsigned int *target_page_nr, size_t offset, size_t *to_copy) +{ + void *target_page = kmap(pages[*target_page_nr]); + size_t chunk = PAGE_SIZE-offset; + + if (!target_page) { + *target_page_nr += 1; + dev_warn(kctx->kbdev->dev, "kmap failed in debug_copy job."); + return; + } + + chunk = min(chunk, *to_copy); + + memcpy(target_page + offset, extres_page, chunk); + *to_copy -= chunk; + + kunmap(pages[*target_page_nr]); + + *target_page_nr += 1; + if (*target_page_nr >= nr_pages) + return; + + target_page = kmap(pages[*target_page_nr]); + if (!target_page) { + *target_page_nr += 1; + dev_warn(kctx->kbdev->dev, "kmap failed in debug_copy job."); + return; + } + + KBASE_DEBUG_ASSERT(target_page); + + chunk = min(offset, *to_copy); + memcpy(target_page, extres_page + PAGE_SIZE-offset, chunk); + *to_copy -= chunk; + + kunmap(pages[*target_page_nr]); +} + +static int kbase_mem_copy_from_extres(struct kbase_context *kctx, + struct kbase_debug_copy_buffer *buf_data) +{ + unsigned int i; + unsigned int target_page_nr = 0; + struct kbase_va_region *reg; + struct page **pages = buf_data->pages; + u64 offset = buf_data->offset; + size_t extres_size = buf_data->nr_extres_pages*PAGE_SIZE; + size_t to_copy = min(extres_size, buf_data->size); + int ret = 0; + + KBASE_DEBUG_ASSERT(pages != NULL); + + kbase_gpu_vm_lock(kctx); + reg = kbase_region_tracker_find_region_enclosing_address( + kctx, buf_data->gpu_addr); + + if (!reg) { + ret = -EINVAL; + goto out_unlock; + } + + switch (reg->gpu_alloc->type) { + case KBASE_MEM_TYPE_IMPORTED_USER_BUF: + { + for (i = 0; i < buf_data->nr_extres_pages; i++) { + struct page *pg = buf_data->extres_pages[i]; + void *extres_page = kmap(pg); + + if (extres_page) + kbase_mem_copy_from_extres_page(kctx, + extres_page, pages, + buf_data->nr_pages, + &target_page_nr, + offset, &to_copy); + + kunmap(pg); + if (target_page_nr >= buf_data->nr_pages) + break; + } + break; + } + break; +#ifdef CONFIG_DMA_SHARED_BUFFER + case KBASE_MEM_TYPE_IMPORTED_UMM: { + struct dma_buf *dma_buf = reg->gpu_alloc->imported.umm.dma_buf; + + KBASE_DEBUG_ASSERT(dma_buf != NULL); + KBASE_DEBUG_ASSERT(dma_buf->size == + buf_data->nr_extres_pages * PAGE_SIZE); + + ret = dma_buf_begin_cpu_access(dma_buf, +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) + 0, buf_data->nr_extres_pages*PAGE_SIZE, +#endif + DMA_FROM_DEVICE); + if (ret) + goto out_unlock; + + for (i = 0; i < buf_data->nr_extres_pages; i++) { + + void *extres_page = dma_buf_kmap(dma_buf, i); + + if (extres_page) + kbase_mem_copy_from_extres_page(kctx, + extres_page, pages, + buf_data->nr_pages, + &target_page_nr, + offset, &to_copy); + + dma_buf_kunmap(dma_buf, i, extres_page); + if (target_page_nr >= buf_data->nr_pages) + break; + } + dma_buf_end_cpu_access(dma_buf, +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) + 0, buf_data->nr_extres_pages*PAGE_SIZE, +#endif + DMA_FROM_DEVICE); + break; + } +#endif + default: + ret = -EINVAL; + } +out_unlock: + kbase_gpu_vm_unlock(kctx); + return ret; + +} + +static int kbase_debug_copy(struct kbase_jd_atom *katom) +{ + struct kbase_debug_copy_buffer *buffers = + (struct kbase_debug_copy_buffer *)(uintptr_t)katom->jc; + unsigned int i; + + for (i = 0; i < katom->nr_extres; i++) { + int res = kbase_mem_copy_from_extres(katom->kctx, &buffers[i]); + + if (res) + return res; + } + + return 0; +} + +static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom) +{ + __user void *data = (__user void *)(uintptr_t) katom->jc; + struct base_jit_alloc_info *info; + int ret; + + /* Fail the job if there is no info structure */ + if (!data) { + ret = -EINVAL; + goto fail; + } + + /* Copy the information for safe access and future storage */ + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) { + ret = -ENOMEM; + goto fail; + } + + if (copy_from_user(info, data, sizeof(*info)) != 0) { + ret = -EINVAL; + goto free_info; + } + + /* If the ID is zero then fail the job */ + if (info->id == 0) { + ret = -EINVAL; + goto free_info; + } + + /* Sanity check that the PA fits within the VA */ + if (info->va_pages < info->commit_pages) { + ret = -EINVAL; + goto free_info; + } + + /* Ensure the GPU address is correctly aligned */ + if ((info->gpu_alloc_addr & 0x7) != 0) { + ret = -EINVAL; + goto free_info; + } + + /* Replace the user pointer with our kernel allocated info structure */ + katom->jc = (u64)(uintptr_t) info; + + /* + * Note: + * The provided info->gpu_alloc_addr isn't validated here as + * userland can cache allocations which means that even + * though the region is valid it doesn't represent the + * same thing it used to. + * + * Complete validation of va_pages, commit_pages and extent + * isn't done here as it will be done during the call to + * kbase_mem_alloc. + */ + return 0; + +free_info: + kfree(info); +fail: + katom->jc = 0; + return ret; +} + +static void kbase_jit_allocate_process(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + struct base_jit_alloc_info *info; + struct kbase_va_region *reg; + struct kbase_vmap_struct mapping; + u64 *ptr; + + info = (struct base_jit_alloc_info *) (uintptr_t) katom->jc; + + /* The JIT ID is still in use so fail the allocation */ + if (kctx->jit_alloc[info->id]) { + katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED; + return; + } + + /* + * Mark the allocation so we know it's in use even if the + * allocation itself fails. + */ + kctx->jit_alloc[info->id] = (struct kbase_va_region *) -1; + + /* Create a JIT allocation */ + reg = kbase_jit_allocate(kctx, info); + if (!reg) { + katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED; + return; + } + + /* + * Write the address of the JIT allocation to the user provided + * GPU allocation. + */ + ptr = kbase_vmap(kctx, info->gpu_alloc_addr, sizeof(*ptr), + &mapping); + if (!ptr) { + /* + * Leave the allocation "live" as the JIT free jit will be + * submitted anyway. + */ + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + return; + } + + *ptr = reg->start_pfn << PAGE_SHIFT; + kbase_vunmap(kctx, &mapping); + + katom->event_code = BASE_JD_EVENT_DONE; + + /* + * Bind it to the user provided ID. Do this last so we can check for + * the JIT free racing this JIT alloc job. + */ + kctx->jit_alloc[info->id] = reg; +} + +static void kbase_jit_allocate_finish(struct kbase_jd_atom *katom) +{ + struct base_jit_alloc_info *info; + + info = (struct base_jit_alloc_info *) (uintptr_t) katom->jc; + /* Free the info structure */ + kfree(info); +} + +static void kbase_jit_free_process(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + u8 id = (u8) katom->jc; + + /* + * If the ID is zero or it is not in use yet then fail the job. + */ + if ((id == 0) || (kctx->jit_alloc[id] == NULL)) { + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + return; + } + + /* + * If the ID is valid but the allocation request failed still succeed + * this soft job but don't try and free the allocation. + */ + if (kctx->jit_alloc[id] != (struct kbase_va_region *) -1) + kbase_jit_free(kctx, kctx->jit_alloc[id]); + + kctx->jit_alloc[id] = NULL; +} + +static int kbase_ext_res_prepare(struct kbase_jd_atom *katom) +{ + __user struct base_external_resource_list *user_ext_res; + struct base_external_resource_list *ext_res; + u64 count = 0; + size_t copy_size; + int ret; + + user_ext_res = (__user struct base_external_resource_list *) + (uintptr_t) katom->jc; + + /* Fail the job if there is no info structure */ + if (!user_ext_res) { + ret = -EINVAL; + goto fail; + } + + if (copy_from_user(&count, &user_ext_res->count, sizeof(u64)) != 0) { + ret = -EINVAL; + goto fail; + } + + /* Is the number of external resources in range? */ + if (!count || count > BASE_EXT_RES_COUNT_MAX) { + ret = -EINVAL; + goto fail; + } + + /* Copy the information for safe access and future storage */ + copy_size = sizeof(*ext_res); + copy_size += sizeof(struct base_external_resource) * (count - 1); + ext_res = kzalloc(copy_size, GFP_KERNEL); + if (!ext_res) { + ret = -ENOMEM; + goto fail; + } + + if (copy_from_user(ext_res, user_ext_res, copy_size) != 0) { + ret = -EINVAL; + goto free_info; + } + + /* + * Overwrite the count with the first value incase it was changed + * after the fact. + */ + ext_res->count = count; + + /* + * Replace the user pointer with our kernel allocated + * ext_res structure. + */ + katom->jc = (u64)(uintptr_t) ext_res; + + return 0; + +free_info: + kfree(ext_res); +fail: + return ret; +} + +static void kbase_ext_res_process(struct kbase_jd_atom *katom, bool map) +{ + struct base_external_resource_list *ext_res; + int i; + bool failed = false; + + ext_res = (struct base_external_resource_list *) (uintptr_t) katom->jc; + if (!ext_res) + goto failed_jc; + + kbase_gpu_vm_lock(katom->kctx); + + for (i = 0; i < ext_res->count; i++) { + u64 gpu_addr; + + gpu_addr = ext_res->ext_res[i].ext_resource & + ~BASE_EXT_RES_ACCESS_EXCLUSIVE; + if (map) { + if (!kbase_sticky_resource_acquire(katom->kctx, + gpu_addr)) + goto failed_loop; + } else + if (!kbase_sticky_resource_release(katom->kctx, NULL, + gpu_addr)) + failed = true; + } + + /* + * In the case of unmap we continue unmapping other resources in the + * case of failure but will always report failure if _any_ unmap + * request fails. + */ + if (failed) + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + else + katom->event_code = BASE_JD_EVENT_DONE; + + kbase_gpu_vm_unlock(katom->kctx); + + return; + +failed_loop: + while (--i > 0) { + u64 gpu_addr; + + gpu_addr = ext_res->ext_res[i].ext_resource & + ~BASE_EXT_RES_ACCESS_EXCLUSIVE; + + kbase_sticky_resource_release(katom->kctx, NULL, gpu_addr); + } + + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + kbase_gpu_vm_unlock(katom->kctx); + +failed_jc: + return; +} + +static void kbase_ext_res_finish(struct kbase_jd_atom *katom) +{ + struct base_external_resource_list *ext_res; + + ext_res = (struct base_external_resource_list *) (uintptr_t) katom->jc; + /* Free the info structure */ + kfree(ext_res); +} + int kbase_process_soft_job(struct kbase_jd_atom *katom) { - switch (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) { + switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: return kbase_dump_cpu_gpu_time(katom); #ifdef CONFIG_SYNC @@ -301,6 +1312,34 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom) #endif /* CONFIG_SYNC */ case BASE_JD_REQ_SOFT_REPLAY: return kbase_replay_process(katom); + case BASE_JD_REQ_SOFT_EVENT_WAIT: + return kbasep_soft_event_wait(katom); + case BASE_JD_REQ_SOFT_EVENT_SET: + kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_SET); + break; + case BASE_JD_REQ_SOFT_EVENT_RESET: + kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_RESET); + break; + case BASE_JD_REQ_SOFT_DEBUG_COPY: + { + int res = kbase_debug_copy(katom); + + if (res) + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + break; + } + case BASE_JD_REQ_SOFT_JIT_ALLOC: + kbase_jit_allocate_process(katom); + break; + case BASE_JD_REQ_SOFT_JIT_FREE: + kbase_jit_free_process(katom); + break; + case BASE_JD_REQ_SOFT_EXT_RES_MAP: + kbase_ext_res_process(katom, true); + break; + case BASE_JD_REQ_SOFT_EXT_RES_UNMAP: + kbase_ext_res_process(katom, false); + break; } /* Atom is complete */ @@ -309,12 +1348,15 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom) void kbase_cancel_soft_job(struct kbase_jd_atom *katom) { - switch (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) { + switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { #ifdef CONFIG_SYNC case BASE_JD_REQ_SOFT_FENCE_WAIT: kbase_fence_cancel_wait(katom); break; #endif + case BASE_JD_REQ_SOFT_EVENT_WAIT: + kbasep_soft_event_cancel_job(katom); + break; default: /* This soft-job doesn't support cancellation! */ KBASE_DEBUG_ASSERT(0); @@ -323,7 +1365,7 @@ void kbase_cancel_soft_job(struct kbase_jd_atom *katom) int kbase_prepare_soft_job(struct kbase_jd_atom *katom) { - switch (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) { + switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: { if (0 != (katom->jc & KBASE_CACHE_ALIGNMENT_MASK)) @@ -372,8 +1414,23 @@ int kbase_prepare_soft_job(struct kbase_jd_atom *katom) } break; #endif /* CONFIG_SYNC */ + case BASE_JD_REQ_SOFT_JIT_ALLOC: + return kbase_jit_allocate_prepare(katom); case BASE_JD_REQ_SOFT_REPLAY: + case BASE_JD_REQ_SOFT_JIT_FREE: break; + case BASE_JD_REQ_SOFT_EVENT_WAIT: + case BASE_JD_REQ_SOFT_EVENT_SET: + case BASE_JD_REQ_SOFT_EVENT_RESET: + if (katom->jc == 0) + return -EINVAL; + break; + case BASE_JD_REQ_SOFT_DEBUG_COPY: + return kbase_debug_copy_prepare(katom); + case BASE_JD_REQ_SOFT_EXT_RES_MAP: + return kbase_ext_res_prepare(katom); + case BASE_JD_REQ_SOFT_EXT_RES_UNMAP: + return kbase_ext_res_prepare(katom); default: /* Unsupported soft-job */ return -EINVAL; @@ -383,7 +1440,7 @@ int kbase_prepare_soft_job(struct kbase_jd_atom *katom) void kbase_finish_soft_job(struct kbase_jd_atom *katom) { - switch (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) { + switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: /* Nothing to do */ break; @@ -405,6 +1462,19 @@ void kbase_finish_soft_job(struct kbase_jd_atom *katom) } break; #endif /* CONFIG_SYNC */ + + case BASE_JD_REQ_SOFT_DEBUG_COPY: + kbase_debug_copy_finish(katom); + break; + case BASE_JD_REQ_SOFT_JIT_ALLOC: + kbase_jit_allocate_finish(katom); + break; + case BASE_JD_REQ_SOFT_EXT_RES_MAP: + kbase_ext_res_finish(katom); + break; + case BASE_JD_REQ_SOFT_EXT_RES_UNMAP: + kbase_ext_res_finish(katom); + break; } } @@ -439,18 +1509,15 @@ void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev) /* Remove from the global list */ list_del(&katom_iter->dep_item[1]); /* Remove from the context's list of waiting soft jobs */ - list_del(&katom_iter->dep_item[0]); + kbasep_remove_waiting_soft_job(katom_iter); if (kbase_process_soft_job(katom_iter) == 0) { kbase_finish_soft_job(katom_iter); resched |= jd_done_nolock(katom_iter, NULL); } else { - /* The job has not completed */ KBASE_DEBUG_ASSERT((katom_iter->core_req & - BASEP_JD_REQ_ATOM_TYPE) + BASE_JD_REQ_SOFT_JOB_TYPE) != BASE_JD_REQ_SOFT_REPLAY); - list_add_tail(&katom_iter->dep_item[0], - &kctx->waiting_soft_jobs); } mutex_unlock(&kctx->jctx.lock); diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_sync.h b/drivers/gpu/arm/midgard_for_linux/mali_kbase_sync.h index 6d8e34d3c3ae..820bddc8c8b1 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_sync.h +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_sync.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -36,6 +36,15 @@ static inline struct sync_timeline *sync_pt_parent(struct sync_pt *pt) } #endif +static inline int kbase_fence_get_status(struct sync_fence *fence) +{ +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) + return fence->status; +#else + return atomic_read(&fence->status); +#endif +} + /* * Create a stream object. * Built on top of timeline object. diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_sync_user.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_sync_user.c index ddd0847a69c5..b9baa913a693 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_sync_user.c +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_sync_user.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -33,7 +33,6 @@ #include #include #include -#include static int kbase_stream_close(struct inode *inode, struct file *file) { diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_tlstream.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_tlstream.c index 0994ddd7c95f..ad88b7b69018 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_tlstream.c +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_tlstream.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -41,9 +41,6 @@ /* The number of nanoseconds in a second. */ #define NSECS_IN_SEC 1000000000ull /* ns */ -/* The number of nanoseconds to wait before autoflushing the stream. */ -#define AUTOFLUSH_TIMEOUT (2ull * NSECS_IN_SEC) /* ns */ - /* The period of autoflush checker execution in milliseconds. */ #define AUTOFLUSH_INTERVAL 1000 /* ms */ @@ -142,6 +139,8 @@ enum tl_msg_id_obj { KBASE_TL_RET_ATOM_AS, KBASE_TL_NRET_ATOM_AS, KBASE_TL_DEP_ATOM_ATOM, + KBASE_TL_NDEP_ATOM_ATOM, + KBASE_TL_RDEP_ATOM_ATOM, KBASE_TL_ATTRIB_ATOM_CONFIG, KBASE_TL_ATTRIB_AS_CONFIG, @@ -152,7 +151,9 @@ enum tl_msg_id_obj { /* Message ids of trace events that are recorded in the auxiliary stream. */ enum tl_msg_id_aux { KBASE_AUX_PM_STATE, + KBASE_AUX_ISSUE_JOB_SOFTSTOP, KBASE_AUX_JOB_SOFTSTOP, + KBASE_AUX_JOB_SOFTSTOP_EX, KBASE_AUX_PAGEFAULT, KBASE_AUX_PAGESALLOC }; @@ -166,7 +167,7 @@ enum tl_msg_id_aux { * @wbi: write buffer index * @rbi: read buffer index * @numbered: if non-zero stream's packets are sequentially numbered - * @last_write_time: timestamp indicating last write + * @autoflush_counter: counter tracking stream's autoflush state * * This structure holds information needed to construct proper packets in the * timeline stream. Each message in sequence must bear timestamp that is greater @@ -177,6 +178,11 @@ enum tl_msg_id_aux { * Each packet in timeline body stream has sequence number embedded (this value * must increment monotonically and is used by packets receiver to discover * buffer overflows. + * Autoflush counter is set to negative number when there is no data pending + * for flush and it is set to zero on every update of the buffer. Autoflush + * timer will increment the counter by one on every expiry. In case there will + * be no activity on the buffer during two consecutive timer expiries, stream + * buffer will be flushed. */ struct tl_stream { spinlock_t lock; @@ -190,7 +196,7 @@ struct tl_stream { atomic_t rbi; int numbered; - u64 last_write_time; + atomic_t autoflush_counter; }; /** @@ -241,9 +247,6 @@ static atomic_t autoflush_timer_active; * streams at any given time. */ static DEFINE_MUTEX(tl_reader_lock); -/* Indicator of whether the timeline stream file descriptor is already used. */ -static atomic_t tlstream_busy = {0}; - /* Timeline stream event queue. */ static DECLARE_WAIT_QUEUE_HEAD(tl_event_queue); @@ -269,8 +272,8 @@ static const struct tp_desc tp_desc_obj[] = { KBASE_TL_NEW_CTX, __stringify(KBASE_TL_NEW_CTX), "object ctx is created", - "@pI", - "ctx,ctx_nr" + "@pII", + "ctx,ctx_nr,tgid" }, { KBASE_TL_NEW_GPU, @@ -405,6 +408,20 @@ static const struct tp_desc tp_desc_obj[] = { "@pp", "atom1,atom2" }, + { + KBASE_TL_NDEP_ATOM_ATOM, + __stringify(KBASE_TL_NDEP_ATOM_ATOM), + "atom2 no longer depends on atom1", + "@pp", + "atom1,atom2" + }, + { + KBASE_TL_RDEP_ATOM_ATOM, + __stringify(KBASE_TL_RDEP_ATOM_ATOM), + "resolved dependecy of atom2 depending on atom1", + "@pp", + "atom1,atom2" + }, { KBASE_TL_ATTRIB_ATOM_CONFIG, __stringify(KBASE_TL_ATTRIB_ATOM_CONFIG), @@ -437,6 +454,13 @@ static const struct tp_desc tp_desc_aux[] = { "@IL", "core_type,core_state_bitset" }, + { + KBASE_AUX_ISSUE_JOB_SOFTSTOP, + __stringify(KBASE_AUX_ISSUE_JOB_SOFTSTOP), + "Issuing job soft stop", + "@p", + "atom" + }, { KBASE_AUX_JOB_SOFTSTOP, __stringify(KBASE_AUX_JOB_SOFTSTOP), @@ -444,6 +468,13 @@ static const struct tp_desc tp_desc_aux[] = { "@I", "tag_id" }, + { + KBASE_AUX_JOB_SOFTSTOP_EX, + __stringify(KBASE_AUX_JOB_SOFTSTOP_EX), + "Job soft stop, more details", + "@pI", + "atom,job_type" + }, { KBASE_AUX_PAGEFAULT, __stringify(KBASE_AUX_PAGEFAULT), @@ -470,6 +501,11 @@ static atomic_t tlstream_bytes_generated = {0}; /*****************************************************************************/ +/* Indicator of whether the timeline stream file descriptor is used. */ +atomic_t kbase_tlstream_enabled = {0}; + +/*****************************************************************************/ + /** * kbasep_tlstream_get_timestamp - return timestamp * @@ -773,6 +809,9 @@ static size_t kbasep_tlstream_msgbuf_submit( unsigned int rb_idx_raw = atomic_read(&stream->rbi); unsigned int wb_idx = wb_idx_raw % PACKET_COUNT; + /* Set stream as flushed. */ + atomic_set(&stream->autoflush_counter, -1); + kbasep_tlstream_packet_header_update( stream->buffer[wb_idx].data, wb_size - PACKET_HEADER_SIZE); @@ -828,7 +867,7 @@ static size_t kbasep_tlstream_msgbuf_submit( static char *kbasep_tlstream_msgbuf_acquire( enum tl_stream_type stream_type, size_t msg_size, - unsigned long *flags) + unsigned long *flags) __acquires(&stream->lock) { struct tl_stream *stream; unsigned int wb_idx_raw; @@ -875,14 +914,16 @@ static char *kbasep_tlstream_msgbuf_acquire( */ static void kbasep_tlstream_msgbuf_release( enum tl_stream_type stream_type, - unsigned long flags) + unsigned long flags) __releases(&stream->lock) { struct tl_stream *stream; KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type); stream = tl_stream[stream_type]; - stream->last_write_time = kbasep_tlstream_get_timestamp(); + + /* Mark stream as containing unflushed data. */ + atomic_set(&stream->autoflush_counter, 0); spin_unlock_irqrestore(&stream->lock, flags); } @@ -944,6 +985,22 @@ static void kbasep_tlstream_autoflush_timer_callback(unsigned long data) size_t wb_size; size_t min_size = PACKET_HEADER_SIZE; + int af_cnt = atomic_read(&stream->autoflush_counter); + + /* Check if stream contain unflushed data. */ + if (0 > af_cnt) + continue; + + /* Check if stream should be flushed now. */ + if (af_cnt != atomic_cmpxchg( + &stream->autoflush_counter, + af_cnt, + af_cnt + 1)) + continue; + if (!af_cnt) + continue; + + /* Autoflush this stream. */ if (stream->numbered) min_size += PACKET_NUMBER_SIZE; @@ -954,16 +1011,11 @@ static void kbasep_tlstream_autoflush_timer_callback(unsigned long data) wb_size = atomic_read(&stream->buffer[wb_idx].size); if (wb_size > min_size) { - u64 timestamp = kbasep_tlstream_get_timestamp(); - - if (timestamp - stream->last_write_time - > AUTOFLUSH_TIMEOUT) { - wb_size = kbasep_tlstream_msgbuf_submit( - stream, wb_idx_raw, wb_size); - wb_idx = (wb_idx_raw + 1) % PACKET_COUNT; - atomic_set(&stream->buffer[wb_idx].size, - wb_size); - } + wb_size = kbasep_tlstream_msgbuf_submit( + stream, wb_idx_raw, wb_size); + wb_idx = (wb_idx_raw + 1) % PACKET_COUNT; + atomic_set(&stream->buffer[wb_idx].size, + wb_size); } spin_unlock_irqrestore(&stream->lock, flags); } @@ -1044,7 +1096,7 @@ static ssize_t kbasep_tlstream_read( while (copy_len < size) { enum tl_stream_type stype; - unsigned int rb_idx_raw; + unsigned int rb_idx_raw = 0; unsigned int rb_idx; size_t rb_size; @@ -1134,7 +1186,12 @@ static int kbasep_tlstream_release(struct inode *inode, struct file *filp) KBASE_DEBUG_ASSERT(filp); CSTD_UNUSED(inode); CSTD_UNUSED(filp); - atomic_set(&tlstream_busy, 0); + + /* Stop autoflush timer before releasing access to streams. */ + atomic_set(&autoflush_timer_active, 0); + del_timer_sync(&autoflush_timer); + + atomic_set(&kbase_tlstream_enabled, 0); return 0; } @@ -1223,7 +1280,6 @@ static void kbasep_tlstream_timeline_header( int kbase_tlstream_init(void) { enum tl_stream_type i; - int rcode; /* Prepare stream structures. */ for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) { @@ -1241,14 +1297,10 @@ int kbase_tlstream_init(void) } /* Initialize autoflush timer. */ - atomic_set(&autoflush_timer_active, 1); + atomic_set(&autoflush_timer_active, 0); setup_timer(&autoflush_timer, kbasep_tlstream_autoflush_timer_callback, 0); - rcode = mod_timer( - &autoflush_timer, - jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL)); - CSTD_UNUSED(rcode); return 0; } @@ -1257,9 +1309,6 @@ void kbase_tlstream_term(void) { enum tl_stream_type i; - atomic_set(&autoflush_timer_active, 0); - del_timer_sync(&autoflush_timer); - for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) { kbasep_timeline_stream_term(tl_stream[i]); kfree(tl_stream[i]); @@ -1268,14 +1317,16 @@ void kbase_tlstream_term(void) int kbase_tlstream_acquire(struct kbase_context *kctx, int *fd) { - if (0 == atomic_cmpxchg(&tlstream_busy, 0, 1)) { + if (0 == atomic_cmpxchg(&kbase_tlstream_enabled, 0, 1)) { + int rcode; + *fd = anon_inode_getfd( "[mali_tlstream]", &kbasep_tlstream_fops, kctx, O_RDONLY | O_CLOEXEC); if (0 > *fd) { - atomic_set(&tlstream_busy, 0); + atomic_set(&kbase_tlstream_enabled, 0); return *fd; } @@ -1294,6 +1345,14 @@ int kbase_tlstream_acquire(struct kbase_context *kctx, int *fd) TL_STREAM_TYPE_AUX_HEADER, tp_desc_aux, ARRAY_SIZE(tp_desc_aux)); + + /* Start autoflush timer. */ + atomic_set(&autoflush_timer_active, 1); + rcode = mod_timer( + &autoflush_timer, + jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL)); + CSTD_UNUSED(rcode); + } else { *fd = -EBUSY; } @@ -1329,11 +1388,12 @@ void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated) /*****************************************************************************/ -void kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr) +void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid) { const u32 msg_id = KBASE_TL_NEW_CTX; const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr); + sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr) + + sizeof(tgid); unsigned long flags; char *buffer; size_t pos = 0; @@ -1349,12 +1409,15 @@ void kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr) buffer, pos, &context, sizeof(context)); pos = kbasep_tlstream_write_bytes( buffer, pos, &nr, sizeof(nr)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &tgid, sizeof(tgid)); + KBASE_DEBUG_ASSERT(msg_size == pos); kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); } -void kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count) +void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count) { const u32 msg_id = KBASE_TL_NEW_GPU; const size_t msg_size = @@ -1382,7 +1445,7 @@ void kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); } -void kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn) +void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn) { const u32 msg_id = KBASE_TL_NEW_LPU; const size_t msg_size = @@ -1410,7 +1473,7 @@ void kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); } -void kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu) +void __kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu) { const u32 msg_id = KBASE_TL_LIFELINK_LPU_GPU; const size_t msg_size = @@ -1435,7 +1498,7 @@ void kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); } -void kbase_tlstream_tl_summary_new_as(void *as, u32 nr) +void __kbase_tlstream_tl_summary_new_as(void *as, u32 nr) { const u32 msg_id = KBASE_TL_NEW_AS; const size_t msg_size = @@ -1460,7 +1523,7 @@ void kbase_tlstream_tl_summary_new_as(void *as, u32 nr) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); } -void kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu) +void __kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu) { const u32 msg_id = KBASE_TL_LIFELINK_AS_GPU; const size_t msg_size = @@ -1487,11 +1550,12 @@ void kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu) /*****************************************************************************/ -void kbase_tlstream_tl_new_ctx(void *context, u32 nr) +void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid) { const u32 msg_id = KBASE_TL_NEW_CTX; const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr); + sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr) + + sizeof(tgid); unsigned long flags; char *buffer; size_t pos = 0; @@ -1507,12 +1571,14 @@ void kbase_tlstream_tl_new_ctx(void *context, u32 nr) buffer, pos, &context, sizeof(context)); pos = kbasep_tlstream_write_bytes( buffer, pos, &nr, sizeof(nr)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &tgid, sizeof(tgid)); KBASE_DEBUG_ASSERT(msg_size == pos); kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_new_atom(void *atom, u32 nr) +void __kbase_tlstream_tl_new_atom(void *atom, u32 nr) { const u32 msg_id = KBASE_TL_NEW_ATOM; const size_t msg_size = @@ -1537,7 +1603,7 @@ void kbase_tlstream_tl_new_atom(void *atom, u32 nr) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_del_ctx(void *context) +void __kbase_tlstream_tl_del_ctx(void *context) { const u32 msg_id = KBASE_TL_DEL_CTX; const size_t msg_size = @@ -1560,7 +1626,7 @@ void kbase_tlstream_tl_del_ctx(void *context) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_del_atom(void *atom) +void __kbase_tlstream_tl_del_atom(void *atom) { const u32 msg_id = KBASE_TL_DEL_ATOM; const size_t msg_size = @@ -1583,7 +1649,7 @@ void kbase_tlstream_tl_del_atom(void *atom) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu) +void __kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu) { const u32 msg_id = KBASE_TL_RET_CTX_LPU; const size_t msg_size = @@ -1608,7 +1674,7 @@ void kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context) +void __kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context) { const u32 msg_id = KBASE_TL_RET_ATOM_CTX; const size_t msg_size = @@ -1633,7 +1699,7 @@ void kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_ret_atom_lpu( +void __kbase_tlstream_tl_ret_atom_lpu( void *atom, void *lpu, const char *attrib_match_list) { const u32 msg_id = KBASE_TL_RET_ATOM_LPU; @@ -1664,7 +1730,7 @@ void kbase_tlstream_tl_ret_atom_lpu( kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu) +void __kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu) { const u32 msg_id = KBASE_TL_NRET_CTX_LPU; const size_t msg_size = @@ -1689,7 +1755,7 @@ void kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context) +void __kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context) { const u32 msg_id = KBASE_TL_NRET_ATOM_CTX; const size_t msg_size = @@ -1714,7 +1780,7 @@ void kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2) +void __kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2) { const u32 msg_id = KBASE_TL_DEP_ATOM_ATOM; const size_t msg_size = @@ -1739,7 +1805,57 @@ void kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu) +void __kbase_tlstream_tl_ndep_atom_atom(void *atom1, void *atom2) +{ + const u32 msg_id = KBASE_TL_NDEP_ATOM_ATOM; + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_OBJ, + msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &atom1, sizeof(atom1)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &atom2, sizeof(atom2)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); +} + +void __kbase_tlstream_tl_rdep_atom_atom(void *atom1, void *atom2) +{ + const u32 msg_id = KBASE_TL_RDEP_ATOM_ATOM; + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_OBJ, + msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &atom1, sizeof(atom1)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &atom2, sizeof(atom2)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); +} + +void __kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu) { const u32 msg_id = KBASE_TL_NRET_ATOM_LPU; const size_t msg_size = @@ -1764,7 +1880,7 @@ void kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx) +void __kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx) { const u32 msg_id = KBASE_TL_RET_AS_CTX; const size_t msg_size = @@ -1789,7 +1905,7 @@ void kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx) +void __kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx) { const u32 msg_id = KBASE_TL_NRET_AS_CTX; const size_t msg_size = @@ -1814,7 +1930,7 @@ void kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_ret_atom_as(void *atom, void *as) +void __kbase_tlstream_tl_ret_atom_as(void *atom, void *as) { const u32 msg_id = KBASE_TL_RET_ATOM_AS; const size_t msg_size = @@ -1839,7 +1955,7 @@ void kbase_tlstream_tl_ret_atom_as(void *atom, void *as) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_nret_atom_as(void *atom, void *as) +void __kbase_tlstream_tl_nret_atom_as(void *atom, void *as) { const u32 msg_id = KBASE_TL_NRET_ATOM_AS; const size_t msg_size = @@ -1864,7 +1980,7 @@ void kbase_tlstream_tl_nret_atom_as(void *atom, void *as) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_attrib_atom_config( +void __kbase_tlstream_tl_attrib_atom_config( void *atom, u64 jd, u64 affinity, u32 config) { const u32 msg_id = KBASE_TL_ATTRIB_ATOM_CONFIG; @@ -1895,7 +2011,7 @@ void kbase_tlstream_tl_attrib_atom_config( kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_attrib_as_config( +void __kbase_tlstream_tl_attrib_as_config( void *as, u64 transtab, u64 memattr, u64 transcfg) { const u32 msg_id = KBASE_TL_ATTRIB_AS_CONFIG; @@ -1926,7 +2042,7 @@ void kbase_tlstream_tl_attrib_as_config( kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_jd_gpu_soft_reset(void *gpu) +void __kbase_tlstream_jd_gpu_soft_reset(void *gpu) { const u32 msg_id = KBASE_JD_GPU_SOFT_RESET; const size_t msg_size = @@ -1951,7 +2067,7 @@ void kbase_tlstream_jd_gpu_soft_reset(void *gpu) /*****************************************************************************/ -void kbase_tlstream_aux_pm_state(u32 core_type, u64 state) +void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state) { const u32 msg_id = KBASE_AUX_PM_STATE; const size_t msg_size = @@ -1976,7 +2092,28 @@ void kbase_tlstream_aux_pm_state(u32 core_type, u64 state) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); } -void kbase_tlstream_aux_job_softstop(u32 js_id) +void __kbase_tlstream_aux_issue_job_softstop(void *katom) +{ + const u32 msg_id = KBASE_AUX_ISSUE_JOB_SOFTSTOP; + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(katom); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_AUX, msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes(buffer, pos, &katom, sizeof(katom)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); +} + +void __kbase_tlstream_aux_job_softstop(u32 js_id) { const u32 msg_id = KBASE_AUX_JOB_SOFTSTOP; const size_t msg_size = @@ -1998,7 +2135,66 @@ void kbase_tlstream_aux_job_softstop(u32 js_id) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); } -void kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change) +/** + * __kbase_tlstream_aux_job_softstop_ex_record - record the trace point + * @katom: the atom that has been soft-stopped + * @job_type: the job type + */ +static void __kbase_tlstream_aux_job_softstop_ex_record( + void *katom, u32 job_type) +{ + const u32 msg_id = KBASE_AUX_JOB_SOFTSTOP_EX; + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(katom) + sizeof(job_type); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_AUX, msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes(buffer, pos, &katom, sizeof(katom)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &job_type, sizeof(job_type)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); +} + +void __kbase_tlstream_aux_job_softstop_ex(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + u64 jd = katom->jc; + + while (jd != 0) { + struct job_descriptor_header *job; + struct kbase_vmap_struct map; + + job = kbase_vmap(kctx, jd, sizeof(*job), &map); + if (!job) { + dev_err(kctx->kbdev->dev, + "__kbase_tlstream_aux_job_softstop_ex: failed to map job descriptor 0x%llx for atom 0x%p\n", + jd, (void *)katom); + break; + } + if (job->exception_status != BASE_JD_EVENT_STOPPED) { + kbase_vunmap(kctx, &map); + break; + } + + __kbase_tlstream_aux_job_softstop_ex_record( + katom, job->job_type); + + jd = job->job_descriptor_size ? + job->next_job._64 : job->next_job._32; + kbase_vunmap(kctx, &map); + } +} + +void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change) { const u32 msg_id = KBASE_AUX_PAGEFAULT; const size_t msg_size = @@ -2023,7 +2219,7 @@ void kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); } -void kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count) +void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count) { const u32 msg_id = KBASE_AUX_PAGESALLOC; const size_t msg_size = diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_tlstream.h b/drivers/gpu/arm/midgard_for_linux/mali_kbase_tlstream.h index eccd469a86e8..22a0d96f9a72 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_tlstream.h +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_tlstream.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -102,18 +102,67 @@ void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated); /*****************************************************************************/ +void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid); +void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count); +void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn); +void __kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu); +void __kbase_tlstream_tl_summary_new_as(void *as, u32 nr); +void __kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu); +void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid); +void __kbase_tlstream_tl_new_atom(void *atom, u32 nr); +void __kbase_tlstream_tl_del_ctx(void *context); +void __kbase_tlstream_tl_del_atom(void *atom); +void __kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu); +void __kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context); +void __kbase_tlstream_tl_ret_atom_lpu( + void *atom, void *lpu, const char *attrib_match_list); +void __kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu); +void __kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context); +void __kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu); +void __kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx); +void __kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx); +void __kbase_tlstream_tl_ret_atom_as(void *atom, void *as); +void __kbase_tlstream_tl_nret_atom_as(void *atom, void *as); +void __kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2); +void __kbase_tlstream_tl_ndep_atom_atom(void *atom1, void *atom2); +void __kbase_tlstream_tl_rdep_atom_atom(void *atom1, void *atom2); +void __kbase_tlstream_tl_attrib_atom_config( + void *atom, u64 jd, u64 affinity, u32 config); +void __kbase_tlstream_tl_attrib_as_config( + void *as, u64 transtab, u64 memattr, u64 transcfg); +void __kbase_tlstream_jd_gpu_soft_reset(void *gpu); +void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state); +void __kbase_tlstream_aux_issue_job_softstop(void *katom); +void __kbase_tlstream_aux_job_softstop(u32 js_id); +void __kbase_tlstream_aux_job_softstop_ex(struct kbase_jd_atom *katom); +void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change); +void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count); + +extern atomic_t kbase_tlstream_enabled; + +#define __TRACE_IF_ENABLED(trace_name, ...) \ + do { \ + int enabled = atomic_read(&kbase_tlstream_enabled); \ + if (enabled) \ + __kbase_tlstream_##trace_name(__VA_ARGS__); \ + } while (0) + +/*****************************************************************************/ + /** * kbase_tlstream_tl_summary_new_ctx - create context object in timeline * summary * @context: name of the context object * @nr: context number + * @tgid: thread Group Id * * Function emits a timeline message informing about context creation. Context * is created with context number (its attribute), that can be used to link * kbase context with userspace context. * This message is directed to timeline summary stream. */ -void kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr); +#define kbase_tlstream_tl_summary_new_ctx(context, nr, tgid) \ + __TRACE_IF_ENABLED(tl_summary_new_ctx, context, nr, tgid) /** * kbase_tlstream_tl_summary_new_gpu - create GPU object in timeline summary @@ -125,7 +174,8 @@ void kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr); * created with two attributes: id and core count. * This message is directed to timeline summary stream. */ -void kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count); +#define kbase_tlstream_tl_summary_new_gpu(gpu, id, core_count) \ + __TRACE_IF_ENABLED(tl_summary_new_gpu, gpu, id, core_count) /** * kbase_tlstream_tl_summary_new_lpu - create LPU object in timeline summary @@ -138,7 +188,8 @@ void kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count); * and function bearing information about this LPU abilities. * This message is directed to timeline summary stream. */ -void kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn); +#define kbase_tlstream_tl_summary_new_lpu(lpu, nr, fn) \ + __TRACE_IF_ENABLED(tl_summary_new_lpu, lpu, nr, fn) /** * kbase_tlstream_tl_summary_lifelink_lpu_gpu - lifelink LPU object to GPU @@ -149,7 +200,8 @@ void kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn); * along with GPU object. * This message is directed to timeline summary stream. */ -void kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu); +#define kbase_tlstream_tl_summary_lifelink_lpu_gpu(lpu, gpu) \ + __TRACE_IF_ENABLED(tl_summary_lifelink_lpu_gpu, lpu, gpu) /** * kbase_tlstream_tl_summary_new_as - create address space object in timeline summary @@ -161,7 +213,8 @@ void kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu); * address space. * This message is directed to timeline summary stream. */ -void kbase_tlstream_tl_summary_new_as(void *as, u32 nr); +#define kbase_tlstream_tl_summary_new_as(as, nr) \ + __TRACE_IF_ENABLED(tl_summary_new_as, as, nr) /** * kbase_tlstream_tl_summary_lifelink_as_gpu - lifelink address space object to GPU @@ -172,18 +225,21 @@ void kbase_tlstream_tl_summary_new_as(void *as, u32 nr); * shall be deleted along with GPU object. * This message is directed to timeline summary stream. */ -void kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu); +#define kbase_tlstream_tl_summary_lifelink_as_gpu(as, gpu) \ + __TRACE_IF_ENABLED(tl_summary_lifelink_as_gpu, as, gpu) /** * kbase_tlstream_tl_new_ctx - create context object in timeline * @context: name of the context object * @nr: context number + * @tgid: thread Group Id * * Function emits a timeline message informing about context creation. Context * is created with context number (its attribute), that can be used to link * kbase context with userspace context. */ -void kbase_tlstream_tl_new_ctx(void *context, u32 nr); +#define kbase_tlstream_tl_new_ctx(context, nr, tgid) \ + __TRACE_IF_ENABLED(tl_new_ctx, context, nr, tgid) /** * kbase_tlstream_tl_new_atom - create atom object in timeline @@ -194,7 +250,8 @@ void kbase_tlstream_tl_new_ctx(void *context, u32 nr); * created with atom number (its attribute) that links it with actual work * bucket id understood by hardware. */ -void kbase_tlstream_tl_new_atom(void *atom, u32 nr); +#define kbase_tlstream_tl_new_atom(atom, nr) \ + __TRACE_IF_ENABLED(tl_new_atom, atom, nr) /** * kbase_tlstream_tl_del_ctx - destroy context object in timeline @@ -203,7 +260,8 @@ void kbase_tlstream_tl_new_atom(void *atom, u32 nr); * Function emits a timeline message informing that context object ceased to * exist. */ -void kbase_tlstream_tl_del_ctx(void *context); +#define kbase_tlstream_tl_del_ctx(context) \ + __TRACE_IF_ENABLED(tl_del_ctx, context) /** * kbase_tlstream_tl_del_atom - destroy atom object in timeline @@ -212,7 +270,8 @@ void kbase_tlstream_tl_del_ctx(void *context); * Function emits a timeline message informing that atom object ceased to * exist. */ -void kbase_tlstream_tl_del_atom(void *atom); +#define kbase_tlstream_tl_del_atom(atom) \ + __TRACE_IF_ENABLED(tl_del_atom, atom) /** * kbase_tlstream_tl_ret_ctx_lpu - retain context by LPU @@ -222,7 +281,8 @@ void kbase_tlstream_tl_del_atom(void *atom); * Function emits a timeline message informing that context is being held * by LPU and must not be deleted unless it is released. */ -void kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu); +#define kbase_tlstream_tl_ret_ctx_lpu(context, lpu) \ + __TRACE_IF_ENABLED(tl_ret_ctx_lpu, context, lpu) /** * kbase_tlstream_tl_ret_atom_ctx - retain atom by context @@ -232,7 +292,8 @@ void kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu); * Function emits a timeline message informing that atom object is being held * by context and must not be deleted unless it is released. */ -void kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context); +#define kbase_tlstream_tl_ret_atom_ctx(atom, context) \ + __TRACE_IF_ENABLED(tl_ret_atom_ctx, atom, context) /** * kbase_tlstream_tl_ret_atom_lpu - retain atom by LPU @@ -243,8 +304,8 @@ void kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context); * Function emits a timeline message informing that atom object is being held * by LPU and must not be deleted unless it is released. */ -void kbase_tlstream_tl_ret_atom_lpu( - void *atom, void *lpu, const char *attrib_match_list); +#define kbase_tlstream_tl_ret_atom_lpu(atom, lpu, attrib_match_list) \ + __TRACE_IF_ENABLED(tl_ret_atom_lpu, atom, lpu, attrib_match_list) /** * kbase_tlstream_tl_nret_ctx_lpu - release context by LPU @@ -254,7 +315,8 @@ void kbase_tlstream_tl_ret_atom_lpu( * Function emits a timeline message informing that context is being released * by LPU object. */ -void kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu); +#define kbase_tlstream_tl_nret_ctx_lpu(context, lpu) \ + __TRACE_IF_ENABLED(tl_nret_ctx_lpu, context, lpu) /** * kbase_tlstream_tl_nret_atom_ctx - release atom by context @@ -264,7 +326,8 @@ void kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu); * Function emits a timeline message informing that atom object is being * released by context. */ -void kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context); +#define kbase_tlstream_tl_nret_atom_ctx(atom, context) \ + __TRACE_IF_ENABLED(tl_nret_atom_ctx, atom, context) /** * kbase_tlstream_tl_nret_atom_lpu - release atom by LPU @@ -274,7 +337,8 @@ void kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context); * Function emits a timeline message informing that atom object is being * released by LPU. */ -void kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu); +#define kbase_tlstream_tl_nret_atom_lpu(atom, lpu) \ + __TRACE_IF_ENABLED(tl_nret_atom_lpu, atom, lpu) /** * kbase_tlstream_tl_ret_as_ctx - lifelink address space object to context @@ -284,7 +348,8 @@ void kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu); * Function emits a timeline message informing that address space object * is being held by the context object. */ -void kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx); +#define kbase_tlstream_tl_ret_as_ctx(as, ctx) \ + __TRACE_IF_ENABLED(tl_ret_as_ctx, as, ctx) /** * kbase_tlstream_tl_nret_as_ctx - release address space by context @@ -294,7 +359,8 @@ void kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx); * Function emits a timeline message informing that address space object * is being released by atom. */ -void kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx); +#define kbase_tlstream_tl_nret_as_ctx(as, ctx) \ + __TRACE_IF_ENABLED(tl_nret_as_ctx, as, ctx) /** * kbase_tlstream_tl_ret_atom_as - retain atom by address space @@ -304,7 +370,8 @@ void kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx); * Function emits a timeline message informing that atom object is being held * by address space and must not be deleted unless it is released. */ -void kbase_tlstream_tl_ret_atom_as(void *atom, void *as); +#define kbase_tlstream_tl_ret_atom_as(atom, as) \ + __TRACE_IF_ENABLED(tl_ret_atom_as, atom, as) /** * kbase_tlstream_tl_nret_atom_as - release atom by address space @@ -314,7 +381,8 @@ void kbase_tlstream_tl_ret_atom_as(void *atom, void *as); * Function emits a timeline message informing that atom object is being * released by address space. */ -void kbase_tlstream_tl_nret_atom_as(void *atom, void *as); +#define kbase_tlstream_tl_nret_atom_as(atom, as) \ + __TRACE_IF_ENABLED(tl_nret_atom_as, atom, as) /** * kbase_tlstream_tl_dep_atom_atom - parent atom depends on child atom @@ -324,7 +392,30 @@ void kbase_tlstream_tl_nret_atom_as(void *atom, void *as); * Function emits a timeline message informing that parent atom waits for * child atom object to be completed before start its execution. */ -void kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2); +#define kbase_tlstream_tl_dep_atom_atom(atom1, atom2) \ + __TRACE_IF_ENABLED(tl_dep_atom_atom, atom1, atom2) + +/** + * kbase_tlstream_tl_ndep_atom_atom - dependency between atoms resolved + * @atom1: name of the child atom object + * @atom2: name of the parent atom object that depended on child atom + * + * Function emits a timeline message informing that parent atom execution + * dependency on child atom has been resolved. + */ +#define kbase_tlstream_tl_ndep_atom_atom(atom1, atom2) \ + __TRACE_IF_ENABLED(tl_ndep_atom_atom, atom1, atom2) + +/** + * kbase_tlstream_tl_rdep_atom_atom - information about already resolved dependency between atoms + * @atom1: name of the child atom object + * @atom2: name of the parent atom object that depended on child atom + * + * Function emits a timeline message informing that parent atom execution + * dependency on child atom has been resolved. + */ +#define kbase_tlstream_tl_rdep_atom_atom(atom1, atom2) \ + __TRACE_IF_ENABLED(tl_rdep_atom_atom, atom1, atom2) /** * kbase_tlstream_tl_attrib_atom_config - atom job slot attributes @@ -335,8 +426,8 @@ void kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2); * * Function emits a timeline message containing atom attributes. */ -void kbase_tlstream_tl_attrib_atom_config( - void *atom, u64 jd, u64 affinity, u32 config); +#define kbase_tlstream_tl_attrib_atom_config(atom, jd, affinity, config) \ + __TRACE_IF_ENABLED(tl_attrib_atom_config, atom, jd, affinity, config) /** * kbase_tlstream_tl_attrib_as_config - address space attributes @@ -347,8 +438,8 @@ void kbase_tlstream_tl_attrib_atom_config( * * Function emits a timeline message containing address space attributes. */ -void kbase_tlstream_tl_attrib_as_config( - void *as, u64 transtab, u64 memattr, u64 transcfg); +#define kbase_tlstream_tl_attrib_as_config(as, transtab, memattr, transcfg) \ + __TRACE_IF_ENABLED(tl_attrib_as_config, as, transtab, memattr, transcfg) /** * kbase_tlstream_jd_gpu_soft_reset - The GPU is being soft reset @@ -357,20 +448,44 @@ void kbase_tlstream_tl_attrib_as_config( * This imperative tracepoint is specific to job dumping. * Function emits a timeline message indicating GPU soft reset. */ -void kbase_tlstream_jd_gpu_soft_reset(void *gpu); +#define kbase_tlstream_jd_gpu_soft_reset(gpu) \ + __TRACE_IF_ENABLED(jd_gpu_soft_reset, gpu) /** * kbase_tlstream_aux_pm_state - timeline message: power management state * @core_type: core type (shader, tiler, l2 cache, l3 cache) * @state: 64bits bitmask reporting power state of the cores (1-ON, 0-OFF) */ -void kbase_tlstream_aux_pm_state(u32 core_type, u64 state); +#define kbase_tlstream_aux_pm_state(core_type, state) \ + __TRACE_IF_ENABLED(aux_pm_state, core_type, state) + +/** + * kbase_tlstream_aux_issue_job_softstop - a soft-stop command is being issued + * @katom: the atom that is being soft-stopped + */ +#define kbase_tlstream_aux_issue_job_softstop(katom) \ + __TRACE_IF_ENABLED(aux_issue_job_softstop, katom) /** * kbase_tlstream_aux_job_softstop - soft job stop occurred * @js_id: job slot id */ -void kbase_tlstream_aux_job_softstop(u32 js_id); +#define kbase_tlstream_aux_job_softstop(js_id) \ + __TRACE_IF_ENABLED(aux_job_softstop, js_id) + +/** + * kbase_tlstream_aux_job_softstop_ex - extra info about soft-stopped atom + * @katom: the atom that has been soft-stopped + * + * This trace point adds more details about the soft-stopped atom. These details + * can't be safety collected inside the interrupt handler so we're doing it + * inside a worker. + * + * Note: this is not the same information that is recorded in the trace point, + * refer to __kbase_tlstream_aux_job_softstop_ex() for more details. + */ +#define kbase_tlstream_aux_job_softstop_ex(katom) \ + __TRACE_IF_ENABLED(aux_job_softstop_ex, katom) /** * kbase_tlstream_aux_pagefault - timeline message: MMU page fault event @@ -378,7 +493,8 @@ void kbase_tlstream_aux_job_softstop(u32 js_id); * @ctx_nr: kernel context number * @page_count_change: number of pages to be added */ -void kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change); +#define kbase_tlstream_aux_pagefault(ctx_nr, page_count_change) \ + __TRACE_IF_ENABLED(aux_pagefault, ctx_nr, page_count_change) /** * kbase_tlstream_aux_pagesalloc - timeline message: total number of allocated @@ -386,7 +502,8 @@ void kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change); * @ctx_nr: kernel context number * @page_count: number of pages used by the context */ -void kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count); +#define kbase_tlstream_aux_pagesalloc(ctx_nr, page_count) \ + __TRACE_IF_ENABLED(aux_pagesalloc, ctx_nr, page_count) #endif /* _KBASE_TLSTREAM_H */ diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_trace_timeline.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_trace_timeline.c index aac9858875ad..a606ae810656 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_trace_timeline.c +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_trace_timeline.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -102,7 +102,7 @@ static const struct file_operations kbasep_trace_timeline_debugfs_fops = { .open = kbasep_trace_timeline_debugfs_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release, }; void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev) diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_trace_timeline.h b/drivers/gpu/arm/midgard_for_linux/mali_kbase_trace_timeline.h index d92caf054804..22a36494e72e 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_trace_timeline.h +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_trace_timeline.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -213,7 +213,6 @@ void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev); js, _producerof_atom_number_completed); \ } while (0) - /** Trace beginning/end of a call to kbase_pm_check_transitions_nolock from a * certin caller */ #define KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code) \ @@ -319,7 +318,6 @@ void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev); #define KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, count) CSTD_NOP() - static inline void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom, int js) { diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_uku.h b/drivers/gpu/arm/midgard_for_linux/mali_kbase_uku.h index 39514685b752..e880d9663d0e 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_uku.h +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_uku.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2008-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2008-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -45,9 +45,20 @@ * 10.1: * - Do mmap in kernel for SAME_VA memory allocations rather then * calling back into the kernel as a 2nd stage of the allocation request. + * + * 10.2: + * - Add KBASE_FUNC_MEM_JIT_INIT which allows clients to request a custom VA + * region for use with JIT (ignored on 32-bit platforms) + * + * 10.3: + * - base_jd_core_req typedef-ed to u32 (instead of to u16) + * - two flags added: BASE_JD_REQ_SKIP_CACHE_STAT / _END + * + * 10.4: + * - Removed KBASE_FUNC_EXT_BUFFER_LOCK used only in internal tests */ #define BASE_UK_VERSION_MAJOR 10 -#define BASE_UK_VERSION_MINOR 1 +#define BASE_UK_VERSION_MINOR 4 struct kbase_uk_mem_alloc { union uk_header header; @@ -292,16 +303,6 @@ struct kbase_uk_model_control_params { }; #endif /* SUPPORT_MALI_NO_MALI */ -#define KBASE_MAXIMUM_EXT_RESOURCES 255 - -struct kbase_uk_ext_buff_kds_data { - union uk_header header; - union kbase_pointer external_resource; - union kbase_pointer file_descriptor; - u32 num_res; /* limited to KBASE_MAXIMUM_EXT_RESOURCES */ - u32 padding; -}; - #ifdef BASE_LEGACY_UK8_SUPPORT struct kbase_uk_keep_gpu_powered { union uk_header header; @@ -327,8 +328,8 @@ struct kbase_uk_context_id { int id; }; -#if (defined(MALI_KTLSTREAM_ENABLED) && MALI_KTLSTREAM_ENABLED) || \ - defined(CONFIG_MALI_MIPE_ENABLED) +#if (defined(MALI_MIPE_ENABLED) && MALI_MIPE_ENABLED) || \ + !defined(MALI_MIPE_ENABLED) /** * struct kbase_uk_tlstream_acquire - User/Kernel space data exchange structure * @header: UK structure header @@ -396,7 +397,7 @@ struct kbase_uk_tlstream_stats { u32 bytes_generated; }; #endif /* MALI_UNIT_TEST */ -#endif /* MALI_KTLSTREAM_ENABLED */ +#endif /* MALI_MIPE_ENABLED */ /** * struct struct kbase_uk_prfcnt_value for the KBASE_FUNC_SET_PRFCNT_VALUES ioctl @@ -411,6 +412,38 @@ struct kbase_uk_prfcnt_values { u32 size; }; +/** + * struct kbase_uk_soft_event_update - User/Kernel space data exchange structure + * @header: UK structure header + * @evt: the GPU address containing the event + * @new_status: the new event status, must be either BASE_JD_SOFT_EVENT_SET or + * BASE_JD_SOFT_EVENT_RESET + * @flags: reserved for future uses, must be set to 0 + * + * This structure is used to update the status of a software event. If the + * event's status is set to BASE_JD_SOFT_EVENT_SET, any job currently waiting + * on this event will complete. + */ +struct kbase_uk_soft_event_update { + union uk_header header; + /* IN */ + u64 evt; + u32 new_status; + u32 flags; +}; + +/** + * struct kbase_uk_mem_jit_init - User/Kernel space data exchange structure + * @header: UK structure header + * @va_pages: Number of virtual pages required for JIT + * + * This structure is used when requesting initialization of JIT. + */ +struct kbase_uk_mem_jit_init { + union uk_header header; + /* IN */ + u64 va_pages; +}; enum kbase_uk_function_id { KBASE_FUNC_MEM_ALLOC = (UK_FUNC_ID + 0), @@ -438,7 +471,6 @@ enum kbase_uk_function_id { KBASE_FUNC_FIND_CPU_OFFSET = (UK_FUNC_ID + 15), KBASE_FUNC_GET_VERSION = (UK_FUNC_ID + 16), - KBASE_FUNC_EXT_BUFFER_LOCK = (UK_FUNC_ID + 17), KBASE_FUNC_SET_FLAGS = (UK_FUNC_ID + 18), KBASE_FUNC_SET_TEST_DATA = (UK_FUNC_ID + 19), @@ -463,15 +495,15 @@ enum kbase_uk_function_id { KBASE_FUNC_GET_CONTEXT_ID = (UK_FUNC_ID + 31), -#if (defined(MALI_KTLSTREAM_ENABLED) && MALI_KTLSTREAM_ENABLED) || \ - defined(CONFIG_MALI_MIPE_ENABLED) +#if (defined(MALI_MIPE_ENABLED) && MALI_MIPE_ENABLED) || \ + !defined(MALI_MIPE_ENABLED) KBASE_FUNC_TLSTREAM_ACQUIRE = (UK_FUNC_ID + 32), #if MALI_UNIT_TEST KBASE_FUNC_TLSTREAM_TEST = (UK_FUNC_ID + 33), KBASE_FUNC_TLSTREAM_STATS = (UK_FUNC_ID + 34), #endif /* MALI_UNIT_TEST */ KBASE_FUNC_TLSTREAM_FLUSH = (UK_FUNC_ID + 35), -#endif /* MALI_KTLSTREAM_ENABLED */ +#endif /* MALI_MIPE_ENABLED */ KBASE_FUNC_HWCNT_READER_SETUP = (UK_FUNC_ID + 36), @@ -479,6 +511,10 @@ enum kbase_uk_function_id { KBASE_FUNC_SET_PRFCNT_VALUES = (UK_FUNC_ID + 37), #endif + KBASE_FUNC_SOFT_EVENT_UPDATE = (UK_FUNC_ID + 38), + + KBASE_FUNC_MEM_JIT_INIT = (UK_FUNC_ID + 39), + KBASE_FUNC_MAX }; diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_vinstr.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_vinstr.c index d3d27e2958d7..bd6095f77480 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_vinstr.c +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_vinstr.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,8 +28,10 @@ #include #include +#include #include #include +#include /*****************************************************************************/ @@ -61,6 +63,14 @@ enum { JM_HWCNT_BM }; +enum vinstr_state { + VINSTR_IDLE, + VINSTR_DUMPING, + VINSTR_SUSPENDING, + VINSTR_SUSPENDED, + VINSTR_RESUMING +}; + /** * struct kbase_vinstr_context - vinstr context per device * @lock: protects the entire vinstr context @@ -74,7 +84,12 @@ enum { * with hardware * @reprogram: when true, reprogram hwcnt block with the new set of * counters - * @suspended: when true, the context has been suspended + * @state: vinstr state + * @state_lock: protects information about vinstr state + * @suspend_waitq: notification queue to trigger state re-validation + * @suspend_cnt: reference counter of vinstr's suspend state + * @suspend_work: worker to execute on entering suspended state + * @resume_work: worker to execute on leaving suspended state * @nclients: number of attached clients, pending or otherwise * @waiting_clients: head of list of clients being periodically sampled * @idle_clients: head of list of clients being idle @@ -94,7 +109,13 @@ struct kbase_vinstr_context { size_t dump_size; u32 bitmap[4]; bool reprogram; - bool suspended; + + enum vinstr_state state; + struct spinlock state_lock; + wait_queue_head_t suspend_waitq; + unsigned int suspend_cnt; + struct work_struct suspend_work; + struct work_struct resume_work; u32 nclients; struct list_head waiting_clients; @@ -189,7 +210,10 @@ static const struct file_operations vinstr_client_fops = { static int enable_hwcnt(struct kbase_vinstr_context *vinstr_ctx) { + struct kbase_context *kctx = vinstr_ctx->kctx; + struct kbase_device *kbdev = kctx->kbdev; struct kbase_uk_hwcnt_setup setup; + int err; setup.dump_buffer = vinstr_ctx->gpu_va; setup.jm_bm = vinstr_ctx->bitmap[JM_HWCNT_BM]; @@ -197,12 +221,46 @@ static int enable_hwcnt(struct kbase_vinstr_context *vinstr_ctx) setup.shader_bm = vinstr_ctx->bitmap[SHADER_HWCNT_BM]; setup.mmu_l2_bm = vinstr_ctx->bitmap[MMU_L2_HWCNT_BM]; - return kbase_instr_hwcnt_enable(vinstr_ctx->kctx, &setup); + /* Mark the context as active so the GPU is kept turned on */ + /* A suspend won't happen here, because we're in a syscall from a + * userspace thread. */ + kbase_pm_context_active(kbdev); + + /* Schedule the context in */ + kbasep_js_schedule_privileged_ctx(kbdev, kctx); + err = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &setup); + if (err) { + /* Release the context. This had its own Power Manager Active + * reference */ + kbasep_js_release_privileged_ctx(kbdev, kctx); + + /* Also release our Power Manager Active reference */ + kbase_pm_context_idle(kbdev); + } + + return err; } static void disable_hwcnt(struct kbase_vinstr_context *vinstr_ctx) { - kbase_instr_hwcnt_disable(vinstr_ctx->kctx); + struct kbase_context *kctx = vinstr_ctx->kctx; + struct kbase_device *kbdev = kctx->kbdev; + int err; + + err = kbase_instr_hwcnt_disable_internal(kctx); + if (err) { + dev_warn(kbdev->dev, "Failed to disable HW counters (ctx:%p)", + kctx); + return; + } + + /* Release the context. This had its own Power Manager Active reference. */ + kbasep_js_release_privileged_ctx(kbdev, kctx); + + /* Also release our Power Manager Active reference. */ + kbase_pm_context_idle(kbdev); + + dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p", kctx); } static int reprogram_hwcnt(struct kbase_vinstr_context *vinstr_ctx) @@ -309,6 +367,10 @@ static void kbasep_vinstr_unmap_kernel_dump_buffer( */ static int kbasep_vinstr_create_kctx(struct kbase_vinstr_context *vinstr_ctx) { + struct kbase_device *kbdev = vinstr_ctx->kbdev; + struct kbasep_kctx_list_element *element; + unsigned long flags; + bool enable_backend = false; int err; vinstr_ctx->kctx = kbase_create_context(vinstr_ctx->kbdev, true); @@ -324,10 +386,48 @@ static int kbasep_vinstr_create_kctx(struct kbase_vinstr_context *vinstr_ctx) return err; } - err = enable_hwcnt(vinstr_ctx); + /* Add kernel context to list of contexts associated with device. */ + element = kzalloc(sizeof(*element), GFP_KERNEL); + if (element) { + element->kctx = vinstr_ctx->kctx; + mutex_lock(&kbdev->kctx_list_lock); + list_add(&element->link, &kbdev->kctx_list); + + /* Inform timeline client about new context. + * Do this while holding the lock to avoid tracepoint + * being created in both body and summary stream. */ + kbase_tlstream_tl_new_ctx( + vinstr_ctx->kctx, + (u32)(vinstr_ctx->kctx->id), + (u32)(vinstr_ctx->kctx->tgid)); + + mutex_unlock(&kbdev->kctx_list_lock); + } else { + /* Don't treat this as a fail - just warn about it. */ + dev_warn(kbdev->dev, + "couldn't add kctx to kctx_list\n"); + } + + /* Don't enable hardware counters if vinstr is suspended. + * Note that vinstr resume code is run under vinstr context lock, + * lower layer will be enabled as needed on resume. */ + spin_lock_irqsave(&vinstr_ctx->state_lock, flags); + if (VINSTR_IDLE == vinstr_ctx->state) + enable_backend = true; + spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); + if (enable_backend) + err = enable_hwcnt(vinstr_ctx); + if (err) { kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx); kbase_destroy_context(vinstr_ctx->kctx); + if (element) { + mutex_lock(&kbdev->kctx_list_lock); + list_del(&element->link); + kfree(element); + mutex_unlock(&kbdev->kctx_list_lock); + } + kbase_tlstream_tl_del_ctx(vinstr_ctx->kctx); vinstr_ctx->kctx = NULL; return err; } @@ -340,6 +440,13 @@ static int kbasep_vinstr_create_kctx(struct kbase_vinstr_context *vinstr_ctx) disable_hwcnt(vinstr_ctx); kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx); kbase_destroy_context(vinstr_ctx->kctx); + if (element) { + mutex_lock(&kbdev->kctx_list_lock); + list_del(&element->link); + kfree(element); + mutex_unlock(&kbdev->kctx_list_lock); + } + kbase_tlstream_tl_del_ctx(vinstr_ctx->kctx); vinstr_ctx->kctx = NULL; return -EFAULT; } @@ -353,11 +460,34 @@ static int kbasep_vinstr_create_kctx(struct kbase_vinstr_context *vinstr_ctx) */ static void kbasep_vinstr_destroy_kctx(struct kbase_vinstr_context *vinstr_ctx) { + struct kbase_device *kbdev = vinstr_ctx->kbdev; + struct kbasep_kctx_list_element *element; + struct kbasep_kctx_list_element *tmp; + bool found = false; + /* Release hw counters dumping resources. */ vinstr_ctx->thread = NULL; disable_hwcnt(vinstr_ctx); kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx); kbase_destroy_context(vinstr_ctx->kctx); + + /* Remove kernel context from the device's contexts list. */ + mutex_lock(&kbdev->kctx_list_lock); + list_for_each_entry_safe(element, tmp, &kbdev->kctx_list, link) { + if (element->kctx == vinstr_ctx->kctx) { + list_del(&element->link); + kfree(element); + found = true; + } + } + mutex_unlock(&kbdev->kctx_list_lock); + + if (!found) + dev_warn(kbdev->dev, "kctx not in kctx_list\n"); + + /* Inform timeline client about context destruction. */ + kbase_tlstream_tl_del_ctx(vinstr_ctx->kctx); + vinstr_ctx->kctx = NULL; } @@ -379,9 +509,10 @@ static struct kbase_vinstr_client *kbasep_vinstr_attach_client( struct kbase_vinstr_client *cli; KBASE_DEBUG_ASSERT(vinstr_ctx); - KBASE_DEBUG_ASSERT(buffer_count >= 0); - KBASE_DEBUG_ASSERT(buffer_count <= MAX_BUFFER_COUNT); - KBASE_DEBUG_ASSERT(!(buffer_count & (buffer_count - 1))); + + if (buffer_count > MAX_BUFFER_COUNT + || (buffer_count & (buffer_count - 1))) + return NULL; cli = kzalloc(sizeof(*cli), GFP_KERNEL); if (!cli) @@ -435,7 +566,7 @@ static struct kbase_vinstr_client *kbasep_vinstr_attach_client( /* Allocate required number of dumping buffers. */ cli->dump_buffers = (char *)__get_free_pages( - GFP_KERNEL, + GFP_KERNEL | __GFP_ZERO, get_order(cli->dump_size * cli->buffer_count)); if (!cli->dump_buffers) goto error; @@ -802,6 +933,7 @@ static void kbasep_vinstr_add_dump_request( static int kbasep_vinstr_collect_and_accumulate( struct kbase_vinstr_context *vinstr_ctx, u64 *timestamp) { + unsigned long flags; int rcode; #ifdef CONFIG_MALI_NO_MALI @@ -809,6 +941,15 @@ static int kbasep_vinstr_collect_and_accumulate( gpu_model_set_dummy_prfcnt_base_cpu(vinstr_ctx->cpu_va); #endif + spin_lock_irqsave(&vinstr_ctx->state_lock, flags); + if (VINSTR_IDLE != vinstr_ctx->state) { + spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); + return -EAGAIN; + } else { + vinstr_ctx->state = VINSTR_DUMPING; + } + spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); + /* Request HW counters dump. * Disable preemption to make dump timestamp more accurate. */ preempt_disable(); @@ -820,6 +961,21 @@ static int kbasep_vinstr_collect_and_accumulate( rcode = kbase_instr_hwcnt_wait_for_dump(vinstr_ctx->kctx); WARN_ON(rcode); + spin_lock_irqsave(&vinstr_ctx->state_lock, flags); + switch (vinstr_ctx->state) + { + case VINSTR_SUSPENDING: + schedule_work(&vinstr_ctx->suspend_work); + break; + case VINSTR_DUMPING: + vinstr_ctx->state = VINSTR_IDLE; + wake_up_all(&vinstr_ctx->suspend_waitq); + break; + default: + break; + } + spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); + /* Accumulate values of collected counters. */ if (!rcode) accum_clients(vinstr_ctx); @@ -907,6 +1063,20 @@ static int kbasep_vinstr_fill_dump_buffer_kernel( static void kbasep_vinstr_reprogram( struct kbase_vinstr_context *vinstr_ctx) { + unsigned long flags; + bool suspended = false; + + /* Don't enable hardware counters if vinstr is suspended. */ + spin_lock_irqsave(&vinstr_ctx->state_lock, flags); + if (VINSTR_IDLE != vinstr_ctx->state) + suspended = true; + spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); + if (suspended) + return; + + /* Change to suspended state is done while holding vinstr context + * lock. Below code will then no re-enable the instrumentation. */ + if (vinstr_ctx->reprogram) { struct kbase_vinstr_client *iter; @@ -1011,6 +1181,7 @@ static int kbasep_vinstr_service_task(void *data) while (!kthread_should_stop()) { struct kbase_vinstr_client *cli = NULL; struct kbase_vinstr_client *tmp; + int rcode; u64 timestamp = kbasep_vinstr_get_timestamp(); u64 dump_time = 0; @@ -1053,7 +1224,8 @@ static int kbasep_vinstr_service_task(void *data) continue; } - kbasep_vinstr_collect_and_accumulate(vinstr_ctx, ×tamp); + rcode = kbasep_vinstr_collect_and_accumulate(vinstr_ctx, + ×tamp); INIT_LIST_HEAD(&expired_requests); @@ -1082,10 +1254,11 @@ static int kbasep_vinstr_service_task(void *data) /* Expect only periodically sampled clients. */ BUG_ON(0 == cli->dump_interval); - kbasep_vinstr_update_client( - cli, - timestamp, - BASE_HWCNT_READER_EVENT_PERIODIC); + if (!rcode) + kbasep_vinstr_update_client( + cli, + timestamp, + BASE_HWCNT_READER_EVENT_PERIODIC); /* Set new dumping time. Drop missed probing times. */ do { @@ -1214,11 +1387,6 @@ static long kbasep_vinstr_hwcnt_reader_ioctl_set_interval( mutex_lock(&vinstr_ctx->lock); - if (vinstr_ctx->suspended) { - mutex_unlock(&vinstr_ctx->lock); - return -EBUSY; - } - list_del(&cli->list); cli->dump_interval = interval; @@ -1455,7 +1623,8 @@ static int kbasep_vinstr_hwcnt_reader_mmap(struct file *filp, struct vm_area_struct *vma) { struct kbase_vinstr_client *cli; - size_t size; + unsigned long size, addr, pfn, offset; + unsigned long vm_size = vma->vm_end - vma->vm_start; KBASE_DEBUG_ASSERT(filp); KBASE_DEBUG_ASSERT(vma); @@ -1464,14 +1633,24 @@ static int kbasep_vinstr_hwcnt_reader_mmap(struct file *filp, KBASE_DEBUG_ASSERT(cli); size = cli->buffer_count * cli->dump_size; - if (vma->vm_end - vma->vm_start > size) - return -ENOMEM; + + if (vma->vm_pgoff > (size >> PAGE_SHIFT)) + return -EINVAL; + if (vm_size > size) + return -EINVAL; + + offset = vma->vm_pgoff << PAGE_SHIFT; + if ((vm_size + offset) > size) + return -EINVAL; + + addr = __pa((unsigned long)cli->dump_buffers + offset); + pfn = addr >> PAGE_SHIFT; return remap_pfn_range( vma, vma->vm_start, - __pa((unsigned long)cli->dump_buffers) >> PAGE_SHIFT, - size, + pfn, + vm_size, vma->vm_page_prot); } @@ -1498,6 +1677,84 @@ static int kbasep_vinstr_hwcnt_reader_release(struct inode *inode, /*****************************************************************************/ +/** + * kbasep_vinstr_kick_scheduler - trigger scheduler cycle + * @kbdev: pointer to kbase device structure + */ +static void kbasep_vinstr_kick_scheduler(struct kbase_device *kbdev) +{ + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + unsigned long flags; + + down(&js_devdata->schedule_sem); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + kbase_jm_kick_all(kbdev); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + up(&js_devdata->schedule_sem); +} + +/** + * kbasep_vinstr_suspend_worker - worker suspending vinstr module + * @data: pointer to work structure + */ +static void kbasep_vinstr_suspend_worker(struct work_struct *data) +{ + struct kbase_vinstr_context *vinstr_ctx; + unsigned long flags; + + vinstr_ctx = container_of(data, struct kbase_vinstr_context, + suspend_work); + + mutex_lock(&vinstr_ctx->lock); + + if (vinstr_ctx->kctx) + disable_hwcnt(vinstr_ctx); + + spin_lock_irqsave(&vinstr_ctx->state_lock, flags); + vinstr_ctx->state = VINSTR_SUSPENDED; + wake_up_all(&vinstr_ctx->suspend_waitq); + spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); + + mutex_unlock(&vinstr_ctx->lock); + + /* Kick GPU scheduler to allow entering protected mode. + * This must happen after vinstr was suspended. */ + kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev); +} + +/** + * kbasep_vinstr_suspend_worker - worker resuming vinstr module + * @data: pointer to work structure + */ +static void kbasep_vinstr_resume_worker(struct work_struct *data) +{ + struct kbase_vinstr_context *vinstr_ctx; + unsigned long flags; + + vinstr_ctx = container_of(data, struct kbase_vinstr_context, + resume_work); + + mutex_lock(&vinstr_ctx->lock); + + if (vinstr_ctx->kctx) + enable_hwcnt(vinstr_ctx); + + spin_lock_irqsave(&vinstr_ctx->state_lock, flags); + vinstr_ctx->state = VINSTR_IDLE; + wake_up_all(&vinstr_ctx->suspend_waitq); + spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); + + mutex_unlock(&vinstr_ctx->lock); + + /* Kick GPU scheduler to allow entering protected mode. + * Note that scheduler state machine might requested re-entry to + * protected mode before vinstr was resumed. + * This must happen after vinstr was release. */ + kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev); +} + +/*****************************************************************************/ + struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev) { struct kbase_vinstr_context *vinstr_ctx; @@ -1509,8 +1766,14 @@ struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev) INIT_LIST_HEAD(&vinstr_ctx->idle_clients); INIT_LIST_HEAD(&vinstr_ctx->waiting_clients); mutex_init(&vinstr_ctx->lock); + spin_lock_init(&vinstr_ctx->state_lock); vinstr_ctx->kbdev = kbdev; vinstr_ctx->thread = NULL; + vinstr_ctx->state = VINSTR_IDLE; + vinstr_ctx->suspend_cnt = 0; + INIT_WORK(&vinstr_ctx->suspend_work, kbasep_vinstr_suspend_worker); + INIT_WORK(&vinstr_ctx->resume_work, kbasep_vinstr_resume_worker); + init_waitqueue_head(&vinstr_ctx->suspend_waitq); atomic_set(&vinstr_ctx->request_pending, 0); init_waitqueue_head(&vinstr_ctx->waitq); @@ -1526,6 +1789,10 @@ void kbase_vinstr_term(struct kbase_vinstr_context *vinstr_ctx) if (vinstr_ctx->thread) kthread_stop(vinstr_ctx->thread); + /* Wait for workers. */ + flush_work(&vinstr_ctx->suspend_work); + flush_work(&vinstr_ctx->resume_work); + while (1) { struct list_head *list = &vinstr_ctx->idle_clients; @@ -1658,11 +1925,6 @@ int kbase_vinstr_hwc_dump(struct kbase_vinstr_client *cli, mutex_lock(&vinstr_ctx->lock); - if (vinstr_ctx->suspended) { - rcode = -EBUSY; - goto exit; - } - if (event_mask & cli->event_mask) { rcode = kbasep_vinstr_collect_and_accumulate( vinstr_ctx, @@ -1698,11 +1960,6 @@ int kbase_vinstr_hwc_clear(struct kbase_vinstr_client *cli) mutex_lock(&vinstr_ctx->lock); - if (vinstr_ctx->suspended) { - rcode = -EBUSY; - goto exit; - } - rcode = kbasep_vinstr_collect_and_accumulate(vinstr_ctx, &unused); if (rcode) goto exit; @@ -1719,40 +1976,66 @@ exit: return rcode; } -void kbase_vinstr_hwc_suspend(struct kbase_vinstr_context *vinstr_ctx) +int kbase_vinstr_try_suspend(struct kbase_vinstr_context *vinstr_ctx) { - u64 unused; + unsigned long flags; + int ret = -EAGAIN; KBASE_DEBUG_ASSERT(vinstr_ctx); - mutex_lock(&vinstr_ctx->lock); - if (!vinstr_ctx->nclients || vinstr_ctx->suspended) { - mutex_unlock(&vinstr_ctx->lock); - return; + spin_lock_irqsave(&vinstr_ctx->state_lock, flags); + switch (vinstr_ctx->state) { + case VINSTR_SUSPENDED: + vinstr_ctx->suspend_cnt++; + /* overflow shall not happen */ + BUG_ON(0 == vinstr_ctx->suspend_cnt); + ret = 0; + break; + + case VINSTR_IDLE: + vinstr_ctx->state = VINSTR_SUSPENDING; + schedule_work(&vinstr_ctx->suspend_work); + break; + + case VINSTR_DUMPING: + vinstr_ctx->state = VINSTR_SUSPENDING; + break; + + case VINSTR_SUSPENDING: + /* fall through */ + case VINSTR_RESUMING: + break; + + default: + BUG(); + break; } + spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); - kbasep_vinstr_collect_and_accumulate(vinstr_ctx, &unused); - vinstr_ctx->suspended = true; - vinstr_ctx->suspended_clients = vinstr_ctx->waiting_clients; - INIT_LIST_HEAD(&vinstr_ctx->waiting_clients); - mutex_unlock(&vinstr_ctx->lock); + return ret; } -void kbase_vinstr_hwc_resume(struct kbase_vinstr_context *vinstr_ctx) +void kbase_vinstr_suspend(struct kbase_vinstr_context *vinstr_ctx) { + wait_event(vinstr_ctx->suspend_waitq, + (0 == kbase_vinstr_try_suspend(vinstr_ctx))); +} + +void kbase_vinstr_resume(struct kbase_vinstr_context *vinstr_ctx) +{ + unsigned long flags; + KBASE_DEBUG_ASSERT(vinstr_ctx); - mutex_lock(&vinstr_ctx->lock); - if (!vinstr_ctx->nclients || !vinstr_ctx->suspended) { - mutex_unlock(&vinstr_ctx->lock); - return; + spin_lock_irqsave(&vinstr_ctx->state_lock, flags); + BUG_ON(VINSTR_SUSPENDING == vinstr_ctx->state); + if (VINSTR_SUSPENDED == vinstr_ctx->state) { + BUG_ON(0 == vinstr_ctx->suspend_cnt); + vinstr_ctx->suspend_cnt--; + if (0 == vinstr_ctx->suspend_cnt) { + vinstr_ctx->state = VINSTR_RESUMING; + schedule_work(&vinstr_ctx->resume_work); + } } - - vinstr_ctx->suspended = false; - vinstr_ctx->waiting_clients = vinstr_ctx->suspended_clients; - vinstr_ctx->reprogram = true; - kbasep_vinstr_reprogram(vinstr_ctx); - atomic_set(&vinstr_ctx->request_pending, 1); - wake_up_all(&vinstr_ctx->waitq); - mutex_unlock(&vinstr_ctx->lock); + spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); } diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_vinstr.h b/drivers/gpu/arm/midgard_for_linux/mali_kbase_vinstr.h index d32462aec653..6207d25aef06 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_vinstr.h +++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_vinstr.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -103,18 +103,39 @@ int kbase_vinstr_hwc_dump( int kbase_vinstr_hwc_clear(struct kbase_vinstr_client *cli); /** - * kbase_vinstr_hwc_suspend - suspends hardware counter collection for - * a given kbase context + * kbase_vinstr_try_suspend - try suspending operation of a given vinstr context * @vinstr_ctx: vinstr context + * + * Return: 0 on success, or negative if state change is in progress + * + * Warning: This API call is non-generic. It is meant to be used only by + * job scheduler state machine. + * + * Function initiates vinstr switch to suspended state. Once it was called + * vinstr enters suspending state. If function return non-zero value, it + * indicates that state switch is not complete and function must be called + * again. On state switch vinstr will trigger job scheduler state machine + * cycle. + */ +int kbase_vinstr_try_suspend(struct kbase_vinstr_context *vinstr_ctx); + +/** + * kbase_vinstr_suspend - suspends operation of a given vinstr context + * @vinstr_ctx: vinstr context + * + * Function initiates vinstr switch to suspended state. Then it blocks until + * operation is completed. */ -void kbase_vinstr_hwc_suspend(struct kbase_vinstr_context *vinstr_ctx); +void kbase_vinstr_suspend(struct kbase_vinstr_context *vinstr_ctx); /** - * kbase_vinstr_hwc_resume - resumes hardware counter collection for - * a given kbase context + * kbase_vinstr_resume - resumes operation of a given vinstr context * @vinstr_ctx: vinstr context + * + * Function can be called only if it was preceded by a successful call + * to kbase_vinstr_suspend. */ -void kbase_vinstr_hwc_resume(struct kbase_vinstr_context *vinstr_ctx); +void kbase_vinstr_resume(struct kbase_vinstr_context *vinstr_ctx); /** * kbase_vinstr_dump_size - Return required size of dump buffer @@ -126,7 +147,7 @@ size_t kbase_vinstr_dump_size(struct kbase_device *kbdev); /** * kbase_vinstr_detach_client - Detach a client from the vinstr core - * @cli: Pointer to vinstr client + * @cli: pointer to vinstr client */ void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli); diff --git a/drivers/gpu/arm/midgard_for_linux/mali_linux_trace.h b/drivers/gpu/arm/midgard_for_linux/mali_linux_trace.h index fc3cf32ba4d2..2be06a552768 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_linux_trace.h +++ b/drivers/gpu/arm/midgard_for_linux/mali_linux_trace.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -15,19 +15,15 @@ - - #if !defined(_TRACE_MALI_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_MALI_H -#include -#include - #undef TRACE_SYSTEM #define TRACE_SYSTEM mali -#define TRACE_SYSTEM_STRING __stringify(TRACE_SYSTEM) #define TRACE_INCLUDE_FILE mali_linux_trace +#include + #define MALI_JOB_SLOTS_EVENT_CHANGED /** @@ -183,24 +179,6 @@ TRACE_EVENT(mali_total_alloc_pages_change, TP_printk("event=%lld", __entry->event_id) ); -/** - * mali_sw_counter - not currently used - * @event_id: counter id - */ -TRACE_EVENT(mali_sw_counter, - TP_PROTO(unsigned int event_id, signed long long value), - TP_ARGS(event_id, value), - TP_STRUCT__entry( - __field(int, event_id) - __field(long long, value) - ), - TP_fast_assign( - __entry->event_id = event_id; - __entry->value = value; - ), - TP_printk("event %d = %lld", __entry->event_id, __entry->value) -); - #endif /* _TRACE_MALI_H */ #undef TRACE_INCLUDE_PATH diff --git a/drivers/gpu/arm/midgard_for_linux/mali_midg_regmap.h b/drivers/gpu/arm/midgard_for_linux/mali_midg_regmap.h index 778104b57fa9..de6c206f3c71 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_midg_regmap.h +++ b/drivers/gpu/arm/midgard_for_linux/mali_midg_regmap.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -58,7 +58,7 @@ #define GPU_COMMAND 0x030 /* (WO) */ #define GPU_STATUS 0x034 /* (RO) */ - +#define LATEST_FLUSH 0x038 /* (RO) */ #define GROUPS_L2_COHERENT (1 << 0) /* Cores groups are l2 coherent */ @@ -169,6 +169,8 @@ #define L2_PWRACTIVE_LO 0x260 /* (RO) Level 2 cache active bitmap, low word */ #define L2_PWRACTIVE_HI 0x264 /* (RO) Level 2 cache active bitmap, high word */ +#define COHERENCY_FEATURES 0x300 /* (RO) Coherency features present */ +#define COHERENCY_ENABLE 0x304 /* (RW) Coherency enable */ #define JM_CONFIG 0xF00 /* (RW) Job Manager configuration register (Implementation specific register) */ #define SHADER_CONFIG 0xF04 /* (RW) Shader core configuration settings (Implementation specific register) */ @@ -212,6 +214,8 @@ #define JS_AFFINITY_LO 0x10 /* (RO) Core affinity mask for job slot n, low word */ #define JS_AFFINITY_HI 0x14 /* (RO) Core affinity mask for job slot n, high word */ #define JS_CONFIG 0x18 /* (RO) Configuration settings for job slot n */ +#define JS_XAFFINITY 0x1C /* (RO) Extended affinity mask for job + slot n */ #define JS_COMMAND 0x20 /* (WO) Command register for job slot n */ #define JS_STATUS 0x24 /* (RO) Status register for job slot n */ @@ -222,9 +226,12 @@ #define JS_AFFINITY_NEXT_LO 0x50 /* (RW) Next core affinity mask for job slot n, low word */ #define JS_AFFINITY_NEXT_HI 0x54 /* (RW) Next core affinity mask for job slot n, high word */ #define JS_CONFIG_NEXT 0x58 /* (RW) Next configuration settings for job slot n */ +#define JS_XAFFINITY_NEXT 0x5C /* (RW) Next extended affinity mask for + job slot n */ #define JS_COMMAND_NEXT 0x60 /* (RW) Next command register for job slot n */ +#define JS_FLUSH_ID_NEXT 0x70 /* (RW) Next job slot n cache flush ID */ #define MEMORY_MANAGEMENT_BASE 0x2000 #define MMU_REG(r) (MEMORY_MANAGEMENT_BASE + (r)) @@ -266,6 +273,14 @@ #define AS_STATUS 0x28 /* (RO) Status flags for address space n */ +/* (RW) Translation table configuration for address space n, low word */ +#define AS_TRANSCFG_LO 0x30 +/* (RW) Translation table configuration for address space n, high word */ +#define AS_TRANSCFG_HI 0x34 +/* (RO) Secondary fault address for address space n, low word */ +#define AS_FAULTEXTRA_LO 0x38 +/* (RO) Secondary fault address for address space n, high word */ +#define AS_FAULTEXTRA_HI 0x3C /* End Register Offsets */ @@ -293,6 +308,11 @@ #define AS_TRANSTAB_LPAE_ADRMODE_MASK 0x00000003 +/* + * Begin AARCH64 MMU TRANSTAB register values + */ +#define MMU_HW_OUTA_BITS 40 +#define AS_TRANSTAB_BASE_MASK ((1ULL << MMU_HW_OUTA_BITS) - (1ULL << 4)) /* * Begin MMU STATUS register values @@ -305,12 +325,38 @@ #define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT (0x2<<3) #define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG (0x3<<3) +#define AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT (0x4<<3) +#define AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT (0x5<<3) #define AS_FAULTSTATUS_ACCESS_TYPE_MASK (0x3<<8) +#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC (0x0<<8) #define AS_FAULTSTATUS_ACCESS_TYPE_EX (0x1<<8) #define AS_FAULTSTATUS_ACCESS_TYPE_READ (0x2<<8) #define AS_FAULTSTATUS_ACCESS_TYPE_WRITE (0x3<<8) +/* + * Begin MMU TRANSCFG register values + */ + +#define AS_TRANSCFG_ADRMODE_LEGACY 0 +#define AS_TRANSCFG_ADRMODE_UNMAPPED 1 +#define AS_TRANSCFG_ADRMODE_IDENTITY 2 +#define AS_TRANSCFG_ADRMODE_AARCH64_4K 6 +#define AS_TRANSCFG_ADRMODE_AARCH64_64K 8 + +#define AS_TRANSCFG_ADRMODE_MASK 0xF + + +/* + * Begin TRANSCFG register values + */ +#define AS_TRANSCFG_PTW_MEMATTR_MASK (3 << 24) +#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1 << 24) +#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2 << 24) + +#define AS_TRANSCFG_PTW_SH_MASK ((3 << 28)) +#define AS_TRANSCFG_PTW_SH_OS (2 << 28) +#define AS_TRANSCFG_PTW_SH_IS (3 << 28) /* * Begin Command Values @@ -348,8 +394,15 @@ #define JS_CONFIG_END_FLUSH_NO_ACTION JS_CONFIG_START_FLUSH_NO_ACTION #define JS_CONFIG_END_FLUSH_CLEAN (1u << 12) #define JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE (3u << 12) +#define JS_CONFIG_ENABLE_FLUSH_REDUCTION (1u << 14) +#define JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK (1u << 15) #define JS_CONFIG_THREAD_PRI(n) ((n) << 16) +/* JS_XAFFINITY register values */ +#define JS_XAFFINITY_XAFFINITY_ENABLE (1u << 0) +#define JS_XAFFINITY_TILER_ENABLE (1u << 8) +#define JS_XAFFINITY_CACHE_ENABLE (1u << 16) + /* JS_STATUS register values */ /* NOTE: Please keep this values in sync with enum base_jd_event_code in mali_base_kernel.h. @@ -400,19 +453,35 @@ #define GPU_COMMAND_CYCLE_COUNT_STOP 0x06 /* Stops the cycle counter, and system timestamp propagation */ #define GPU_COMMAND_CLEAN_CACHES 0x07 /* Clean all caches */ #define GPU_COMMAND_CLEAN_INV_CACHES 0x08 /* Clean and invalidate all caches */ +#define GPU_COMMAND_SET_PROTECTED_MODE 0x09 /* Places the GPU in protected mode */ /* End Command Values */ /* GPU_STATUS values */ #define GPU_STATUS_PRFCNT_ACTIVE (1 << 2) /* Set if the performance counters are active. */ +#define GPU_STATUS_PROTECTED_MODE_ACTIVE (1 << 7) /* Set if protected mode is active */ /* PRFCNT_CONFIG register values */ -#define PRFCNT_CONFIG_AS_SHIFT 4 /* address space bitmap starts from bit 4 of the register */ +#define PRFCNT_CONFIG_MODE_SHIFT 0 /* Counter mode position. */ +#define PRFCNT_CONFIG_AS_SHIFT 4 /* Address space bitmap position. */ +#define PRFCNT_CONFIG_SETSELECT_SHIFT 8 /* Set select position. */ + #define PRFCNT_CONFIG_MODE_OFF 0 /* The performance counters are disabled. */ #define PRFCNT_CONFIG_MODE_MANUAL 1 /* The performance counters are enabled, but are only written out when a PRFCNT_SAMPLE command is issued using the GPU_COMMAND register. */ #define PRFCNT_CONFIG_MODE_TILE 2 /* The performance counters are enabled, and are written out each time a tile finishes rendering. */ /* AS_MEMATTR values: */ +/* Use GPU implementation-defined caching policy. */ +#define AS_MEMATTR_IMPL_DEF_CACHE_POLICY 0x88ull +/* The attribute set to force all resources to be cached. */ +#define AS_MEMATTR_FORCE_TO_CACHE_ALL 0x8Full +/* Inner write-alloc cache setup, no outer caching */ +#define AS_MEMATTR_WRITE_ALLOC 0x8Dull + +/* Set to implementation defined, outer caching */ +#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull +/* Set to write back memory, outer caching */ +#define AS_MEMATTR_AARCH64_OUTER_WA 0x8Dull /* Use GPU implementation-defined caching policy. */ #define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull @@ -457,6 +526,8 @@ /* End JS_FEATURES register */ /* L2_MMU_CONFIG register */ +#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT (23) +#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY (0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT) #define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT (24) #define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT) #define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_OCTANT (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT) @@ -505,5 +576,4 @@ /* End TILER_CONFIG register */ - #endif /* _MIDGARD_REGMAP_H_ */ diff --git a/drivers/gpu/arm/midgard_for_linux/mali_timeline.h b/drivers/gpu/arm/midgard_for_linux/mali_timeline.h index c3563723cb63..bd5f6614b6bb 100644 --- a/drivers/gpu/arm/midgard_for_linux/mali_timeline.h +++ b/drivers/gpu/arm/midgard_for_linux/mali_timeline.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -386,7 +386,6 @@ TRACE_EVENT(mali_timeline_context_active, __entry->count) ); - #endif /* _MALI_TIMELINE_H */ #undef TRACE_INCLUDE_PATH diff --git a/drivers/gpu/arm/midgard_for_linux/platform/devicetree/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard_for_linux/platform/devicetree/mali_kbase_config_platform.h index d4813f7f8a35..34f6d57382e2 100644 --- a/drivers/gpu/arm/midgard_for_linux/platform/devicetree/mali_kbase_config_platform.h +++ b/drivers/gpu/arm/midgard_for_linux/platform/devicetree/mali_kbase_config_platform.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -73,8 +73,8 @@ extern struct kbase_pm_callback_conf pm_callbacks; /** - * Secure mode switch + * Protected mode switch * - * Attached value: pointer to @ref kbase_secure_ops + * Attached value: pointer to @ref kbase_protected_ops */ -#define SECURE_CALLBACKS (NULL) +#define PROTECTED_CALLBACKS (NULL) diff --git a/drivers/gpu/arm/midgard_for_linux/platform/juno_soc/mali_kbase_config_juno_soc.c b/drivers/gpu/arm/midgard_for_linux/platform/juno_soc/mali_kbase_config_juno_soc.c index 3baf3d96d41a..c65481810927 100644 --- a/drivers/gpu/arm/midgard_for_linux/platform/juno_soc/mali_kbase_config_juno_soc.c +++ b/drivers/gpu/arm/midgard_for_linux/platform/juno_soc/mali_kbase_config_juno_soc.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -66,48 +66,76 @@ struct kbase_pm_callback_conf pm_callbacks = { }; /* - * Juno Secure Mode integration + * Juno Protected Mode integration */ /* SMC Function Numbers */ -#define JUNO_SMC_SECURE_ENABLE_FUNC 0xff06 -#define JUNO_SMC_SECURE_DISABLE_FUNC 0xff07 +#define JUNO_SMC_PROTECTED_ENTER_FUNC 0xff06 +#define JUNO_SMC_PROTECTED_RESET_FUNC 0xff07 -static int juno_secure_mode_enable(struct kbase_device *kbdev) +static int juno_protected_mode_enter(struct kbase_device *kbdev) { - u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + /* T62X in SoC detected */ + u64 ret = kbase_invoke_smc(SMC_OEN_SIP, + JUNO_SMC_PROTECTED_ENTER_FUNC, false, + 0, 0, 0); + return ret; +} - if (gpu_id == GPU_ID_MAKE(GPU_ID_PI_T62X, 0, 1, 0) && - kbdev->reg_start == 0x2d000000) { - /* T62X in SoC detected */ - u64 ret = kbase_invoke_smc(SMC_OEN_SIP, - JUNO_SMC_SECURE_ENABLE_FUNC, false, - 0, 0, 0); - return ret; - } - - return -EINVAL; /* Not supported */ +/* TODO: Remove these externs, reset should should be done by the firmware */ +extern void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value, + struct kbase_context *kctx); + +extern u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset, + struct kbase_context *kctx); + +static int juno_protected_mode_reset(struct kbase_device *kbdev) +{ + + /* T62X in SoC detected */ + u64 ret = kbase_invoke_smc(SMC_OEN_SIP, + JUNO_SMC_PROTECTED_RESET_FUNC, false, + 0, 0, 0); + + /* TODO: Remove this reset, it should be done by the firmware */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_HARD_RESET, NULL); + + while ((kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL) + & RESET_COMPLETED) != RESET_COMPLETED) + ; + + return ret; } -static int juno_secure_mode_disable(struct kbase_device *kbdev) +static bool juno_protected_mode_supported(struct kbase_device *kbdev) { u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + /* + * Protected mode is only supported for the built in GPU + * _and_ only if the right firmware is running. + * + * Given that at init time the GPU is not powered up the + * juno_protected_mode_reset function can't be used as + * is needs to access GPU registers. + * However, although we don't want the GPU to boot into + * protected mode we know a GPU reset will be done after + * this function is called so although we set the GPU to + * protected mode it will exit protected mode before the + * driver is ready to run work. + */ if (gpu_id == GPU_ID_MAKE(GPU_ID_PI_T62X, 0, 1, 0) && - kbdev->reg_start == 0x2d000000) { - /* T62X in SoC detected */ - u64 ret = kbase_invoke_smc(SMC_OEN_SIP, - JUNO_SMC_SECURE_DISABLE_FUNC, false, - 0, 0, 0); - return ret; - } - - return -EINVAL; /* Not supported */ + (kbdev->reg_start == 0x2d000000)) + return juno_protected_mode_enter(kbdev) == 0; + + return false; } -struct kbase_secure_ops juno_secure_ops = { - .secure_mode_enable = juno_secure_mode_enable, - .secure_mode_disable = juno_secure_mode_disable, +struct kbase_protected_ops juno_protected_ops = { + .protected_mode_enter = juno_protected_mode_enter, + .protected_mode_reset = juno_protected_mode_reset, + .protected_mode_supported = juno_protected_mode_supported, }; static struct kbase_platform_config versatile_platform_config = { diff --git a/drivers/gpu/arm/midgard_for_linux/platform/juno_soc/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard_for_linux/platform/juno_soc/mali_kbase_config_platform.h index 5fc6d9e1c312..ab29e9d9c0a6 100644 --- a/drivers/gpu/arm/midgard_for_linux/platform/juno_soc/mali_kbase_config_platform.h +++ b/drivers/gpu/arm/midgard_for_linux/platform/juno_soc/mali_kbase_config_platform.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -71,14 +71,14 @@ #define PLATFORM_FUNCS (NULL) /** - * Secure mode switch + * Protected mode switch * - * Attached value: pointer to @ref kbase_secure_ops + * Attached value: pointer to @ref kbase_protected_ops */ -#define SECURE_CALLBACKS (&juno_secure_ops) +#define PROTECTED_CALLBACKS (&juno_protected_ops) extern struct kbase_pm_callback_conf pm_callbacks; #ifdef CONFIG_DEVFREQ_THERMAL extern struct devfreq_cooling_ops juno_model_ops; #endif -extern struct kbase_secure_ops juno_secure_ops; +extern struct kbase_protected_ops juno_protected_ops; diff --git a/drivers/gpu/arm/midgard_for_linux/platform/vexpress/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard_for_linux/platform/vexpress/mali_kbase_config_platform.h index 6384586371d0..dc4471beae67 100644 --- a/drivers/gpu/arm/midgard_for_linux/platform/vexpress/mali_kbase_config_platform.h +++ b/drivers/gpu/arm/midgard_for_linux/platform/vexpress/mali_kbase_config_platform.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,7 +24,7 @@ * Attached value: number in kHz * Default value: NA */ -#define GPU_FREQ_KHZ_MAX (5000) +#define GPU_FREQ_KHZ_MAX kbase_get_platform_max_freq() /** * Minimum frequency GPU will be clocked at. Given in kHz. * This must be specified as there is no default value. @@ -32,16 +32,7 @@ * Attached value: number in kHz * Default value: NA */ -#define GPU_FREQ_KHZ_MIN (5000) - -/** - * Values used for determining the GPU frequency based on the LogicTile type - * Used by the function kbase_get_platform_logic_tile_type - */ -#define VE_VIRTEX6_GPU_FREQ_MIN 5000 -#define VE_VIRTEX6_GPU_FREQ_MAX 5000 -#define VE_VIRTEX7_GPU_FREQ_MIN 40000 -#define VE_VIRTEX7_GPU_FREQ_MAX 40000 +#define GPU_FREQ_KHZ_MIN kbase_get_platform_min_freq() /** * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock @@ -82,10 +73,10 @@ #define PLATFORM_FUNCS (NULL) /** - * Secure mode switch + * Protected mode switch * - * Attached value: pointer to @ref kbase_secure_ops + * Attached value: pointer to @ref kbase_protected_ops */ -#define SECURE_CALLBACKS (NULL) +#define PROTECTED_CALLBACKS (NULL) extern struct kbase_pm_callback_conf pm_callbacks; diff --git a/drivers/gpu/arm/midgard_for_linux/platform/vexpress/mali_kbase_config_vexpress.c b/drivers/gpu/arm/midgard_for_linux/platform/vexpress/mali_kbase_config_vexpress.c index 687b1a8c0431..15ce2bc5eea5 100644 --- a/drivers/gpu/arm/midgard_for_linux/platform/vexpress/mali_kbase_config_vexpress.c +++ b/drivers/gpu/arm/midgard_for_linux/platform/vexpress/mali_kbase_config_vexpress.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/drivers/gpu/arm/midgard_for_linux/platform/vexpress/mali_kbase_cpu_vexpress.c b/drivers/gpu/arm/midgard_for_linux/platform/vexpress/mali_kbase_cpu_vexpress.c index 9bc51f1e2da8..4665f98cbbe4 100644 --- a/drivers/gpu/arm/midgard_for_linux/platform/vexpress/mali_kbase_cpu_vexpress.c +++ b/drivers/gpu/arm/midgard_for_linux/platform/vexpress/mali_kbase_cpu_vexpress.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -52,6 +52,18 @@ #define IS_SINGLE_BIT_SET(val, pos) (val&(1<