3 * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
5 * This program is free software and is provided to you under the terms of the
6 * GNU General Public License version 2 as published by the Free Software
7 * Foundation, and any use by you of this program is subject to the terms
10 * A copy of the licence is included with the program, and can also be obtained
11 * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
12 * Boston, MA 02110-1301, USA.
21 * @file mali_kbase_mmu.c
22 * Base kernel MMU management.
26 #include <linux/kernel.h>
27 #include <linux/dma-mapping.h>
28 #include <mali_kbase.h>
29 #include <mali_midg_regmap.h>
30 #if defined(CONFIG_MALI_GATOR_SUPPORT)
31 #include <mali_kbase_gator.h>
33 #include <mali_kbase_tlstream.h>
34 #include <mali_kbase_instr_defs.h>
35 #include <mali_kbase_debug.h>
37 #define beenthere(kctx, f, a...) dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
39 #include <mali_kbase_defs.h>
40 #include <mali_kbase_hw.h>
41 #include <mali_kbase_mmu_hw.h>
42 #include <mali_kbase_hwaccess_jm.h>
43 #include <mali_kbase_time.h>
44 #include <mali_kbase_mem.h>
46 #define KBASE_MMU_PAGE_ENTRIES 512
49 * kbase_mmu_flush_invalidate() - Flush and invalidate the GPU caches.
50 * @kctx: The KBase context.
51 * @vpfn: The virtual page frame number to start the flush on.
52 * @nr: The number of pages to flush.
53 * @sync: Set if the operation should be synchronous or not.
55 * Issue a cache flush + invalidate to the GPU caches and invalidate the TLBs.
57 * If sync is not set then transactions still in flight when the flush is issued
58 * may use the old page tables and the data they write will not be written out
59 * to memory, this function returns after the flush has been issued but
60 * before all accesses which might effect the flushed region have completed.
62 * If sync is set then accesses in the flushed region will be drained
63 * before data is flush and invalidated through L1, L2 and into memory,
64 * after which point this function will return.
66 static void kbase_mmu_flush_invalidate(struct kbase_context *kctx,
67 u64 vpfn, size_t nr, bool sync);
70 * kbase_mmu_sync_pgd - sync page directory to memory
71 * @kbdev: Device pointer.
72 * @handle: Address of DMA region.
73 * @size: Size of the region to sync.
75 * This should be called after each page directory update.
78 static void kbase_mmu_sync_pgd(struct kbase_device *kbdev,
79 dma_addr_t handle, size_t size)
81 /* If page table is not coherent then ensure the gpu can read
82 * the pages from memory
84 if (kbdev->system_coherency != COHERENCY_ACE)
85 dma_sync_single_for_device(kbdev->dev, handle, size,
91 * - PGD: Page Directory.
92 * - PTE: Page Table Entry. A 64bit value pointing to the next
93 * level of translation
94 * - ATE: Address Transation Entry. A 64bit value pointing to
95 * a 4kB physical page.
98 static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
99 struct kbase_as *as, const char *reason_str);
102 static size_t make_multiple(size_t minimum, size_t multiple)
104 size_t remainder = minimum % multiple;
109 return minimum + multiple - remainder;
112 void page_fault_worker(struct work_struct *data)
117 size_t fault_rel_pfn;
118 struct kbase_as *faulting_as;
120 struct kbase_context *kctx;
121 struct kbase_device *kbdev;
122 struct kbase_va_region *region;
126 faulting_as = container_of(data, struct kbase_as, work_pagefault);
127 fault_pfn = faulting_as->fault_addr >> PAGE_SHIFT;
128 as_no = faulting_as->number;
130 kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
132 /* Grab the context that was already refcounted in kbase_mmu_interrupt().
133 * Therefore, it cannot be scheduled out of this AS until we explicitly release it
135 kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no);
136 if (WARN_ON(!kctx)) {
137 atomic_dec(&kbdev->faults_pending);
141 KBASE_DEBUG_ASSERT(kctx->kbdev == kbdev);
143 if (unlikely(faulting_as->protected_mode))
145 kbase_mmu_report_fault_and_kill(kctx, faulting_as,
146 "Protected mode fault");
147 kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
148 KBASE_MMU_FAULT_TYPE_PAGE);
153 fault_status = faulting_as->fault_status;
154 switch (fault_status & AS_FAULTSTATUS_EXCEPTION_CODE_MASK) {
156 case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT:
157 /* need to check against the region to handle this one */
160 case AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT:
161 kbase_mmu_report_fault_and_kill(kctx, faulting_as,
162 "Permission failure");
165 case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT:
166 kbase_mmu_report_fault_and_kill(kctx, faulting_as,
167 "Translation table bus fault");
170 case AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG:
171 /* nothing to do, but we don't expect this fault currently */
172 dev_warn(kbdev->dev, "Access flag unexpectedly set");
175 #ifdef CONFIG_MALI_GPU_MMU_AARCH64
176 case AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT:
178 kbase_mmu_report_fault_and_kill(kctx, faulting_as,
179 "Address size fault");
182 case AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT:
183 kbase_mmu_report_fault_and_kill(kctx, faulting_as,
184 "Memory attributes fault");
186 #endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
189 kbase_mmu_report_fault_and_kill(kctx, faulting_as,
190 "Unknown fault code");
194 /* so we have a translation fault, let's see if it is for growable
196 kbase_gpu_vm_lock(kctx);
198 region = kbase_region_tracker_find_region_enclosing_address(kctx,
199 faulting_as->fault_addr);
200 if (!region || region->flags & KBASE_REG_FREE) {
201 kbase_gpu_vm_unlock(kctx);
202 kbase_mmu_report_fault_and_kill(kctx, faulting_as,
203 "Memory is not mapped on the GPU");
207 if ((region->flags & GROWABLE_FLAGS_REQUIRED)
208 != GROWABLE_FLAGS_REQUIRED) {
209 kbase_gpu_vm_unlock(kctx);
210 kbase_mmu_report_fault_and_kill(kctx, faulting_as,
211 "Memory is not growable");
215 if ((region->flags & KBASE_REG_DONT_NEED)) {
216 kbase_gpu_vm_unlock(kctx);
217 kbase_mmu_report_fault_and_kill(kctx, faulting_as,
218 "Don't need memory can't be grown");
222 /* find the size we need to grow it by */
223 /* we know the result fit in a size_t due to kbase_region_tracker_find_region_enclosing_address
224 * validating the fault_adress to be within a size_t from the start_pfn */
225 fault_rel_pfn = fault_pfn - region->start_pfn;
227 if (fault_rel_pfn < kbase_reg_current_backed_size(region)) {
228 dev_dbg(kbdev->dev, "Page fault @ 0x%llx in allocated region 0x%llx-0x%llx of growable TMEM: Ignoring",
229 faulting_as->fault_addr, region->start_pfn,
231 kbase_reg_current_backed_size(region));
233 mutex_lock(&kbdev->mmu_hw_mutex);
235 kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
236 KBASE_MMU_FAULT_TYPE_PAGE);
237 /* [1] in case another page fault occurred while we were
238 * handling the (duplicate) page fault we need to ensure we
239 * don't loose the other page fault as result of us clearing
240 * the MMU IRQ. Therefore, after we clear the MMU IRQ we send
241 * an UNLOCK command that will retry any stalled memory
242 * transaction (which should cause the other page fault to be
245 kbase_mmu_hw_do_operation(kbdev, faulting_as, NULL, 0, 0,
246 AS_COMMAND_UNLOCK, 1);
248 mutex_unlock(&kbdev->mmu_hw_mutex);
250 kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
251 KBASE_MMU_FAULT_TYPE_PAGE);
252 kbase_gpu_vm_unlock(kctx);
257 new_pages = make_multiple(fault_rel_pfn -
258 kbase_reg_current_backed_size(region) + 1,
261 /* cap to max vsize */
262 if (new_pages + kbase_reg_current_backed_size(region) >
264 new_pages = region->nr_pages -
265 kbase_reg_current_backed_size(region);
267 if (0 == new_pages) {
268 mutex_lock(&kbdev->mmu_hw_mutex);
270 /* Duplicate of a fault we've already handled, nothing to do */
271 kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
272 KBASE_MMU_FAULT_TYPE_PAGE);
273 /* See comment [1] about UNLOCK usage */
274 kbase_mmu_hw_do_operation(kbdev, faulting_as, NULL, 0, 0,
275 AS_COMMAND_UNLOCK, 1);
277 mutex_unlock(&kbdev->mmu_hw_mutex);
279 kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
280 KBASE_MMU_FAULT_TYPE_PAGE);
281 kbase_gpu_vm_unlock(kctx);
285 if (kbase_alloc_phy_pages_helper(region->gpu_alloc, new_pages) == 0) {
286 if (region->gpu_alloc != region->cpu_alloc) {
287 if (kbase_alloc_phy_pages_helper(
288 region->cpu_alloc, new_pages) == 0) {
291 kbase_free_phy_pages_helper(region->gpu_alloc,
305 KBASE_DEBUG_ASSERT(kbase_reg_current_backed_size(region) <= region->nr_pages);
307 /* set up the new pages */
308 pfn_offset = kbase_reg_current_backed_size(region) - new_pages;
311 * Issuing an MMU operation will unlock the MMU and cause the
312 * translation to be replayed. If the page insertion fails then
313 * rather then trying to continue the context should be killed
314 * so the no_flush version of insert_pages is used which allows
315 * us to unlock the MMU as we see fit.
317 err = kbase_mmu_insert_pages_no_flush(kctx,
318 region->start_pfn + pfn_offset,
319 &kbase_get_gpu_phy_pages(region)[pfn_offset],
320 new_pages, region->flags);
322 kbase_free_phy_pages_helper(region->gpu_alloc, new_pages);
323 if (region->gpu_alloc != region->cpu_alloc)
324 kbase_free_phy_pages_helper(region->cpu_alloc,
326 kbase_gpu_vm_unlock(kctx);
327 /* The locked VA region will be unlocked and the cache invalidated in here */
328 kbase_mmu_report_fault_and_kill(kctx, faulting_as,
329 "Page table update failure");
332 #if defined(CONFIG_MALI_GATOR_SUPPORT)
333 kbase_trace_mali_page_fault_insert_pages(as_no, new_pages);
335 kbase_tlstream_aux_pagefault(kctx->id, (u64)new_pages);
337 /* AS transaction begin */
338 mutex_lock(&kbdev->mmu_hw_mutex);
340 /* flush L2 and unlock the VA (resumes the MMU) */
341 if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367))
342 op = AS_COMMAND_FLUSH;
344 op = AS_COMMAND_FLUSH_PT;
346 /* clear MMU interrupt - this needs to be done after updating
347 * the page tables but before issuing a FLUSH command. The
348 * FLUSH cmd has a side effect that it restarts stalled memory
349 * transactions in other address spaces which may cause
350 * another fault to occur. If we didn't clear the interrupt at
351 * this stage a new IRQ might not be raised when the GPU finds
352 * a MMU IRQ is already pending.
354 kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
355 KBASE_MMU_FAULT_TYPE_PAGE);
357 kbase_mmu_hw_do_operation(kbdev, faulting_as, kctx,
358 faulting_as->fault_addr >> PAGE_SHIFT,
362 mutex_unlock(&kbdev->mmu_hw_mutex);
363 /* AS transaction end */
365 /* reenable this in the mask */
366 kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
367 KBASE_MMU_FAULT_TYPE_PAGE);
368 kbase_gpu_vm_unlock(kctx);
370 /* failed to extend, handle as a normal PF */
371 kbase_gpu_vm_unlock(kctx);
372 kbase_mmu_report_fault_and_kill(kctx, faulting_as,
373 "Page allocation failure");
378 * By this point, the fault was handled in some way,
379 * so release the ctx refcount
381 kbasep_js_runpool_release_ctx(kbdev, kctx);
383 atomic_dec(&kbdev->faults_pending);
386 phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx)
391 int new_page_count __maybe_unused;
393 KBASE_DEBUG_ASSERT(NULL != kctx);
394 new_page_count = kbase_atomic_add_pages(1, &kctx->used_pages);
395 kbase_atomic_add_pages(1, &kctx->kbdev->memdev.used_pages);
397 p = kbase_mem_pool_alloc(&kctx->mem_pool);
401 kbase_tlstream_aux_pagesalloc(
403 (u64)new_page_count);
409 kbase_process_page_usage_inc(kctx, 1);
411 for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++)
412 kctx->kbdev->mmu_mode->entry_invalidate(&page[i]);
414 kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE);
417 return page_to_phys(p);
420 kbase_mem_pool_free(&kctx->mem_pool, p, false);
422 kbase_atomic_sub_pages(1, &kctx->used_pages);
423 kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
428 KBASE_EXPORT_TEST_API(kbase_mmu_alloc_pgd);
430 /* Given PGD PFN for level N, return PGD PFN for level N+1, allocating the
431 * new table from the pool if needed and possible
433 static int mmu_get_next_pgd(struct kbase_context *kctx,
434 phys_addr_t *pgd, u64 vpfn, int level)
437 phys_addr_t target_pgd;
440 KBASE_DEBUG_ASSERT(*pgd);
441 KBASE_DEBUG_ASSERT(NULL != kctx);
443 lockdep_assert_held(&kctx->mmu_lock);
446 * Architecture spec defines level-0 as being the top-most.
447 * This is a bit unfortunate here, but we keep the same convention.
449 vpfn >>= (3 - level) * 9;
452 p = pfn_to_page(PFN_DOWN(*pgd));
455 dev_warn(kctx->kbdev->dev, "mmu_get_next_pgd: kmap failure\n");
459 target_pgd = kctx->kbdev->mmu_mode->pte_to_phy_addr(page[vpfn]);
462 target_pgd = kbase_mmu_alloc_pgd(kctx);
464 dev_dbg(kctx->kbdev->dev, "mmu_get_next_pgd: kbase_mmu_alloc_pgd failure\n");
469 kctx->kbdev->mmu_mode->entry_set_pte(&page[vpfn], target_pgd);
471 kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE);
472 /* Rely on the caller to update the address space flags. */
481 static int mmu_get_bottom_pgd(struct kbase_context *kctx,
482 u64 vpfn, phys_addr_t *out_pgd)
487 lockdep_assert_held(&kctx->mmu_lock);
490 for (l = MIDGARD_MMU_TOPLEVEL; l < MIDGARD_MMU_BOTTOMLEVEL; l++) {
491 int err = mmu_get_next_pgd(kctx, &pgd, vpfn, l);
492 /* Handle failure condition */
494 dev_dbg(kctx->kbdev->dev, "mmu_get_bottom_pgd: mmu_get_next_pgd failure\n");
504 static phys_addr_t mmu_insert_pages_recover_get_next_pgd(struct kbase_context *kctx, phys_addr_t pgd, u64 vpfn, int level)
507 phys_addr_t target_pgd;
509 KBASE_DEBUG_ASSERT(pgd);
510 KBASE_DEBUG_ASSERT(NULL != kctx);
512 lockdep_assert_held(&kctx->mmu_lock);
515 * Architecture spec defines level-0 as being the top-most.
516 * This is a bit unfortunate here, but we keep the same convention.
518 vpfn >>= (3 - level) * 9;
521 page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
522 /* kmap_atomic should NEVER fail */
523 KBASE_DEBUG_ASSERT(NULL != page);
525 target_pgd = kctx->kbdev->mmu_mode->pte_to_phy_addr(page[vpfn]);
526 /* As we are recovering from what has already been set up, we should have a target_pgd */
527 KBASE_DEBUG_ASSERT(0 != target_pgd);
532 static phys_addr_t mmu_insert_pages_recover_get_bottom_pgd(struct kbase_context *kctx, u64 vpfn)
537 lockdep_assert_held(&kctx->mmu_lock);
541 for (l = MIDGARD_MMU_TOPLEVEL; l < MIDGARD_MMU_BOTTOMLEVEL; l++) {
542 pgd = mmu_insert_pages_recover_get_next_pgd(kctx, pgd, vpfn, l);
543 /* Should never fail */
544 KBASE_DEBUG_ASSERT(0 != pgd);
550 static void mmu_insert_pages_failure_recovery(struct kbase_context *kctx, u64 vpfn,
555 struct kbase_mmu_mode const *mmu_mode;
557 KBASE_DEBUG_ASSERT(NULL != kctx);
558 KBASE_DEBUG_ASSERT(0 != vpfn);
559 /* 64-bit address range is the max */
560 KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
562 lockdep_assert_held(&kctx->mmu_lock);
564 mmu_mode = kctx->kbdev->mmu_mode;
568 unsigned int index = vpfn & 0x1FF;
569 unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
575 pgd = mmu_insert_pages_recover_get_bottom_pgd(kctx, vpfn);
576 KBASE_DEBUG_ASSERT(0 != pgd);
578 p = pfn_to_page(PFN_DOWN(pgd));
580 pgd_page = kmap_atomic(p);
581 KBASE_DEBUG_ASSERT(NULL != pgd_page);
583 /* Invalidate the entries we added */
584 for (i = 0; i < count; i++)
585 mmu_mode->entry_invalidate(&pgd_page[index + i]);
590 kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE);
592 kunmap_atomic(pgd_page);
597 * Map the single page 'phys' 'nr' of times, starting at GPU PFN 'vpfn'
599 int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
600 phys_addr_t phys, size_t nr,
605 /* In case the insert_single_page only partially completes we need to be
607 bool recover_required = false;
608 u64 recover_vpfn = vpfn;
609 size_t recover_count = 0;
613 KBASE_DEBUG_ASSERT(NULL != kctx);
614 KBASE_DEBUG_ASSERT(0 != vpfn);
615 /* 64-bit address range is the max */
616 KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
618 /* Early out if there is nothing to do */
622 mutex_lock(&kctx->mmu_lock);
626 unsigned int index = vpfn & 0x1FF;
627 unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
634 * Repeatedly calling mmu_get_bottom_pte() is clearly
635 * suboptimal. We don't have to re-parse the whole tree
636 * each time (just cache the l0-l2 sequence).
637 * On the other hand, it's only a gain when we map more than
638 * 256 pages at once (on average). Do we really care?
641 err = mmu_get_bottom_pgd(kctx, vpfn, &pgd);
644 /* Fill the memory pool with enough pages for
645 * the page walk to succeed
647 mutex_unlock(&kctx->mmu_lock);
648 err = kbase_mem_pool_grow(&kctx->mem_pool,
649 MIDGARD_MMU_BOTTOMLEVEL);
650 mutex_lock(&kctx->mmu_lock);
653 dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n");
654 if (recover_required) {
655 /* Invalidate the pages we have partially
657 mmu_insert_pages_failure_recovery(kctx,
664 p = pfn_to_page(PFN_DOWN(pgd));
667 dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: kmap failure\n");
668 if (recover_required) {
669 /* Invalidate the pages we have partially
671 mmu_insert_pages_failure_recovery(kctx,
679 for (i = 0; i < count; i++) {
680 unsigned int ofs = index + i;
682 KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL));
683 kctx->kbdev->mmu_mode->entry_set_ate(&pgd_page[ofs],
690 kbase_mmu_sync_pgd(kctx->kbdev,
691 kbase_dma_addr(p) + (index * sizeof(u64)),
692 count * sizeof(u64));
695 /* We have started modifying the page table.
696 * If further pages need inserting and fail we need to undo what
697 * has already taken place */
698 recover_required = true;
699 recover_count += count;
701 mutex_unlock(&kctx->mmu_lock);
702 kbase_mmu_flush_invalidate(kctx, vpfn, nr, false);
706 mutex_unlock(&kctx->mmu_lock);
707 kbase_mmu_flush_invalidate(kctx, vpfn, nr, false);
711 int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
712 phys_addr_t *phys, size_t nr,
717 /* In case the insert_pages only partially completes we need to be able
719 bool recover_required = false;
720 u64 recover_vpfn = vpfn;
721 size_t recover_count = 0;
725 KBASE_DEBUG_ASSERT(NULL != kctx);
726 KBASE_DEBUG_ASSERT(0 != vpfn);
727 /* 64-bit address range is the max */
728 KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
730 /* Early out if there is nothing to do */
734 mutex_lock(&kctx->mmu_lock);
738 unsigned int index = vpfn & 0x1FF;
739 unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
746 * Repeatedly calling mmu_get_bottom_pte() is clearly
747 * suboptimal. We don't have to re-parse the whole tree
748 * each time (just cache the l0-l2 sequence).
749 * On the other hand, it's only a gain when we map more than
750 * 256 pages at once (on average). Do we really care?
753 err = mmu_get_bottom_pgd(kctx, vpfn, &pgd);
756 /* Fill the memory pool with enough pages for
757 * the page walk to succeed
759 mutex_unlock(&kctx->mmu_lock);
760 err = kbase_mem_pool_grow(&kctx->mem_pool,
761 MIDGARD_MMU_BOTTOMLEVEL);
762 mutex_lock(&kctx->mmu_lock);
765 dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n");
766 if (recover_required) {
767 /* Invalidate the pages we have partially
769 mmu_insert_pages_failure_recovery(kctx,
776 p = pfn_to_page(PFN_DOWN(pgd));
779 dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: kmap failure\n");
780 if (recover_required) {
781 /* Invalidate the pages we have partially
783 mmu_insert_pages_failure_recovery(kctx,
791 for (i = 0; i < count; i++) {
792 unsigned int ofs = index + i;
794 KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL));
795 kctx->kbdev->mmu_mode->entry_set_ate(&pgd_page[ofs],
803 kbase_mmu_sync_pgd(kctx->kbdev,
804 kbase_dma_addr(p) + (index * sizeof(u64)),
805 count * sizeof(u64));
808 /* We have started modifying the page table. If further pages
809 * need inserting and fail we need to undo what has already
811 recover_required = true;
812 recover_count += count;
815 mutex_unlock(&kctx->mmu_lock);
819 mutex_unlock(&kctx->mmu_lock);
824 * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn'
826 int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn,
827 phys_addr_t *phys, size_t nr,
832 err = kbase_mmu_insert_pages_no_flush(kctx, vpfn, phys, nr, flags);
833 kbase_mmu_flush_invalidate(kctx, vpfn, nr, false);
837 KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages);
840 * kbase_mmu_flush_invalidate_noretain() - Flush and invalidate the GPU caches
841 * without retaining the kbase context.
842 * @kctx: The KBase context.
843 * @vpfn: The virtual page frame number to start the flush on.
844 * @nr: The number of pages to flush.
845 * @sync: Set if the operation should be synchronous or not.
847 * As per kbase_mmu_flush_invalidate but doesn't retain the kctx or do any
850 static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx,
851 u64 vpfn, size_t nr, bool sync)
853 struct kbase_device *kbdev = kctx->kbdev;
857 /* Early out if there is nothing to do */
862 op = AS_COMMAND_FLUSH_MEM;
864 op = AS_COMMAND_FLUSH_PT;
866 err = kbase_mmu_hw_do_operation(kbdev,
867 &kbdev->as[kctx->as_nr],
868 kctx, vpfn, nr, op, 0);
869 #if KBASE_GPU_RESET_EN
871 /* Flush failed to complete, assume the
872 * GPU has hung and perform a reset to
874 dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n");
876 if (kbase_prepare_to_reset_gpu_locked(kbdev))
877 kbase_reset_gpu_locked(kbdev);
879 #endif /* KBASE_GPU_RESET_EN */
881 #ifndef CONFIG_MALI_NO_MALI
883 * As this function could be called in interrupt context the sync
884 * request can't block. Instead log the request and the next flush
885 * request will pick it up.
887 if ((!err) && sync &&
888 kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_6367))
889 atomic_set(&kctx->drain_pending, 1);
890 #endif /* !CONFIG_MALI_NO_MALI */
893 static void kbase_mmu_flush_invalidate(struct kbase_context *kctx,
894 u64 vpfn, size_t nr, bool sync)
896 struct kbase_device *kbdev;
897 bool ctx_is_in_runpool;
898 #ifndef CONFIG_MALI_NO_MALI
899 bool drain_pending = false;
901 if (atomic_xchg(&kctx->drain_pending, 0))
902 drain_pending = true;
903 #endif /* !CONFIG_MALI_NO_MALI */
905 /* Early out if there is nothing to do */
910 ctx_is_in_runpool = kbasep_js_runpool_retain_ctx(kbdev, kctx);
912 if (ctx_is_in_runpool) {
913 KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
915 if (!kbase_pm_context_active_handle_suspend(kbdev,
916 KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
920 /* AS transaction begin */
921 mutex_lock(&kbdev->mmu_hw_mutex);
924 op = AS_COMMAND_FLUSH_MEM;
926 op = AS_COMMAND_FLUSH_PT;
928 err = kbase_mmu_hw_do_operation(kbdev,
929 &kbdev->as[kctx->as_nr],
930 kctx, vpfn, nr, op, 0);
932 #if KBASE_GPU_RESET_EN
934 /* Flush failed to complete, assume the
935 * GPU has hung and perform a reset to
937 dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issueing GPU soft-reset to recover\n");
939 if (kbase_prepare_to_reset_gpu(kbdev))
940 kbase_reset_gpu(kbdev);
942 #endif /* KBASE_GPU_RESET_EN */
944 mutex_unlock(&kbdev->mmu_hw_mutex);
945 /* AS transaction end */
947 #ifndef CONFIG_MALI_NO_MALI
949 * The transaction lock must be dropped before here
950 * as kbase_wait_write_flush could take it if
951 * the GPU was powered down (static analysis doesn't
952 * know this can't happen).
954 drain_pending |= (!err) && sync &&
955 kbase_hw_has_issue(kctx->kbdev,
958 /* Wait for GPU to flush write buffer */
959 kbase_wait_write_flush(kctx);
961 #endif /* !CONFIG_MALI_NO_MALI */
963 kbase_pm_context_idle(kbdev);
965 kbasep_js_runpool_release_ctx(kbdev, kctx);
969 void kbase_mmu_update(struct kbase_context *kctx)
971 lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
972 lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex);
973 /* ASSERT that the context has a valid as_nr, which is only the case
974 * when it's scheduled in.
976 * as_nr won't change because the caller has the hwaccess_lock */
977 KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
979 kctx->kbdev->mmu_mode->update(kctx);
981 KBASE_EXPORT_TEST_API(kbase_mmu_update);
983 void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr)
985 lockdep_assert_held(&kbdev->hwaccess_lock);
986 lockdep_assert_held(&kbdev->mmu_hw_mutex);
988 kbdev->mmu_mode->disable_as(kbdev, as_nr);
991 void kbase_mmu_disable(struct kbase_context *kctx)
993 /* ASSERT that the context has a valid as_nr, which is only the case
994 * when it's scheduled in.
996 * as_nr won't change because the caller has the hwaccess_lock */
997 KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
999 lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
1002 * The address space is being disabled, drain all knowledge of it out
1003 * from the caches as pages and page tables might be freed after this.
1005 * The job scheduler code will already be holding the locks and context
1006 * so just do the flush.
1008 kbase_mmu_flush_invalidate_noretain(kctx, 0, ~0, true);
1010 kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr);
1012 KBASE_EXPORT_TEST_API(kbase_mmu_disable);
1015 * We actually only discard the ATE, and not the page table
1016 * pages. There is a potential DoS here, as we'll leak memory by
1017 * having PTEs that are potentially unused. Will require physical
1018 * page accounting, so MMU pages are part of the process allocation.
1020 * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
1021 * currently scheduled into the runpool, and so potentially uses a lot of locks.
1022 * These locks must be taken in the correct order with respect to others
1023 * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
1026 int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr)
1030 struct kbase_device *kbdev;
1031 size_t requested_nr = nr;
1032 struct kbase_mmu_mode const *mmu_mode;
1035 KBASE_DEBUG_ASSERT(NULL != kctx);
1036 beenthere(kctx, "kctx %p vpfn %lx nr %zd", (void *)kctx, (unsigned long)vpfn, nr);
1039 /* early out if nothing to do */
1043 mutex_lock(&kctx->mmu_lock);
1045 kbdev = kctx->kbdev;
1046 mmu_mode = kbdev->mmu_mode;
1050 unsigned int index = vpfn & 0x1FF;
1051 unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
1057 err = mmu_get_bottom_pgd(kctx, vpfn, &pgd);
1059 dev_warn(kbdev->dev, "kbase_mmu_teardown_pages: mmu_get_bottom_pgd failure\n");
1064 p = pfn_to_page(PFN_DOWN(pgd));
1067 dev_warn(kbdev->dev, "kbase_mmu_teardown_pages: kmap failure\n");
1072 for (i = 0; i < count; i++)
1073 mmu_mode->entry_invalidate(&pgd_page[index + i]);
1078 kbase_mmu_sync_pgd(kctx->kbdev,
1079 kbase_dma_addr(p) + (index * sizeof(u64)),
1080 count * sizeof(u64));
1085 mutex_unlock(&kctx->mmu_lock);
1086 kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true);
1090 mutex_unlock(&kctx->mmu_lock);
1091 kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true);
1095 KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages);
1098 * Update the entries for specified number of pages pointed to by 'phys' at GPU PFN 'vpfn'.
1099 * This call is being triggered as a response to the changes of the mem attributes
1101 * @pre : The caller is responsible for validating the memory attributes
1103 * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
1104 * currently scheduled into the runpool, and so potentially uses a lot of locks.
1105 * These locks must be taken in the correct order with respect to others
1106 * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
1109 int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *phys, size_t nr, unsigned long flags)
1113 size_t requested_nr = nr;
1114 struct kbase_mmu_mode const *mmu_mode;
1117 KBASE_DEBUG_ASSERT(NULL != kctx);
1118 KBASE_DEBUG_ASSERT(0 != vpfn);
1119 KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
1121 /* Early out if there is nothing to do */
1125 mutex_lock(&kctx->mmu_lock);
1127 mmu_mode = kctx->kbdev->mmu_mode;
1129 dev_warn(kctx->kbdev->dev, "kbase_mmu_update_pages(): updating page share flags on GPU PFN 0x%llx from phys %p, %zu pages",
1134 unsigned int index = vpfn & 0x1FF;
1135 size_t count = KBASE_MMU_PAGE_ENTRIES - index;
1142 err = mmu_get_bottom_pgd(kctx, vpfn, &pgd);
1145 /* Fill the memory pool with enough pages for
1146 * the page walk to succeed
1148 mutex_unlock(&kctx->mmu_lock);
1149 err = kbase_mem_pool_grow(&kctx->mem_pool,
1150 MIDGARD_MMU_BOTTOMLEVEL);
1151 mutex_lock(&kctx->mmu_lock);
1154 dev_warn(kctx->kbdev->dev, "mmu_get_bottom_pgd failure\n");
1158 p = pfn_to_page(PFN_DOWN(pgd));
1161 dev_warn(kctx->kbdev->dev, "kmap failure\n");
1166 for (i = 0; i < count; i++)
1167 mmu_mode->entry_set_ate(&pgd_page[index + i], phys[i],
1174 kbase_mmu_sync_pgd(kctx->kbdev,
1175 kbase_dma_addr(p) + (index * sizeof(u64)),
1176 count * sizeof(u64));
1178 kunmap(pfn_to_page(PFN_DOWN(pgd)));
1181 mutex_unlock(&kctx->mmu_lock);
1182 kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true);
1186 mutex_unlock(&kctx->mmu_lock);
1187 kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true);
1191 /* This is a debug feature only */
1192 static void mmu_check_unused(struct kbase_context *kctx, phys_addr_t pgd)
1197 page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
1198 /* kmap_atomic should NEVER fail. */
1199 KBASE_DEBUG_ASSERT(NULL != page);
1201 for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
1202 if (kctx->kbdev->mmu_mode->ate_is_valid(page[i]))
1203 beenthere(kctx, "live pte %016lx", (unsigned long)page[i]);
1205 kunmap_atomic(page);
1208 static void mmu_teardown_level(struct kbase_context *kctx, phys_addr_t pgd, int level, int zap, u64 *pgd_page_buffer)
1210 phys_addr_t target_pgd;
1213 struct kbase_mmu_mode const *mmu_mode;
1215 KBASE_DEBUG_ASSERT(NULL != kctx);
1216 lockdep_assert_held(&kctx->mmu_lock);
1218 pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
1219 /* kmap_atomic should NEVER fail. */
1220 KBASE_DEBUG_ASSERT(NULL != pgd_page);
1221 /* Copy the page to our preallocated buffer so that we can minimize kmap_atomic usage */
1222 memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE);
1223 kunmap_atomic(pgd_page);
1224 pgd_page = pgd_page_buffer;
1226 mmu_mode = kctx->kbdev->mmu_mode;
1228 for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
1229 target_pgd = mmu_mode->pte_to_phy_addr(pgd_page[i]);
1232 if (level < (MIDGARD_MMU_BOTTOMLEVEL - 1)) {
1233 mmu_teardown_level(kctx, target_pgd, level + 1, zap, pgd_page_buffer + (PAGE_SIZE / sizeof(u64)));
1236 * So target_pte is a level-3 page.
1237 * As a leaf, it is safe to free it.
1238 * Unless we have live pages attached to it!
1240 mmu_check_unused(kctx, target_pgd);
1243 beenthere(kctx, "pte %lx level %d", (unsigned long)target_pgd, level + 1);
1245 struct page *p = phys_to_page(target_pgd);
1247 kbase_mem_pool_free(&kctx->mem_pool, p, true);
1248 kbase_process_page_usage_dec(kctx, 1);
1249 kbase_atomic_sub_pages(1, &kctx->used_pages);
1250 kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
1256 int kbase_mmu_init(struct kbase_context *kctx)
1258 KBASE_DEBUG_ASSERT(NULL != kctx);
1259 KBASE_DEBUG_ASSERT(NULL == kctx->mmu_teardown_pages);
1261 mutex_init(&kctx->mmu_lock);
1263 /* Preallocate MMU depth of four pages for mmu_teardown_level to use */
1264 kctx->mmu_teardown_pages = kmalloc(PAGE_SIZE * 4, GFP_KERNEL);
1266 if (NULL == kctx->mmu_teardown_pages)
1272 void kbase_mmu_term(struct kbase_context *kctx)
1274 KBASE_DEBUG_ASSERT(NULL != kctx);
1275 KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages);
1277 kfree(kctx->mmu_teardown_pages);
1278 kctx->mmu_teardown_pages = NULL;
1281 void kbase_mmu_free_pgd(struct kbase_context *kctx)
1283 int new_page_count __maybe_unused;
1285 KBASE_DEBUG_ASSERT(NULL != kctx);
1286 KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages);
1288 mutex_lock(&kctx->mmu_lock);
1289 mmu_teardown_level(kctx, kctx->pgd, MIDGARD_MMU_TOPLEVEL, 1, kctx->mmu_teardown_pages);
1290 mutex_unlock(&kctx->mmu_lock);
1292 beenthere(kctx, "pgd %lx", (unsigned long)kctx->pgd);
1293 kbase_mem_pool_free(&kctx->mem_pool, phys_to_page(kctx->pgd), true);
1294 kbase_process_page_usage_dec(kctx, 1);
1295 new_page_count = kbase_atomic_sub_pages(1, &kctx->used_pages);
1296 kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
1298 kbase_tlstream_aux_pagesalloc(
1300 (u64)new_page_count);
1303 KBASE_EXPORT_TEST_API(kbase_mmu_free_pgd);
1305 static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, int level, char ** const buffer, size_t *size_left)
1307 phys_addr_t target_pgd;
1310 size_t size = KBASE_MMU_PAGE_ENTRIES * sizeof(u64) + sizeof(u64);
1312 struct kbase_mmu_mode const *mmu_mode;
1314 KBASE_DEBUG_ASSERT(NULL != kctx);
1315 lockdep_assert_held(&kctx->mmu_lock);
1317 mmu_mode = kctx->kbdev->mmu_mode;
1319 pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd)));
1321 dev_warn(kctx->kbdev->dev, "kbasep_mmu_dump_level: kmap failure\n");
1325 if (*size_left >= size) {
1326 /* A modified physical address that contains the page table level */
1327 u64 m_pgd = pgd | level;
1329 /* Put the modified physical address in the output buffer */
1330 memcpy(*buffer, &m_pgd, sizeof(m_pgd));
1331 *buffer += sizeof(m_pgd);
1333 /* Followed by the page table itself */
1334 memcpy(*buffer, pgd_page, sizeof(u64) * KBASE_MMU_PAGE_ENTRIES);
1335 *buffer += sizeof(u64) * KBASE_MMU_PAGE_ENTRIES;
1340 if (level < MIDGARD_MMU_BOTTOMLEVEL) {
1341 for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
1342 if (mmu_mode->pte_is_valid(pgd_page[i])) {
1343 target_pgd = mmu_mode->pte_to_phy_addr(
1346 dump_size = kbasep_mmu_dump_level(kctx,
1347 target_pgd, level + 1,
1350 kunmap(pfn_to_page(PFN_DOWN(pgd)));
1358 kunmap(pfn_to_page(PFN_DOWN(pgd)));
1363 void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages)
1368 KBASE_DEBUG_ASSERT(kctx);
1370 if (0 == nr_pages) {
1371 /* can't dump in a 0 sized buffer, early out */
1375 size_left = nr_pages * PAGE_SIZE;
1377 KBASE_DEBUG_ASSERT(0 != size_left);
1378 kaddr = vmalloc_user(size_left);
1380 mutex_lock(&kctx->mmu_lock);
1383 u64 end_marker = 0xFFULL;
1385 char *mmu_dump_buffer;
1389 buffer = (char *)kaddr;
1390 mmu_dump_buffer = buffer;
1392 if (kctx->api_version >= KBASE_API_VERSION(8, 4)) {
1393 struct kbase_mmu_setup as_setup;
1395 kctx->kbdev->mmu_mode->get_as_setup(kctx, &as_setup);
1396 config[0] = as_setup.transtab;
1397 config[1] = as_setup.memattr;
1398 config[2] = as_setup.transcfg;
1399 memcpy(buffer, &config, sizeof(config));
1400 mmu_dump_buffer += sizeof(config);
1401 size_left -= sizeof(config);
1406 size = kbasep_mmu_dump_level(kctx,
1408 MIDGARD_MMU_TOPLEVEL,
1415 /* Add on the size for the end marker */
1416 size += sizeof(u64);
1417 /* Add on the size for the config */
1418 if (kctx->api_version >= KBASE_API_VERSION(8, 4))
1419 size += sizeof(config);
1422 if (size > nr_pages * PAGE_SIZE || size_left < sizeof(u64)) {
1423 /* The buffer isn't big enough - free the memory and return failure */
1427 /* Add the end marker */
1428 memcpy(mmu_dump_buffer, &end_marker, sizeof(u64));
1431 mutex_unlock(&kctx->mmu_lock);
1436 mutex_unlock(&kctx->mmu_lock);
1439 KBASE_EXPORT_TEST_API(kbase_mmu_dump);
1441 void bus_fault_worker(struct work_struct *data)
1443 struct kbase_as *faulting_as;
1445 struct kbase_context *kctx;
1446 struct kbase_device *kbdev;
1447 #if KBASE_GPU_RESET_EN
1448 bool reset_status = false;
1449 #endif /* KBASE_GPU_RESET_EN */
1451 faulting_as = container_of(data, struct kbase_as, work_busfault);
1453 as_no = faulting_as->number;
1455 kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
1457 /* Grab the context that was already refcounted in kbase_mmu_interrupt().
1458 * Therefore, it cannot be scheduled out of this AS until we explicitly release it
1460 kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no);
1461 if (WARN_ON(!kctx)) {
1462 atomic_dec(&kbdev->faults_pending);
1466 if (unlikely(faulting_as->protected_mode))
1468 kbase_mmu_report_fault_and_kill(kctx, faulting_as,
1469 "Permission failure");
1470 kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
1471 KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
1472 kbasep_js_runpool_release_ctx(kbdev, kctx);
1473 atomic_dec(&kbdev->faults_pending);
1478 #if KBASE_GPU_RESET_EN
1479 if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
1480 /* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode.
1481 * We start the reset before switching to UNMAPPED to ensure that unrelated jobs
1482 * are evicted from the GPU before the switch.
1484 dev_err(kbdev->dev, "GPU bus error occurred. For this GPU version we now soft-reset as part of bus error recovery\n");
1485 reset_status = kbase_prepare_to_reset_gpu(kbdev);
1487 #endif /* KBASE_GPU_RESET_EN */
1488 /* NOTE: If GPU already powered off for suspend, we don't need to switch to unmapped */
1489 if (!kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
1490 unsigned long flags;
1492 /* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */
1493 /* AS transaction begin */
1494 mutex_lock(&kbdev->mmu_hw_mutex);
1496 /* Set the MMU into unmapped mode */
1497 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
1498 kbase_mmu_disable(kctx);
1499 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
1501 mutex_unlock(&kbdev->mmu_hw_mutex);
1502 /* AS transaction end */
1504 kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
1505 KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
1506 kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
1507 KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
1509 kbase_pm_context_idle(kbdev);
1512 #if KBASE_GPU_RESET_EN
1513 if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status)
1514 kbase_reset_gpu(kbdev);
1515 #endif /* KBASE_GPU_RESET_EN */
1517 kbasep_js_runpool_release_ctx(kbdev, kctx);
1519 atomic_dec(&kbdev->faults_pending);
1522 const char *kbase_exception_name(struct kbase_device *kbdev, u32 exception_code)
1526 switch (exception_code) {
1527 /* Non-Fault Status code */
1529 e = "NOT_STARTED/IDLE/OK";
1546 /* Job exceptions */
1548 e = "JOB_CONFIG_FAULT";
1551 e = "JOB_POWER_FAULT";
1554 e = "JOB_READ_FAULT";
1557 e = "JOB_WRITE_FAULT";
1560 e = "JOB_AFFINITY_FAULT";
1563 e = "JOB_BUS_FAULT";
1566 e = "INSTR_INVALID_PC";
1569 e = "INSTR_INVALID_ENC";
1572 e = "INSTR_TYPE_MISMATCH";
1575 e = "INSTR_OPERAND_FAULT";
1578 e = "INSTR_TLS_FAULT";
1581 e = "INSTR_BARRIER_FAULT";
1584 e = "INSTR_ALIGN_FAULT";
1587 e = "DATA_INVALID_FAULT";
1590 e = "TILE_RANGE_FAULT";
1593 e = "ADDR_RANGE_FAULT";
1596 e = "OUT_OF_MEMORY";
1598 /* GPU exceptions */
1600 e = "DELAYED_BUS_FAULT";
1603 e = "SHAREABILITY_FAULT";
1605 /* MMU exceptions */
1614 e = "TRANSLATION_FAULT";
1617 #ifdef CONFIG_MALI_GPU_MMU_AARCH64
1625 #endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
1626 e = "PERMISSION_FAULT";
1636 e = "TRANSTAB_BUS_FAULT";
1639 #ifdef CONFIG_MALI_GPU_MMU_AARCH64
1647 #endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
1650 #ifdef CONFIG_MALI_GPU_MMU_AARCH64
1659 e = "ADDRESS_SIZE_FAULT";
1669 e = "MEMORY_ATTRIBUTES_FAULT";
1670 #endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
1680 static const char *access_type_name(struct kbase_device *kbdev,
1683 switch (fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK) {
1684 case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC:
1685 #ifdef CONFIG_MALI_GPU_MMU_AARCH64
1689 #endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
1690 case AS_FAULTSTATUS_ACCESS_TYPE_READ:
1692 case AS_FAULTSTATUS_ACCESS_TYPE_WRITE:
1694 case AS_FAULTSTATUS_ACCESS_TYPE_EX:
1703 * The caller must ensure it's retained the ctx to prevent it from being scheduled out whilst it's being worked on.
1705 static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
1706 struct kbase_as *as, const char *reason_str)
1708 unsigned long flags;
1713 struct kbase_device *kbdev;
1714 struct kbasep_js_device_data *js_devdata;
1716 #if KBASE_GPU_RESET_EN
1717 bool reset_status = false;
1721 kbdev = kctx->kbdev;
1722 js_devdata = &kbdev->js_data;
1724 /* ASSERT that the context won't leave the runpool */
1725 KBASE_DEBUG_ASSERT(kbasep_js_debug_check_ctx_refcount(kbdev, kctx) > 0);
1727 /* decode the fault status */
1728 exception_type = as->fault_status & 0xFF;
1729 access_type = (as->fault_status >> 8) & 0x3;
1730 source_id = (as->fault_status >> 16);
1732 /* terminal fault, print info about the fault */
1734 "Unhandled Page fault in AS%d at VA 0x%016llX\n"
1736 "raw fault status: 0x%X\n"
1737 "decoded fault status: %s\n"
1738 "exception type 0x%X: %s\n"
1739 "access type 0x%X: %s\n"
1742 as_no, as->fault_addr,
1745 (as->fault_status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"),
1746 exception_type, kbase_exception_name(kbdev, exception_type),
1747 access_type, access_type_name(kbdev, as->fault_status),
1751 /* hardware counters dump fault handling */
1752 if ((kbdev->hwcnt.kctx) && (kbdev->hwcnt.kctx->as_nr == as_no) &&
1753 (kbdev->hwcnt.backend.state ==
1754 KBASE_INSTR_STATE_DUMPING)) {
1755 unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
1757 if ((as->fault_addr >= kbdev->hwcnt.addr) &&
1758 (as->fault_addr < (kbdev->hwcnt.addr +
1759 (num_core_groups * 2048))))
1760 kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_FAULT;
1763 /* Stop the kctx from submitting more jobs and cause it to be scheduled
1764 * out/rescheduled - this will occur on releasing the context's refcount */
1765 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
1766 kbasep_js_clear_submit_allowed(js_devdata, kctx);
1767 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
1769 /* Kill any running jobs from the context. Submit is disallowed, so no more jobs from this
1770 * context can appear in the job slots from this point on */
1771 kbase_backend_jm_kill_jobs_from_kctx(kctx);
1772 /* AS transaction begin */
1773 mutex_lock(&kbdev->mmu_hw_mutex);
1774 #if KBASE_GPU_RESET_EN
1775 if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
1776 /* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode.
1777 * We start the reset before switching to UNMAPPED to ensure that unrelated jobs
1778 * are evicted from the GPU before the switch.
1780 dev_err(kbdev->dev, "Unhandled page fault. For this GPU version we now soft-reset the GPU as part of page fault recovery.");
1781 reset_status = kbase_prepare_to_reset_gpu(kbdev);
1783 #endif /* KBASE_GPU_RESET_EN */
1784 /* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */
1785 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
1786 kbase_mmu_disable(kctx);
1787 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
1789 mutex_unlock(&kbdev->mmu_hw_mutex);
1790 /* AS transaction end */
1791 /* Clear down the fault */
1792 kbase_mmu_hw_clear_fault(kbdev, as, kctx,
1793 KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
1794 kbase_mmu_hw_enable_fault(kbdev, as, kctx,
1795 KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
1797 #if KBASE_GPU_RESET_EN
1798 if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status)
1799 kbase_reset_gpu(kbdev);
1800 #endif /* KBASE_GPU_RESET_EN */
1803 void kbasep_as_do_poke(struct work_struct *work)
1805 struct kbase_as *as;
1806 struct kbase_device *kbdev;
1807 struct kbase_context *kctx;
1808 unsigned long flags;
1810 KBASE_DEBUG_ASSERT(work);
1811 as = container_of(work, struct kbase_as, poke_work);
1812 kbdev = container_of(as, struct kbase_device, as[as->number]);
1813 KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
1815 /* GPU power will already be active by virtue of the caller holding a JS
1816 * reference on the address space, and will not release it until this worker
1819 /* Further to the comment above, we know that while this function is running
1820 * the AS will not be released as before the atom is released this workqueue
1821 * is flushed (in kbase_as_poking_timer_release_atom)
1823 kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as->number);
1825 /* AS transaction begin */
1826 mutex_lock(&kbdev->mmu_hw_mutex);
1827 /* Force a uTLB invalidate */
1828 kbase_mmu_hw_do_operation(kbdev, as, kctx, 0, 0,
1829 AS_COMMAND_UNLOCK, 0);
1830 mutex_unlock(&kbdev->mmu_hw_mutex);
1831 /* AS transaction end */
1833 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
1834 if (as->poke_refcount &&
1835 !(as->poke_state & KBASE_AS_POKE_STATE_KILLING_POKE)) {
1836 /* Only queue up the timer if we need it, and we're not trying to kill it */
1837 hrtimer_start(&as->poke_timer, HR_TIMER_DELAY_MSEC(5), HRTIMER_MODE_REL);
1839 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
1842 enum hrtimer_restart kbasep_as_poke_timer_callback(struct hrtimer *timer)
1844 struct kbase_as *as;
1847 KBASE_DEBUG_ASSERT(NULL != timer);
1848 as = container_of(timer, struct kbase_as, poke_timer);
1849 KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
1851 queue_work_ret = queue_work(as->poke_wq, &as->poke_work);
1852 KBASE_DEBUG_ASSERT(queue_work_ret);
1853 return HRTIMER_NORESTART;
1857 * Retain the poking timer on an atom's context (if the atom hasn't already
1858 * done so), and start the timer (if it's not already started).
1860 * This must only be called on a context that's scheduled in, and an atom
1861 * that's running on the GPU.
1863 * The caller must hold hwaccess_lock
1865 * This can be called safely from atomic context
1867 void kbase_as_poking_timer_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom)
1869 struct kbase_as *as;
1871 KBASE_DEBUG_ASSERT(kbdev);
1872 KBASE_DEBUG_ASSERT(kctx);
1873 KBASE_DEBUG_ASSERT(katom);
1874 KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
1875 lockdep_assert_held(&kbdev->hwaccess_lock);
1882 /* It's safe to work on the as/as_nr without an explicit reference,
1883 * because the caller holds the hwaccess_lock, and the atom itself
1884 * was also running and had already taken a reference */
1885 as = &kbdev->as[kctx->as_nr];
1887 if (++(as->poke_refcount) == 1) {
1888 /* First refcount for poke needed: check if not already in flight */
1889 if (!as->poke_state) {
1890 /* need to start poking */
1891 as->poke_state |= KBASE_AS_POKE_STATE_IN_FLIGHT;
1892 queue_work(as->poke_wq, &as->poke_work);
1898 * If an atom holds a poking timer, release it and wait for it to finish
1900 * This must only be called on a context that's scheduled in, and an atom
1901 * that still has a JS reference on the context
1903 * This must \b not be called from atomic context, since it can sleep.
1905 void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom)
1907 struct kbase_as *as;
1908 unsigned long flags;
1910 KBASE_DEBUG_ASSERT(kbdev);
1911 KBASE_DEBUG_ASSERT(kctx);
1912 KBASE_DEBUG_ASSERT(katom);
1913 KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
1918 as = &kbdev->as[kctx->as_nr];
1920 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
1921 KBASE_DEBUG_ASSERT(as->poke_refcount > 0);
1922 KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
1924 if (--(as->poke_refcount) == 0) {
1925 as->poke_state |= KBASE_AS_POKE_STATE_KILLING_POKE;
1926 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
1928 hrtimer_cancel(&as->poke_timer);
1929 flush_workqueue(as->poke_wq);
1931 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
1933 /* Re-check whether it's still needed */
1934 if (as->poke_refcount) {
1936 /* Poking still needed:
1937 * - Another retain will not be starting the timer or queueing work,
1938 * because it's still marked as in-flight
1939 * - The hrtimer has finished, and has not started a new timer or
1940 * queued work because it's been marked as killing
1942 * So whatever happens now, just queue the work again */
1943 as->poke_state &= ~((kbase_as_poke_state)KBASE_AS_POKE_STATE_KILLING_POKE);
1944 queue_work_ret = queue_work(as->poke_wq, &as->poke_work);
1945 KBASE_DEBUG_ASSERT(queue_work_ret);
1947 /* It isn't - so mark it as not in flight, and not killing */
1948 as->poke_state = 0u;
1950 /* The poke associated with the atom has now finished. If this is
1951 * also the last atom on the context, then we can guarentee no more
1952 * pokes (and thus no more poking register accesses) will occur on
1953 * the context until new atoms are run */
1956 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
1961 void kbase_mmu_interrupt_process(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_as *as)
1963 struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
1965 lockdep_assert_held(&kbdev->hwaccess_lock);
1968 dev_warn(kbdev->dev, "%s in AS%d at 0x%016llx with no context present! Suprious IRQ or SW Design Error?\n",
1969 kbase_as_has_bus_fault(as) ? "Bus error" : "Page fault",
1970 as->number, as->fault_addr);
1972 /* Since no ctx was found, the MMU must be disabled. */
1973 WARN_ON(as->current_setup.transtab);
1975 if (kbase_as_has_bus_fault(as)) {
1976 kbase_mmu_hw_clear_fault(kbdev, as, kctx,
1977 KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
1978 kbase_mmu_hw_enable_fault(kbdev, as, kctx,
1979 KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
1980 } else if (kbase_as_has_page_fault(as)) {
1981 kbase_mmu_hw_clear_fault(kbdev, as, kctx,
1982 KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
1983 kbase_mmu_hw_enable_fault(kbdev, as, kctx,
1984 KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
1987 #if KBASE_GPU_RESET_EN
1988 if (kbase_as_has_bus_fault(as) &&
1989 kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
1992 * Reset the GPU, like in bus_fault_worker, in case an
1993 * earlier error hasn't been properly cleared by this
1996 dev_err(kbdev->dev, "GPU bus error occurred. For this GPU version we now soft-reset as part of bus error recovery\n");
1997 reset_status = kbase_prepare_to_reset_gpu_locked(kbdev);
1999 kbase_reset_gpu_locked(kbdev);
2001 #endif /* KBASE_GPU_RESET_EN */
2006 if (kbase_as_has_bus_fault(as)) {
2008 * hw counters dumping in progress, signal the
2009 * other thread that it failed
2011 if ((kbdev->hwcnt.kctx == kctx) &&
2012 (kbdev->hwcnt.backend.state ==
2013 KBASE_INSTR_STATE_DUMPING))
2014 kbdev->hwcnt.backend.state =
2015 KBASE_INSTR_STATE_FAULT;
2018 * Stop the kctx from submitting more jobs and cause it
2019 * to be scheduled out/rescheduled when all references
2020 * to it are released
2022 kbasep_js_clear_submit_allowed(js_devdata, kctx);
2024 #ifdef CONFIG_MALI_GPU_MMU_AARCH64
2025 dev_warn(kbdev->dev,
2026 "Bus error in AS%d at VA=0x%016llx, IPA=0x%016llx\n",
2027 as->number, as->fault_addr,
2028 as->fault_extra_addr);
2030 dev_warn(kbdev->dev, "Bus error in AS%d at 0x%016llx\n",
2031 as->number, as->fault_addr);
2032 #endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
2035 * We need to switch to UNMAPPED mode - but we do this in a
2036 * worker so that we can sleep
2038 kbdev->kbase_group_error++;
2039 KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&as->work_busfault));
2040 WARN_ON(work_pending(&as->work_busfault));
2041 queue_work(as->pf_wq, &as->work_busfault);
2042 atomic_inc(&kbdev->faults_pending);
2044 kbdev->kbase_group_error++;
2045 KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&as->work_pagefault));
2046 WARN_ON(work_pending(&as->work_pagefault));
2047 queue_work(as->pf_wq, &as->work_pagefault);
2048 atomic_inc(&kbdev->faults_pending);
2052 void kbase_flush_mmu_wqs(struct kbase_device *kbdev)
2056 for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
2057 struct kbase_as *as = &kbdev->as[i];
2059 flush_workqueue(as->pf_wq);