3 * (C) COPYRIGHT ARM Limited. All rights reserved.
5 * This program is free software and is provided to you under the terms of the
6 * GNU General Public License version 2 as published by the Free Software
7 * Foundation, and any use by you of this program is subject to the terms
10 * A copy of the licence is included with the program, and can also be obtained
11 * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
12 * Boston, MA 02110-1301, USA.
21 * @file mali_kbase_mmu.c
22 * Base kernel MMU management.
26 #include <mali_kbase.h>
27 #include <mali_midg_regmap.h>
28 #include <mali_kbase_gator.h>
29 #include <mali_kbase_debug.h>
31 #define beenthere(kctx, f, a...) KBASE_LOG(1, kctx->kbdev->dev, "%s:" f, __func__, ##a)
33 #include <mali_kbase_defs.h>
34 #include <mali_kbase_hw.h>
36 #define KBASE_MMU_PAGE_ENTRIES 512
40 * - PGD: Page Directory.
41 * - PTE: Page Table Entry. A 64bit value pointing to the next
42 * level of translation
43 * - ATE: Address Transation Entry. A 64bit value pointing to
44 * a 4kB physical page.
47 static void kbase_mmu_report_fault_and_kill(kbase_context *kctx, kbase_as *as);
48 static u64 lock_region(kbase_device *kbdev, u64 pfn, size_t num_pages);
50 /* Helper Function to perform assignment of page table entries, to ensure the use of
51 * strd, which is required on LPAE systems.
54 static inline void page_table_entry_set( kbase_device * kbdev, u64 * pte, u64 phy )
58 #elif defined(CONFIG_ARM)
61 * In order to prevent the compiler keeping cached copies of memory, we have to explicitly
62 * say that we have updated memory.
64 * Note: We could manually move the data ourselves into R0 and R1 by specifying
65 * register variables that are explicitly given registers assignments, the down side of
66 * this is that we have to assume cpu endianess. To avoid this we can use the ldrd to read the
67 * data from memory into R0 and R1 which will respect the cpu endianess, we then use strd to
68 * make the 64 bit assignment to the page table entry.
72 asm volatile("ldrd r0, r1, [%[ptemp]]\n\t"
73 "strd r0, r1, [%[pte]]\n\t"
75 : [ptemp] "r" (&phy), [pte] "r" (pte), "m" (phy)
78 #error "64-bit atomic write must be implemented for your architecture"
82 static void ksync_kern_vrange_gpu(phys_addr_t paddr, void *vaddr, size_t size)
84 kbase_sync_to_memory(paddr, vaddr, size);
87 static size_t make_multiple(size_t minimum, size_t multiple)
89 size_t remainder = minimum % multiple;
93 return minimum + multiple - remainder;
96 static void mmu_mask_reenable(kbase_device *kbdev, kbase_context *kctx, kbase_as *as)
100 spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
101 mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), kctx);
102 mask |= ((1UL << as->number) | (1UL << (MMU_REGS_BUS_ERROR_FLAG(as->number))));
103 kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), mask, kctx);
104 spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
107 static void page_fault_worker(struct work_struct *data)
111 size_t fault_rel_pfn;
112 kbase_as *faulting_as;
116 kbase_va_region *region;
119 faulting_as = container_of(data, kbase_as, work_pagefault);
120 fault_pfn = faulting_as->fault_addr >> PAGE_SHIFT;
121 as_no = faulting_as->number;
123 kbdev = container_of(faulting_as, kbase_device, as[as_no]);
125 /* Grab the context that was already refcounted in kbase_mmu_interrupt().
126 * Therefore, it cannot be scheduled out of this AS until we explicitly release it
128 * NOTE: NULL can be returned here if we're gracefully handling a spurious interrupt */
129 kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no);
132 /* Only handle this if not already suspended */
133 if ( !kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
134 /* Address space has no context, terminate the work */
137 /* AS transaction begin */
138 mutex_lock(&faulting_as->transaction_mutex);
139 reg = kbase_reg_read(kbdev, MMU_AS_REG(as_no, ASn_TRANSTAB_LO), NULL);
140 reg = (reg & (~(u32) MMU_TRANSTAB_ADRMODE_MASK)) | ASn_TRANSTAB_ADRMODE_UNMAPPED;
141 kbase_reg_write(kbdev, MMU_AS_REG(as_no, ASn_TRANSTAB_LO), reg, NULL);
142 kbase_reg_write(kbdev, MMU_AS_REG(as_no, ASn_COMMAND), ASn_COMMAND_UPDATE, NULL);
143 mutex_unlock(&faulting_as->transaction_mutex);
144 /* AS transaction end */
146 mmu_mask_reenable(kbdev, NULL, faulting_as);
147 kbase_pm_context_idle(kbdev);
152 KBASE_DEBUG_ASSERT(kctx->kbdev == kbdev);
154 kbase_gpu_vm_lock(kctx);
156 /* find the region object for this VA */
157 region = kbase_region_tracker_find_region_enclosing_address(kctx, faulting_as->fault_addr);
158 if (NULL == region || (GROWABLE_FLAGS_REQUIRED != (region->flags & GROWABLE_FLAGS_MASK))) {
159 kbase_gpu_vm_unlock(kctx);
160 /* failed to find the region or mismatch of the flags */
161 kbase_mmu_report_fault_and_kill(kctx, faulting_as);
165 if ((((faulting_as->fault_status & ASn_FAULTSTATUS_ACCESS_TYPE_MASK) == ASn_FAULTSTATUS_ACCESS_TYPE_READ) && !(region->flags & KBASE_REG_GPU_RD)) || (((faulting_as->fault_status & ASn_FAULTSTATUS_ACCESS_TYPE_MASK) == ASn_FAULTSTATUS_ACCESS_TYPE_WRITE) && !(region->flags & KBASE_REG_GPU_WR)) || (((faulting_as->fault_status & ASn_FAULTSTATUS_ACCESS_TYPE_MASK) == ASn_FAULTSTATUS_ACCESS_TYPE_EX) && (region->flags & KBASE_REG_GPU_NX))) {
166 dev_warn(kbdev->dev, "Access permissions don't match: region->flags=0x%lx", region->flags);
167 kbase_gpu_vm_unlock(kctx);
168 kbase_mmu_report_fault_and_kill(kctx, faulting_as);
172 /* find the size we need to grow it by */
173 /* we know the result fit in a size_t due to kbase_region_tracker_find_region_enclosing_address
174 * validating the fault_adress to be within a size_t from the start_pfn */
175 fault_rel_pfn = fault_pfn - region->start_pfn;
177 if (fault_rel_pfn < kbase_reg_current_backed_size(region)) {
178 dev_warn(kbdev->dev, "Page fault in allocated region of growable TMEM: Ignoring");
179 mmu_mask_reenable(kbdev, kctx, faulting_as);
180 kbase_gpu_vm_unlock(kctx);
184 new_pages = make_multiple(fault_rel_pfn - kbase_reg_current_backed_size(region) + 1, region->extent);
185 if (new_pages + kbase_reg_current_backed_size(region) > region->nr_pages) {
186 /* cap to max vsize */
187 new_pages = region->nr_pages - kbase_reg_current_backed_size(region);
190 if (0 == new_pages) {
191 /* Duplicate of a fault we've already handled, nothing to do */
192 mmu_mask_reenable(kbdev, kctx, faulting_as);
193 kbase_gpu_vm_unlock(kctx);
197 if (MALI_ERROR_NONE == kbase_alloc_phy_pages_helper(region->alloc, new_pages)) {
199 mali_addr64 lock_addr;
200 KBASE_DEBUG_ASSERT(kbase_reg_current_backed_size(region) <= region->nr_pages);
202 /* AS transaction begin */
203 mutex_lock(&faulting_as->transaction_mutex);
205 /* Lock the VA region we're about to update */
206 lock_addr = lock_region(kbdev, faulting_as->fault_addr >> PAGE_SHIFT, new_pages);
207 kbase_reg_write(kbdev, MMU_AS_REG(as_no, ASn_LOCKADDR_LO), lock_addr & 0xFFFFFFFFUL, kctx);
208 kbase_reg_write(kbdev, MMU_AS_REG(as_no, ASn_LOCKADDR_HI), lock_addr >> 32, kctx);
209 kbase_reg_write(kbdev, MMU_AS_REG(as_no, ASn_COMMAND), ASn_COMMAND_LOCK, kctx);
210 if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3285)) {
211 kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), (1UL << as_no), NULL);
212 kbase_reg_write(kbdev, MMU_AS_REG(as_no, ASn_COMMAND), ASn_COMMAND_LOCK, kctx);
215 /* set up the new pages */
216 err = kbase_mmu_insert_pages(kctx, region->start_pfn + kbase_reg_current_backed_size(region) - new_pages, &kbase_get_phy_pages(region)[kbase_reg_current_backed_size(region) - new_pages], new_pages, region->flags);
217 if (MALI_ERROR_NONE != err) {
218 /* failed to insert pages, handle as a normal PF */
219 mutex_unlock(&faulting_as->transaction_mutex);
220 kbase_gpu_vm_unlock(kctx);
221 kbase_free_phy_pages_helper(region->alloc, new_pages);
222 /* The locked VA region will be unlocked and the cache invalidated in here */
223 kbase_mmu_report_fault_and_kill(kctx, faulting_as);
226 #ifdef CONFIG_MALI_GATOR_SUPPORT
227 kbase_trace_mali_page_fault_insert_pages(as_no, new_pages);
228 #endif /* CONFIG_MALI_GATOR_SUPPORT */
230 /* flush L2 and unlock the VA (resumes the MMU) */
231 if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367))
232 kbase_reg_write(kbdev, MMU_AS_REG(as_no, ASn_COMMAND), ASn_COMMAND_FLUSH, kctx);
234 kbase_reg_write(kbdev, MMU_AS_REG(as_no, ASn_COMMAND), ASn_COMMAND_FLUSH_PT, kctx);
236 /* wait for the flush to complete */
237 while (kbase_reg_read(kbdev, MMU_AS_REG(as_no, ASn_STATUS), kctx) & 1)
240 if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9630)) {
241 /* Issue an UNLOCK command to ensure that valid page tables are re-read by the GPU after an update.
242 Note that, the FLUSH command should perform all the actions necessary, however the bus logs show
243 that if multiple page faults occur within an 8 page region the MMU does not always re-read the
244 updated page table entries for later faults or is only partially read, it subsequently raises the
245 page fault IRQ for the same addresses, the unlock ensures that the MMU cache is flushed, so updates
246 can be re-read. As the region is now unlocked we need to issue 2 UNLOCK commands in order to flush the
247 MMU/uTLB, see PRLAM-8812.
249 kbase_reg_write(kctx->kbdev, MMU_AS_REG(kctx->as_nr, ASn_COMMAND), ASn_COMMAND_UNLOCK, kctx);
250 kbase_reg_write(kctx->kbdev, MMU_AS_REG(kctx->as_nr, ASn_COMMAND), ASn_COMMAND_UNLOCK, kctx);
253 mutex_unlock(&faulting_as->transaction_mutex);
254 /* AS transaction end */
256 /* reenable this in the mask */
257 mmu_mask_reenable(kbdev, kctx, faulting_as);
258 kbase_gpu_vm_unlock(kctx);
260 /* failed to extend, handle as a normal PF */
261 kbase_gpu_vm_unlock(kctx);
262 kbase_mmu_report_fault_and_kill(kctx, faulting_as);
266 /* By this point, the fault was handled in some way, so release the ctx refcount */
267 kbasep_js_runpool_release_ctx(kbdev, kctx);
270 phys_addr_t kbase_mmu_alloc_pgd(kbase_context *kctx)
276 KBASE_DEBUG_ASSERT(NULL != kctx);
277 kbase_atomic_add_pages(1, &kctx->used_pages);
278 kbase_atomic_add_pages(1, &kctx->kbdev->memdev.used_pages);
280 if (MALI_ERROR_NONE != kbase_mem_allocator_alloc(kctx->pgd_allocator, 1, &pgd))
283 page = kmap(pfn_to_page(PFN_DOWN(pgd)));
287 kbase_process_page_usage_inc(kctx, 1);
289 for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++)
290 page_table_entry_set( kctx->kbdev, &page[i], ENTRY_IS_INVAL );
292 /* Clean the full page */
293 ksync_kern_vrange_gpu(pgd, page, KBASE_MMU_PAGE_ENTRIES * sizeof(u64));
294 kunmap(pfn_to_page(PFN_DOWN(pgd)));
298 kbase_mem_allocator_free(kctx->pgd_allocator, 1, &pgd, MALI_FALSE);
300 kbase_atomic_sub_pages(1, &kctx->used_pages);
301 kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
306 KBASE_EXPORT_TEST_API(kbase_mmu_alloc_pgd)
308 static phys_addr_t mmu_pte_to_phy_addr(u64 entry)
313 return entry & ~0xFFF;
316 static u64 mmu_phyaddr_to_pte(phys_addr_t phy)
318 return (phy & ~0xFFF) | ENTRY_IS_PTE;
321 static u64 mmu_phyaddr_to_ate(phys_addr_t phy, u64 flags)
323 return (phy & ~0xFFF) | (flags & ENTRY_FLAGS_MASK) | ENTRY_IS_ATE;
326 /* Given PGD PFN for level N, return PGD PFN for level N+1 */
327 static phys_addr_t mmu_get_next_pgd(kbase_context *kctx, phys_addr_t pgd, u64 vpfn, int level)
330 phys_addr_t target_pgd;
332 KBASE_DEBUG_ASSERT(pgd);
333 KBASE_DEBUG_ASSERT(NULL != kctx);
335 lockdep_assert_held(&kctx->reg_lock);
338 * Architecture spec defines level-0 as being the top-most.
339 * This is a bit unfortunate here, but we keep the same convention.
341 vpfn >>= (3 - level) * 9;
344 page = kmap(pfn_to_page(PFN_DOWN(pgd)));
346 dev_warn(kctx->kbdev->dev, "mmu_get_next_pgd: kmap failure\n");
350 target_pgd = mmu_pte_to_phy_addr(page[vpfn]);
353 target_pgd = kbase_mmu_alloc_pgd(kctx);
355 dev_warn(kctx->kbdev->dev, "mmu_get_next_pgd: kbase_mmu_alloc_pgd failure\n");
356 kunmap(pfn_to_page(PFN_DOWN(pgd)));
360 page_table_entry_set( kctx->kbdev, &page[vpfn], mmu_phyaddr_to_pte(target_pgd) );
362 ksync_kern_vrange_gpu(pgd + (vpfn * sizeof(u64)), page + vpfn, sizeof(u64));
363 /* Rely on the caller to update the address space flags. */
366 kunmap(pfn_to_page(PFN_DOWN(pgd)));
370 static phys_addr_t mmu_get_bottom_pgd(kbase_context *kctx, u64 vpfn)
377 for (l = MIDGARD_MMU_TOPLEVEL; l < 3; l++) {
378 pgd = mmu_get_next_pgd(kctx, pgd, vpfn, l);
379 /* Handle failure condition */
381 dev_warn(kctx->kbdev->dev, "mmu_get_bottom_pgd: mmu_get_next_pgd failure\n");
389 static phys_addr_t mmu_insert_pages_recover_get_next_pgd(kbase_context *kctx, phys_addr_t pgd, u64 vpfn, int level)
392 phys_addr_t target_pgd;
394 KBASE_DEBUG_ASSERT(pgd);
395 KBASE_DEBUG_ASSERT(NULL != kctx);
397 lockdep_assert_held(&kctx->reg_lock);
400 * Architecture spec defines level-0 as being the top-most.
401 * This is a bit unfortunate here, but we keep the same convention.
403 vpfn >>= (3 - level) * 9;
406 page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
407 /* kmap_atomic should NEVER fail */
408 KBASE_DEBUG_ASSERT(NULL != page);
410 target_pgd = mmu_pte_to_phy_addr(page[vpfn]);
411 /* As we are recovering from what has already been set up, we should have a target_pgd */
412 KBASE_DEBUG_ASSERT(0 != target_pgd);
418 static phys_addr_t mmu_insert_pages_recover_get_bottom_pgd(kbase_context *kctx, u64 vpfn)
425 for (l = MIDGARD_MMU_TOPLEVEL; l < 3; l++) {
426 pgd = mmu_insert_pages_recover_get_next_pgd(kctx, pgd, vpfn, l);
427 /* Should never fail */
428 KBASE_DEBUG_ASSERT(0 != pgd);
434 static void mmu_insert_pages_failure_recovery(kbase_context *kctx, u64 vpfn,
440 KBASE_DEBUG_ASSERT(NULL != kctx);
441 KBASE_DEBUG_ASSERT(0 != vpfn);
442 /* 64-bit address range is the max */
443 KBASE_DEBUG_ASSERT(vpfn <= (UINT64_MAX / PAGE_SIZE));
445 lockdep_assert_held(&kctx->reg_lock);
449 unsigned int index = vpfn & 0x1FF;
450 unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
455 pgd = mmu_insert_pages_recover_get_bottom_pgd(kctx, vpfn);
456 KBASE_DEBUG_ASSERT(0 != pgd);
458 pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
459 KBASE_DEBUG_ASSERT(NULL != pgd_page);
461 /* Invalidate the entries we added */
462 for (i = 0; i < count; i++)
463 page_table_entry_set(kctx->kbdev, &pgd_page[index + i],
469 ksync_kern_vrange_gpu(pgd + (index * sizeof(u64)),
470 pgd_page + index, count * sizeof(u64));
472 kunmap_atomic(pgd_page);
477 * Map KBASE_REG flags to MMU flags
479 static u64 kbase_mmu_get_mmu_flags(unsigned long flags)
483 /* store mem_attr index as 4:2 (macro called ensures 3 bits already) */
484 mmu_flags = KBASE_REG_MEMATTR_VALUE(flags) << 2;
486 /* write perm if requested */
487 mmu_flags |= (flags & KBASE_REG_GPU_WR) ? ENTRY_WR_BIT : 0;
488 /* read perm if requested */
489 mmu_flags |= (flags & KBASE_REG_GPU_RD) ? ENTRY_RD_BIT : 0;
490 /* nx if requested */
491 mmu_flags |= (flags & KBASE_REG_GPU_NX) ? ENTRY_NX_BIT : 0;
493 if (flags & KBASE_REG_SHARE_BOTH) {
494 /* inner and outer shareable */
495 mmu_flags |= SHARE_BOTH_BITS;
496 } else if (flags & KBASE_REG_SHARE_IN) {
497 /* inner shareable coherency */
498 mmu_flags |= SHARE_INNER_BITS;
505 * Map the single page 'phys' 'nr' of times, starting at GPU PFN 'vpfn'
507 mali_error kbase_mmu_insert_single_page(kbase_context *kctx, u64 vpfn,
508 phys_addr_t phys, size_t nr,
514 /* In case the insert_single_page only partially completes we need to be
516 mali_bool recover_required = MALI_FALSE;
517 u64 recover_vpfn = vpfn;
518 size_t recover_count = 0;
520 KBASE_DEBUG_ASSERT(NULL != kctx);
521 KBASE_DEBUG_ASSERT(0 != vpfn);
522 /* 64-bit address range is the max */
523 KBASE_DEBUG_ASSERT(vpfn <= (UINT64_MAX / PAGE_SIZE));
525 lockdep_assert_held(&kctx->reg_lock);
527 /* the one entry we'll populate everywhere */
528 pte_entry = mmu_phyaddr_to_ate(phys, kbase_mmu_get_mmu_flags(flags));
532 unsigned int index = vpfn & 0x1FF;
533 unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
539 * Repeatedly calling mmu_get_bottom_pte() is clearly
540 * suboptimal. We don't have to re-parse the whole tree
541 * each time (just cache the l0-l2 sequence).
542 * On the other hand, it's only a gain when we map more than
543 * 256 pages at once (on average). Do we really care?
545 pgd = mmu_get_bottom_pgd(kctx, vpfn);
547 dev_warn(kctx->kbdev->dev,
548 "kbase_mmu_insert_pages: "
549 "mmu_get_bottom_pgd failure\n");
550 if (recover_required) {
551 /* Invalidate the pages we have partially
553 mmu_insert_pages_failure_recovery(kctx,
557 return MALI_ERROR_FUNCTION_FAILED;
560 pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd)));
562 dev_warn(kctx->kbdev->dev,
563 "kbase_mmu_insert_pages: "
565 if (recover_required) {
566 /* Invalidate the pages we have partially
568 mmu_insert_pages_failure_recovery(kctx,
572 return MALI_ERROR_OUT_OF_MEMORY;
575 for (i = 0; i < count; i++) {
576 unsigned int ofs = index + i;
577 KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL));
578 page_table_entry_set(kctx->kbdev, &pgd_page[ofs],
585 ksync_kern_vrange_gpu(pgd + (index * sizeof(u64)),
586 pgd_page + index, count * sizeof(u64));
588 kunmap(pfn_to_page(PFN_DOWN(pgd)));
589 /* We have started modifying the page table.
590 * If further pages need inserting and fail we need to undo what
591 * has already taken place */
592 recover_required = MALI_TRUE;
593 recover_count += count;
595 return MALI_ERROR_NONE;
599 * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn'
601 mali_error kbase_mmu_insert_pages(kbase_context *kctx, u64 vpfn,
602 phys_addr_t *phys, size_t nr,
608 /* In case the insert_pages only partially completes we need to be able
610 mali_bool recover_required = MALI_FALSE;
611 u64 recover_vpfn = vpfn;
612 size_t recover_count = 0;
614 KBASE_DEBUG_ASSERT(NULL != kctx);
615 KBASE_DEBUG_ASSERT(0 != vpfn);
616 /* 64-bit address range is the max */
617 KBASE_DEBUG_ASSERT(vpfn <= (UINT64_MAX / PAGE_SIZE));
619 lockdep_assert_held(&kctx->reg_lock);
621 mmu_flags = kbase_mmu_get_mmu_flags(flags);
625 unsigned int index = vpfn & 0x1FF;
626 unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
632 * Repeatedly calling mmu_get_bottom_pte() is clearly
633 * suboptimal. We don't have to re-parse the whole tree
634 * each time (just cache the l0-l2 sequence).
635 * On the other hand, it's only a gain when we map more than
636 * 256 pages at once (on average). Do we really care?
638 pgd = mmu_get_bottom_pgd(kctx, vpfn);
640 dev_warn(kctx->kbdev->dev,
641 "kbase_mmu_insert_pages: "
642 "mmu_get_bottom_pgd failure\n");
643 if (recover_required) {
644 /* Invalidate the pages we have partially
646 mmu_insert_pages_failure_recovery(kctx,
650 return MALI_ERROR_FUNCTION_FAILED;
653 pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd)));
655 dev_warn(kctx->kbdev->dev,
656 "kbase_mmu_insert_pages: "
658 if (recover_required) {
659 /* Invalidate the pages we have partially
661 mmu_insert_pages_failure_recovery(kctx,
665 return MALI_ERROR_OUT_OF_MEMORY;
668 for (i = 0; i < count; i++) {
669 unsigned int ofs = index + i;
670 KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL));
671 page_table_entry_set(kctx->kbdev, &pgd_page[ofs],
672 mmu_phyaddr_to_ate(phys[i],
681 ksync_kern_vrange_gpu(pgd + (index * sizeof(u64)),
682 pgd_page + index, count * sizeof(u64));
684 kunmap(pfn_to_page(PFN_DOWN(pgd)));
685 /* We have started modifying the page table. If further pages
686 * need inserting and fail we need to undo what has already
688 recover_required = MALI_TRUE;
689 recover_count += count;
691 return MALI_ERROR_NONE;
694 KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages)
697 * This function is responsible for validating the MMU PTs
698 * triggering reguired flushes.
700 * * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
701 * currently scheduled into the runpool, and so potentially uses a lot of locks.
702 * These locks must be taken in the correct order with respect to others
703 * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
706 static void kbase_mmu_flush(kbase_context *kctx, u64 vpfn, size_t nr)
709 mali_bool ctx_is_in_runpool;
711 KBASE_DEBUG_ASSERT(NULL != kctx);
715 /* We must flush if we're currently running jobs. At the very least, we need to retain the
716 * context to ensure it doesn't schedule out whilst we're trying to flush it */
717 ctx_is_in_runpool = kbasep_js_runpool_retain_ctx(kbdev, kctx);
719 if (ctx_is_in_runpool) {
720 KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
722 /* Second level check is to try to only do this when jobs are running. The refcount is
723 * a heuristic for this. */
724 if (kbdev->js_data.runpool_irq.per_as_data[kctx->as_nr].as_busy_refcount >= 2) {
725 /* Lock the VA region we're about to update */
726 u64 lock_addr = lock_region(kbdev, vpfn, nr);
727 unsigned int max_loops = KBASE_AS_FLUSH_MAX_LOOPS;
729 /* AS transaction begin */
730 mutex_lock(&kbdev->as[kctx->as_nr].transaction_mutex);
731 kbase_reg_write(kctx->kbdev, MMU_AS_REG(kctx->as_nr, ASn_LOCKADDR_LO), lock_addr & 0xFFFFFFFFUL, kctx);
732 kbase_reg_write(kctx->kbdev, MMU_AS_REG(kctx->as_nr, ASn_LOCKADDR_HI), lock_addr >> 32, kctx);
733 kbase_reg_write(kctx->kbdev, MMU_AS_REG(kctx->as_nr, ASn_COMMAND), ASn_COMMAND_LOCK, kctx);
735 /* flush L2 and unlock the VA */
736 if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367))
737 kbase_reg_write(kctx->kbdev, MMU_AS_REG(kctx->as_nr, ASn_COMMAND), ASn_COMMAND_FLUSH, kctx);
739 kbase_reg_write(kctx->kbdev, MMU_AS_REG(kctx->as_nr, ASn_COMMAND), ASn_COMMAND_FLUSH_MEM, kctx);
741 /* wait for the flush to complete */
742 while (--max_loops && kbase_reg_read(kctx->kbdev, MMU_AS_REG(kctx->as_nr, ASn_STATUS), kctx) & ASn_STATUS_FLUSH_ACTIVE)
746 /* Flush failed to complete, assume the GPU has hung and perform a reset to recover */
747 dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issueing GPU soft-reset to recover\n");
748 if (kbase_prepare_to_reset_gpu(kbdev))
749 kbase_reset_gpu(kbdev);
752 if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9630)) {
753 /* Issue an UNLOCK command to ensure that valid page tables are re-read by the GPU after an update.
754 Note that, the FLUSH command should perform all the actions necessary, however the bus logs show
755 that if multiple page faults occur within an 8 page region the MMU does not always re-read the
756 updated page table entries for later faults or is only partially read, it subsequently raises the
757 page fault IRQ for the same addresses, the unlock ensures that the MMU cache is flushed, so updates
758 can be re-read. As the region is now unlocked we need to issue 2 UNLOCK commands in order to flush the
759 MMU/uTLB, see PRLAM-8812.
761 kbase_reg_write(kctx->kbdev, MMU_AS_REG(kctx->as_nr, ASn_COMMAND), ASn_COMMAND_UNLOCK, kctx);
762 kbase_reg_write(kctx->kbdev, MMU_AS_REG(kctx->as_nr, ASn_COMMAND), ASn_COMMAND_UNLOCK, kctx);
765 mutex_unlock(&kbdev->as[kctx->as_nr].transaction_mutex);
766 /* AS transaction end */
768 kbasep_js_runpool_release_ctx(kbdev, kctx);
773 * We actually only discard the ATE, and not the page table
774 * pages. There is a potential DoS here, as we'll leak memory by
775 * having PTEs that are potentially unused. Will require physical
776 * page accounting, so MMU pages are part of the process allocation.
778 * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
779 * currently scheduled into the runpool, and so potentially uses a lot of locks.
780 * These locks must be taken in the correct order with respect to others
781 * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
784 mali_error kbase_mmu_teardown_pages(kbase_context *kctx, u64 vpfn, size_t nr)
789 size_t requested_nr = nr;
791 KBASE_DEBUG_ASSERT(NULL != kctx);
792 beenthere(kctx, "kctx %p vpfn %lx nr %d", (void *)kctx, (unsigned long)vpfn, nr);
794 lockdep_assert_held(&kctx->reg_lock);
797 /* early out if nothing to do */
798 return MALI_ERROR_NONE;
805 unsigned int index = vpfn & 0x1FF;
806 unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
810 pgd = mmu_get_bottom_pgd(kctx, vpfn);
812 dev_warn(kbdev->dev, "kbase_mmu_teardown_pages: mmu_get_bottom_pgd failure\n");
813 return MALI_ERROR_FUNCTION_FAILED;
816 pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd)));
818 dev_warn(kbdev->dev, "kbase_mmu_teardown_pages: kmap failure\n");
819 return MALI_ERROR_OUT_OF_MEMORY;
822 for (i = 0; i < count; i++) {
823 page_table_entry_set( kctx->kbdev, &pgd_page[index + i], ENTRY_IS_INVAL );
829 ksync_kern_vrange_gpu(pgd + (index * sizeof(u64)), pgd_page + index, count * sizeof(u64));
831 kunmap(pfn_to_page(PFN_DOWN(pgd)));
834 kbase_mmu_flush(kctx,vpfn,requested_nr);
835 return MALI_ERROR_NONE;
838 KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages)
841 * Update the entries for specified number of pages pointed to by 'phys' at GPU PFN 'vpfn'.
842 * This call is being triggered as a response to the changes of the mem attributes
844 * @pre : The caller is responsible for validating the memory attributes
846 * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
847 * currently scheduled into the runpool, and so potentially uses a lot of locks.
848 * These locks must be taken in the correct order with respect to others
849 * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
852 mali_error kbase_mmu_update_pages(kbase_context* kctx, u64 vpfn, phys_addr_t* phys, size_t nr, unsigned long flags)
857 size_t requested_nr = nr;
859 KBASE_DEBUG_ASSERT(NULL != kctx);
860 KBASE_DEBUG_ASSERT(0 != vpfn);
861 KBASE_DEBUG_ASSERT(vpfn <= (UINT64_MAX / PAGE_SIZE));
863 lockdep_assert_held(&kctx->reg_lock);
865 mmu_flags = kbase_mmu_get_mmu_flags(flags);
867 dev_warn(kctx->kbdev->dev, "kbase_mmu_update_pages(): updating page share flags "\
868 "on GPU PFN 0x%llx from phys %p, %zu pages",
875 unsigned int index = vpfn & 0x1FF;
876 size_t count = KBASE_MMU_PAGE_ENTRIES - index;
880 pgd = mmu_get_bottom_pgd(kctx, vpfn);
882 dev_warn(kctx->kbdev->dev, "mmu_get_bottom_pgd failure\n");
883 return MALI_ERROR_FUNCTION_FAILED;
886 pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd)));
888 dev_warn(kctx->kbdev->dev, "kmap failure\n");
889 return MALI_ERROR_OUT_OF_MEMORY;
892 for (i = 0; i < count; i++) {
893 page_table_entry_set( kctx->kbdev, &pgd_page[index + i], mmu_phyaddr_to_ate(phys[i], mmu_flags) );
900 ksync_kern_vrange_gpu(pgd + (index * sizeof(u64)), pgd_page + index, count * sizeof(u64));
902 kunmap(pfn_to_page(PFN_DOWN(pgd)));
905 kbase_mmu_flush(kctx,vpfn,requested_nr);
907 return MALI_ERROR_NONE;
910 static int mmu_pte_is_valid(u64 pte)
912 return ((pte & 3) == ENTRY_IS_ATE);
915 /* This is a debug feature only */
916 static void mmu_check_unused(kbase_context *kctx, phys_addr_t pgd)
921 page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
922 /* kmap_atomic should NEVER fail. */
923 KBASE_DEBUG_ASSERT(NULL != page);
925 for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
926 if (mmu_pte_is_valid(page[i]))
927 beenthere(kctx, "live pte %016lx", (unsigned long)page[i]);
932 static void mmu_teardown_level(kbase_context *kctx, phys_addr_t pgd, int level, int zap, u64 *pgd_page_buffer)
934 phys_addr_t target_pgd;
938 KBASE_DEBUG_ASSERT(NULL != kctx);
939 lockdep_assert_held(&kctx->reg_lock);
941 pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
942 /* kmap_atomic should NEVER fail. */
943 KBASE_DEBUG_ASSERT(NULL != pgd_page);
944 /* Copy the page to our preallocated buffer so that we can minimize kmap_atomic usage */
945 memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE);
946 kunmap_atomic(pgd_page);
947 pgd_page = pgd_page_buffer;
949 for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
950 target_pgd = mmu_pte_to_phy_addr(pgd_page[i]);
954 mmu_teardown_level(kctx, target_pgd, level + 1, zap, pgd_page_buffer + (PAGE_SIZE / sizeof(u64)));
957 * So target_pte is a level-3 page.
958 * As a leaf, it is safe to free it.
959 * Unless we have live pages attached to it!
961 mmu_check_unused(kctx, target_pgd);
964 beenthere(kctx, "pte %lx level %d", (unsigned long)target_pgd, level + 1);
966 kbase_mem_allocator_free(kctx->pgd_allocator, 1, &target_pgd, MALI_TRUE);
967 kbase_process_page_usage_dec(kctx, 1 );
968 kbase_atomic_sub_pages(1, &kctx->used_pages);
969 kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
975 mali_error kbase_mmu_init(kbase_context *kctx)
977 KBASE_DEBUG_ASSERT(NULL != kctx);
978 KBASE_DEBUG_ASSERT(NULL == kctx->mmu_teardown_pages);
980 /* Preallocate MMU depth of four pages for mmu_teardown_level to use */
981 kctx->mmu_teardown_pages = kmalloc(PAGE_SIZE * 4, GFP_KERNEL);
983 kctx->mem_attrs = (ASn_MEMATTR_IMPL_DEF_CACHE_POLICY <<
984 (ASn_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) |
985 (ASn_MEMATTR_FORCE_TO_CACHE_ALL <<
986 (ASn_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) |
987 (ASn_MEMATTR_WRITE_ALLOC <<
988 (ASn_MEMATTR_INDEX_WRITE_ALLOC * 8)) |
989 0; /* The other indices are unused for now */
991 if (NULL == kctx->mmu_teardown_pages)
992 return MALI_ERROR_OUT_OF_MEMORY;
994 return MALI_ERROR_NONE;
997 void kbase_mmu_term(kbase_context *kctx)
999 KBASE_DEBUG_ASSERT(NULL != kctx);
1000 KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages);
1002 kfree(kctx->mmu_teardown_pages);
1003 kctx->mmu_teardown_pages = NULL;
1006 void kbase_mmu_free_pgd(kbase_context *kctx)
1008 KBASE_DEBUG_ASSERT(NULL != kctx);
1009 KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages);
1011 lockdep_assert_held(&kctx->reg_lock);
1013 mmu_teardown_level(kctx, kctx->pgd, MIDGARD_MMU_TOPLEVEL, 1, kctx->mmu_teardown_pages);
1015 beenthere(kctx, "pgd %lx", (unsigned long)kctx->pgd);
1016 kbase_mem_allocator_free(kctx->pgd_allocator, 1, &kctx->pgd, MALI_TRUE);
1017 kbase_process_page_usage_dec(kctx, 1 );
1018 kbase_atomic_sub_pages(1, &kctx->used_pages);
1019 kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
1022 KBASE_EXPORT_TEST_API(kbase_mmu_free_pgd)
1024 static size_t kbasep_mmu_dump_level(kbase_context *kctx, phys_addr_t pgd, int level, char ** const buffer, size_t *size_left)
1026 phys_addr_t target_pgd;
1029 size_t size = KBASE_MMU_PAGE_ENTRIES * sizeof(u64) + sizeof(u64);
1032 KBASE_DEBUG_ASSERT(NULL != kctx);
1033 lockdep_assert_held(&kctx->reg_lock);
1035 pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd)));
1037 dev_warn(kctx->kbdev->dev, "kbasep_mmu_dump_level: kmap failure\n");
1041 if (*size_left >= size) {
1042 /* A modified physical address that contains the page table level */
1043 u64 m_pgd = pgd | level;
1045 /* Put the modified physical address in the output buffer */
1046 memcpy(*buffer, &m_pgd, sizeof(m_pgd));
1047 *buffer += sizeof(m_pgd);
1049 /* Followed by the page table itself */
1050 memcpy(*buffer, pgd_page, sizeof(u64) * KBASE_MMU_PAGE_ENTRIES);
1051 *buffer += sizeof(u64) * KBASE_MMU_PAGE_ENTRIES;
1056 for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
1057 if ((pgd_page[i] & ENTRY_IS_PTE) == ENTRY_IS_PTE) {
1058 target_pgd = mmu_pte_to_phy_addr(pgd_page[i]);
1060 dump_size = kbasep_mmu_dump_level(kctx, target_pgd, level + 1, buffer, size_left);
1062 kunmap(pfn_to_page(PFN_DOWN(pgd)));
1069 kunmap(pfn_to_page(PFN_DOWN(pgd)));
1074 void *kbase_mmu_dump(kbase_context *kctx, int nr_pages)
1079 KBASE_DEBUG_ASSERT(kctx);
1081 lockdep_assert_held(&kctx->reg_lock);
1083 if (0 == nr_pages) {
1084 /* can't find in a 0 sized buffer, early out */
1088 size_left = nr_pages * PAGE_SIZE;
1090 KBASE_DEBUG_ASSERT(0 != size_left);
1091 kaddr = vmalloc_user(size_left);
1094 u64 end_marker = 0xFFULL;
1095 char *buffer = (char *)kaddr;
1097 size_t size = kbasep_mmu_dump_level(kctx, kctx->pgd, MIDGARD_MMU_TOPLEVEL, &buffer, &size_left);
1103 /* Add on the size for the end marker */
1104 size += sizeof(u64);
1106 if (size > nr_pages * PAGE_SIZE || size_left < sizeof(u64)) {
1107 /* The buffer isn't big enough - free the memory and return failure */
1112 /* Add the end marker */
1113 memcpy(buffer, &end_marker, sizeof(u64));
1118 KBASE_EXPORT_TEST_API(kbase_mmu_dump)
1120 static u64 lock_region(kbase_device *kbdev, u64 pfn, size_t num_pages)
1124 /* can't lock a zero sized range */
1125 KBASE_DEBUG_ASSERT(num_pages);
1127 region = pfn << PAGE_SHIFT;
1129 * fls returns (given the ASSERT above):
1133 * 32-bit: 10 + fls(num_pages)
1134 * results in the range (11 .. 42)
1135 * 64-bit: 10 + fls(num_pages)
1136 * results in the range (11 .. 42)
1139 /* gracefully handle num_pages being zero */
1140 if (0 == num_pages) {
1144 region_width = 10 + fls(num_pages);
1145 if (num_pages != (1ul << (region_width - 11))) {
1146 /* not pow2, so must go up to the next pow2 */
1149 KBASE_DEBUG_ASSERT(region_width <= KBASE_LOCK_REGION_MAX_SIZE);
1150 KBASE_DEBUG_ASSERT(region_width >= KBASE_LOCK_REGION_MIN_SIZE);
1151 region |= region_width;
1157 static void bus_fault_worker(struct work_struct *data)
1159 kbase_as *faulting_as;
1161 kbase_context *kctx;
1162 kbase_device *kbdev;
1164 mali_bool reset_status = MALI_FALSE;
1166 faulting_as = container_of(data, kbase_as, work_busfault);
1167 as_no = faulting_as->number;
1169 kbdev = container_of(faulting_as, kbase_device, as[as_no]);
1171 /* Grab the context that was already refcounted in kbase_mmu_interrupt().
1172 * Therefore, it cannot be scheduled out of this AS until we explicitly release it
1174 * NOTE: NULL can be returned here if we're gracefully handling a spurious interrupt */
1175 kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no);
1177 if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
1178 /* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode.
1179 * We start the reset before switching to UNMAPPED to ensure that unrelated jobs
1180 * are evicted from the GPU before the switch.
1182 dev_err(kbdev->dev, "GPU bus error occurred. For this GPU version we now soft-reset as part of bus error recovery\n");
1183 reset_status = kbase_prepare_to_reset_gpu(kbdev);
1186 /* NOTE: If GPU already powered off for suspend, we don't need to switch to unmapped */
1187 if (!kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
1188 /* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */
1189 /* AS transaction begin */
1190 mutex_lock(&kbdev->as[as_no].transaction_mutex);
1192 reg = kbase_reg_read(kbdev, MMU_AS_REG(as_no, ASn_TRANSTAB_LO), kctx);
1194 kbase_reg_write(kbdev, MMU_AS_REG(as_no, ASn_TRANSTAB_LO), reg, kctx);
1195 kbase_reg_write(kbdev, MMU_AS_REG(as_no, ASn_COMMAND), ASn_COMMAND_UPDATE, kctx);
1197 mutex_unlock(&kbdev->as[as_no].transaction_mutex);
1198 /* AS transaction end */
1200 mmu_mask_reenable(kbdev, kctx, faulting_as);
1201 kbase_pm_context_idle(kbdev);
1204 if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status)
1205 kbase_reset_gpu(kbdev);
1207 /* By this point, the fault was handled in some way, so release the ctx refcount */
1209 kbasep_js_runpool_release_ctx(kbdev, kctx);
1212 void kbase_mmu_interrupt(kbase_device *kbdev, u32 irq_stat)
1214 unsigned long flags;
1215 const int num_as = 16;
1216 const int busfault_shift = 16;
1217 const int pf_shift = 0;
1218 const unsigned long mask = (1UL << num_as) - 1;
1219 kbasep_js_device_data *js_devdata;
1222 u32 bf_bits = (irq_stat >> busfault_shift) & mask; /* bus faults */
1223 /* Ignore ASes with both pf and bf */
1224 u32 pf_bits = ((irq_stat >> pf_shift) & mask) & ~bf_bits; /* page faults */
1226 KBASE_DEBUG_ASSERT(NULL != kbdev);
1228 js_devdata = &kbdev->js_data;
1230 /* remember current mask */
1231 spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
1232 new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL);
1233 /* mask interrupts for now */
1234 kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0, NULL);
1235 spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
1238 /* the while logic ensures we have a bit set, no need to check for not-found here */
1239 int as_no = ffs(bf_bits) - 1;
1240 kbase_as *as = &kbdev->as[as_no];
1241 kbase_context *kctx;
1243 /* Refcount the kctx ASAP - it shouldn't disappear anyway, since Bus/Page faults
1244 * _should_ only occur whilst jobs are running, and a job causing the Bus/Page fault
1245 * shouldn't complete until the MMU is updated */
1246 kctx = kbasep_js_runpool_lookup_ctx(kbdev, as_no);
1248 /* mark as handled */
1249 bf_bits &= ~(1UL << as_no);
1251 /* find faulting address & status */
1252 as->fault_addr = ((u64)kbase_reg_read(kbdev, MMU_AS_REG(as_no, ASn_FAULTADDRESS_HI), kctx) << 32) |
1253 kbase_reg_read(kbdev, MMU_AS_REG(as_no, ASn_FAULTADDRESS_LO), kctx);
1254 as->fault_status = kbase_reg_read(kbdev, MMU_AS_REG(as_no, ASn_FAULTSTATUS), kctx);
1256 /* Clear the internal JM mask first before clearing the internal MMU mask */
1257 kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 1UL << MMU_REGS_BUS_ERROR_FLAG(as_no), kctx);
1260 /* hw counters dumping in progress, signal the other thread that it failed */
1261 if ((kbdev->hwcnt.kctx == kctx) && (kbdev->hwcnt.state == KBASE_INSTR_STATE_DUMPING))
1262 kbdev->hwcnt.state = KBASE_INSTR_STATE_FAULT;
1264 /* Stop the kctx from submitting more jobs and cause it to be scheduled
1265 * out/rescheduled when all references to it are released */
1266 spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
1267 kbasep_js_clear_submit_allowed(js_devdata, kctx);
1268 spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
1270 dev_warn(kbdev->dev, "Bus error in AS%d at 0x%016llx\n", as_no, as->fault_addr);
1272 dev_warn(kbdev->dev, "Bus error in AS%d at 0x%016llx with no context present! " "Suprious IRQ or SW Design Error?\n", as_no, as->fault_addr);
1275 /* remove the queued BFs from the mask */
1276 new_mask &= ~(1UL << (as_no + num_as));
1278 /* We need to switch to UNMAPPED mode - but we do this in a worker so that we can sleep */
1279 KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&as->work_busfault));
1280 INIT_WORK(&as->work_busfault, bus_fault_worker);
1281 queue_work(as->pf_wq, &as->work_busfault);
1285 * pf_bits is non-zero if we have at least one AS with a page fault and no bus fault.
1286 * Handle the PFs in our worker thread.
1289 /* the while logic ensures we have a bit set, no need to check for not-found here */
1290 int as_no = ffs(pf_bits) - 1;
1291 kbase_as *as = &kbdev->as[as_no];
1292 kbase_context *kctx;
1294 /* Refcount the kctx ASAP - it shouldn't disappear anyway, since Bus/Page faults
1295 * _should_ only occur whilst jobs are running, and a job causing the Bus/Page fault
1296 * shouldn't complete until the MMU is updated */
1297 kctx = kbasep_js_runpool_lookup_ctx(kbdev, as_no);
1299 /* mark as handled */
1300 pf_bits &= ~(1UL << as_no);
1302 /* find faulting address & status */
1303 as->fault_addr = ((u64)kbase_reg_read(kbdev, MMU_AS_REG(as_no, ASn_FAULTADDRESS_HI), kctx) << 32) |
1304 kbase_reg_read(kbdev, MMU_AS_REG(as_no, ASn_FAULTADDRESS_LO), kctx);
1305 as->fault_status = kbase_reg_read(kbdev, MMU_AS_REG(as_no, ASn_FAULTSTATUS), kctx);
1307 /* Clear the internal JM mask first before clearing the internal MMU mask */
1308 kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 1UL << MMU_REGS_PAGE_FAULT_FLAG(as_no), kctx);
1311 dev_warn(kbdev->dev, "Page fault in AS%d at 0x%016llx with no context present! " "Suprious IRQ or SW Design Error?\n", as_no, as->fault_addr);
1313 /* remove the queued PFs from the mask */
1314 new_mask &= ~((1UL << as_no) | (1UL << (as_no + num_as)));
1316 /* queue work pending for this AS */
1317 KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&as->work_pagefault));
1318 INIT_WORK(&as->work_pagefault, page_fault_worker);
1319 queue_work(as->pf_wq, &as->work_pagefault);
1322 /* reenable interrupts */
1323 spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
1324 tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL);
1326 kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask, NULL);
1327 spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
1330 KBASE_EXPORT_TEST_API(kbase_mmu_interrupt)
1332 const char *kbase_exception_name(u32 exception_code)
1336 switch (exception_code) {
1337 /* Non-Fault Status code */
1339 e = "NOT_STARTED/IDLE/OK";
1356 /* Job exceptions */
1358 e = "JOB_CONFIG_FAULT";
1361 e = "JOB_POWER_FAULT";
1364 e = "JOB_READ_FAULT";
1367 e = "JOB_WRITE_FAULT";
1370 e = "JOB_AFFINITY_FAULT";
1373 e = "JOB_BUS_FAULT";
1376 e = "INSTR_INVALID_PC";
1379 e = "INSTR_INVALID_ENC";
1382 e = "INSTR_TYPE_MISMATCH";
1385 e = "INSTR_OPERAND_FAULT";
1388 e = "INSTR_TLS_FAULT";
1391 e = "INSTR_BARRIER_FAULT";
1394 e = "INSTR_ALIGN_FAULT";
1397 e = "DATA_INVALID_FAULT";
1400 e = "TILE_RANGE_FAULT";
1403 e = "ADDR_RANGE_FAULT";
1406 e = "OUT_OF_MEMORY";
1408 /* GPU exceptions */
1410 e = "DELAYED_BUS_FAULT";
1413 e = "SHAREABILITY_FAULT";
1415 /* MMU exceptions */
1424 e = "TRANSLATION_FAULT";
1427 e = "PERMISSION_FAULT";
1437 e = "TRANSTAB_BUS_FAULT";
1451 * The caller must ensure it's retained the ctx to prevent it from being scheduled out whilst it's being worked on.
1453 static void kbase_mmu_report_fault_and_kill(kbase_context *kctx, kbase_as *as)
1455 unsigned long flags;
1461 kbase_device *kbdev;
1462 kbasep_js_device_data *js_devdata;
1463 mali_bool reset_status = MALI_FALSE;
1464 static const char * const access_type_names[] = { "RESERVED", "EXECUTE", "READ", "WRITE" };
1466 KBASE_DEBUG_ASSERT(as);
1467 KBASE_DEBUG_ASSERT(kctx);
1470 kbdev = kctx->kbdev;
1471 js_devdata = &kbdev->js_data;
1473 /* ASSERT that the context won't leave the runpool */
1474 KBASE_DEBUG_ASSERT(kbasep_js_debug_check_ctx_refcount(kbdev, kctx) > 0);
1476 /* decode the fault status */
1477 exception_type = as->fault_status & 0xFF;
1478 access_type = (as->fault_status >> 8) & 0x3;
1479 source_id = (as->fault_status >> 16);
1481 /* terminal fault, print info about the fault */
1482 dev_err(kbdev->dev, "Unhandled Page fault in AS%d at VA 0x%016llX\n"
1483 "raw fault status 0x%X\n"
1484 "decoded fault status: %s\n"
1485 "exception type 0x%X: %s\n"
1486 "access type 0x%X: %s\n"
1488 as_no, as->fault_addr,
1490 (as->fault_status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"),
1491 exception_type, kbase_exception_name(exception_type),
1492 access_type, access_type_names[access_type],
1495 /* hardware counters dump fault handling */
1496 if ((kbdev->hwcnt.kctx) && (kbdev->hwcnt.kctx->as_nr == as_no) && (kbdev->hwcnt.state == KBASE_INSTR_STATE_DUMPING)) {
1497 unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
1498 if ((as->fault_addr >= kbdev->hwcnt.addr) && (as->fault_addr < (kbdev->hwcnt.addr + (num_core_groups * 2048))))
1499 kbdev->hwcnt.state = KBASE_INSTR_STATE_FAULT;
1502 /* Stop the kctx from submitting more jobs and cause it to be scheduled
1503 * out/rescheduled - this will occur on releasing the context's refcount */
1504 spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
1505 kbasep_js_clear_submit_allowed(js_devdata, kctx);
1506 spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
1508 /* Kill any running jobs from the context. Submit is disallowed, so no more jobs from this
1509 * context can appear in the job slots from this point on */
1510 kbase_job_kill_jobs_from_context(kctx);
1511 /* AS transaction begin */
1512 mutex_lock(&as->transaction_mutex);
1514 if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
1515 /* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode.
1516 * We start the reset before switching to UNMAPPED to ensure that unrelated jobs
1517 * are evicted from the GPU before the switch.
1519 dev_err(kbdev->dev, "Unhandled page fault. For this GPU version we now soft-reset the GPU as part of page fault recovery.");
1520 reset_status = kbase_prepare_to_reset_gpu(kbdev);
1523 /* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */
1524 reg = kbase_reg_read(kbdev, MMU_AS_REG(as_no, ASn_TRANSTAB_LO), kctx);
1526 kbase_reg_write(kbdev, MMU_AS_REG(as_no, ASn_TRANSTAB_LO), reg, kctx);
1527 kbase_reg_write(kbdev, MMU_AS_REG(as_no, ASn_COMMAND), ASn_COMMAND_UPDATE, kctx);
1529 mutex_unlock(&as->transaction_mutex);
1530 /* AS transaction end */
1531 mmu_mask_reenable(kbdev, kctx, as);
1533 if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status)
1534 kbase_reset_gpu(kbdev);
1537 void kbasep_as_do_poke(struct work_struct *work)
1540 kbase_device *kbdev;
1541 unsigned long flags;
1543 KBASE_DEBUG_ASSERT(work);
1544 as = container_of(work, kbase_as, poke_work);
1545 kbdev = container_of(as, kbase_device, as[as->number]);
1546 KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
1548 /* GPU power will already be active by virtue of the caller holding a JS
1549 * reference on the address space, and will not release it until this worker
1552 /* AS transaction begin */
1553 mutex_lock(&as->transaction_mutex);
1554 /* Force a uTLB invalidate */
1555 kbase_reg_write(kbdev, MMU_AS_REG(as->number, ASn_COMMAND), ASn_COMMAND_UNLOCK, NULL);
1556 mutex_unlock(&as->transaction_mutex);
1557 /* AS transaction end */
1559 spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
1560 if (as->poke_refcount &&
1561 !(as->poke_state & KBASE_AS_POKE_STATE_KILLING_POKE)) {
1562 /* Only queue up the timer if we need it, and we're not trying to kill it */
1563 hrtimer_start(&as->poke_timer, HR_TIMER_DELAY_MSEC(5), HRTIMER_MODE_REL);
1565 spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags);
1569 enum hrtimer_restart kbasep_as_poke_timer_callback(struct hrtimer *timer)
1574 KBASE_DEBUG_ASSERT(NULL != timer);
1575 as = container_of(timer, kbase_as, poke_timer);
1576 KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
1578 queue_work_ret = queue_work(as->poke_wq, &as->poke_work);
1579 KBASE_DEBUG_ASSERT(queue_work_ret);
1580 return HRTIMER_NORESTART;
1584 * Retain the poking timer on an atom's context (if the atom hasn't already
1585 * done so), and start the timer (if it's not already started).
1587 * This must only be called on a context that's scheduled in, and an atom
1588 * that's running on the GPU.
1590 * The caller must hold kbasep_js_device_data::runpool_irq::lock
1592 * This can be called safely from atomic context
1594 void kbase_as_poking_timer_retain_atom(kbase_device *kbdev, kbase_context *kctx, kbase_jd_atom *katom)
1597 KBASE_DEBUG_ASSERT(kbdev);
1598 KBASE_DEBUG_ASSERT(kctx);
1599 KBASE_DEBUG_ASSERT(katom);
1600 KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
1601 lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
1608 /* It's safe to work on the as/as_nr without an explicit reference,
1609 * because the caller holds the runpool_irq lock, and the atom itself
1610 * was also running and had already taken a reference */
1611 as = &kbdev->as[kctx->as_nr];
1613 if (++(as->poke_refcount) == 1) {
1614 /* First refcount for poke needed: check if not already in flight */
1615 if (!as->poke_state) {
1616 /* need to start poking */
1617 as->poke_state |= KBASE_AS_POKE_STATE_IN_FLIGHT;
1618 queue_work(as->poke_wq, &as->poke_work);
1624 * If an atom holds a poking timer, release it and wait for it to finish
1626 * This must only be called on a context that's scheduled in, and an atom
1627 * that still has a JS reference on the context
1629 * This must \b not be called from atomic context, since it can sleep.
1631 void kbase_as_poking_timer_release_atom(kbase_device *kbdev, kbase_context *kctx, kbase_jd_atom *katom)
1634 unsigned long flags;
1636 KBASE_DEBUG_ASSERT(kbdev);
1637 KBASE_DEBUG_ASSERT(kctx);
1638 KBASE_DEBUG_ASSERT(katom);
1639 KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
1644 as = &kbdev->as[kctx->as_nr];
1646 spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
1647 KBASE_DEBUG_ASSERT(as->poke_refcount > 0);
1648 KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
1650 if (--(as->poke_refcount) == 0) {
1651 as->poke_state |= KBASE_AS_POKE_STATE_KILLING_POKE;
1652 spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags);
1654 hrtimer_cancel(&as->poke_timer);
1655 flush_workqueue(as->poke_wq);
1657 spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
1659 /* Re-check whether it's still needed */
1660 if (as->poke_refcount) {
1662 /* Poking still needed:
1663 * - Another retain will not be starting the timer or queueing work,
1664 * because it's still marked as in-flight
1665 * - The hrtimer has finished, and has not started a new timer or
1666 * queued work because it's been marked as killing
1668 * So whatever happens now, just queue the work again */
1669 as->poke_state &= ~((kbase_as_poke_state)KBASE_AS_POKE_STATE_KILLING_POKE);
1670 queue_work_ret = queue_work(as->poke_wq, &as->poke_work);
1671 KBASE_DEBUG_ASSERT(queue_work_ret);
1673 /* It isn't - so mark it as not in flight, and not killing */
1674 as->poke_state = 0u;
1676 /* The poke associated with the atom has now finished. If this is
1677 * also the last atom on the context, then we can guarentee no more
1678 * pokes (and thus no more poking register accesses) will occur on
1679 * the context until new atoms are run */
1682 spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags);