drivers/gpu/arm/midgard/mali_kbase_mmu.c

   1 /*
   2  *
   3  * (C) COPYRIGHT ARM Limited. All rights reserved.
   4  *
   5  * This program is free software and is provided to you under the terms of the
   6  * GNU General Public License version 2 as published by the Free Software
   7  * Foundation, and any use by you of this program is subject to the terms
   8  * of such GNU licence.
   9  *
  10  * A copy of the licence is included with the program, and can also be obtained
  11  * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  12  * Boston, MA  02110-1301, USA.
  13  *
  14  */
  15
  16
  17
  18
  19
  20 /**
  21  * @file mali_kbase_mmu.c
  22  * Base kernel MMU management.
  23  */
  24
  25 /* #define DEBUG    1 */
  26 #include <linux/dma-mapping.h>
  27 #include <mali_kbase.h>
  28 #include <mali_midg_regmap.h>
  29 #include <mali_kbase_gator.h>
  30 #include <mali_kbase_debug.h>
  31
  32 #define beenthere(kctx, f, a...)  dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
  33
  34 #include <mali_kbase_defs.h>
  35 #include <mali_kbase_hw.h>
  36 #include <mali_kbase_mmu_hw.h>
  37
  38 #define KBASE_MMU_PAGE_ENTRIES 512
  39
  40 /*
  41  * Definitions:
  42  * - PGD: Page Directory.
  43  * - PTE: Page Table Entry. A 64bit value pointing to the next
  44  *        level of translation
  45  * - ATE: Address Transation Entry. A 64bit value pointing to
  46  *        a 4kB physical page.
  47  */
  48
  49 static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
  50                 struct kbase_as *as, const char *reason_str);
  51
  52
  53 /* Helper Function to perform assignment of page table entries, to ensure the use of
  54  * strd, which is required on LPAE systems.
  55  */
  56
  57 static inline void page_table_entry_set(struct kbase_device *kbdev, u64 *pte, u64 phy)
  58 {
  59 #ifdef CONFIG_64BIT
  60         *pte = phy;
  61 #elif defined(CONFIG_ARM)
  62         /*
  63          *
  64          * In order to prevent the compiler keeping cached copies of memory, we have to explicitly
  65          * say that we have updated memory.
  66          *
  67          * Note: We could manually move the data ourselves into R0 and R1 by specifying
  68          * register variables that are explicitly given registers assignments, the down side of
  69          * this is that we have to assume cpu endianess.  To avoid this we can use the ldrd to read the
  70          * data from memory into R0 and R1 which will respect the cpu endianess, we then use strd to
  71          * make the 64 bit assignment to the page table entry.
  72          *
  73          */
  74
  75         asm     volatile("ldrd r0, r1, [%[ptemp]]\n\t"
  76                                 "strd r0, r1, [%[pte]]\n\t"
  77                                 : "=m" (*pte)
  78                                 : [ptemp] "r" (&phy), [pte] "r" (pte), "m" (phy)
  79                                 : "r0", "r1");
  80 #else
  81 #error "64-bit atomic write must be implemented for your architecture"
  82 #endif
  83 }
  84
  85 static size_t make_multiple(size_t minimum, size_t multiple)
  86 {
  87         size_t remainder = minimum % multiple;
  88         if (remainder == 0)
  89                 return minimum;
  90         else
  91                 return minimum + multiple - remainder;
  92 }
  93
  94 static void page_fault_worker(struct work_struct *data)
  95 {
  96         u64 fault_pfn;
  97         u32 fault_access;
  98         size_t new_pages;
  99         size_t fault_rel_pfn;
 100         struct kbase_as *faulting_as;
 101         int as_no;
 102         struct kbase_context *kctx;
 103         struct kbase_device *kbdev;
 104         struct kbase_va_region *region;
 105         mali_error err;
 106
 107         faulting_as = container_of(data, struct kbase_as, work_pagefault);
 108         fault_pfn = faulting_as->fault_addr >> PAGE_SHIFT;
 109         as_no = faulting_as->number;
 110
 111         kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
 112
 113         /* Grab the context that was already refcounted in kbase_mmu_interrupt().
 114          * Therefore, it cannot be scheduled out of this AS until we explicitly release it
 115          *
 116          * NOTE: NULL can be returned here if we're gracefully handling a spurious interrupt */
 117         kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no);
 118
 119         if (kctx == NULL) {
 120                 /* Only handle this if not already suspended */
 121                 if (!kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
 122                         struct kbase_mmu_setup *current_setup = &faulting_as->current_setup;
 123
 124                         /* Address space has no context, terminate the work */
 125
 126                         /* AS transaction begin */
 127                         mutex_lock(&faulting_as->transaction_mutex);
 128
 129                         /* Switch to unmapped mode */
 130                         current_setup->transtab &= ~(u64)MMU_TRANSTAB_ADRMODE_MASK;
 131                         current_setup->transtab |= AS_TRANSTAB_ADRMODE_UNMAPPED;
 132
 133                         /* Apply new address space settings */
 134                         kbase_mmu_hw_configure(kbdev, faulting_as, kctx);
 135
 136                         mutex_unlock(&faulting_as->transaction_mutex);
 137                         /* AS transaction end */
 138
 139                         kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
 140                                         KBASE_MMU_FAULT_TYPE_PAGE);
 141                         kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
 142                                         KBASE_MMU_FAULT_TYPE_PAGE);
 143                         kbase_pm_context_idle(kbdev);
 144                 }
 145                 return;
 146         }
 147
 148         KBASE_DEBUG_ASSERT(kctx->kbdev == kbdev);
 149
 150         kbase_gpu_vm_lock(kctx);
 151
 152         region = kbase_region_tracker_find_region_enclosing_address(kctx, faulting_as->fault_addr);
 153         if (NULL == region || region->flags & KBASE_REG_FREE) {
 154                 kbase_gpu_vm_unlock(kctx);
 155                 kbase_mmu_report_fault_and_kill(kctx, faulting_as,
 156                                 "Memory is not mapped on the GPU");
 157                 goto fault_done;
 158         }
 159
 160         fault_access = faulting_as->fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK;
 161         if (((fault_access == AS_FAULTSTATUS_ACCESS_TYPE_READ) &&
 162                         !(region->flags & KBASE_REG_GPU_RD)) ||
 163                         ((fault_access == AS_FAULTSTATUS_ACCESS_TYPE_WRITE) &&
 164                         !(region->flags & KBASE_REG_GPU_WR)) ||
 165                         ((fault_access == AS_FAULTSTATUS_ACCESS_TYPE_EX) &&
 166                         (region->flags & KBASE_REG_GPU_NX))) {
 167                 dev_warn(kbdev->dev, "Access permissions don't match: region->flags=0x%lx", region->flags);
 168                 kbase_gpu_vm_unlock(kctx);
 169                 kbase_mmu_report_fault_and_kill(kctx, faulting_as,
 170                                 "Access permissions mismatch");
 171                 goto fault_done;
 172         }
 173
 174         if (!(region->flags & GROWABLE_FLAGS_REQUIRED)) {
 175                 kbase_gpu_vm_unlock(kctx);
 176                 kbase_mmu_report_fault_and_kill(kctx, faulting_as,
 177                                 "Memory is not growable");
 178                 goto fault_done;
 179         }
 180
 181         /* find the size we need to grow it by */
 182         /* we know the result fit in a size_t due to kbase_region_tracker_find_region_enclosing_address
 183          * validating the fault_adress to be within a size_t from the start_pfn */
 184         fault_rel_pfn = fault_pfn - region->start_pfn;
 185
 186         if (fault_rel_pfn < kbase_reg_current_backed_size(region)) {
 187                 dev_dbg(kbdev->dev, "Page fault @ 0x%llx in allocated region 0x%llx-0x%llx of growable TMEM: Ignoring",
 188                                 faulting_as->fault_addr, region->start_pfn,
 189                                 region->start_pfn +
 190                                 kbase_reg_current_backed_size(region));
 191
 192                 kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
 193                                 KBASE_MMU_FAULT_TYPE_PAGE);
 194                 /* [1] in case another page fault occurred while we were
 195                  * handling the (duplicate) page fault we need to ensure we
 196                  * don't loose the other page fault as result of us clearing
 197                  * the MMU IRQ. Therefore, after we clear the MMU IRQ we send
 198                  * an UNLOCK command that will retry any stalled memory
 199                  * transaction (which should cause the other page fault to be
 200                  * raised again).
 201                  */
 202                 kbase_mmu_hw_do_operation(kbdev, faulting_as, 0, 0, 0,
 203                                 AS_COMMAND_UNLOCK, 1);
 204                 kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
 205                                 KBASE_MMU_FAULT_TYPE_PAGE);
 206                 kbase_gpu_vm_unlock(kctx);
 207
 208                 goto fault_done;
 209         }
 210
 211         new_pages = make_multiple(fault_rel_pfn -
 212                         kbase_reg_current_backed_size(region) + 1,
 213                         region->extent);
 214
 215         /* cap to max vsize */
 216         if (new_pages + kbase_reg_current_backed_size(region) >
 217                         region->nr_pages)
 218                 new_pages = region->nr_pages -
 219                                 kbase_reg_current_backed_size(region);
 220
 221         if (0 == new_pages) {
 222                 /* Duplicate of a fault we've already handled, nothing to do */
 223                 kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
 224                                 KBASE_MMU_FAULT_TYPE_PAGE);
 225                 /* See comment [1] about UNLOCK usage */
 226                 kbase_mmu_hw_do_operation(kbdev, faulting_as, 0, 0, 0,
 227                                 AS_COMMAND_UNLOCK, 1);
 228                 kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
 229                                 KBASE_MMU_FAULT_TYPE_PAGE);
 230                 kbase_gpu_vm_unlock(kctx);
 231                 goto fault_done;
 232         }
 233
 234         if (MALI_ERROR_NONE == kbase_alloc_phy_pages_helper(region->alloc, new_pages)) {
 235                 u32 op;
 236
 237                 /* alloc success */
 238                 KBASE_DEBUG_ASSERT(kbase_reg_current_backed_size(region) <= region->nr_pages);
 239
 240                 /* AS transaction begin */
 241                 mutex_lock(&faulting_as->transaction_mutex);
 242
 243                 /* set up the new pages */
 244                 err = kbase_mmu_insert_pages(kctx, region->start_pfn + kbase_reg_current_backed_size(region) - new_pages, &kbase_get_phy_pages(region)[kbase_reg_current_backed_size(region) - new_pages], new_pages, region->flags);
 245                 if (MALI_ERROR_NONE != err) {
 246                         /* failed to insert pages, handle as a normal PF */
 247                         mutex_unlock(&faulting_as->transaction_mutex);
 248                         kbase_free_phy_pages_helper(region->alloc, new_pages);
 249                         kbase_gpu_vm_unlock(kctx);
 250                         /* The locked VA region will be unlocked and the cache invalidated in here */
 251                         kbase_mmu_report_fault_and_kill(kctx, faulting_as,
 252                                         "Page table update failure");
 253                         goto fault_done;
 254                 }
 255 #ifdef CONFIG_MALI_GATOR_SUPPORT
 256                 kbase_trace_mali_page_fault_insert_pages(as_no, new_pages);
 257 #endif                          /* CONFIG_MALI_GATOR_SUPPORT */
 258
 259                 /* flush L2 and unlock the VA (resumes the MMU) */
 260                 if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367))
 261                         op = AS_COMMAND_FLUSH;
 262                 else
 263                         op = AS_COMMAND_FLUSH_PT;
 264
 265                 /* clear MMU interrupt - this needs to be done after updating
 266                  * the page tables but before issuing a FLUSH command. The
 267                  * FLUSH cmd has a side effect that it restarts stalled memory
 268                  * transactions in other address spaces which may cause
 269                  * another fault to occur. If we didn't clear the interrupt at
 270                  * this stage a new IRQ might not be raised when the GPU finds
 271                  * a MMU IRQ is already pending.
 272                  */
 273                 kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
 274                                          KBASE_MMU_FAULT_TYPE_PAGE);
 275
 276                 kbase_mmu_hw_do_operation(kbdev, faulting_as, kctx,
 277                                           faulting_as->fault_addr >> PAGE_SHIFT,
 278                                           new_pages,
 279                                           op, 1);
 280
 281                 mutex_unlock(&faulting_as->transaction_mutex);
 282                 /* AS transaction end */
 283
 284                 /* reenable this in the mask */
 285                 kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
 286                                          KBASE_MMU_FAULT_TYPE_PAGE);
 287                 kbase_gpu_vm_unlock(kctx);
 288         } else {
 289                 /* failed to extend, handle as a normal PF */
 290                 kbase_gpu_vm_unlock(kctx);
 291                 kbase_mmu_report_fault_and_kill(kctx, faulting_as,
 292                                 "Page allocation failure");
 293         }
 294
 295 fault_done:
 296         /*
 297          * By this point, the fault was handled in some way,
 298          * so release the ctx refcount
 299          */
 300         kbasep_js_runpool_release_ctx(kbdev, kctx);
 301 }
 302
 303 phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx)
 304 {
 305         phys_addr_t pgd;
 306         u64 *page;
 307         int i;
 308         struct page *p;
 309
 310         KBASE_DEBUG_ASSERT(NULL != kctx);
 311         kbase_atomic_add_pages(1, &kctx->used_pages);
 312         kbase_atomic_add_pages(1, &kctx->kbdev->memdev.used_pages);
 313
 314         if (MALI_ERROR_NONE != kbase_mem_allocator_alloc(kctx->pgd_allocator, 1, &pgd))
 315                 goto sub_pages;
 316
 317         p = pfn_to_page(PFN_DOWN(pgd));
 318         page = kmap(p);
 319         if (NULL == page)
 320                 goto alloc_free;
 321
 322         kbase_process_page_usage_inc(kctx, 1);
 323
 324         for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++)
 325                 page_table_entry_set(kctx->kbdev, &page[i], ENTRY_IS_INVAL);
 326
 327         /* Clean the full page */
 328         dma_sync_single_for_device(kctx->kbdev->dev,
 329                                    kbase_dma_addr(p),
 330                                    PAGE_SIZE,
 331                                    DMA_TO_DEVICE);
 332         kunmap(pfn_to_page(PFN_DOWN(pgd)));
 333         return pgd;
 334
 335 alloc_free:
 336         kbase_mem_allocator_free(kctx->pgd_allocator, 1, &pgd, MALI_FALSE);
 337 sub_pages:
 338         kbase_atomic_sub_pages(1, &kctx->used_pages);
 339         kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
 340
 341         return 0;
 342 }
 343
 344 KBASE_EXPORT_TEST_API(kbase_mmu_alloc_pgd)
 345
 346 static phys_addr_t mmu_pte_to_phy_addr(u64 entry)
 347 {
 348         if (!(entry & 1))
 349                 return 0;
 350
 351         return entry & ~0xFFF;
 352 }
 353
 354 static u64 mmu_phyaddr_to_pte(phys_addr_t phy)
 355 {
 356         return (phy & ~0xFFF) | ENTRY_IS_PTE;
 357 }
 358
 359 static u64 mmu_phyaddr_to_ate(phys_addr_t phy, u64 flags)
 360 {
 361         return (phy & ~0xFFF) | (flags & ENTRY_FLAGS_MASK) | ENTRY_IS_ATE;
 362 }
 363
 364 /* Given PGD PFN for level N, return PGD PFN for level N+1 */
 365 static phys_addr_t mmu_get_next_pgd(struct kbase_context *kctx, phys_addr_t pgd, u64 vpfn, int level)
 366 {
 367         u64 *page;
 368         phys_addr_t target_pgd;
 369         struct page *p;
 370
 371         KBASE_DEBUG_ASSERT(pgd);
 372         KBASE_DEBUG_ASSERT(NULL != kctx);
 373
 374         lockdep_assert_held(&kctx->reg_lock);
 375
 376         /*
 377          * Architecture spec defines level-0 as being the top-most.
 378          * This is a bit unfortunate here, but we keep the same convention.
 379          */
 380         vpfn >>= (3 - level) * 9;
 381         vpfn &= 0x1FF;
 382
 383         p = pfn_to_page(PFN_DOWN(pgd));
 384         page = kmap(p);
 385         if (NULL == page) {
 386                 dev_warn(kctx->kbdev->dev, "mmu_get_next_pgd: kmap failure\n");
 387                 return 0;
 388         }
 389
 390         target_pgd = mmu_pte_to_phy_addr(page[vpfn]);
 391
 392         if (!target_pgd) {
 393                 target_pgd = kbase_mmu_alloc_pgd(kctx);
 394                 if (!target_pgd) {
 395                         dev_warn(kctx->kbdev->dev, "mmu_get_next_pgd: kbase_mmu_alloc_pgd failure\n");
 396                         kunmap(p);
 397                         return 0;
 398                 }
 399
 400                 page_table_entry_set(kctx->kbdev, &page[vpfn],
 401                                 mmu_phyaddr_to_pte(target_pgd));
 402
 403                 dma_sync_single_for_device(kctx->kbdev->dev,
 404                                            kbase_dma_addr(p),
 405                                            PAGE_SIZE,
 406                                            DMA_TO_DEVICE);
 407                 /* Rely on the caller to update the address space flags. */
 408         }
 409
 410         kunmap(p);
 411         return target_pgd;
 412 }
 413
 414 static phys_addr_t mmu_get_bottom_pgd(struct kbase_context *kctx, u64 vpfn)
 415 {
 416         phys_addr_t pgd;
 417         int l;
 418
 419         pgd = kctx->pgd;
 420
 421         for (l = MIDGARD_MMU_TOPLEVEL; l < 3; l++) {
 422                 pgd = mmu_get_next_pgd(kctx, pgd, vpfn, l);
 423                 /* Handle failure condition */
 424                 if (!pgd) {
 425                         dev_warn(kctx->kbdev->dev, "mmu_get_bottom_pgd: mmu_get_next_pgd failure\n");
 426                         return 0;
 427                 }
 428         }
 429
 430         return pgd;
 431 }
 432
 433 static phys_addr_t mmu_insert_pages_recover_get_next_pgd(struct kbase_context *kctx, phys_addr_t pgd, u64 vpfn, int level)
 434 {
 435         u64 *page;
 436         phys_addr_t target_pgd;
 437
 438         KBASE_DEBUG_ASSERT(pgd);
 439         KBASE_DEBUG_ASSERT(NULL != kctx);
 440
 441         lockdep_assert_held(&kctx->reg_lock);
 442
 443         /*
 444          * Architecture spec defines level-0 as being the top-most.
 445          * This is a bit unfortunate here, but we keep the same convention.
 446          */
 447         vpfn >>= (3 - level) * 9;
 448         vpfn &= 0x1FF;
 449
 450         page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
 451         /* kmap_atomic should NEVER fail */
 452         KBASE_DEBUG_ASSERT(NULL != page);
 453
 454         target_pgd = mmu_pte_to_phy_addr(page[vpfn]);
 455         /* As we are recovering from what has already been set up, we should have a target_pgd */
 456         KBASE_DEBUG_ASSERT(0 != target_pgd);
 457
 458         kunmap_atomic(page);
 459         return target_pgd;
 460 }
 461
 462 static phys_addr_t mmu_insert_pages_recover_get_bottom_pgd(struct kbase_context *kctx, u64 vpfn)
 463 {
 464         phys_addr_t pgd;
 465         int l;
 466
 467         pgd = kctx->pgd;
 468
 469         for (l = MIDGARD_MMU_TOPLEVEL; l < 3; l++) {
 470                 pgd = mmu_insert_pages_recover_get_next_pgd(kctx, pgd, vpfn, l);
 471                 /* Should never fail */
 472                 KBASE_DEBUG_ASSERT(0 != pgd);
 473         }
 474
 475         return pgd;
 476 }
 477
 478 static void mmu_insert_pages_failure_recovery(struct kbase_context *kctx, u64 vpfn,
 479                                               size_t nr)
 480 {
 481         phys_addr_t pgd;
 482         u64 *pgd_page;
 483
 484         KBASE_DEBUG_ASSERT(NULL != kctx);
 485         KBASE_DEBUG_ASSERT(0 != vpfn);
 486         /* 64-bit address range is the max */
 487         KBASE_DEBUG_ASSERT(vpfn <= (UINT64_MAX / PAGE_SIZE));
 488
 489         lockdep_assert_held(&kctx->reg_lock);
 490
 491         while (nr) {
 492                 unsigned int i;
 493                 unsigned int index = vpfn & 0x1FF;
 494                 unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
 495                 struct page *p;
 496
 497                 if (count > nr)
 498                         count = nr;
 499
 500                 pgd = mmu_insert_pages_recover_get_bottom_pgd(kctx, vpfn);
 501                 KBASE_DEBUG_ASSERT(0 != pgd);
 502
 503                 p = pfn_to_page(PFN_DOWN(pgd));
 504
 505                 pgd_page = kmap_atomic(p);
 506                 KBASE_DEBUG_ASSERT(NULL != pgd_page);
 507
 508                 /* Invalidate the entries we added */
 509                 for (i = 0; i < count; i++)
 510                         page_table_entry_set(kctx->kbdev, &pgd_page[index + i],
 511                                              ENTRY_IS_INVAL);
 512
 513                 vpfn += count;
 514                 nr -= count;
 515
 516                 dma_sync_single_for_device(kctx->kbdev->dev,
 517                                            kbase_dma_addr(p),
 518                                            PAGE_SIZE, DMA_TO_DEVICE);
 519                 kunmap_atomic(pgd_page);
 520         }
 521 }
 522
 523 /**
 524  * Map KBASE_REG flags to MMU flags
 525  */
 526 static u64 kbase_mmu_get_mmu_flags(unsigned long flags)
 527 {
 528         u64 mmu_flags;
 529
 530         /* store mem_attr index as 4:2 (macro called ensures 3 bits already) */
 531         mmu_flags = KBASE_REG_MEMATTR_VALUE(flags) << 2;
 532
 533         /* write perm if requested */
 534         mmu_flags |= (flags & KBASE_REG_GPU_WR) ? ENTRY_WR_BIT : 0;
 535         /* read perm if requested */
 536         mmu_flags |= (flags & KBASE_REG_GPU_RD) ? ENTRY_RD_BIT : 0;
 537         /* nx if requested */
 538         mmu_flags |= (flags & KBASE_REG_GPU_NX) ? ENTRY_NX_BIT : 0;
 539
 540         if (flags & KBASE_REG_SHARE_BOTH) {
 541                 /* inner and outer shareable */
 542                 mmu_flags |= SHARE_BOTH_BITS;
 543         } else if (flags & KBASE_REG_SHARE_IN) {
 544                 /* inner shareable coherency */
 545                 mmu_flags |= SHARE_INNER_BITS;
 546         }
 547
 548         return mmu_flags;
 549 }
 550
 551 /*
 552  * Map the single page 'phys' 'nr' of times, starting at GPU PFN 'vpfn'
 553  */
 554 mali_error kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
 555                                         phys_addr_t phys, size_t nr,
 556                                         unsigned long flags)
 557 {
 558         phys_addr_t pgd;
 559         u64 *pgd_page;
 560         u64 pte_entry;
 561         /* In case the insert_single_page only partially completes we need to be
 562          * able to recover */
 563         mali_bool recover_required = MALI_FALSE;
 564         u64 recover_vpfn = vpfn;
 565         size_t recover_count = 0;
 566
 567         KBASE_DEBUG_ASSERT(NULL != kctx);
 568         KBASE_DEBUG_ASSERT(0 != vpfn);
 569         /* 64-bit address range is the max */
 570         KBASE_DEBUG_ASSERT(vpfn <= (UINT64_MAX / PAGE_SIZE));
 571
 572         lockdep_assert_held(&kctx->reg_lock);
 573
 574         /* the one entry we'll populate everywhere */
 575         pte_entry = mmu_phyaddr_to_ate(phys, kbase_mmu_get_mmu_flags(flags));
 576
 577         while (nr) {
 578                 unsigned int i;
 579                 unsigned int index = vpfn & 0x1FF;
 580                 unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
 581                 struct page *p;
 582
 583                 if (count > nr)
 584                         count = nr;
 585
 586                 /*
 587                  * Repeatedly calling mmu_get_bottom_pte() is clearly
 588                  * suboptimal. We don't have to re-parse the whole tree
 589                  * each time (just cache the l0-l2 sequence).
 590                  * On the other hand, it's only a gain when we map more than
 591                  * 256 pages at once (on average). Do we really care?
 592                  */
 593                 pgd = mmu_get_bottom_pgd(kctx, vpfn);
 594                 if (!pgd) {
 595                         dev_warn(kctx->kbdev->dev,
 596                                                "kbase_mmu_insert_pages: "
 597                                                "mmu_get_bottom_pgd failure\n");
 598                         if (recover_required) {
 599                                 /* Invalidate the pages we have partially
 600                                  * completed */
 601                                 mmu_insert_pages_failure_recovery(kctx,
 602                                                                   recover_vpfn,
 603                                                                   recover_count);
 604                         }
 605                         return MALI_ERROR_FUNCTION_FAILED;
 606                 }
 607
 608                 p = pfn_to_page(PFN_DOWN(pgd));
 609                 pgd_page = kmap(p);
 610                 if (!pgd_page) {
 611                         dev_warn(kctx->kbdev->dev,
 612                                                "kbase_mmu_insert_pages: "
 613                                                "kmap failure\n");
 614                         if (recover_required) {
 615                                 /* Invalidate the pages we have partially
 616                                  * completed */
 617                                 mmu_insert_pages_failure_recovery(kctx,
 618                                                                   recover_vpfn,
 619                                                                   recover_count);
 620                         }
 621                         return MALI_ERROR_OUT_OF_MEMORY;
 622                 }
 623
 624                 for (i = 0; i < count; i++) {
 625                         unsigned int ofs = index + i;
 626                         KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL));
 627                         page_table_entry_set(kctx->kbdev, &pgd_page[ofs],
 628                                              pte_entry);
 629                 }
 630
 631                 vpfn += count;
 632                 nr -= count;
 633
 634                 dma_sync_single_for_device(kctx->kbdev->dev,
 635                                            kbase_dma_addr(p) +
 636                                            (index * sizeof(u64)),
 637                                            count * sizeof(u64),
 638                                            DMA_TO_DEVICE);
 639
 640
 641                 kunmap(p);
 642                 /* We have started modifying the page table.
 643                  * If further pages need inserting and fail we need to undo what
 644                  * has already taken place */
 645                 recover_required = MALI_TRUE;
 646                 recover_count += count;
 647         }
 648         return MALI_ERROR_NONE;
 649 }
 650
 651 /*
 652  * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn'
 653  */
 654 mali_error kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn,
 655                                   phys_addr_t *phys, size_t nr,
 656                                   unsigned long flags)
 657 {
 658         phys_addr_t pgd;
 659         u64 *pgd_page;
 660         u64 mmu_flags = 0;
 661         /* In case the insert_pages only partially completes we need to be able
 662          * to recover */
 663         mali_bool recover_required = MALI_FALSE;
 664         u64 recover_vpfn = vpfn;
 665         size_t recover_count = 0;
 666
 667         KBASE_DEBUG_ASSERT(NULL != kctx);
 668         KBASE_DEBUG_ASSERT(0 != vpfn);
 669         /* 64-bit address range is the max */
 670         KBASE_DEBUG_ASSERT(vpfn <= (UINT64_MAX / PAGE_SIZE));
 671
 672         lockdep_assert_held(&kctx->reg_lock);
 673
 674         mmu_flags = kbase_mmu_get_mmu_flags(flags);
 675
 676         while (nr) {
 677                 unsigned int i;
 678                 unsigned int index = vpfn & 0x1FF;
 679                 unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
 680                 struct page *p;
 681
 682                 if (count > nr)
 683                         count = nr;
 684
 685                 /*
 686                  * Repeatedly calling mmu_get_bottom_pte() is clearly
 687                  * suboptimal. We don't have to re-parse the whole tree
 688                  * each time (just cache the l0-l2 sequence).
 689                  * On the other hand, it's only a gain when we map more than
 690                  * 256 pages at once (on average). Do we really care?
 691                  */
 692                 pgd = mmu_get_bottom_pgd(kctx, vpfn);
 693                 if (!pgd) {
 694                         dev_warn(kctx->kbdev->dev,
 695                                                "kbase_mmu_insert_pages: "
 696                                                "mmu_get_bottom_pgd failure\n");
 697                         if (recover_required) {
 698                                 /* Invalidate the pages we have partially
 699                                  * completed */
 700                                 mmu_insert_pages_failure_recovery(kctx,
 701                                                                   recover_vpfn,
 702                                                                   recover_count);
 703                         }
 704                         return MALI_ERROR_FUNCTION_FAILED;
 705                 }
 706
 707                 p = pfn_to_page(PFN_DOWN(pgd));
 708                 pgd_page = kmap(p);
 709                 if (!pgd_page) {
 710                         dev_warn(kctx->kbdev->dev,
 711                                                "kbase_mmu_insert_pages: "
 712                                                "kmap failure\n");
 713                         if (recover_required) {
 714                                 /* Invalidate the pages we have partially
 715                                  * completed */
 716                                 mmu_insert_pages_failure_recovery(kctx,
 717                                                                   recover_vpfn,
 718                                                                   recover_count);
 719                         }
 720                         return MALI_ERROR_OUT_OF_MEMORY;
 721                 }
 722
 723                 for (i = 0; i < count; i++) {
 724                         unsigned int ofs = index + i;
 725                         KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL));
 726                         page_table_entry_set(kctx->kbdev, &pgd_page[ofs],
 727                                              mmu_phyaddr_to_ate(phys[i],
 728                                                                 mmu_flags)
 729                                              );
 730                 }
 731
 732                 phys += count;
 733                 vpfn += count;
 734                 nr -= count;
 735
 736                 dma_sync_single_for_device(kctx->kbdev->dev,
 737                                            kbase_dma_addr(p) +
 738                                            (index * sizeof(u64)),
 739                                            count * sizeof(u64),
 740                                            DMA_TO_DEVICE);
 741
 742                 kunmap(p);
 743                 /* We have started modifying the page table. If further pages
 744                  * need inserting and fail we need to undo what has already
 745                  * taken place */
 746                 recover_required = MALI_TRUE;
 747                 recover_count += count;
 748         }
 749         return MALI_ERROR_NONE;
 750 }
 751
 752 KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages)
 753
 754 /**
 755  * This function is responsible for validating the MMU PTs
 756  * triggering reguired flushes.
 757  *
 758  * * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
 759  * currently scheduled into the runpool, and so potentially uses a lot of locks.
 760  * These locks must be taken in the correct order with respect to others
 761  * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
 762  * information.
 763  */
 764 static void kbase_mmu_flush(struct kbase_context *kctx, u64 vpfn, size_t nr)
 765 {
 766         struct kbase_device *kbdev;
 767         mali_bool ctx_is_in_runpool;
 768
 769         KBASE_DEBUG_ASSERT(NULL != kctx);
 770
 771         kbdev = kctx->kbdev;
 772
 773         /* We must flush if we're currently running jobs. At the very least, we need to retain the
 774          * context to ensure it doesn't schedule out whilst we're trying to flush it */
 775         ctx_is_in_runpool = kbasep_js_runpool_retain_ctx(kbdev, kctx);
 776
 777         if (ctx_is_in_runpool) {
 778                 KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
 779
 780                 /* Second level check is to try to only do this when jobs are running. The refcount is
 781                  * a heuristic for this. */
 782                 if (kbdev->js_data.runpool_irq.per_as_data[kctx->as_nr].as_busy_refcount >= 2) {
 783                         int ret;
 784                         u32 op;
 785
 786                         /* AS transaction begin */
 787                         mutex_lock(&kbdev->as[kctx->as_nr].transaction_mutex);
 788
 789                         if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367))
 790                                 op = AS_COMMAND_FLUSH;
 791                         else
 792                                 op = AS_COMMAND_FLUSH_MEM;
 793
 794                         ret = kbase_mmu_hw_do_operation(kbdev,
 795                                                         &kbdev->as[kctx->as_nr],
 796                                                         kctx, vpfn, nr,
 797                                                         op, 0);
 798 #if KBASE_GPU_RESET_EN
 799                         if (ret) {
 800                                 /* Flush failed to complete, assume the GPU has hung and perform a reset to recover */
 801                                 dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issueing GPU soft-reset to recover\n");
 802                                 if (kbase_prepare_to_reset_gpu(kbdev))
 803                                         kbase_reset_gpu(kbdev);
 804                         }
 805 #endif /* KBASE_GPU_RESET_EN */
 806
 807                         mutex_unlock(&kbdev->as[kctx->as_nr].transaction_mutex);
 808                         /* AS transaction end */
 809                 }
 810                 kbasep_js_runpool_release_ctx(kbdev, kctx);
 811         }
 812 }
 813
 814 /*
 815  * We actually only discard the ATE, and not the page table
 816  * pages. There is a potential DoS here, as we'll leak memory by
 817  * having PTEs that are potentially unused.  Will require physical
 818  * page accounting, so MMU pages are part of the process allocation.
 819  *
 820  * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
 821  * currently scheduled into the runpool, and so potentially uses a lot of locks.
 822  * These locks must be taken in the correct order with respect to others
 823  * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
 824  * information.
 825  */
 826 mali_error kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr)
 827 {
 828         phys_addr_t pgd;
 829         u64 *pgd_page;
 830         struct kbase_device *kbdev;
 831         size_t requested_nr = nr;
 832
 833         KBASE_DEBUG_ASSERT(NULL != kctx);
 834         beenthere(kctx, "kctx %p vpfn %lx nr %zd", (void *)kctx, (unsigned long)vpfn, nr);
 835
 836         lockdep_assert_held(&kctx->reg_lock);
 837
 838         if (0 == nr) {
 839                 /* early out if nothing to do */
 840                 return MALI_ERROR_NONE;
 841         }
 842
 843         kbdev = kctx->kbdev;
 844
 845         while (nr) {
 846                 unsigned int i;
 847                 unsigned int index = vpfn & 0x1FF;
 848                 unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
 849                 struct page *p;
 850                 if (count > nr)
 851                         count = nr;
 852
 853                 pgd = mmu_get_bottom_pgd(kctx, vpfn);
 854                 if (!pgd) {
 855                         dev_warn(kbdev->dev, "kbase_mmu_teardown_pages: mmu_get_bottom_pgd failure\n");
 856                         return MALI_ERROR_FUNCTION_FAILED;
 857                 }
 858
 859                 p = pfn_to_page(PFN_DOWN(pgd));
 860                 pgd_page = kmap(p);
 861                 if (!pgd_page) {
 862                         dev_warn(kbdev->dev, "kbase_mmu_teardown_pages: kmap failure\n");
 863                         return MALI_ERROR_OUT_OF_MEMORY;
 864                 }
 865
 866                 for (i = 0; i < count; i++) {
 867                         page_table_entry_set(kctx->kbdev, &pgd_page[index + i], ENTRY_IS_INVAL);
 868                 }
 869
 870                 vpfn += count;
 871                 nr -= count;
 872
 873                 dma_sync_single_for_device(kctx->kbdev->dev,
 874                                            kbase_dma_addr(p) +
 875                                            (index * sizeof(u64)),
 876                                            count * sizeof(u64),
 877                                            DMA_TO_DEVICE);
 878
 879                 kunmap(p);
 880         }
 881
 882         kbase_mmu_flush(kctx, vpfn, requested_nr);
 883         return MALI_ERROR_NONE;
 884 }
 885
 886 KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages)
 887
 888 /**
 889  * Update the entries for specified number of pages pointed to by 'phys' at GPU PFN 'vpfn'.
 890  * This call is being triggered as a response to the changes of the mem attributes
 891  *
 892  * @pre : The caller is responsible for validating the memory attributes
 893  *
 894  * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
 895  * currently scheduled into the runpool, and so potentially uses a lot of locks.
 896  * These locks must be taken in the correct order with respect to others
 897  * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
 898  * information.
 899  */
 900 mali_error kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t* phys, size_t nr, unsigned long flags)
 901 {
 902         phys_addr_t pgd;
 903         u64* pgd_page;
 904         u64 mmu_flags = 0;
 905         size_t requested_nr = nr;
 906
 907         KBASE_DEBUG_ASSERT(NULL != kctx);
 908         KBASE_DEBUG_ASSERT(0 != vpfn);
 909         KBASE_DEBUG_ASSERT(vpfn <= (UINT64_MAX / PAGE_SIZE));
 910
 911         lockdep_assert_held(&kctx->reg_lock);
 912
 913         mmu_flags = kbase_mmu_get_mmu_flags(flags);
 914
 915         dev_warn(kctx->kbdev->dev, "kbase_mmu_update_pages(): updating page share flags "\
 916                         "on GPU PFN 0x%llx from phys %p, %zu pages",
 917                         vpfn, phys, nr);
 918
 919
 920         while(nr) {
 921                 unsigned int i;
 922                 unsigned int index = vpfn & 0x1FF;
 923                 size_t count = KBASE_MMU_PAGE_ENTRIES - index;
 924                 struct page *p;
 925
 926                 if (count > nr)
 927                         count = nr;
 928
 929                 pgd = mmu_get_bottom_pgd(kctx, vpfn);
 930                 if (!pgd) {
 931                         dev_warn(kctx->kbdev->dev, "mmu_get_bottom_pgd failure\n");
 932                         return MALI_ERROR_FUNCTION_FAILED;
 933                 }
 934
 935                 p = pfn_to_page(PFN_DOWN(pgd));
 936                 pgd_page = kmap(p);
 937                 if (!pgd_page) {
 938                         dev_warn(kctx->kbdev->dev, "kmap failure\n");
 939                         return MALI_ERROR_OUT_OF_MEMORY;
 940                 }
 941
 942                 for (i = 0; i < count; i++) {
 943                         page_table_entry_set(kctx->kbdev, &pgd_page[index + i],  mmu_phyaddr_to_ate(phys[i], mmu_flags));
 944                 }
 945
 946                 phys += count;
 947                 vpfn += count;
 948                 nr -= count;
 949
 950                 dma_sync_single_for_device(kctx->kbdev->dev,
 951                                            kbase_dma_addr(p) +
 952                                            (index * sizeof(u64)),
 953                                            count * sizeof(u64),
 954                                            DMA_TO_DEVICE);
 955
 956                 kunmap(pfn_to_page(PFN_DOWN(pgd)));
 957         }
 958
 959         kbase_mmu_flush(kctx, vpfn, requested_nr);
 960
 961         return MALI_ERROR_NONE;
 962 }
 963
 964 static int mmu_pte_is_valid(u64 pte)
 965 {
 966         return ((pte & 3) == ENTRY_IS_ATE);
 967 }
 968
 969 /* This is a debug feature only */
 970 static void mmu_check_unused(struct kbase_context *kctx, phys_addr_t pgd)
 971 {
 972         u64 *page;
 973         int i;
 974
 975         page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
 976         /* kmap_atomic should NEVER fail. */
 977         KBASE_DEBUG_ASSERT(NULL != page);
 978
 979         for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
 980                 if (mmu_pte_is_valid(page[i]))
 981                         beenthere(kctx, "live pte %016lx", (unsigned long)page[i]);
 982         }
 983         kunmap_atomic(page);
 984 }
 985
 986 static void mmu_teardown_level(struct kbase_context *kctx, phys_addr_t pgd, int level, int zap, u64 *pgd_page_buffer)
 987 {
 988         phys_addr_t target_pgd;
 989         u64 *pgd_page;
 990         int i;
 991
 992         KBASE_DEBUG_ASSERT(NULL != kctx);
 993         lockdep_assert_held(&kctx->reg_lock);
 994
 995         pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
 996         /* kmap_atomic should NEVER fail. */
 997         KBASE_DEBUG_ASSERT(NULL != pgd_page);
 998         /* Copy the page to our preallocated buffer so that we can minimize kmap_atomic usage */
 999         memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE);
1000         kunmap_atomic(pgd_page);
1001         pgd_page = pgd_page_buffer;
1002
1003         for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
1004                 target_pgd = mmu_pte_to_phy_addr(pgd_page[i]);
1005
1006                 if (target_pgd) {
1007                         if (level < 2) {
1008                                 mmu_teardown_level(kctx, target_pgd, level + 1, zap, pgd_page_buffer + (PAGE_SIZE / sizeof(u64)));
1009                         } else {
1010                                 /*
1011                                  * So target_pte is a level-3 page.
1012                                  * As a leaf, it is safe to free it.
1013                                  * Unless we have live pages attached to it!
1014                                  */
1015                                 mmu_check_unused(kctx, target_pgd);
1016                         }
1017
1018                         beenthere(kctx, "pte %lx level %d", (unsigned long)target_pgd, level + 1);
1019                         if (zap) {
1020                                 kbase_mem_allocator_free(kctx->pgd_allocator, 1, &target_pgd, MALI_TRUE);
1021                                 kbase_process_page_usage_dec(kctx, 1);
1022                                 kbase_atomic_sub_pages(1, &kctx->used_pages);
1023                                 kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
1024                         }
1025                 }
1026         }
1027 }
1028
1029 mali_error kbase_mmu_init(struct kbase_context *kctx)
1030 {
1031         KBASE_DEBUG_ASSERT(NULL != kctx);
1032         KBASE_DEBUG_ASSERT(NULL == kctx->mmu_teardown_pages);
1033
1034         /* Preallocate MMU depth of four pages for mmu_teardown_level to use */
1035         kctx->mmu_teardown_pages = kmalloc(PAGE_SIZE * 4, GFP_KERNEL);
1036
1037         kctx->mem_attrs = (AS_MEMATTR_IMPL_DEF_CACHE_POLICY <<
1038                            (AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) |
1039                           (AS_MEMATTR_FORCE_TO_CACHE_ALL    <<
1040                            (AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) |
1041                           (AS_MEMATTR_WRITE_ALLOC           <<
1042                            (AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) |
1043                           0; /* The other indices are unused for now */
1044
1045         if (NULL == kctx->mmu_teardown_pages)
1046                 return MALI_ERROR_OUT_OF_MEMORY;
1047
1048         return MALI_ERROR_NONE;
1049 }
1050
1051 void kbase_mmu_term(struct kbase_context *kctx)
1052 {
1053         KBASE_DEBUG_ASSERT(NULL != kctx);
1054         KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages);
1055
1056         kfree(kctx->mmu_teardown_pages);
1057         kctx->mmu_teardown_pages = NULL;
1058 }
1059
1060 void kbase_mmu_free_pgd(struct kbase_context *kctx)
1061 {
1062         KBASE_DEBUG_ASSERT(NULL != kctx);
1063         KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages);
1064
1065         lockdep_assert_held(&kctx->reg_lock);
1066
1067         mmu_teardown_level(kctx, kctx->pgd, MIDGARD_MMU_TOPLEVEL, 1, kctx->mmu_teardown_pages);
1068
1069         beenthere(kctx, "pgd %lx", (unsigned long)kctx->pgd);
1070         kbase_mem_allocator_free(kctx->pgd_allocator, 1, &kctx->pgd, MALI_TRUE);
1071         kbase_process_page_usage_dec(kctx, 1);
1072         kbase_atomic_sub_pages(1, &kctx->used_pages);
1073         kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
1074 }
1075
1076 KBASE_EXPORT_TEST_API(kbase_mmu_free_pgd)
1077
1078 static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, int level, char ** const buffer, size_t *size_left)
1079 {
1080         phys_addr_t target_pgd;
1081         u64 *pgd_page;
1082         int i;
1083         size_t size = KBASE_MMU_PAGE_ENTRIES * sizeof(u64) + sizeof(u64);
1084         size_t dump_size;
1085
1086         KBASE_DEBUG_ASSERT(NULL != kctx);
1087         lockdep_assert_held(&kctx->reg_lock);
1088
1089         pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd)));
1090         if (!pgd_page) {
1091                 dev_warn(kctx->kbdev->dev, "kbasep_mmu_dump_level: kmap failure\n");
1092                 return 0;
1093         }
1094
1095         if (*size_left >= size) {
1096                 /* A modified physical address that contains the page table level */
1097                 u64 m_pgd = pgd | level;
1098
1099                 /* Put the modified physical address in the output buffer */
1100                 memcpy(*buffer, &m_pgd, sizeof(m_pgd));
1101                 *buffer += sizeof(m_pgd);
1102
1103                 /* Followed by the page table itself */
1104                 memcpy(*buffer, pgd_page, sizeof(u64) * KBASE_MMU_PAGE_ENTRIES);
1105                 *buffer += sizeof(u64) * KBASE_MMU_PAGE_ENTRIES;
1106
1107                 *size_left -= size;
1108         }
1109
1110         for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
1111                 if ((pgd_page[i] & ENTRY_IS_PTE) == ENTRY_IS_PTE) {
1112                         target_pgd = mmu_pte_to_phy_addr(pgd_page[i]);
1113
1114                         dump_size = kbasep_mmu_dump_level(kctx, target_pgd, level + 1, buffer, size_left);
1115                         if (!dump_size) {
1116                                 kunmap(pfn_to_page(PFN_DOWN(pgd)));
1117                                 return 0;
1118                         }
1119                         size += dump_size;
1120                 }
1121         }
1122
1123         kunmap(pfn_to_page(PFN_DOWN(pgd)));
1124
1125         return size;
1126 }
1127
1128 void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages)
1129 {
1130         void *kaddr;
1131         size_t size_left;
1132
1133         KBASE_DEBUG_ASSERT(kctx);
1134
1135         lockdep_assert_held(&kctx->reg_lock);
1136
1137         if (0 == nr_pages) {
1138                 /* can't find in a 0 sized buffer, early out */
1139                 return NULL;
1140         }
1141
1142         size_left = nr_pages * PAGE_SIZE;
1143
1144         KBASE_DEBUG_ASSERT(0 != size_left);
1145         kaddr = vmalloc_user(size_left);
1146
1147         if (kaddr) {
1148                 u64 end_marker = 0xFFULL;
1149                 char *buffer = (char *)kaddr;
1150
1151                 size_t size = kbasep_mmu_dump_level(kctx, kctx->pgd, MIDGARD_MMU_TOPLEVEL, &buffer, &size_left);
1152                 if (!size) {
1153                         vfree(kaddr);
1154                         return NULL;
1155                 }
1156
1157                 /* Add on the size for the end marker */
1158                 size += sizeof(u64);
1159
1160                 if (size > nr_pages * PAGE_SIZE || size_left < sizeof(u64)) {
1161                         /* The buffer isn't big enough - free the memory and return failure */
1162                         vfree(kaddr);
1163                         return NULL;
1164                 }
1165
1166                 /* Add the end marker */
1167                 memcpy(buffer, &end_marker, sizeof(u64));
1168         }
1169
1170         return kaddr;
1171 }
1172 KBASE_EXPORT_TEST_API(kbase_mmu_dump)
1173
1174 static void bus_fault_worker(struct work_struct *data)
1175 {
1176         struct kbase_as *faulting_as;
1177         int as_no;
1178         struct kbase_context *kctx;
1179         struct kbase_device *kbdev;
1180 #if KBASE_GPU_RESET_EN
1181         mali_bool reset_status = MALI_FALSE;
1182 #endif /* KBASE_GPU_RESET_EN */
1183
1184         faulting_as = container_of(data, struct kbase_as, work_busfault);
1185
1186         as_no = faulting_as->number;
1187
1188         kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
1189
1190         /* Grab the context that was already refcounted in kbase_mmu_interrupt().
1191          * Therefore, it cannot be scheduled out of this AS until we explicitly release it
1192          *
1193          * NOTE: NULL can be returned here if we're gracefully handling a spurious interrupt */
1194         kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no);
1195 #if KBASE_GPU_RESET_EN
1196         if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
1197                 /* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode.
1198                  * We start the reset before switching to UNMAPPED to ensure that unrelated jobs
1199                  * are evicted from the GPU before the switch.
1200                  */
1201                 dev_err(kbdev->dev, "GPU bus error occurred. For this GPU version we now soft-reset as part of bus error recovery\n");
1202                 reset_status = kbase_prepare_to_reset_gpu(kbdev);
1203         }
1204 #endif /* KBASE_GPU_RESET_EN */
1205         /* NOTE: If GPU already powered off for suspend, we don't need to switch to unmapped */
1206         if (!kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
1207                 struct kbase_mmu_setup *current_setup = &faulting_as->current_setup;
1208
1209                 /* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */
1210                 /* AS transaction begin */
1211                 mutex_lock(&kbdev->as[as_no].transaction_mutex);
1212
1213                 /* Set the MMU into unmapped mode */
1214                 current_setup->transtab &= ~(u64)MMU_TRANSTAB_ADRMODE_MASK;
1215                 current_setup->transtab |= AS_TRANSTAB_ADRMODE_UNMAPPED;
1216
1217                 /* Apply the new settings */
1218                 kbase_mmu_hw_configure(kbdev, faulting_as, kctx);
1219
1220                 mutex_unlock(&kbdev->as[as_no].transaction_mutex);
1221                 /* AS transaction end */
1222
1223                 kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
1224                                          KBASE_MMU_FAULT_TYPE_BUS);
1225                 kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
1226                                          KBASE_MMU_FAULT_TYPE_BUS);
1227
1228                 kbase_pm_context_idle(kbdev);
1229         }
1230 #if KBASE_GPU_RESET_EN
1231         if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status)
1232                 kbase_reset_gpu(kbdev);
1233 #endif /* KBASE_GPU_RESET_EN */
1234         /* By this point, the fault was handled in some way, so release the ctx refcount */
1235         if (kctx != NULL)
1236                 kbasep_js_runpool_release_ctx(kbdev, kctx);
1237 }
1238
1239 const char *kbase_exception_name(u32 exception_code)
1240 {
1241         const char *e;
1242
1243         switch (exception_code) {
1244                 /* Non-Fault Status code */
1245         case 0x00:
1246                 e = "NOT_STARTED/IDLE/OK";
1247                 break;
1248         case 0x01:
1249                 e = "DONE";
1250                 break;
1251         case 0x02:
1252                 e = "INTERRUPTED";
1253                 break;
1254         case 0x03:
1255                 e = "STOPPED";
1256                 break;
1257         case 0x04:
1258                 e = "TERMINATED";
1259                 break;
1260         case 0x08:
1261                 e = "ACTIVE";
1262                 break;
1263                 /* Job exceptions */
1264         case 0x40:
1265                 e = "JOB_CONFIG_FAULT";
1266                 break;
1267         case 0x41:
1268                 e = "JOB_POWER_FAULT";
1269                 break;
1270         case 0x42:
1271                 e = "JOB_READ_FAULT";
1272                 break;
1273         case 0x43:
1274                 e = "JOB_WRITE_FAULT";
1275                 break;
1276         case 0x44:
1277                 e = "JOB_AFFINITY_FAULT";
1278                 break;
1279         case 0x48:
1280                 e = "JOB_BUS_FAULT";
1281                 break;
1282         case 0x50:
1283                 e = "INSTR_INVALID_PC";
1284                 break;
1285         case 0x51:
1286                 e = "INSTR_INVALID_ENC";
1287                 break;
1288         case 0x52:
1289                 e = "INSTR_TYPE_MISMATCH";
1290                 break;
1291         case 0x53:
1292                 e = "INSTR_OPERAND_FAULT";
1293                 break;
1294         case 0x54:
1295                 e = "INSTR_TLS_FAULT";
1296                 break;
1297         case 0x55:
1298                 e = "INSTR_BARRIER_FAULT";
1299                 break;
1300         case 0x56:
1301                 e = "INSTR_ALIGN_FAULT";
1302                 break;
1303         case 0x58:
1304                 e = "DATA_INVALID_FAULT";
1305                 break;
1306         case 0x59:
1307                 e = "TILE_RANGE_FAULT";
1308                 break;
1309         case 0x5A:
1310                 e = "ADDR_RANGE_FAULT";
1311                 break;
1312         case 0x60:
1313                 e = "OUT_OF_MEMORY";
1314                 break;
1315                 /* GPU exceptions */
1316         case 0x80:
1317                 e = "DELAYED_BUS_FAULT";
1318                 break;
1319         case 0x88:
1320                 e = "SHAREABILITY_FAULT";
1321                 break;
1322                 /* MMU exceptions */
1323         case 0xC0:
1324         case 0xC1:
1325         case 0xC2:
1326         case 0xC3:
1327         case 0xC4:
1328         case 0xC5:
1329         case 0xC6:
1330         case 0xC7:
1331                 e = "TRANSLATION_FAULT";
1332                 break;
1333         case 0xC8:
1334                 e = "PERMISSION_FAULT";
1335                 break;
1336         case 0xD0:
1337         case 0xD1:
1338         case 0xD2:
1339         case 0xD3:
1340         case 0xD4:
1341         case 0xD5:
1342         case 0xD6:
1343         case 0xD7:
1344                 e = "TRANSTAB_BUS_FAULT";
1345                 break;
1346         case 0xD8:
1347                 e = "ACCESS_FLAG";
1348                 break;
1349         default:
1350                 e = "UNKNOWN";
1351                 break;
1352         };
1353
1354         return e;
1355 }
1356
1357 /**
1358  * The caller must ensure it's retained the ctx to prevent it from being scheduled out whilst it's being worked on.
1359  */
1360 static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
1361                 struct kbase_as *as, const char *reason_str)
1362 {
1363         unsigned long flags;
1364         int exception_type;
1365         int access_type;
1366         int source_id;
1367         int as_no;
1368         struct kbase_device *kbdev;
1369         struct kbase_mmu_setup *current_setup;
1370         struct kbasep_js_device_data *js_devdata;
1371
1372 #if KBASE_GPU_RESET_EN
1373         mali_bool reset_status = MALI_FALSE;
1374 #endif
1375         static const char * const access_type_names[] = { "RESERVED", "EXECUTE", "READ", "WRITE" };
1376
1377         as_no = as->number;
1378         kbdev = kctx->kbdev;
1379         js_devdata = &kbdev->js_data;
1380
1381         /* ASSERT that the context won't leave the runpool */
1382         KBASE_DEBUG_ASSERT(kbasep_js_debug_check_ctx_refcount(kbdev, kctx) > 0);
1383
1384         /* decode the fault status */
1385         exception_type = as->fault_status & 0xFF;
1386         access_type = (as->fault_status >> 8) & 0x3;
1387         source_id = (as->fault_status >> 16);
1388
1389         /* terminal fault, print info about the fault */
1390         dev_err(kbdev->dev,
1391                 "Unhandled Page fault in AS%d at VA 0x%016llX\n"
1392                 "Reason: %s\n"
1393                 "raw fault status 0x%X\n"
1394                 "decoded fault status: %s\n"
1395                 "exception type 0x%X: %s\n"
1396                 "access type 0x%X: %s\n"
1397                 "source id 0x%X\n",
1398                 as_no, as->fault_addr,
1399                 reason_str,
1400                 as->fault_status,
1401                 (as->fault_status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"),
1402                 exception_type, kbase_exception_name(exception_type),
1403                 access_type, access_type_names[access_type],
1404                 source_id);
1405
1406         /* hardware counters dump fault handling */
1407         if ((kbdev->hwcnt.kctx) && (kbdev->hwcnt.kctx->as_nr == as_no) && (kbdev->hwcnt.state == KBASE_INSTR_STATE_DUMPING)) {
1408                 unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
1409                 if ((as->fault_addr >= kbdev->hwcnt.addr) && (as->fault_addr < (kbdev->hwcnt.addr + (num_core_groups * 2048))))
1410                         kbdev->hwcnt.state = KBASE_INSTR_STATE_FAULT;
1411         }
1412
1413         /* Stop the kctx from submitting more jobs and cause it to be scheduled
1414          * out/rescheduled - this will occur on releasing the context's refcount */
1415         spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
1416         kbasep_js_clear_submit_allowed(js_devdata, kctx);
1417         spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
1418
1419         /* Kill any running jobs from the context. Submit is disallowed, so no more jobs from this
1420          * context can appear in the job slots from this point on */
1421         kbase_job_kill_jobs_from_context(kctx);
1422         /* AS transaction begin */
1423         mutex_lock(&as->transaction_mutex);
1424 #if KBASE_GPU_RESET_EN
1425         if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
1426                 /* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode.
1427                  * We start the reset before switching to UNMAPPED to ensure that unrelated jobs
1428                  * are evicted from the GPU before the switch.
1429                  */
1430                 dev_err(kbdev->dev, "Unhandled page fault. For this GPU version we now soft-reset the GPU as part of page fault recovery.");
1431                 reset_status = kbase_prepare_to_reset_gpu(kbdev);
1432         }
1433 #endif /* KBASE_GPU_RESET_EN */
1434         /* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */
1435         current_setup = &as->current_setup;
1436
1437         current_setup->transtab &= ~(u64)MMU_TRANSTAB_ADRMODE_MASK;
1438         current_setup->transtab |= AS_TRANSTAB_ADRMODE_UNMAPPED;
1439
1440         /* Apply the new address space setting */
1441         kbase_mmu_hw_configure(kbdev, as, kctx);
1442
1443         mutex_unlock(&as->transaction_mutex);
1444         /* AS transaction end */
1445
1446         /* Clear down the fault */
1447         kbase_mmu_hw_clear_fault(kbdev, as, kctx, KBASE_MMU_FAULT_TYPE_PAGE);
1448         kbase_mmu_hw_enable_fault(kbdev, as, kctx, KBASE_MMU_FAULT_TYPE_PAGE);
1449
1450 #if KBASE_GPU_RESET_EN
1451         if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status)
1452                 kbase_reset_gpu(kbdev);
1453 #endif /* KBASE_GPU_RESET_EN */
1454 }
1455
1456 void kbasep_as_do_poke(struct work_struct *work)
1457 {
1458         struct kbase_as *as;
1459         struct kbase_device *kbdev;
1460         struct kbase_context *kctx;
1461         unsigned long flags;
1462
1463         KBASE_DEBUG_ASSERT(work);
1464         as = container_of(work, struct kbase_as, poke_work);
1465         kbdev = container_of(as, struct kbase_device, as[as->number]);
1466         KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
1467
1468         /* GPU power will already be active by virtue of the caller holding a JS
1469          * reference on the address space, and will not release it until this worker
1470          * has finished */
1471
1472         /* Further to the comment above, we know that while this function is running
1473          * the AS will not be released as before the atom is released this workqueue
1474          * is flushed (in kbase_as_poking_timer_release_atom)
1475          */
1476         kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as->number);
1477
1478         /* AS transaction begin */
1479         mutex_lock(&as->transaction_mutex);
1480         /* Force a uTLB invalidate */
1481         kbase_mmu_hw_do_operation(kbdev, as, kctx, 0, 0,
1482                                   AS_COMMAND_UNLOCK, 0);
1483         mutex_unlock(&as->transaction_mutex);
1484         /* AS transaction end */
1485
1486         spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
1487         if (as->poke_refcount &&
1488                 !(as->poke_state & KBASE_AS_POKE_STATE_KILLING_POKE)) {
1489                 /* Only queue up the timer if we need it, and we're not trying to kill it */
1490                 hrtimer_start(&as->poke_timer, HR_TIMER_DELAY_MSEC(5), HRTIMER_MODE_REL);
1491         }
1492         spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags);
1493
1494 }
1495
1496 enum hrtimer_restart kbasep_as_poke_timer_callback(struct hrtimer *timer)
1497 {
1498         struct kbase_as *as;
1499         int queue_work_ret;
1500
1501         KBASE_DEBUG_ASSERT(NULL != timer);
1502         as = container_of(timer, struct kbase_as, poke_timer);
1503         KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
1504
1505         queue_work_ret = queue_work(as->poke_wq, &as->poke_work);
1506         KBASE_DEBUG_ASSERT(queue_work_ret);
1507         return HRTIMER_NORESTART;
1508 }
1509
1510 /**
1511  * Retain the poking timer on an atom's context (if the atom hasn't already
1512  * done so), and start the timer (if it's not already started).
1513  *
1514  * This must only be called on a context that's scheduled in, and an atom
1515  * that's running on the GPU.
1516  *
1517  * The caller must hold kbasep_js_device_data::runpool_irq::lock
1518  *
1519  * This can be called safely from atomic context
1520  */
1521 void kbase_as_poking_timer_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom)
1522 {
1523         struct kbase_as *as;
1524         KBASE_DEBUG_ASSERT(kbdev);
1525         KBASE_DEBUG_ASSERT(kctx);
1526         KBASE_DEBUG_ASSERT(katom);
1527         KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
1528         lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
1529
1530         if (katom->poking)
1531                 return;
1532
1533         katom->poking = 1;
1534
1535         /* It's safe to work on the as/as_nr without an explicit reference,
1536          * because the caller holds the runpool_irq lock, and the atom itself
1537          * was also running and had already taken a reference  */
1538         as = &kbdev->as[kctx->as_nr];
1539
1540         if (++(as->poke_refcount) == 1) {
1541                 /* First refcount for poke needed: check if not already in flight */
1542                 if (!as->poke_state) {
1543                         /* need to start poking */
1544                         as->poke_state |= KBASE_AS_POKE_STATE_IN_FLIGHT;
1545                         queue_work(as->poke_wq, &as->poke_work);
1546                 }
1547         }
1548 }
1549
1550 /**
1551  * If an atom holds a poking timer, release it and wait for it to finish
1552  *
1553  * This must only be called on a context that's scheduled in, and an atom
1554  * that still has a JS reference on the context
1555  *
1556  * This must \b not be called from atomic context, since it can sleep.
1557  */
1558 void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom)
1559 {
1560         struct kbase_as *as;
1561         unsigned long flags;
1562
1563         KBASE_DEBUG_ASSERT(kbdev);
1564         KBASE_DEBUG_ASSERT(kctx);
1565         KBASE_DEBUG_ASSERT(katom);
1566         KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
1567
1568         if (!katom->poking)
1569                 return;
1570
1571         as = &kbdev->as[kctx->as_nr];
1572
1573         spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
1574         KBASE_DEBUG_ASSERT(as->poke_refcount > 0);
1575         KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
1576
1577         if (--(as->poke_refcount) == 0) {
1578                 as->poke_state |= KBASE_AS_POKE_STATE_KILLING_POKE;
1579                 spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags);
1580
1581                 hrtimer_cancel(&as->poke_timer);
1582                 flush_workqueue(as->poke_wq);
1583
1584                 spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
1585
1586                 /* Re-check whether it's still needed */
1587                 if (as->poke_refcount) {
1588                         int queue_work_ret;
1589                         /* Poking still needed:
1590                          * - Another retain will not be starting the timer or queueing work,
1591                          * because it's still marked as in-flight
1592                          * - The hrtimer has finished, and has not started a new timer or
1593                          * queued work because it's been marked as killing
1594                          *
1595                          * So whatever happens now, just queue the work again */
1596                         as->poke_state &= ~((kbase_as_poke_state)KBASE_AS_POKE_STATE_KILLING_POKE);
1597                         queue_work_ret = queue_work(as->poke_wq, &as->poke_work);
1598                         KBASE_DEBUG_ASSERT(queue_work_ret);
1599                 } else {
1600                         /* It isn't - so mark it as not in flight, and not killing */
1601                         as->poke_state = 0u;
1602
1603                         /* The poke associated with the atom has now finished. If this is
1604                          * also the last atom on the context, then we can guarentee no more
1605                          * pokes (and thus no more poking register accesses) will occur on
1606                          * the context until new atoms are run */
1607                 }
1608         }
1609         spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags);
1610
1611         katom->poking = 0;
1612 }
1613
1614 void kbase_mmu_interrupt_process(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_as *as)
1615 {
1616         struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
1617         unsigned long flags;
1618
1619         if (kctx == NULL) {
1620                 dev_warn(kbdev->dev, "%s in AS%d at 0x%016llx with no context present! Suprious IRQ or SW Design Error?\n",
1621                                  kbase_as_has_bus_fault(as) ? "Bus error" : "Page fault",
1622                                  as->number, as->fault_addr);
1623         }
1624
1625         if (kbase_as_has_bus_fault(as)) {
1626                 if (kctx) {
1627                         /*
1628                          * hw counters dumping in progress, signal the
1629                          * other thread that it failed
1630                          */
1631                         if ((kbdev->hwcnt.kctx == kctx) &&
1632                             (kbdev->hwcnt.state == KBASE_INSTR_STATE_DUMPING))
1633                                 kbdev->hwcnt.state = KBASE_INSTR_STATE_FAULT;
1634
1635                         /*
1636                          * Stop the kctx from submitting more jobs and cause it
1637                          * to be scheduled out/rescheduled when all references
1638                          * to it are released
1639                          */
1640                         spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
1641                         kbasep_js_clear_submit_allowed(js_devdata, kctx);
1642                         spin_unlock_irqrestore(&js_devdata->runpool_irq.lock,
1643                                                flags);
1644
1645                         dev_warn(kbdev->dev, "Bus error in AS%d at 0x%016llx\n",
1646                                          as->number, as->fault_addr);
1647                 }
1648
1649                 /*
1650                  * We need to switch to UNMAPPED mode - but we do this in a
1651                  * worker so that we can sleep
1652                  */
1653                 kbdev->kbase_group_error++;
1654                 KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&as->work_busfault));
1655                 INIT_WORK(&as->work_busfault, bus_fault_worker);
1656                 queue_work(as->pf_wq, &as->work_busfault);
1657         } else {
1658                 kbdev->kbase_group_error++;
1659                 KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&as->work_pagefault));
1660                 INIT_WORK(&as->work_pagefault, page_fault_worker);
1661                 queue_work(as->pf_wq, &as->work_pagefault);
1662         }
1663 }