ARM: add coherent dma ops

[firefly-linux-kernel-4.4.55.git] / arch / arm / mm / dma-mapping.c
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c

index c2cdf6500f75dc5a1ab5a688ea305099e8d30057..7d772c0a93f231ca6de67d6a0467b4f4bac0a917 100644 (file)
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -73,11 +73,18 @@ static dma_addr_t arm_dma_map_page(struct device *dev, struct page *page,
              unsigned long offset, size_t size, enum dma_data_direction dir,
              struct dma_attrs *attrs)
  {
-       if (!arch_is_coherent() && !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
+       if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
                 __dma_page_cpu_to_dev(page, offset, size, dir);
         return pfn_to_dma(dev, page_to_pfn(page)) + offset;
  }
  
+static dma_addr_t arm_coherent_dma_map_page(struct device *dev, struct page *page,
+            unsigned long offset, size_t size, enum dma_data_direction dir,
+            struct dma_attrs *attrs)
+{
+       return pfn_to_dma(dev, page_to_pfn(page)) + offset;
+}
+
  /**
   * arm_dma_unmap_page - unmap a buffer previously mapped through dma_map_page()
   * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
@@ -96,7 +103,7 @@ static void arm_dma_unmap_page(struct device *dev, dma_addr_t handle,
                 size_t size, enum dma_data_direction dir,
                 struct dma_attrs *attrs)
  {
-       if (!arch_is_coherent() && !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
+       if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
                 __dma_page_dev_to_cpu(pfn_to_page(dma_to_pfn(dev, handle)),
                                       handle & ~PAGE_MASK, size, dir);
  }
@@ -106,8 +113,7 @@ static void arm_dma_sync_single_for_cpu(struct device *dev,
  {
         unsigned int offset = handle & (PAGE_SIZE - 1);
         struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset));
-       if (!arch_is_coherent())
-               __dma_page_dev_to_cpu(page, offset, size, dir);
+       __dma_page_dev_to_cpu(page, offset, size, dir);
  }
  
  static void arm_dma_sync_single_for_device(struct device *dev,
@@ -115,8 +121,7 @@ static void arm_dma_sync_single_for_device(struct device *dev,
  {
         unsigned int offset = handle & (PAGE_SIZE - 1);
         struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset));
-       if (!arch_is_coherent())
-               __dma_page_cpu_to_dev(page, offset, size, dir);
+       __dma_page_cpu_to_dev(page, offset, size, dir);
  }
  
  static int arm_dma_set_mask(struct device *dev, u64 dma_mask);
@@ -138,6 +143,22 @@ struct dma_map_ops arm_dma_ops = {
  };
  EXPORT_SYMBOL(arm_dma_ops);
  
+static void *arm_coherent_dma_alloc(struct device *dev, size_t size,
+       dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs);
+static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_addr,
+                                 dma_addr_t handle, struct dma_attrs *attrs);
+
+struct dma_map_ops arm_coherent_dma_ops = {
+       .alloc                  = arm_coherent_dma_alloc,
+       .free                   = arm_coherent_dma_free,
+       .mmap                   = arm_dma_mmap,
+       .get_sgtable            = arm_dma_get_sgtable,
+       .map_page               = arm_coherent_dma_map_page,
+       .map_sg                 = arm_dma_map_sg,
+       .set_dma_mask           = arm_dma_set_mask,
+};
+EXPORT_SYMBOL(arm_coherent_dma_ops);
+
  static u64 get_coherent_dma_mask(struct device *dev)
  {
         u64 mask = (u64)arm_dma_limit;
@@ -267,17 +288,19 @@ static void __dma_free_remap(void *cpu_addr, size_t size)
         vunmap(cpu_addr);
  }
  
+#define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K
+
  struct dma_pool {
         size_t size;
         spinlock_t lock;
         unsigned long *bitmap;
         unsigned long nr_pages;
         void *vaddr;
-       struct page *page;
+       struct page **pages;
  };
  
  static struct dma_pool atomic_pool = {
-       .size = SZ_256K,
+       .size = DEFAULT_DMA_COHERENT_POOL_SIZE,
  };
  
  static int __init early_coherent_pool(char *p)
@@ -287,6 +310,21 @@ static int __init early_coherent_pool(char *p)
  }
  early_param("coherent_pool", early_coherent_pool);
  
+void __init init_dma_coherent_pool_size(unsigned long size)
+{
+       /*
+        * Catch any attempt to set the pool size too late.
+        */
+       BUG_ON(atomic_pool.vaddr);
+
+       /*
+        * Set architecture specific coherent pool size only if
+        * it has not been changed by kernel command line parameter.
+        */
+       if (atomic_pool.size == DEFAULT_DMA_COHERENT_POOL_SIZE)
+               atomic_pool.size = size;
+}
+
  /*
   * Initialise the coherent pool for atomic allocations.
   */
@@ -297,6 +335,7 @@ static int __init atomic_pool_init(void)
         unsigned long nr_pages = pool->size >> PAGE_SHIFT;
         unsigned long *bitmap;
         struct page *page;
+       struct page **pages;
         void *ptr;
         int bitmap_size = BITS_TO_LONGS(nr_pages) * sizeof(long);
  
@@ -304,21 +343,33 @@ static int __init atomic_pool_init(void)
         if (!bitmap)
                 goto no_bitmap;
  
+       pages = kzalloc(nr_pages * sizeof(struct page *), GFP_KERNEL);
+       if (!pages)
+               goto no_pages;
+
         if (IS_ENABLED(CONFIG_CMA))
                 ptr = __alloc_from_contiguous(NULL, pool->size, prot, &page);
         else
                 ptr = __alloc_remap_buffer(NULL, pool->size, GFP_KERNEL, prot,
                                            &page, NULL);
         if (ptr) {
+               int i;
+
+               for (i = 0; i < nr_pages; i++)
+                       pages[i] = page + i;
+
                 spin_lock_init(&pool->lock);
                 pool->vaddr = ptr;
-               pool->page = page;
+               pool->pages = pages;
                 pool->bitmap = bitmap;
                 pool->nr_pages = nr_pages;
                 pr_info("DMA: preallocated %u KiB pool for atomic coherent allocations\n",
                        (unsigned)pool->size / 1024);
                 return 0;
         }
+
+       kfree(pages);
+no_pages:
         kfree(bitmap);
  no_bitmap:
         pr_err("DMA: failed to allocate %u KiB pool for atomic coherent allocation\n",
@@ -358,7 +409,7 @@ void __init dma_contiguous_remap(void)
                 if (end > arm_lowmem_limit)
                         end = arm_lowmem_limit;
                 if (start >= end)
-                       return;
+                       continue;
  
                 map.pfn = __phys_to_pfn(start);
                 map.virtual = __phys_to_virt(start);
@@ -423,7 +474,7 @@ static void *__alloc_from_pool(size_t size, struct page **ret_page)
         unsigned int pageno;
         unsigned long flags;
         void *ptr = NULL;
-       size_t align;
+       unsigned long align_mask;
  
         if (!pool->vaddr) {
                 WARN(1, "coherent pool not initialised!\n");
@@ -435,34 +486,52 @@ static void *__alloc_from_pool(size_t size, struct page **ret_page)
          * small, so align them to their order in pages, minimum is a page
          * size. This helps reduce fragmentation of the DMA space.
          */
-       align = PAGE_SIZE << get_order(size);
+       align_mask = (1 << get_order(size)) - 1;
  
         spin_lock_irqsave(&pool->lock, flags);
         pageno = bitmap_find_next_zero_area(pool->bitmap, pool->nr_pages,
-                                           0, count, (1 << align) - 1);
+                                           0, count, align_mask);
         if (pageno < pool->nr_pages) {
                 bitmap_set(pool->bitmap, pageno, count);
                 ptr = pool->vaddr + PAGE_SIZE * pageno;
-               *ret_page = pool->page + pageno;
+               *ret_page = pool->pages[pageno];
+       } else {
+               pr_err_once("ERROR: %u KiB atomic DMA coherent pool is too small!\n"
+                           "Please increase it with coherent_pool= kernel parameter!\n",
+                           (unsigned)pool->size / 1024);
         }
         spin_unlock_irqrestore(&pool->lock, flags);
  
         return ptr;
  }
  
+static bool __in_atomic_pool(void *start, size_t size)
+{
+       struct dma_pool *pool = &atomic_pool;
+       void *end = start + size;
+       void *pool_start = pool->vaddr;
+       void *pool_end = pool->vaddr + pool->size;
+
+       if (start < pool_start || start >= pool_end)
+               return false;
+
+       if (end <= pool_end)
+               return true;
+
+       WARN(1, "Wrong coherent size(%p-%p) from atomic pool(%p-%p)\n",
+            start, end - 1, pool_start, pool_end - 1);
+
+       return false;
+}
+
  static int __free_from_pool(void *start, size_t size)
  {
         struct dma_pool *pool = &atomic_pool;
         unsigned long pageno, count;
         unsigned long flags;
  
-       if (start < pool->vaddr || start > pool->vaddr + pool->size)
-               return 0;
-
-       if (start + size > pool->vaddr + pool->size) {
-               WARN(1, "freeing wrong coherent size from pool\n");
+       if (!__in_atomic_pool(start, size))
                 return 0;
-       }
  
         pageno = (start - pool->vaddr) >> PAGE_SHIFT;
         count = size >> PAGE_SHIFT;
@@ -538,7 +607,7 @@ static void *__alloc_simple_buffer(struct device *dev, size_t size, gfp_t gfp,
  
  
  static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
-                        gfp_t gfp, pgprot_t prot, const void *caller)
+                        gfp_t gfp, pgprot_t prot, bool is_coherent, const void *caller)
  {
         u64 mask = get_coherent_dma_mask(dev);
         struct page *page;
@@ -571,7 +640,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
         *handle = DMA_ERROR_CODE;
         size = PAGE_ALIGN(size);
  
-       if (arch_is_coherent() || nommu())
+       if (is_coherent || nommu())
                 addr = __alloc_simple_buffer(dev, size, gfp, &page);
         else if (gfp & GFP_ATOMIC)
                 addr = __alloc_from_pool(size, &page);
@@ -599,7 +668,20 @@ void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
         if (dma_alloc_from_coherent(dev, size, handle, &memory))
                 return memory;
  
-       return __dma_alloc(dev, size, handle, gfp, prot,
+       return __dma_alloc(dev, size, handle, gfp, prot, false,
+                          __builtin_return_address(0));
+}
+
+static void *arm_coherent_dma_alloc(struct device *dev, size_t size,
+       dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs)
+{
+       pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel);
+       void *memory;
+
+       if (dma_alloc_from_coherent(dev, size, handle, &memory))
+               return memory;
+
+       return __dma_alloc(dev, size, handle, gfp, prot, true,
                            __builtin_return_address(0));
  }
  
@@ -636,8 +718,9 @@ int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
  /*
   * Free a buffer as defined by the above mapping.
   */
-void arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
-                 dma_addr_t handle, struct dma_attrs *attrs)
+static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
+                          dma_addr_t handle, struct dma_attrs *attrs,
+                          bool is_coherent)
  {
         struct page *page = pfn_to_page(dma_to_pfn(dev, handle));
  
@@ -646,14 +729,14 @@ void arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
  
         size = PAGE_ALIGN(size);
  
-       if (arch_is_coherent() || nommu()) {
+       if (is_coherent || nommu()) {
                 __dma_free_buffer(page, size);
+       } else if (__free_from_pool(cpu_addr, size)) {
+               return;
         } else if (!IS_ENABLED(CONFIG_CMA)) {
                 __dma_free_remap(cpu_addr, size);
                 __dma_free_buffer(page, size);
         } else {
-               if (__free_from_pool(cpu_addr, size))
-                       return;
                 /*
                  * Non-atomic allocations cannot be freed with IRQs disabled
                  */
@@ -662,6 +745,18 @@ void arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
         }
  }
  
+void arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
+                 dma_addr_t handle, struct dma_attrs *attrs)
+{
+       __arm_dma_free(dev, size, cpu_addr, handle, attrs, false);
+}
+
+static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_addr,
+                                 dma_addr_t handle, struct dma_attrs *attrs)
+{
+       __arm_dma_free(dev, size, cpu_addr, handle, attrs, true);
+}
+
  int arm_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
                  void *cpu_addr, dma_addr_t handle, size_t size,
                  struct dma_attrs *attrs)
@@ -1090,10 +1185,22 @@ static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, size_t si
         return 0;
  }
  
+static struct page **__atomic_get_pages(void *addr)
+{
+       struct dma_pool *pool = &atomic_pool;
+       struct page **pages = pool->pages;
+       int offs = (addr - pool->vaddr) >> PAGE_SHIFT;
+
+       return pages + offs;
+}
+
  static struct page **__iommu_get_pages(void *cpu_addr, struct dma_attrs *attrs)
  {
         struct vm_struct *area;
  
+       if (__in_atomic_pool(cpu_addr, PAGE_SIZE))
+               return __atomic_get_pages(cpu_addr);
+
         if (dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs))
                 return cpu_addr;
  
@@ -1103,6 +1210,34 @@ static struct page **__iommu_get_pages(void *cpu_addr, struct dma_attrs *attrs)
         return NULL;
  }
  
+static void *__iommu_alloc_atomic(struct device *dev, size_t size,
+                                 dma_addr_t *handle)
+{
+       struct page *page;
+       void *addr;
+
+       addr = __alloc_from_pool(size, &page);
+       if (!addr)
+               return NULL;
+
+       *handle = __iommu_create_mapping(dev, &page, size);
+       if (*handle == DMA_ERROR_CODE)
+               goto err_mapping;
+
+       return addr;
+
+err_mapping:
+       __free_from_pool(addr, size);
+       return NULL;
+}
+
+static void __iommu_free_atomic(struct device *dev, struct page **pages,
+                               dma_addr_t handle, size_t size)
+{
+       __iommu_remove_mapping(dev, handle, size);
+       __free_from_pool(page_address(pages[0]), size);
+}
+
  static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
             dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs)
  {
@@ -1113,6 +1248,9 @@ static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
         *handle = DMA_ERROR_CODE;
         size = PAGE_ALIGN(size);
  
+       if (gfp & GFP_ATOMIC)
+               return __iommu_alloc_atomic(dev, size, handle);
+
         pages = __iommu_alloc_buffer(dev, size, gfp);
         if (!pages)
                 return NULL;
@@ -1179,6 +1317,11 @@ void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
                 return;
         }
  
+       if (__in_atomic_pool(cpu_addr, size)) {
+               __iommu_free_atomic(dev, pages, handle, size);
+               return;
+       }
+
         if (!dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs)) {
                 unmap_kernel_range((unsigned long)cpu_addr, size);
                 vunmap(cpu_addr);