X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=drivers%2Fiommu%2Fdma-iommu.c;h=ab5680b6cb976a4c3f09adce7d81f21d7c6e7e4f;hb=c71a0bf1fdd48a97be1524f2d450575a590c07db;hp=3a20db4f8604f77d7bd374c9b9278f65a8c2cb5c;hpb=787b121db159641de9d0bfd11dc8c3f0ff8697d3;p=firefly-linux-kernel-4.4.55.git diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 3a20db4f8604..ab5680b6cb97 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -21,10 +21,13 @@ #include #include +#include #include #include #include #include +#include +#include int iommu_dma_init(void) { @@ -65,7 +68,8 @@ void iommu_put_dma_cookie(struct iommu_domain *domain) if (!iovad) return; - put_iova_domain(iovad); + if (iovad->granule) + put_iova_domain(iovad); kfree(iovad); domain->iova_cookie = NULL; } @@ -191,6 +195,7 @@ static struct page **__iommu_dma_alloc_pages(unsigned int count, gfp_t gfp) { struct page **pages; unsigned int i = 0, array_size = count * sizeof(*pages); + unsigned int order = MAX_ORDER; if (array_size <= PAGE_SIZE) pages = kzalloc(array_size, GFP_KERNEL); @@ -204,14 +209,15 @@ static struct page **__iommu_dma_alloc_pages(unsigned int count, gfp_t gfp) while (count) { struct page *page = NULL; - int j, order = __fls(count); + int j; /* * Higher-order allocations are a convenience rather * than a necessity, hence using __GFP_NORETRY until * falling back to single-page allocations. */ - for (order = min(order, MAX_ORDER); order > 0; order--) { + for (order = min_t(unsigned int, order, __fls(count)); + order > 0; order--) { page = alloc_pages(gfp | __GFP_NORETRY, order); if (!page) continue; @@ -384,26 +390,58 @@ void iommu_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size, /* * Prepare a successfully-mapped scatterlist to give back to the caller. - * Handling IOVA concatenation can come later, if needed + * + * At this point the segments are already laid out by iommu_dma_map_sg() to + * avoid individually crossing any boundaries, so we merely need to check a + * segment's start address to avoid concatenating across one. */ static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents, dma_addr_t dma_addr) { - struct scatterlist *s; - int i; + struct scatterlist *s, *cur = sg; + unsigned long seg_mask = dma_get_seg_boundary(dev); + unsigned int cur_len = 0, max_len = dma_get_max_seg_size(dev); + int i, count = 0; for_each_sg(sg, s, nents, i) { - /* Un-swizzling the fields here, hence the naming mismatch */ - unsigned int s_offset = sg_dma_address(s); + /* Restore this segment's original unaligned fields first */ + unsigned int s_iova_off = sg_dma_address(s); unsigned int s_length = sg_dma_len(s); - unsigned int s_dma_len = s->length; + unsigned int s_iova_len = s->length; - s->offset = s_offset; + s->offset += s_iova_off; s->length = s_length; - sg_dma_address(s) = dma_addr + s_offset; - dma_addr += s_dma_len; + sg_dma_address(s) = DMA_ERROR_CODE; + sg_dma_len(s) = 0; + + /* + * Now fill in the real DMA data. If... + * - there is a valid output segment to append to + * - and this segment starts on an IOVA page boundary + * - but doesn't fall at a segment boundary + * - and wouldn't make the resulting output segment too long + */ + if (cur_len && !s_iova_off && (dma_addr & seg_mask) && + (cur_len + s_length <= max_len)) { + /* ...then concatenate it with the previous one */ + cur_len += s_length; + } else { + /* Otherwise start the next output segment */ + if (i > 0) + cur = sg_next(cur); + cur_len = s_length; + count++; + + sg_dma_address(cur) = dma_addr + s_iova_off; + } + + sg_dma_len(cur) = cur_len; + dma_addr += s_iova_len; + + if (s_length + s_iova_off < s_iova_len) + cur_len = 0; } - return i; + return count; } /* @@ -417,7 +455,7 @@ static void __invalidate_sg(struct scatterlist *sg, int nents) for_each_sg(sg, s, nents, i) { if (sg_dma_address(s) != DMA_ERROR_CODE) - s->offset = sg_dma_address(s); + s->offset += sg_dma_address(s); if (sg_dma_len(s)) s->length = sg_dma_len(s); sg_dma_address(s) = DMA_ERROR_CODE; @@ -441,34 +479,40 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, struct scatterlist *s, *prev = NULL; dma_addr_t dma_addr; size_t iova_len = 0; + unsigned long mask = dma_get_seg_boundary(dev); int i; /* * Work out how much IOVA space we need, and align the segments to * IOVA granules for the IOMMU driver to handle. With some clever * trickery we can modify the list in-place, but reversibly, by - * hiding the original data in the as-yet-unused DMA fields. + * stashing the unaligned parts in the as-yet-unused DMA fields. */ for_each_sg(sg, s, nents, i) { - size_t s_offset = iova_offset(iovad, s->offset); + size_t s_iova_off = iova_offset(iovad, s->offset); size_t s_length = s->length; + size_t pad_len = (mask - iova_len + 1) & mask; - sg_dma_address(s) = s->offset; + sg_dma_address(s) = s_iova_off; sg_dma_len(s) = s_length; - s->offset -= s_offset; - s_length = iova_align(iovad, s_length + s_offset); + s->offset -= s_iova_off; + s_length = iova_align(iovad, s_length + s_iova_off); s->length = s_length; /* - * The simple way to avoid the rare case of a segment - * crossing the boundary mask is to pad the previous one - * to end at a naturally-aligned IOVA for this one's size, - * at the cost of potentially over-allocating a little. + * Due to the alignment of our single IOVA allocation, we can + * depend on these assumptions about the segment boundary mask: + * - If mask size >= IOVA size, then the IOVA range cannot + * possibly fall across a boundary, so we don't care. + * - If mask size < IOVA size, then the IOVA range must start + * exactly on a boundary, therefore we can lay things out + * based purely on segment lengths without needing to know + * the actual addresses beforehand. + * - The mask must be a power of 2, so pad_len == 0 if + * iova_len == 0, thus we cannot dereference prev the first + * time through here (i.e. before it has a meaningful value). */ - if (prev) { - size_t pad_len = roundup_pow_of_two(s_length); - - pad_len = (pad_len - iova_len) & (pad_len - 1); + if (pad_len && pad_len < s_length - 1) { prev->length += pad_len; iova_len += pad_len; }