Merge "iommu/io-pgtable-arm: Support SMMU coherent page tables"

This commit is contained in:
Linux Build Service Account 2016-10-06 12:25:40 -07:00 committed by Gerrit - the friendly Code Review server
commit ec3cc3d97c
2 changed files with 63 additions and 213 deletions

View file

@ -49,6 +49,17 @@ static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot,
return prot;
}
static int __get_iommu_pgprot(struct dma_attrs *attrs, int prot,
bool coherent)
{
if (!dma_get_attr(DMA_ATTR_EXEC_MAPPING, attrs))
prot |= IOMMU_NOEXEC;
if (coherent)
prot |= IOMMU_CACHE;
return prot;
}
static struct gen_pool *atomic_pool;
#define NO_KERNEL_MAPPING_DUMMY 0x2222
#define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K
@ -1153,7 +1164,7 @@ static int arm_dma_set_mask(struct device *dev, u64 dma_mask)
/* IOMMU */
static void __dma_clear_buffer(struct page *page, size_t size,
struct dma_attrs *attrs)
struct dma_attrs *attrs, bool is_coherent)
{
/*
* Ensure that the allocated pages are zeroed, and that any data
@ -1162,7 +1173,8 @@ static void __dma_clear_buffer(struct page *page, size_t size,
void *ptr = page_address(page);
if (!dma_get_attr(DMA_ATTR_SKIP_ZEROING, attrs))
memset(ptr, 0, size);
dmac_flush_range(ptr, ptr + size);
if (!is_coherent)
dmac_flush_range(ptr, ptr + size);
}
static inline dma_addr_t __alloc_iova(struct dma_iommu_mapping *mapping,
@ -1212,6 +1224,7 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size,
size_t count = size >> PAGE_SHIFT;
size_t array_size = count * sizeof(struct page *);
int i = 0;
bool is_coherent = is_device_dma_coherent(dev);
if (array_size <= PAGE_SIZE)
pages = kzalloc(array_size, gfp);
@ -1228,7 +1241,7 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size,
if (!page)
goto error;
__dma_clear_buffer(page, size, attrs);
__dma_clear_buffer(page, size, attrs, is_coherent);
for (i = 0; i < count; i++)
pages[i] = page + i;
@ -1257,7 +1270,8 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size,
pages[i + j] = pages[i] + j;
}
__dma_clear_buffer(pages[i], PAGE_SIZE << order, attrs);
__dma_clear_buffer(pages[i], PAGE_SIZE << order, attrs,
is_coherent);
i += 1 << order;
count -= 1 << order;
}
@ -1322,9 +1336,8 @@ static dma_addr_t __iommu_create_mapping(struct device *dev,
dma_addr = __alloc_iova(mapping, size);
if (dma_addr == DMA_ERROR_CODE)
return dma_addr;
if (!dma_get_attr(DMA_ATTR_EXEC_MAPPING, attrs))
prot |= IOMMU_NOEXEC;
prot = __get_iommu_pgprot(attrs, prot,
is_device_dma_coherent(dev));
iova = dma_addr;
for (i = 0; i < count; ) {
@ -1404,6 +1417,7 @@ static void *__iommu_alloc_atomic(struct device *dev, size_t size,
size_t array_size = count * sizeof(struct page *);
int i;
void *addr;
bool coherent = is_device_dma_coherent(dev);
if (array_size <= PAGE_SIZE)
pages = kzalloc(array_size, gfp);
@ -1413,7 +1427,13 @@ static void *__iommu_alloc_atomic(struct device *dev, size_t size,
if (!pages)
return NULL;
addr = __alloc_from_pool(size, &page, gfp);
if (coherent) {
page = alloc_pages(gfp, get_order(size));
addr = page ? page_address(page) : NULL;
} else {
addr = __alloc_from_pool(size, &page, gfp);
}
if (!addr)
goto err_free;
@ -1428,7 +1448,10 @@ static void *__iommu_alloc_atomic(struct device *dev, size_t size,
return addr;
err_mapping:
__free_from_pool(addr, size);
if (coherent)
__free_pages(page, get_order(size));
else
__free_from_pool(addr, size);
err_free:
kvfree(pages);
return NULL;
@ -1444,7 +1467,8 @@ static void __iommu_free_atomic(struct device *dev, void *cpu_addr,
static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs)
{
pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, false);
bool coherent = is_device_dma_coherent(dev);
pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent);
struct page **pages;
void *addr = NULL;
@ -1495,8 +1519,10 @@ static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
unsigned long uaddr = vma->vm_start;
unsigned long usize = vma->vm_end - vma->vm_start;
struct page **pages = __iommu_get_pages(cpu_addr, attrs);
bool coherent = is_device_dma_coherent(dev);
vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, false);
vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot,
coherent);
if (!pages)
return -ENXIO;
@ -1577,121 +1603,6 @@ static int __dma_direction_to_prot(enum dma_data_direction dir)
return prot;
}
/*
* Map a part of the scatter-gather list into contiguous io address space
*/
static int __map_sg_chunk(struct device *dev, struct scatterlist *sg,
size_t size, dma_addr_t *handle,
enum dma_data_direction dir, struct dma_attrs *attrs,
bool is_coherent)
{
struct dma_iommu_mapping *mapping = dev->archdata.mapping;
dma_addr_t iova, iova_base;
int ret = 0;
unsigned int count;
struct scatterlist *s;
int prot;
size = PAGE_ALIGN(size);
*handle = DMA_ERROR_CODE;
iova_base = iova = __alloc_iova(mapping, size);
if (iova == DMA_ERROR_CODE)
return -ENOMEM;
for (count = 0, s = sg; count < (size >> PAGE_SHIFT); s = sg_next(s)) {
phys_addr_t phys = page_to_phys(sg_page(s));
unsigned int len = PAGE_ALIGN(s->offset + s->length);
if (!is_coherent &&
!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
__dma_page_cpu_to_dev(sg_page(s), s->offset, s->length,
dir);
prot = __dma_direction_to_prot(dir);
if (!dma_get_attr(DMA_ATTR_EXEC_MAPPING, attrs))
prot |= IOMMU_NOEXEC;
ret = iommu_map(mapping->domain, iova, phys, len, prot);
if (ret < 0)
goto fail;
count += len >> PAGE_SHIFT;
iova += len;
}
*handle = iova_base;
return 0;
fail:
iommu_unmap(mapping->domain, iova_base, count * PAGE_SIZE);
__free_iova(mapping, iova_base, size);
return ret;
}
static int __iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents,
enum dma_data_direction dir, struct dma_attrs *attrs,
bool is_coherent)
{
struct scatterlist *s = sg, *dma = sg, *start = sg;
int i, count = 0;
unsigned int offset = s->offset;
unsigned int size = s->offset + s->length;
unsigned int max = dma_get_max_seg_size(dev);
for (i = 1; i < nents; i++) {
s = sg_next(s);
s->dma_address = DMA_ERROR_CODE;
s->dma_length = 0;
if (s->offset || (size & ~PAGE_MASK)
|| size + s->length > max) {
if (__map_sg_chunk(dev, start, size, &dma->dma_address,
dir, attrs, is_coherent) < 0)
goto bad_mapping;
dma->dma_address += offset;
dma->dma_length = size - offset;
size = offset = s->offset;
start = s;
dma = sg_next(dma);
count += 1;
}
size += s->length;
}
if (__map_sg_chunk(dev, start, size, &dma->dma_address, dir, attrs,
is_coherent) < 0)
goto bad_mapping;
dma->dma_address += offset;
dma->dma_length = size - offset;
return count+1;
bad_mapping:
for_each_sg(sg, s, count, i)
__iommu_remove_mapping(dev, sg_dma_address(s), sg_dma_len(s));
return 0;
}
/**
* arm_coherent_iommu_map_sg - map a set of SG buffers for streaming mode DMA
* @dev: valid struct device pointer
* @sg: list of buffers
* @nents: number of buffers to map
* @dir: DMA transfer direction
*
* Map a set of i/o coherent buffers described by scatterlist in streaming
* mode for DMA. The scatter gather list elements are merged together (if
* possible) and tagged with the appropriate dma address and length. They are
* obtained via sg_dma_{address,length}.
*/
int arm_coherent_iommu_map_sg(struct device *dev, struct scatterlist *sg,
int nents, enum dma_data_direction dir, struct dma_attrs *attrs)
{
return __iommu_map_sg(dev, sg, nents, dir, attrs, true);
}
/**
* arm_iommu_map_sg - map a set of SG buffers for streaming mode DMA
* @dev: valid struct device pointer
@ -1722,9 +1633,8 @@ int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg,
dev_err(dev, "Couldn't allocate iova for sg %p\n", sg);
return 0;
}
if (!dma_get_attr(DMA_ATTR_EXEC_MAPPING, attrs))
prot |= IOMMU_NOEXEC;
prot = __get_iommu_pgprot(attrs, prot,
is_device_dma_coherent(dev));
ret = iommu_map_sg(mapping->domain, iova, sg, nents, prot);
if (ret != total_length) {
@ -1741,40 +1651,6 @@ int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg,
return nents;
}
static void __iommu_unmap_sg(struct device *dev, struct scatterlist *sg,
int nents, enum dma_data_direction dir, struct dma_attrs *attrs,
bool is_coherent)
{
struct scatterlist *s;
int i;
for_each_sg(sg, s, nents, i) {
if (sg_dma_len(s))
__iommu_remove_mapping(dev, sg_dma_address(s),
sg_dma_len(s));
if (!is_coherent &&
!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
__dma_page_dev_to_cpu(sg_page(s), s->offset,
s->length, dir);
}
}
/**
* arm_coherent_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg
* @dev: valid struct device pointer
* @sg: list of buffers
* @nents: number of buffers to unmap (same as was passed to dma_map_sg)
* @dir: DMA transfer direction (same as was passed to dma_map_sg)
*
* Unmap a set of streaming mode DMA translations. Again, CPU access
* rules concerning calls here are the same as for dma_unmap_single().
*/
void arm_coherent_iommu_unmap_sg(struct device *dev, struct scatterlist *sg,
int nents, enum dma_data_direction dir, struct dma_attrs *attrs)
{
__iommu_unmap_sg(dev, sg, nents, dir, attrs, true);
}
/**
* arm_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg
* @dev: valid struct device pointer
@ -1812,6 +1688,9 @@ void arm_iommu_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
struct scatterlist *s;
int i;
if (is_device_dma_coherent(dev))
return;
for_each_sg(sg, s, nents, i)
__dma_page_dev_to_cpu(sg_page(s), s->offset, s->length, dir);
@ -1830,6 +1709,9 @@ void arm_iommu_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
struct scatterlist *s;
int i;
if (is_device_dma_coherent(dev))
return;
for_each_sg(sg, s, nents, i)
__dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir);
}
@ -1858,8 +1740,8 @@ static dma_addr_t arm_coherent_iommu_map_page(struct device *dev,
return dma_addr;
prot = __dma_direction_to_prot(dir);
if (!dma_get_attr(DMA_ATTR_EXEC_MAPPING, attrs))
prot |= IOMMU_NOEXEC;
prot = __get_iommu_pgprot(attrs, prot,
is_device_dma_coherent(dev));
ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len,
prot);
@ -1886,37 +1768,13 @@ static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size, enum dma_data_direction dir,
struct dma_attrs *attrs)
{
if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
if (!is_device_dma_coherent(dev) &&
!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
__dma_page_cpu_to_dev(page, offset, size, dir);
return arm_coherent_iommu_map_page(dev, page, offset, size, dir, attrs);
}
/**
* arm_coherent_iommu_unmap_page
* @dev: valid struct device pointer
* @handle: DMA address of buffer
* @size: size of buffer (same as passed to dma_map_page)
* @dir: DMA transfer direction (same as passed to dma_map_page)
*
* Coherent IOMMU aware version of arm_dma_unmap_page()
*/
static void arm_coherent_iommu_unmap_page(struct device *dev, dma_addr_t handle,
size_t size, enum dma_data_direction dir,
struct dma_attrs *attrs)
{
struct dma_iommu_mapping *mapping = dev->archdata.mapping;
dma_addr_t iova = handle & PAGE_MASK;
int offset = handle & ~PAGE_MASK;
int len = PAGE_ALIGN(size + offset);
if (!iova)
return;
iommu_unmap(mapping->domain, iova, len);
__free_iova(mapping, iova, len);
}
/**
* arm_iommu_unmap_page
* @dev: valid struct device pointer
@ -1940,7 +1798,8 @@ static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle,
if (!iova)
return;
if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
if (!(is_device_dma_coherent(dev) ||
dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)))
__dma_page_dev_to_cpu(page, offset, size, dir);
iommu_unmap(mapping->domain, iova, len);
@ -1959,7 +1818,8 @@ static void arm_iommu_sync_single_for_cpu(struct device *dev,
if (!iova)
return;
__dma_page_dev_to_cpu(page, offset, size, dir);
if (!is_device_dma_coherent(dev))
__dma_page_dev_to_cpu(page, offset, size, dir);
}
static void arm_iommu_sync_single_for_device(struct device *dev,
@ -1974,7 +1834,8 @@ static void arm_iommu_sync_single_for_device(struct device *dev,
if (!iova)
return;
__dma_page_cpu_to_dev(page, offset, size, dir);
if (!is_device_dma_coherent(dev))
__dma_page_cpu_to_dev(page, offset, size, dir);
}
static int arm_iommu_dma_supported(struct device *dev, u64 mask)
@ -2016,22 +1877,6 @@ const struct dma_map_ops iommu_ops = {
.mapping_error = arm_iommu_mapping_error,
};
const struct dma_map_ops iommu_coherent_ops = {
.alloc = arm_iommu_alloc_attrs,
.free = arm_iommu_free_attrs,
.mmap = arm_iommu_mmap_attrs,
.get_sgtable = arm_iommu_get_sgtable,
.map_page = arm_coherent_iommu_map_page,
.unmap_page = arm_coherent_iommu_unmap_page,
.map_sg = arm_coherent_iommu_map_sg,
.unmap_sg = arm_coherent_iommu_unmap_sg,
.set_dma_mask = arm_dma_set_mask,
.dma_supported = arm_iommu_dma_supported,
};
/**
* arm_iommu_create_mapping
* @bus: pointer to the bus holding the client device (for IOMMU calls)

View file

@ -935,9 +935,14 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
return NULL;
/* TCR */
reg = (ARM_LPAE_TCR_SH_IS << ARM_LPAE_TCR_SH0_SHIFT) |
(ARM_LPAE_TCR_RGN_NC << ARM_LPAE_TCR_IRGN0_SHIFT) |
(ARM_LPAE_TCR_RGN_NC << ARM_LPAE_TCR_ORGN0_SHIFT);
if (cfg->iommu_dev && cfg->iommu_dev->archdata.dma_coherent)
reg = (ARM_LPAE_TCR_SH_OS << ARM_LPAE_TCR_SH0_SHIFT) |
(ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_IRGN0_SHIFT) |
(ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_ORGN0_SHIFT);
else
reg = (ARM_LPAE_TCR_SH_IS << ARM_LPAE_TCR_SH0_SHIFT) |
(ARM_LPAE_TCR_RGN_NC << ARM_LPAE_TCR_IRGN0_SHIFT) |
(ARM_LPAE_TCR_RGN_NC << ARM_LPAE_TCR_ORGN0_SHIFT);
switch (1 << data->pg_shift) {
case SZ_4K: