From b39c609072d556dc39117e162cd396f7f4aea660 Mon Sep 17 00:00:00 2001 From: Jordan Crouse Date: Wed, 10 May 2017 15:22:09 -0600 Subject: [PATCH 1/4] iommu/arm-smmu: Correctly calculate and use the TTBR1 pagetable Due to an egregious misunderstanding of the specification it was thought that to do the TTBR1 matching correctly the sign extension bit needed to match the input address space. This is incorrect - the TTBR1 range is determined by the size of the TTBR1 space (in our case, the same as the input address space). For example if the input address size is 36, the effective range of the pagetables are: TTBR0: 0x00000000_00000000 - 0x0000000f_ffffffff TTBR1: 0xfffffff0_00000000 - 0xffffffff_ffffffff For its part the sign extension bit needs should be set based on the upstream bus size. If the device has a UBS of 49 then the sign extension bit is assumed by design to be 48 otherwise the driver needs to pick the highest available bit and reduce the input address space by 1. Because the client driver shouldn't need to know the upstream bus size, convention is to use a fully sign extended unsigned long address for TTBR1 mappings. If the sign extension bit is set lower than the upstream bus size some implementations assume that bits above the sign extension bit need to be zero and breaks the convention. Setting the sign extension bit correctly for the upstream bus size ensures that sign extension always works. The hardware will match an address to the TTBR1 if all the bits between the sign extension bit and the input address size are set to 1. We emulate this behavior in software when looking up a pagetable for a software operation. Change-Id: Ic0dedbad80c72f11bc8a7e6792f0e3c2f58bc674 Signed-off-by: Jordan Crouse --- drivers/iommu/arm-smmu.c | 31 +++++++++++++++++++++++++++---- drivers/iommu/io-pgtable-arm.c | 31 ++++++++++++++++++++----------- drivers/iommu/io-pgtable.h | 1 + 3 files changed, 48 insertions(+), 15 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 065379c1397f..5f2a8bb3e3a3 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -367,6 +367,8 @@ struct arm_smmu_device { u32 num_mapping_groups; DECLARE_BITMAP(smr_map, ARM_SMMU_MAX_SMRS); + u32 ubs; + unsigned long va_size; unsigned long ipa_size; unsigned long pa_size; @@ -1754,6 +1756,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, { int irq, start, ret = 0; unsigned long ias, oas; + int sep = 0; struct io_pgtable_ops *pgtbl_ops; enum io_pgtable_fmt fmt; struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); @@ -1795,9 +1798,27 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, start = smmu->num_s2_context_banks; ias = smmu->va_size; oas = smmu->ipa_size; - if (IS_ENABLED(CONFIG_64BIT)) + if (IS_ENABLED(CONFIG_64BIT)) { fmt = ARM_64_LPAE_S1; - else + + if (quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) { + + /* + * When the UBS id is 5 we know that the bus + * size is 49 bits and that bit 48 is the fixed + * sign extension bit. For any other bus size + * we need to specify the sign extension bit + * and adjust the input size accordingly + */ + + if (smmu->ubs == 5) { + sep = 48; + } else { + sep = ias - 1; + ias--; + } + } + } else fmt = ARM_32_LPAE_S1; break; case ARM_SMMU_DOMAIN_NESTED: @@ -1859,6 +1880,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, .pgsize_bitmap = arm_smmu_ops.pgsize_bitmap, .ias = ias, .oas = oas, + .sep = sep, .tlb = &arm_smmu_gather_ops, .iommu_dev = smmu->dev, .iova_base = domain->geometry.aperture_start, @@ -3888,8 +3910,9 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) smmu->va_size = smmu->ipa_size; size = SZ_4K | SZ_2M | SZ_1G; } else { - size = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK; - smmu->va_size = arm_smmu_id_size_to_bits(size); + smmu->ubs = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK; + + smmu->va_size = arm_smmu_id_size_to_bits(smmu->ubs); #ifndef CONFIG_64BIT smmu->va_size = min(32UL, smmu->va_size); #endif diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 2d2583c78bdb..7651545e3f2e 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -550,9 +550,18 @@ static inline arm_lpae_iopte *arm_lpae_get_table( { struct io_pgtable_cfg *cfg = &data->iop.cfg; - return ((cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) && - (iova & (1UL << (cfg->ias - 1)))) ? - data->pgd[1] : data->pgd[0]; + /* + * iovas for TTBR1 will have all the bits set between the input address + * region and the sign extension bit + */ + if (unlikely(cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)) { + unsigned long mask = GENMASK(cfg->sep, cfg->ias); + + if ((iova & mask) == mask) + return data->pgd[1]; + } + + return data->pgd[0]; } static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova, @@ -1089,26 +1098,26 @@ static u64 arm64_lpae_setup_ttbr1(struct io_pgtable_cfg *cfg, /* Set T1SZ */ reg |= (64ULL - cfg->ias) << ARM_LPAE_TCR_T1SZ_SHIFT; - /* Set the SEP bit based on the size */ - switch (cfg->ias) { - case 32: + switch (cfg->sep) { + case 31: reg |= (ARM_LPAE_TCR_SEP_31 << ARM_LPAE_TCR_SEP_SHIFT); break; - case 36: + case 35: reg |= (ARM_LPAE_TCR_SEP_35 << ARM_LPAE_TCR_SEP_SHIFT); break; - case 40: + case 39: reg |= (ARM_LPAE_TCR_SEP_39 << ARM_LPAE_TCR_SEP_SHIFT); break; - case 42: + case 41: reg |= (ARM_LPAE_TCR_SEP_41 << ARM_LPAE_TCR_SEP_SHIFT); break; - case 44: + case 43: reg |= (ARM_LPAE_TCR_SEP_43 << ARM_LPAE_TCR_SEP_SHIFT); break; - case 48: + case 47: reg |= (ARM_LPAE_TCR_SEP_47 << ARM_LPAE_TCR_SEP_SHIFT); break; + case 48: default: reg |= (ARM_LPAE_TCR_SEP_UPSTREAM << ARM_LPAE_TCR_SEP_SHIFT); break; diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h index 2cf213514221..0326bb6a4afa 100644 --- a/drivers/iommu/io-pgtable.h +++ b/drivers/iommu/io-pgtable.h @@ -67,6 +67,7 @@ struct io_pgtable_cfg { unsigned long pgsize_bitmap; unsigned int ias; unsigned int oas; + int sep; const struct iommu_gather_ops *tlb; struct device *iommu_dev; dma_addr_t iova_base; From 855e0a4c1c6b5257c2407c6f1f6210d2be5534f2 Mon Sep 17 00:00:00 2001 From: Sushmita Susheelendra Date: Fri, 12 May 2017 08:12:21 -0600 Subject: [PATCH 2/4] drm/msm: Set the TTBR1 range for a 36-bit address space Define a 36-bit address space for TTBR1 which is used for kernel side GPU buffer objects. Change-Id: I1c4eaee0fd92236793621c7d3dba1700e56fefd2 Signed-off-by: Sushmita Susheelendra --- drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index 3caa460aa5ba..f70e67766d19 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -1408,8 +1408,8 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev) * Set the user domain range to fall into the TTBR1 region for global * objects */ - a5xx_config.va_start = 0x800000000; - a5xx_config.va_end = 0x8ffffffff; + a5xx_config.va_start = 0xfffffff000000000ULL; + a5xx_config.va_end = 0xffffffffffffffffULL; a5xx_config.secure_va_start = SECURE_VA_START; a5xx_config.secure_va_end = SECURE_VA_START + SECURE_VA_SIZE - 1; From 9f47a21e5606a5c23601ca168cfa349c803c2a57 Mon Sep 17 00:00:00 2001 From: Sushmita Susheelendra Date: Fri, 12 May 2017 13:32:51 -0600 Subject: [PATCH 3/4] iommu/arm-smmu: Change virtual address size limit to 39 bits Restrict the virtual address size to 39-bits to allow a maximum of 3 pagetable levels. Change-Id: I264f23a2e16cc7599bdad8a161854bcf6e24dd4a Signed-off-by: Sushmita Susheelendra --- drivers/iommu/arm-smmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 5f2a8bb3e3a3..1cc552a2d525 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -3916,7 +3916,7 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) #ifndef CONFIG_64BIT smmu->va_size = min(32UL, smmu->va_size); #endif - smmu->va_size = min(36UL, smmu->va_size); + smmu->va_size = min(39UL, smmu->va_size); size = 0; if (id & ID2_PTFS_4K) size |= SZ_4K | SZ_2M | SZ_1G; From 9c0d1dc8c0ad47acfc0d3c2e3ea534c65e3ba3b7 Mon Sep 17 00:00:00 2001 From: Sushmita Susheelendra Date: Sat, 1 Apr 2017 16:17:33 -0600 Subject: [PATCH 4/4] drm/msm: Separate locking of buffer resources from struct_mutex Buffer object specific resources like pages, domains, sg list need not be protected with struct_mutex. They can be protected with a buffer object level lock. This simplifies locking and makes it easier to avoid potential recursive locking scenarios for SVM involving mmap_sem and struct_mutex. This also removes unnecessary serialization when creating buffer objects, and also between buffer object creation and GPU command submission. Change-Id: I40cb437d0186c3d9aac365c9baba0aa4792f0aa1 Signed-off-by: Sushmita Susheelendra --- drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 2 - drivers/gpu/drm/msm/adreno/a5xx_power.c | 2 - drivers/gpu/drm/msm/adreno/a5xx_preempt.c | 2 - drivers/gpu/drm/msm/adreno/a5xx_snapshot.c | 7 +- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 2 - drivers/gpu/drm/msm/dsi/dsi_host.c | 5 +- drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c | 2 +- drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c | 2 - drivers/gpu/drm/msm/msm_drv.c | 3 +- drivers/gpu/drm/msm/msm_drv.h | 6 +- drivers/gpu/drm/msm/msm_fbdev.c | 6 +- drivers/gpu/drm/msm/msm_gem.c | 198 ++++++++++++--------- drivers/gpu/drm/msm/msm_gem.h | 2 + drivers/gpu/drm/msm/msm_gem_submit.c | 6 +- drivers/gpu/drm/msm/msm_gem_vma.c | 20 ++- drivers/gpu/drm/msm/msm_gpu.c | 7 +- drivers/gpu/drm/msm/msm_rd.c | 2 +- drivers/gpu/drm/msm/msm_ringbuffer.c | 5 +- 18 files changed, 150 insertions(+), 129 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index f70e67766d19..8a136fef86f1 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -481,10 +481,8 @@ static struct drm_gem_object *a5xx_ucode_load_bo(struct msm_gpu *gpu, struct drm_gem_object *bo; void *ptr; - mutex_lock(&drm->struct_mutex); bo = msm_gem_new(drm, fw->size - 4, MSM_BO_UNCACHED | MSM_BO_GPU_READONLY); - mutex_unlock(&drm->struct_mutex); if (IS_ERR(bo)) return bo; diff --git a/drivers/gpu/drm/msm/adreno/a5xx_power.c b/drivers/gpu/drm/msm/adreno/a5xx_power.c index e04feaadefb9..0025922540df 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_power.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_power.c @@ -458,10 +458,8 @@ void a5xx_gpmu_ucode_init(struct msm_gpu *gpu) */ bosize = (cmds_size + (cmds_size / TYPE4_MAX_PAYLOAD) + 1) << 2; - mutex_lock(&drm->struct_mutex); a5xx_gpu->gpmu_bo = msm_gem_new(drm, bosize, MSM_BO_UNCACHED | MSM_BO_GPU_READONLY); - mutex_unlock(&drm->struct_mutex); if (IS_ERR(a5xx_gpu->gpmu_bo)) goto err; diff --git a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c index 648494c75abc..57046089434c 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c @@ -24,9 +24,7 @@ static void *alloc_kernel_bo(struct drm_device *drm, struct msm_gpu *gpu, void *ptr; int ret; - mutex_lock(&drm->struct_mutex); _bo = msm_gem_new(drm, size, flags); - mutex_unlock(&drm->struct_mutex); if (IS_ERR(_bo)) return _bo; diff --git a/drivers/gpu/drm/msm/adreno/a5xx_snapshot.c b/drivers/gpu/drm/msm/adreno/a5xx_snapshot.c index 690e6f546e60..c2773cb325d5 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_snapshot.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_snapshot.c @@ -217,18 +217,19 @@ static int crashdump_init(struct msm_gpu *gpu, struct crashdump *crashdump) struct drm_device *drm = gpu->dev; int ret = -ENOMEM; - crashdump->bo = msm_gem_new(drm, CRASHDUMP_BO_SIZE, MSM_BO_UNCACHED); + crashdump->bo = msm_gem_new_locked(drm, CRASHDUMP_BO_SIZE, + MSM_BO_UNCACHED); if (IS_ERR(crashdump->bo)) { ret = PTR_ERR(crashdump->bo); crashdump->bo = NULL; return ret; } - crashdump->ptr = msm_gem_vaddr_locked(crashdump->bo); + crashdump->ptr = msm_gem_vaddr(crashdump->bo); if (!crashdump->ptr) goto out; - ret = msm_gem_get_iova_locked(crashdump->bo, gpu->aspace, + ret = msm_gem_get_iova(crashdump->bo, gpu->aspace, &crashdump->iova); out: diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index d3cb497411c4..81fa37ee9671 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -563,10 +563,8 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, } } - mutex_lock(&drm->struct_mutex); adreno_gpu->memptrs_bo = msm_gem_new(drm, sizeof(*adreno_gpu->memptrs), MSM_BO_UNCACHED); - mutex_unlock(&drm->struct_mutex); if (IS_ERR(adreno_gpu->memptrs_bo)) { ret = PTR_ERR(adreno_gpu->memptrs_bo); adreno_gpu->memptrs_bo = NULL; diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index 4580a6e3c877..e2b8deda46c2 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -838,22 +838,19 @@ static int dsi_tx_buf_alloc(struct msm_dsi_host *msm_host, int size) int ret; u64 iova; - mutex_lock(&dev->struct_mutex); msm_host->tx_gem_obj = msm_gem_new(dev, size, MSM_BO_UNCACHED); if (IS_ERR(msm_host->tx_gem_obj)) { ret = PTR_ERR(msm_host->tx_gem_obj); pr_err("%s: failed to allocate gem, %d\n", __func__, ret); msm_host->tx_gem_obj = NULL; - mutex_unlock(&dev->struct_mutex); return ret; } - ret = msm_gem_get_iova_locked(msm_host->tx_gem_obj, 0, &iova); + ret = msm_gem_get_iova(msm_host->tx_gem_obj, NULL, &iova); if (ret) { pr_err("%s: failed to get iova, %d\n", __func__, ret); return ret; } - mutex_unlock(&dev->struct_mutex); if (iova & 0x07) { pr_err("%s: buf NOT 8 bytes aligned\n", __func__); diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c index caad2be87ae4..e5f42fe983c1 100644 --- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c +++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c @@ -392,7 +392,7 @@ static void update_cursor(struct drm_crtc *crtc) if (next_bo) { /* take a obj ref + iova ref when we start scanning out: */ drm_gem_object_reference(next_bo); - msm_gem_get_iova_locked(next_bo, mdp4_kms->aspace, + msm_gem_get_iova(next_bo, mdp4_kms->aspace, &iova); /* enable cursor: */ diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c index 119221eacb43..40509434a913 100644 --- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c +++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c @@ -536,9 +536,7 @@ struct msm_kms *mdp4_kms_init(struct drm_device *dev) goto fail; } - mutex_lock(&dev->struct_mutex); mdp4_kms->blank_cursor_bo = msm_gem_new(dev, SZ_16K, MSM_BO_WC); - mutex_unlock(&dev->struct_mutex); if (IS_ERR(mdp4_kms->blank_cursor_bo)) { ret = PTR_ERR(mdp4_kms->blank_cursor_bo); dev_err(dev->dev, "could not allocate blank-cursor bo: %d\n", ret); diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 665f8db0e04f..b9503564cdd6 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -322,6 +322,7 @@ static int msm_init_vram(struct drm_device *dev) priv->vram.size = size; drm_mm_init(&priv->vram.mm, 0, (size >> PAGE_SHIFT) - 1); + spin_lock_init(&priv->vram.lock); dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); @@ -631,12 +632,10 @@ static void msm_postclose(struct drm_device *dev, struct drm_file *file) if (priv->gpu) msm_gpu_cleanup_counters(priv->gpu, ctx); - mutex_lock(&dev->struct_mutex); if (ctx && ctx->aspace && ctx->aspace != priv->gpu->aspace) { ctx->aspace->mmu->funcs->detach(ctx->aspace->mmu); msm_gem_address_space_put(ctx->aspace); } - mutex_unlock(&dev->struct_mutex); kfree(ctx); } diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 94f429cb83a7..54a3568ca11f 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -341,6 +341,7 @@ struct msm_drm_private { * and position mm_node->start is in # of pages: */ struct drm_mm mm; + spinlock_t lock; /* Protects drm_mm node allocation/removal */ } vram; struct msm_vblank_ctrl vblank_ctrl; @@ -431,8 +432,6 @@ int msm_gem_mmap_obj(struct drm_gem_object *obj, int msm_gem_mmap(struct file *filp, struct vm_area_struct *vma); int msm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf); uint64_t msm_gem_mmap_offset(struct drm_gem_object *obj); -int msm_gem_get_iova_locked(struct drm_gem_object *obj, - struct msm_gem_address_space *aspace, uint64_t *iova); int msm_gem_get_iova(struct drm_gem_object *obj, struct msm_gem_address_space *aspace, uint64_t *iova); uint64_t msm_gem_iova(struct drm_gem_object *obj, @@ -453,7 +452,6 @@ struct drm_gem_object *msm_gem_prime_import_sg_table(struct drm_device *dev, struct dma_buf_attachment *attach, struct sg_table *sg); int msm_gem_prime_pin(struct drm_gem_object *obj); void msm_gem_prime_unpin(struct drm_gem_object *obj); -void *msm_gem_vaddr_locked(struct drm_gem_object *obj); void *msm_gem_vaddr(struct drm_gem_object *obj); int msm_gem_queue_inactive_cb(struct drm_gem_object *obj, struct msm_fence_cb *cb); @@ -468,6 +466,8 @@ int msm_gem_new_handle(struct drm_device *dev, struct drm_file *file, uint32_t size, uint32_t flags, uint32_t *handle); struct drm_gem_object *msm_gem_new(struct drm_device *dev, uint32_t size, uint32_t flags); +struct drm_gem_object *msm_gem_new_locked(struct drm_device *dev, + uint32_t size, uint32_t flags); struct drm_gem_object *msm_gem_import(struct drm_device *dev, uint32_t size, struct sg_table *sgt, u32 flags); void msm_gem_sync(struct drm_gem_object *obj, u32 op); diff --git a/drivers/gpu/drm/msm/msm_fbdev.c b/drivers/gpu/drm/msm/msm_fbdev.c index cf127250c0d0..28b98cc1433c 100644 --- a/drivers/gpu/drm/msm/msm_fbdev.c +++ b/drivers/gpu/drm/msm/msm_fbdev.c @@ -104,10 +104,8 @@ static int msm_fbdev_create(struct drm_fb_helper *helper, /* allocate backing bo */ size = mode_cmd.pitches[0] * mode_cmd.height; DBG("allocating %d bytes for fb %d", size, dev->primary->index); - mutex_lock(&dev->struct_mutex); fbdev->bo = msm_gem_new(dev, size, MSM_BO_SCANOUT | MSM_BO_WC | MSM_BO_STOLEN); - mutex_unlock(&dev->struct_mutex); if (IS_ERR(fbdev->bo)) { ret = PTR_ERR(fbdev->bo); fbdev->bo = NULL; @@ -133,7 +131,7 @@ static int msm_fbdev_create(struct drm_fb_helper *helper, * in panic (ie. lock-safe, etc) we could avoid pinning the * buffer now: */ - ret = msm_gem_get_iova_locked(fbdev->bo, 0, &paddr); + ret = msm_gem_get_iova(fbdev->bo, 0, &paddr); if (ret) { dev_err(dev->dev, "failed to get buffer obj iova: %d\n", ret); goto fail_unlock; @@ -163,7 +161,7 @@ static int msm_fbdev_create(struct drm_fb_helper *helper, /* FIXME: Verify paddr < 32 bits? */ dev->mode_config.fb_base = lower_32_bits(paddr); - fbi->screen_base = msm_gem_vaddr_locked(fbdev->bo); + fbi->screen_base = msm_gem_vaddr(fbdev->bo); fbi->screen_size = fbdev->bo->size; fbi->fix.smem_start = lower_32_bits(paddr); fbi->fix.smem_len = fbdev->bo->size; diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 4bee797da746..e258277eb21a 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -63,8 +63,7 @@ static bool use_pages(struct drm_gem_object *obj) } /* allocate pages from VRAM carveout, used when no IOMMU: */ -static struct page **get_pages_vram(struct drm_gem_object *obj, - int npages) +static struct page **get_pages_vram(struct drm_gem_object *obj, int npages) { struct msm_gem_object *msm_obj = to_msm_bo(obj); struct msm_drm_private *priv = obj->dev->dev_private; @@ -76,8 +75,10 @@ static struct page **get_pages_vram(struct drm_gem_object *obj, if (!p) return ERR_PTR(-ENOMEM); + spin_lock(&priv->vram.lock); ret = drm_mm_insert_node(&priv->vram.mm, msm_obj->vram_node, npages, 0, DRM_MM_SEARCH_DEFAULT); + spin_unlock(&priv->vram.lock); if (ret) { drm_free_large(p); return ERR_PTR(ret); @@ -92,7 +93,6 @@ static struct page **get_pages_vram(struct drm_gem_object *obj, return p; } -/* called with dev->struct_mutex held */ static struct page **get_pages(struct drm_gem_object *obj) { struct msm_gem_object *msm_obj = to_msm_bo(obj); @@ -147,6 +147,18 @@ static struct page **get_pages(struct drm_gem_object *obj) return msm_obj->pages; } +static void put_pages_vram(struct drm_gem_object *obj) +{ + struct msm_gem_object *msm_obj = to_msm_bo(obj); + struct msm_drm_private *priv = obj->dev->dev_private; + + spin_lock(&priv->vram.lock); + drm_mm_remove_node(msm_obj->vram_node); + spin_unlock(&priv->vram.lock); + + drm_free_large(msm_obj->pages); +} + static void put_pages(struct drm_gem_object *obj) { struct msm_gem_object *msm_obj = to_msm_bo(obj); @@ -160,12 +172,10 @@ static void put_pages(struct drm_gem_object *obj) sg_free_table(msm_obj->sgt); kfree(msm_obj->sgt); - if (use_pages(obj)) { + if (use_pages(obj)) drm_gem_put_pages(obj, msm_obj->pages, true, false); - } else { - drm_mm_remove_node(msm_obj->vram_node); - drm_free_large(msm_obj->pages); - } + else + put_pages_vram(obj); msm_obj->pages = NULL; } @@ -173,11 +183,12 @@ static void put_pages(struct drm_gem_object *obj) struct page **msm_gem_get_pages(struct drm_gem_object *obj) { - struct drm_device *dev = obj->dev; + struct msm_gem_object *msm_obj = to_msm_bo(obj); struct page **p; - mutex_lock(&dev->struct_mutex); + + mutex_lock(&msm_obj->lock); p = get_pages(obj); - mutex_unlock(&dev->struct_mutex); + mutex_unlock(&msm_obj->lock); return p; } @@ -237,16 +248,17 @@ int msm_gem_mmap(struct file *filp, struct vm_area_struct *vma) int msm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { struct drm_gem_object *obj = vma->vm_private_data; - struct drm_device *dev = obj->dev; + struct msm_gem_object *msm_obj = to_msm_bo(obj); struct page **pages; unsigned long pfn; pgoff_t pgoff; int ret; - /* Make sure we don't parallel update on a fault, nor move or remove - * something from beneath our feet + /* + * vm_ops.open and close get and put a reference on obj. + * So, we dont need to hold one here. */ - ret = mutex_lock_interruptible(&dev->struct_mutex); + ret = mutex_lock_interruptible(&msm_obj->lock); if (ret) goto out; @@ -269,7 +281,7 @@ int msm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) ret = vm_insert_mixed(vma, (unsigned long)vmf->virtual_address, pfn); out_unlock: - mutex_unlock(&dev->struct_mutex); + mutex_unlock(&msm_obj->lock); out: switch (ret) { case -EAGAIN: @@ -293,9 +305,10 @@ out: static uint64_t mmap_offset(struct drm_gem_object *obj) { struct drm_device *dev = obj->dev; + struct msm_gem_object *msm_obj = to_msm_bo(obj); int ret; - WARN_ON(!mutex_is_locked(&dev->struct_mutex)); + WARN_ON(!mutex_is_locked(&msm_obj->lock)); /* Make it mmapable */ ret = drm_gem_create_mmap_offset(obj); @@ -311,9 +324,11 @@ static uint64_t mmap_offset(struct drm_gem_object *obj) uint64_t msm_gem_mmap_offset(struct drm_gem_object *obj) { uint64_t offset; - mutex_lock(&obj->dev->struct_mutex); + struct msm_gem_object *msm_obj = to_msm_bo(obj); + + mutex_lock(&msm_obj->lock); offset = mmap_offset(obj); - mutex_unlock(&obj->dev->struct_mutex); + mutex_unlock(&msm_obj->lock); return offset; } @@ -325,14 +340,14 @@ static void obj_remove_domain(struct msm_gem_vma *domain) } } +/* Called with msm_obj->lock locked */ static void put_iova(struct drm_gem_object *obj) { - struct drm_device *dev = obj->dev; struct msm_gem_object *msm_obj = to_msm_bo(obj); struct msm_gem_vma *domain, *tmp; - WARN_ON(!mutex_is_locked(&dev->struct_mutex)); + WARN_ON(!mutex_is_locked(&msm_obj->lock)); list_for_each_entry_safe(domain, tmp, &msm_obj->domains, list) { if (iommu_present(&platform_bus_type)) { @@ -378,14 +393,8 @@ static struct msm_gem_vma *obj_get_domain(struct drm_gem_object *obj, #define IOMMU_PRIV 0 #endif -/* should be called under struct_mutex.. although it can be called - * from atomic context without struct_mutex to acquire an extra - * iova ref if you know one is already held. - * - * That means when I do eventually need to add support for unpinning - * the refcnt counter needs to be atomic_t. - */ -int msm_gem_get_iova_locked(struct drm_gem_object *obj, +/* A reference to obj must be held before calling this function. */ +int msm_gem_get_iova(struct drm_gem_object *obj, struct msm_gem_address_space *aspace, uint64_t *iova) { struct msm_gem_object *msm_obj = to_msm_bo(obj); @@ -393,13 +402,18 @@ int msm_gem_get_iova_locked(struct drm_gem_object *obj, struct msm_gem_vma *domain; int ret = 0; + mutex_lock(&msm_obj->lock); + if (!iommu_present(&platform_bus_type)) { pages = get_pages(obj); - if (IS_ERR(pages)) + if (IS_ERR(pages)) { + mutex_unlock(&msm_obj->lock); return PTR_ERR(pages); + } *iova = (uint64_t) physaddr(obj); + mutex_unlock(&msm_obj->lock); return 0; } @@ -407,12 +421,15 @@ int msm_gem_get_iova_locked(struct drm_gem_object *obj, if (!domain) { domain = obj_add_domain(obj, aspace); - if (IS_ERR(domain)) + if (IS_ERR(domain)) { + mutex_unlock(&msm_obj->lock); return PTR_ERR(domain); + } pages = get_pages(obj); if (IS_ERR(pages)) { obj_remove_domain(domain); + mutex_unlock(&msm_obj->lock); return PTR_ERR(pages); } @@ -425,26 +442,8 @@ int msm_gem_get_iova_locked(struct drm_gem_object *obj, else obj_remove_domain(domain); - return ret; -} - -/* get iova, taking a reference. Should have a matching put */ -int msm_gem_get_iova(struct drm_gem_object *obj, - struct msm_gem_address_space *aspace, uint64_t *iova) -{ - struct msm_gem_vma *domain; - int ret; - - domain = obj_get_domain(obj, aspace); - if (domain) { - *iova = domain->iova; - return 0; - } - - mutex_lock(&obj->dev->struct_mutex); - ret = msm_gem_get_iova_locked(obj, aspace, iova); - mutex_unlock(&obj->dev->struct_mutex); - return ret; + mutex_unlock(&msm_obj->lock); + return 0; } /* get iova without taking a reference, used in places where you have @@ -453,11 +452,17 @@ int msm_gem_get_iova(struct drm_gem_object *obj, uint64_t msm_gem_iova(struct drm_gem_object *obj, struct msm_gem_address_space *aspace) { - struct msm_gem_vma *domain = obj_get_domain(obj, aspace); + struct msm_gem_object *msm_obj = to_msm_bo(obj); + struct msm_gem_vma *domain; + uint64_t iova; + mutex_lock(&msm_obj->lock); + domain = obj_get_domain(obj, aspace); WARN_ON(!domain); + iova = domain ? domain->iova : 0; + mutex_unlock(&msm_obj->lock); - return domain ? domain->iova : 0; + return iova; } void msm_gem_put_iova(struct drm_gem_object *obj, @@ -501,27 +506,23 @@ fail: return ret; } -void *msm_gem_vaddr_locked(struct drm_gem_object *obj) +void *msm_gem_vaddr(struct drm_gem_object *obj) { struct msm_gem_object *msm_obj = to_msm_bo(obj); - WARN_ON(!mutex_is_locked(&obj->dev->struct_mutex)); + + mutex_lock(&msm_obj->lock); if (!msm_obj->vaddr) { struct page **pages = get_pages(obj); - if (IS_ERR(pages)) + if (IS_ERR(pages)) { + mutex_unlock(&msm_obj->lock); return ERR_CAST(pages); + } msm_obj->vaddr = vmap(pages, obj->size >> PAGE_SHIFT, VM_MAP, pgprot_writecombine(PAGE_KERNEL)); } - return msm_obj->vaddr; -} + mutex_unlock(&msm_obj->lock); -void *msm_gem_vaddr(struct drm_gem_object *obj) -{ - void *ret; - mutex_lock(&obj->dev->struct_mutex); - ret = msm_gem_vaddr_locked(obj); - mutex_unlock(&obj->dev->struct_mutex); - return ret; + return msm_obj->vaddr; } /* setup callback for when bo is no longer busy.. @@ -659,16 +660,16 @@ void msm_gem_free_object(struct drm_gem_object *obj) /* object should not be on active list: */ WARN_ON(is_active(msm_obj)); - list_del(&msm_obj->mm_list); + mutex_lock(&msm_obj->lock); + put_iova(obj); if (obj->import_attach) { if (msm_obj->vaddr) dma_buf_vunmap(obj->import_attach->dmabuf, msm_obj->vaddr); - /* Don't drop the pages for imported dmabuf, as they are not * ours, just free the array we allocated: */ @@ -685,6 +686,7 @@ void msm_gem_free_object(struct drm_gem_object *obj) reservation_object_fini(msm_obj->resv); drm_gem_object_release(obj); + mutex_unlock(&msm_obj->lock); kfree(msm_obj); } @@ -696,14 +698,8 @@ int msm_gem_new_handle(struct drm_device *dev, struct drm_file *file, struct drm_gem_object *obj; int ret; - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; - obj = msm_gem_new(dev, size, flags); - mutex_unlock(&dev->struct_mutex); - if (IS_ERR(obj)) return PTR_ERR(obj); @@ -715,9 +711,23 @@ int msm_gem_new_handle(struct drm_device *dev, struct drm_file *file, return ret; } +static inline void msm_gem_add_to_inactive_list(struct msm_gem_object *msm_obj, + struct drm_device *dev, bool struct_mutex_locked) +{ + struct msm_drm_private *priv = dev->dev_private; + + if (struct_mutex_locked) { + list_add_tail(&msm_obj->mm_list, &priv->inactive_list); + } else { + mutex_lock(&dev->struct_mutex); + list_add_tail(&msm_obj->mm_list, &priv->inactive_list); + mutex_unlock(&dev->struct_mutex); + } +} + static int msm_gem_new_impl(struct drm_device *dev, - uint32_t size, uint32_t flags, - struct drm_gem_object **obj) + uint32_t size, uint32_t flags, struct drm_gem_object **obj, + bool struct_mutex_locked) { struct msm_drm_private *priv = dev->dev_private; struct msm_gem_object *msm_obj; @@ -746,6 +756,8 @@ static int msm_gem_new_impl(struct drm_device *dev, if (!msm_obj) return -ENOMEM; + mutex_init(&msm_obj->lock); + if (use_vram) { struct msm_gem_vma *domain = obj_add_domain(&msm_obj->base, 0); @@ -761,21 +773,19 @@ static int msm_gem_new_impl(struct drm_device *dev, INIT_LIST_HEAD(&msm_obj->submit_entry); INIT_LIST_HEAD(&msm_obj->domains); - list_add_tail(&msm_obj->mm_list, &priv->inactive_list); + msm_gem_add_to_inactive_list(msm_obj, dev, struct_mutex_locked); *obj = &msm_obj->base; return 0; } -struct drm_gem_object *msm_gem_new(struct drm_device *dev, - uint32_t size, uint32_t flags) +static struct drm_gem_object *_msm_gem_new(struct drm_device *dev, + uint32_t size, uint32_t flags, bool struct_mutex_locked) { struct drm_gem_object *obj = NULL; int ret; - WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - size = PAGE_ALIGN(size); /* @@ -785,7 +795,7 @@ struct drm_gem_object *msm_gem_new(struct drm_device *dev, if (!size) return ERR_PTR(-EINVAL); - ret = msm_gem_new_impl(dev, size, flags, &obj); + ret = msm_gem_new_impl(dev, size, flags, &obj, struct_mutex_locked); if (ret) goto fail; @@ -801,11 +811,23 @@ struct drm_gem_object *msm_gem_new(struct drm_device *dev, fail: if (obj) - drm_gem_object_unreference(obj); + drm_gem_object_unreference_unlocked(obj); return ERR_PTR(ret); } +struct drm_gem_object *msm_gem_new_locked(struct drm_device *dev, + uint32_t size, uint32_t flags) +{ + return _msm_gem_new(dev, size, flags, true); +} + +struct drm_gem_object *msm_gem_new(struct drm_device *dev, + uint32_t size, uint32_t flags) +{ + return _msm_gem_new(dev, size, flags, false); +} + struct drm_gem_object *msm_gem_import(struct drm_device *dev, uint32_t size, struct sg_table *sgt, u32 flags) { @@ -821,9 +843,7 @@ struct drm_gem_object *msm_gem_import(struct drm_device *dev, size = PAGE_ALIGN(size); - mutex_lock(&dev->struct_mutex); - ret = msm_gem_new_impl(dev, size, MSM_BO_WC, &obj); - mutex_unlock(&dev->struct_mutex); + ret = msm_gem_new_impl(dev, size, MSM_BO_WC, &obj, false); if (ret) goto fail; @@ -833,9 +853,11 @@ struct drm_gem_object *msm_gem_import(struct drm_device *dev, npages = size / PAGE_SIZE; msm_obj = to_msm_bo(obj); + mutex_lock(&msm_obj->lock); msm_obj->sgt = sgt; msm_obj->pages = drm_malloc_ab(npages, sizeof(struct page *)); if (!msm_obj->pages) { + mutex_unlock(&msm_obj->lock); ret = -ENOMEM; goto fail; } @@ -844,8 +866,12 @@ struct drm_gem_object *msm_gem_import(struct drm_device *dev, msm_obj->flags |= flags; ret = drm_prime_sg_to_page_addr_arrays(sgt, msm_obj->pages, NULL, npages); - if (ret) + if (ret) { + mutex_unlock(&msm_obj->lock); goto fail; + } + + mutex_unlock(&msm_obj->lock); return obj; diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index 41b4b5a4fd66..eb850952f1f5 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -31,6 +31,7 @@ struct msm_gem_address_space { struct msm_mmu *mmu; struct kref kref; struct drm_mm mm; + spinlock_t lock; /* Protects drm_mm node allocation/removal */ u64 va_len; }; @@ -80,6 +81,7 @@ struct msm_gem_object { * an IOMMU. Also used for stolen/splashscreen buffer. */ struct drm_mm_node *vram_node; + struct mutex lock; /* Protects resources associated with bo */ }; #define to_msm_bo(x) container_of(x, struct msm_gem_object, base) diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 12cc28acec18..612e8e77b782 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -215,7 +215,7 @@ retry: /* if locking succeeded, pin bo: */ - ret = msm_gem_get_iova_locked(&msm_obj->base, aspace, &iova); + ret = msm_gem_get_iova(&msm_obj->base, aspace, &iova); /* this would break the logic in the fail path.. there is no * reason for this to happen, but just to be on the safe side @@ -303,7 +303,7 @@ static int submit_reloc(struct msm_gem_submit *submit, struct msm_gem_object *ob /* For now, just map the entire thing. Eventually we probably * to do it page-by-page, w/ kmap() if not vmap()d.. */ - ptr = msm_gem_vaddr_locked(&obj->base); + ptr = msm_gem_vaddr(&obj->base); if (IS_ERR(ptr)) { ret = PTR_ERR(ptr); @@ -466,7 +466,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, if (submit_cmd.type == MSM_SUBMIT_CMD_PROFILE_BUF) { submit->profile_buf_iova = submit->cmd[i].iova; submit->profile_buf_vaddr = - msm_gem_vaddr_locked(&msm_obj->base); + msm_gem_vaddr(&msm_obj->base); } if (submit->valid) diff --git a/drivers/gpu/drm/msm/msm_gem_vma.c b/drivers/gpu/drm/msm/msm_gem_vma.c index 47f7436854fb..12e3c0f7c101 100644 --- a/drivers/gpu/drm/msm/msm_gem_vma.c +++ b/drivers/gpu/drm/msm/msm_gem_vma.c @@ -52,6 +52,7 @@ msm_gem_address_space_new(struct msm_mmu *mmu, const char *name, if (!aspace) return ERR_PTR(-ENOMEM); + spin_lock_init(&aspace->lock); aspace->name = name; aspace->mmu = mmu; @@ -77,14 +78,19 @@ static int allocate_iova(struct msm_gem_address_space *aspace, if (!aspace->va_len) return 0; - if (WARN_ON(drm_mm_node_allocated(&vma->node))) - return 0; - for_each_sg(sgt->sgl, sg, sgt->nents, i) size += sg->length + sg->offset; - ret = drm_mm_insert_node(&aspace->mm, &vma->node, size >> PAGE_SHIFT, - 0, DRM_MM_SEARCH_DEFAULT); + spin_lock(&aspace->lock); + + if (WARN_ON(drm_mm_node_allocated(&vma->node))) { + spin_unlock(&aspace->lock); + return 0; + } + ret = drm_mm_insert_node(&aspace->mm, &vma->node, + size >> PAGE_SHIFT, 0, DRM_MM_SEARCH_DEFAULT); + + spin_unlock(&aspace->lock); if (!ret && iova) *iova = vma->node.start << PAGE_SHIFT; @@ -110,8 +116,10 @@ int msm_gem_map_vma(struct msm_gem_address_space *aspace, flags, priv); if (ret) { + spin_lock(&aspace->lock); if (drm_mm_node_allocated(&vma->node)) drm_mm_remove_node(&vma->node); + spin_unlock(&aspace->lock); return ret; } @@ -130,8 +138,10 @@ void msm_gem_unmap_vma(struct msm_gem_address_space *aspace, aspace->mmu->funcs->unmap(aspace->mmu, vma->iova, sgt, priv); + spin_lock(&aspace->lock); if (drm_mm_node_allocated(&vma->node)) drm_mm_remove_node(&vma->node); + spin_unlock(&aspace->lock); vma->iova = 0; diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index bef6e81de82a..81bab9cc22af 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -580,8 +580,7 @@ int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) /* ring takes a reference to the bo and iova: */ drm_gem_object_reference(&msm_obj->base); - msm_gem_get_iova_locked(&msm_obj->base, - aspace, &iova); + msm_gem_get_iova(&msm_obj->base, aspace, &iova); } if (submit->bos[i].flags & MSM_SUBMIT_BO_READ) @@ -890,10 +889,8 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, /* Create ringbuffer(s): */ for (i = 0; i < nr_rings; i++) { - mutex_lock(&drm->struct_mutex); - gpu->rb[i] = msm_ringbuffer_new(gpu, i); - mutex_unlock(&drm->struct_mutex); + gpu->rb[i] = msm_ringbuffer_new(gpu, i); if (IS_ERR(gpu->rb[i])) { ret = PTR_ERR(gpu->rb[i]); gpu->rb[i] = NULL; diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c index 6dbb516cd4dc..2d112f24a902 100644 --- a/drivers/gpu/drm/msm/msm_rd.c +++ b/drivers/gpu/drm/msm/msm_rd.c @@ -310,7 +310,7 @@ void msm_rd_dump_submit(struct msm_gem_submit *submit) uint64_t iova = submit->cmd[i].iova; uint32_t szd = submit->cmd[i].size; /* in dwords */ struct msm_gem_object *obj = submit->bos[idx].obj; - const char *buf = msm_gem_vaddr_locked(&obj->base); + const char *buf = msm_gem_vaddr(&obj->base); buf += iova - submit->bos[idx].iova; diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c index 14a16c4578d9..382c71bb0ebe 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.c +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c @@ -34,14 +34,15 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id) ring->gpu = gpu; ring->id = id; - ring->bo = msm_gem_new(gpu->dev, MSM_GPU_RINGBUFFER_SZ, MSM_BO_WC); + ring->bo = msm_gem_new(gpu->dev, MSM_GPU_RINGBUFFER_SZ, + MSM_BO_WC); if (IS_ERR(ring->bo)) { ret = PTR_ERR(ring->bo); ring->bo = NULL; goto fail; } - ring->start = msm_gem_vaddr_locked(ring->bo); + ring->start = msm_gem_vaddr(ring->bo); ring->end = ring->start + (MSM_GPU_RINGBUFFER_SZ >> 2); ring->next = ring->start; ring->cur = ring->start;