Message ID | 1343390619-20456-3-git-send-email-m.szyprowski@samsung.com |
---|---|
State | New |
Headers | show |
Hi Marek, I looked at patch and have minor points and a question. I should have said in early patch. Sorry about that. Anyway, except below points, It's good to me. On Fri, Jul 27, 2012 at 02:03:39PM +0200, Marek Szyprowski wrote: > This patch changes dma-mapping subsystem to use generic vmalloc areas > for all consistent dma allocations. This increases the total size limit > of the consistent allocations and removes platform hacks and a lot of > duplicated code. > > Atomic allocations are served from special pool preallocated on boot, > becasue vmalloc areas cannot be reliably created in atomic context. > > Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com> > Reviewed-by: Kyungmin Park <kyungmin.park@samsung.com> > --- > Documentation/kernel-parameters.txt | 2 +- > arch/arm/include/asm/dma-mapping.h | 2 +- > arch/arm/mm/dma-mapping.c | 511 +++++++++++++---------------------- > arch/arm/mm/mm.h | 3 + > include/linux/vmalloc.h | 1 + > mm/vmalloc.c | 10 +- > 6 files changed, 197 insertions(+), 332 deletions(-) > > diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt > index a92c5eb..4ee28f3 100644 > --- a/Documentation/kernel-parameters.txt > +++ b/Documentation/kernel-parameters.txt > @@ -526,7 +526,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. > > coherent_pool=nn[KMG] [ARM,KNL] > Sets the size of memory pool for coherent, atomic dma > - allocations if Contiguous Memory Allocator (CMA) is used. > + allocations, by default set to 256K. > > code_bytes [X86] How many bytes of object code to print > in an oops report. > diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h > index bbef15d..80777d87 100644 > --- a/arch/arm/include/asm/dma-mapping.h > +++ b/arch/arm/include/asm/dma-mapping.h > @@ -226,7 +226,7 @@ static inline int dma_mmap_writecombine(struct device *dev, struct vm_area_struc > * DMA region above it's default value of 2MB. It must be called before the > * memory allocator is initialised, i.e. before any core_initcall. > */ > -extern void __init init_consistent_dma_size(unsigned long size); > +static inline void init_consistent_dma_size(unsigned long size) { } > > /* > * For SA-1111, IXP425, and ADI systems the dma-mapping functions are "magic" > diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c > index 655878b..4d750a6 100644 > --- a/arch/arm/mm/dma-mapping.c > +++ b/arch/arm/mm/dma-mapping.c > @@ -22,6 +22,7 @@ > #include <linux/memblock.h> > #include <linux/slab.h> > #include <linux/iommu.h> > +#include <linux/io.h> > #include <linux/vmalloc.h> > > #include <asm/memory.h> > @@ -217,115 +218,70 @@ static void __dma_free_buffer(struct page *page, size_t size) > } > > #ifdef CONFIG_MMU > - > -#define CONSISTENT_OFFSET(x) (((unsigned long)(x) - consistent_base) >> PAGE_SHIFT) > -#define CONSISTENT_PTE_INDEX(x) (((unsigned long)(x) - consistent_base) >> PMD_SHIFT) > - > -/* > - * These are the page tables (2MB each) covering uncached, DMA consistent allocations > - */ > -static pte_t **consistent_pte; > - > -#define DEFAULT_CONSISTENT_DMA_SIZE SZ_2M > - > -static unsigned long consistent_base = CONSISTENT_END - DEFAULT_CONSISTENT_DMA_SIZE; > - > -void __init init_consistent_dma_size(unsigned long size) > -{ > - unsigned long base = CONSISTENT_END - ALIGN(size, SZ_2M); > - > - BUG_ON(consistent_pte); /* Check we're called before DMA region init */ > - BUG_ON(base < VMALLOC_END); > - > - /* Grow region to accommodate specified size */ > - if (base < consistent_base) > - consistent_base = base; > -} > - > -#include "vmregion.h" > - > -static struct arm_vmregion_head consistent_head = { > - .vm_lock = __SPIN_LOCK_UNLOCKED(&consistent_head.vm_lock), > - .vm_list = LIST_HEAD_INIT(consistent_head.vm_list), > - .vm_end = CONSISTENT_END, > -}; > - > #ifdef CONFIG_HUGETLB_PAGE > #error ARM Coherent DMA allocator does not (yet) support huge TLB > #endif > > -/* > - * Initialise the consistent memory allocation. > - */ > -static int __init consistent_init(void) > -{ > - int ret = 0; > - pgd_t *pgd; > - pud_t *pud; > - pmd_t *pmd; > - pte_t *pte; > - int i = 0; > - unsigned long base = consistent_base; > - unsigned long num_ptes = (CONSISTENT_END - base) >> PMD_SHIFT; > - > - if (IS_ENABLED(CONFIG_CMA) && !IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)) > - return 0; > - > - consistent_pte = kmalloc(num_ptes * sizeof(pte_t), GFP_KERNEL); > - if (!consistent_pte) { > - pr_err("%s: no memory\n", __func__); > - return -ENOMEM; > - } > - > - pr_debug("DMA memory: 0x%08lx - 0x%08lx:\n", base, CONSISTENT_END); > - consistent_head.vm_start = base; > - > - do { > - pgd = pgd_offset(&init_mm, base); > - > - pud = pud_alloc(&init_mm, pgd, base); > - if (!pud) { > - pr_err("%s: no pud tables\n", __func__); > - ret = -ENOMEM; > - break; > - } > - > - pmd = pmd_alloc(&init_mm, pud, base); > - if (!pmd) { > - pr_err("%s: no pmd tables\n", __func__); > - ret = -ENOMEM; > - break; > - } > - WARN_ON(!pmd_none(*pmd)); > - > - pte = pte_alloc_kernel(pmd, base); > - if (!pte) { > - pr_err("%s: no pte tables\n", __func__); > - ret = -ENOMEM; > - break; > - } > - > - consistent_pte[i++] = pte; > - base += PMD_SIZE; > - } while (base < CONSISTENT_END); > - > - return ret; > -} > -core_initcall(consistent_init); > - > static void *__alloc_from_contiguous(struct device *dev, size_t size, > pgprot_t prot, struct page **ret_page); > > -static struct arm_vmregion_head coherent_head = { > - .vm_lock = __SPIN_LOCK_UNLOCKED(&coherent_head.vm_lock), > - .vm_list = LIST_HEAD_INIT(coherent_head.vm_list), > +static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp, > + pgprot_t prot, struct page **ret_page, > + const void *caller); > + > +static void * > +__dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, > + const void *caller) > +{ > + struct vm_struct *area; > + unsigned long addr; > + > + /* > + * DMA allocation can be mapped to user space, so lets > + * set VM_USERMAP flags too. > + */ > + area = get_vm_area_caller(size, VM_ARM_DMA_CONSISTENT | VM_USERMAP, > + caller); > + if (!area) > + return NULL; > + addr = (unsigned long)area->addr; > + area->phys_addr = __pfn_to_phys(page_to_pfn(page)); > + > + if (ioremap_page_range(addr, addr + size, area->phys_addr, prot)) { > + vunmap((void *)addr); > + return NULL; > + } > + return (void *)addr; > +} > + > +static void __dma_free_remap(void *cpu_addr, size_t size) > +{ > + struct vm_struct *area = find_vm_area(cpu_addr); > + if (!area || !(area->flags & (VM_ARM_DMA_CONSISTENT | VM_USERMAP))) { It's not a exact check. You should check it by following as. (area->flags & (VM_ARM_DMA_CONSISTENT | VM_USERMAP)) != (VM_ARM_DMA_CONSISTENT | VM_USERMAP) > + WARN(1, "%s: trying to free invalid coherent area: %p\n", > + __func__, cpu_addr); We need __func__? WARN will show it on call stack. > + return; > + } > + unmap_kernel_range((unsigned long)cpu_addr, size); > + vunmap(cpu_addr); > +} > + > +struct dma_pool { > + size_t size; > + spinlock_t lock; > + unsigned long *bitmap; > + unsigned long nr_pages; > + void *vaddr; > + struct page *page; > }; > > -static size_t coherent_pool_size = DEFAULT_CONSISTENT_DMA_SIZE / 8; > +static struct dma_pool atomic_pool = { > + .size = SZ_256K, > +}; > > static int __init early_coherent_pool(char *p) > { > - coherent_pool_size = memparse(p, &p); > + atomic_pool.size = memparse(p, &p); > return 0; > } > early_param("coherent_pool", early_coherent_pool); > @@ -333,32 +289,45 @@ early_param("coherent_pool", early_coherent_pool); > /* > * Initialise the coherent pool for atomic allocations. > */ > -static int __init coherent_init(void) > +static int __init atomic_pool_init(void) > { > + struct dma_pool *pool = &atomic_pool; > pgprot_t prot = pgprot_dmacoherent(pgprot_kernel); > - size_t size = coherent_pool_size; > + unsigned long nr_pages = pool->size >> PAGE_SHIFT; > + unsigned long *bitmap; > struct page *page; > void *ptr; > + int bitmap_size = BITS_TO_LONGS(nr_pages) * sizeof(long); > > - if (!IS_ENABLED(CONFIG_CMA)) > - return 0; > + bitmap = kzalloc(bitmap_size, GFP_KERNEL); > + if (!bitmap) > + goto no_bitmap; > > - ptr = __alloc_from_contiguous(NULL, size, prot, &page); > + if (IS_ENABLED(CONFIG_CMA)) > + ptr = __alloc_from_contiguous(NULL, pool->size, prot, &page); > + else > + ptr = __alloc_remap_buffer(NULL, pool->size, GFP_KERNEL, prot, > + &page, NULL); > if (ptr) { > - coherent_head.vm_start = (unsigned long) ptr; > - coherent_head.vm_end = (unsigned long) ptr + size; > - printk(KERN_INFO "DMA: preallocated %u KiB pool for atomic coherent allocations\n", > - (unsigned)size / 1024); > + spin_lock_init(&pool->lock); > + pool->vaddr = ptr; > + pool->page = page; > + pool->bitmap = bitmap; > + pool->nr_pages = nr_pages; > + pr_info("DMA: preallocated %u KiB pool for atomic coherent allocations\n", > + (unsigned)pool->size / 1024); > return 0; > } > - printk(KERN_ERR "DMA: failed to allocate %u KiB pool for atomic coherent allocation\n", > - (unsigned)size / 1024); > + kfree(bitmap); > +no_bitmap: > + pr_err("DMA: failed to allocate %u KiB pool for atomic coherent allocation\n", > + (unsigned)pool->size / 1024); > return -ENOMEM; > } > /* > * CMA is activated by core_initcall, so we must be called after it. > */ > -postcore_initcall(coherent_init); > +postcore_initcall(atomic_pool_init); > > struct dma_contig_early_reserve { > phys_addr_t base; > @@ -406,112 +375,6 @@ void __init dma_contiguous_remap(void) > } > } > > -static void * > -__dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, > - const void *caller) > -{ > - struct arm_vmregion *c; > - size_t align; > - int bit; > - > - if (!consistent_pte) { > - pr_err("%s: not initialised\n", __func__); > - dump_stack(); > - return NULL; > - } > - > - /* > - * Align the virtual region allocation - maximum alignment is > - * a section size, minimum is a page size. This helps reduce > - * fragmentation of the DMA space, and also prevents allocations > - * smaller than a section from crossing a section boundary. > - */ > - bit = fls(size - 1); > - if (bit > SECTION_SHIFT) > - bit = SECTION_SHIFT; > - align = 1 << bit; > - > - /* > - * Allocate a virtual address in the consistent mapping region. > - */ > - c = arm_vmregion_alloc(&consistent_head, align, size, > - gfp & ~(__GFP_DMA | __GFP_HIGHMEM), caller); > - if (c) { > - pte_t *pte; > - int idx = CONSISTENT_PTE_INDEX(c->vm_start); > - u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); > - > - pte = consistent_pte[idx] + off; > - c->priv = page; > - > - do { > - BUG_ON(!pte_none(*pte)); > - > - set_pte_ext(pte, mk_pte(page, prot), 0); > - page++; > - pte++; > - off++; > - if (off >= PTRS_PER_PTE) { > - off = 0; > - pte = consistent_pte[++idx]; > - } > - } while (size -= PAGE_SIZE); > - > - dsb(); > - > - return (void *)c->vm_start; > - } > - return NULL; > -} > - > -static void __dma_free_remap(void *cpu_addr, size_t size) > -{ > - struct arm_vmregion *c; > - unsigned long addr; > - pte_t *ptep; > - int idx; > - u32 off; > - > - c = arm_vmregion_find_remove(&consistent_head, (unsigned long)cpu_addr); > - if (!c) { > - pr_err("%s: trying to free invalid coherent area: %p\n", > - __func__, cpu_addr); > - dump_stack(); > - return; > - } > - > - if ((c->vm_end - c->vm_start) != size) { > - pr_err("%s: freeing wrong coherent size (%ld != %d)\n", > - __func__, c->vm_end - c->vm_start, size); > - dump_stack(); > - size = c->vm_end - c->vm_start; > - } > - > - idx = CONSISTENT_PTE_INDEX(c->vm_start); > - off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); > - ptep = consistent_pte[idx] + off; > - addr = c->vm_start; > - do { > - pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep); > - > - ptep++; > - addr += PAGE_SIZE; > - off++; > - if (off >= PTRS_PER_PTE) { > - off = 0; > - ptep = consistent_pte[++idx]; > - } > - > - if (pte_none(pte) || !pte_present(pte)) > - pr_crit("%s: bad page in kernel page table\n", > - __func__); > - } while (size -= PAGE_SIZE); > - > - flush_tlb_kernel_range(c->vm_start, c->vm_end); > - > - arm_vmregion_free(&consistent_head, c); > -} > - > static int __dma_update_pte(pte_t *pte, pgtable_t token, unsigned long addr, > void *data) > { > @@ -552,16 +415,17 @@ static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp, > return ptr; > } > > -static void *__alloc_from_pool(struct device *dev, size_t size, > - struct page **ret_page, const void *caller) > +static void *__alloc_from_pool(size_t size, struct page **ret_page) > { > - struct arm_vmregion *c; > + struct dma_pool *pool = &atomic_pool; > + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; > + unsigned int pageno; > + unsigned long flags; > + void *ptr = NULL; > size_t align; > > - if (!coherent_head.vm_start) { > - printk(KERN_ERR "%s: coherent pool not initialised!\n", > - __func__); > - dump_stack(); > + if (!pool->vaddr) { > + WARN(1, "%s: coherent pool not initialised!\n", __func__); Ditto. > return NULL; > } > > @@ -571,35 +435,41 @@ static void *__alloc_from_pool(struct device *dev, size_t size, > * size. This helps reduce fragmentation of the DMA space. > */ > align = PAGE_SIZE << get_order(size); > - c = arm_vmregion_alloc(&coherent_head, align, size, 0, caller); > - if (c) { > - void *ptr = (void *)c->vm_start; > - struct page *page = virt_to_page(ptr); > - *ret_page = page; > - return ptr; > + > + spin_lock_irqsave(&pool->lock, flags); > + pageno = bitmap_find_next_zero_area(pool->bitmap, pool->nr_pages, > + 0, count, (1 << align) - 1); > + if (pageno < pool->nr_pages) { > + bitmap_set(pool->bitmap, pageno, count); > + ptr = pool->vaddr + PAGE_SIZE * pageno; > + *ret_page = pool->page + pageno; > } > - return NULL; > + spin_unlock_irqrestore(&pool->lock, flags); > + > + return ptr; > } > > -static int __free_from_pool(void *cpu_addr, size_t size) > +static int __free_from_pool(void *start, size_t size) > { > - unsigned long start = (unsigned long)cpu_addr; > - unsigned long end = start + size; > - struct arm_vmregion *c; > + struct dma_pool *pool = &atomic_pool; > + unsigned long pageno, count; > + unsigned long flags; > > - if (start < coherent_head.vm_start || end > coherent_head.vm_end) > + if (start < pool->vaddr || start > pool->vaddr + pool->size) > return 0; > > - c = arm_vmregion_find_remove(&coherent_head, (unsigned long)start); > - > - if ((c->vm_end - c->vm_start) != size) { > - printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n", > - __func__, c->vm_end - c->vm_start, size); > - dump_stack(); > - size = c->vm_end - c->vm_start; > + if (start + size > pool->vaddr + pool->size) { > + WARN(1, "%s: freeing wrong coherent size from pool\n", __func__); Ditto > + return 0; > } > > - arm_vmregion_free(&coherent_head, c); > + pageno = (start - pool->vaddr) >> PAGE_SHIFT; > + count = size >> PAGE_SHIFT; > + > + spin_lock_irqsave(&pool->lock, flags); > + bitmap_clear(pool->bitmap, pageno, count); > + spin_unlock_irqrestore(&pool->lock, flags); > + > return 1; > } > > @@ -644,7 +514,7 @@ static inline pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot) > > #define __get_dma_pgprot(attrs, prot) __pgprot(0) > #define __alloc_remap_buffer(dev, size, gfp, prot, ret, c) NULL > -#define __alloc_from_pool(dev, size, ret_page, c) NULL > +#define __alloc_from_pool(size, ret_page) NULL > #define __alloc_from_contiguous(dev, size, prot, ret) NULL > #define __free_from_pool(cpu_addr, size) 0 > #define __free_from_contiguous(dev, page, size) do { } while (0) > @@ -702,10 +572,10 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, > > if (arch_is_coherent() || nommu()) > addr = __alloc_simple_buffer(dev, size, gfp, &page); > + else if (gfp & GFP_ATOMIC) > + addr = __alloc_from_pool(size, &page); > else if (!IS_ENABLED(CONFIG_CMA)) > addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller); > - else if (gfp & GFP_ATOMIC) > - addr = __alloc_from_pool(dev, size, &page, caller); > else > addr = __alloc_from_contiguous(dev, size, prot, &page); > > @@ -741,16 +611,22 @@ int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, > { > int ret = -ENXIO; > #ifdef CONFIG_MMU > + unsigned long user_count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; > + unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; > unsigned long pfn = dma_to_pfn(dev, dma_addr); > + unsigned long off = vma->vm_pgoff; > + > vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); > > if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) > return ret; > > - ret = remap_pfn_range(vma, vma->vm_start, > - pfn + vma->vm_pgoff, > - vma->vm_end - vma->vm_start, > - vma->vm_page_prot); > + if (off < count && user_count <= (count - off)) { > + ret = remap_pfn_range(vma, vma->vm_start, > + pfn + off, > + user_count << PAGE_SHIFT, > + vma->vm_page_prot); > + } What's that? Is this piece relate to this patch? I can't understand that and nitpick, naming is rather awkward. IMHO, nr_vma_pages, nr_pages would be better. Anyway, Is it a bug fix or clean up? I guess it should be another patch if I didn't miss something. > #endif /* CONFIG_MMU */ > > return ret; > @@ -998,9 +874,6 @@ static int arm_dma_set_mask(struct device *dev, u64 dma_mask) > > static int __init dma_debug_do_init(void) > { > -#ifdef CONFIG_MMU > - arm_vmregion_create_proc("dma-mappings", &consistent_head); > -#endif > dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); > return 0; > } > @@ -1117,61 +990,32 @@ static int __iommu_free_buffer(struct device *dev, struct page **pages, size_t s > * Create a CPU mapping for a specified pages > */ > static void * > -__iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot) > +__iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot, > + const void *caller) > { > - struct arm_vmregion *c; > - size_t align; > - size_t count = size >> PAGE_SHIFT; > - int bit; > + unsigned int i, nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; > + struct vm_struct *area; > + unsigned long p; > > - if (!consistent_pte[0]) { > - pr_err("%s: not initialised\n", __func__); > - dump_stack(); > + area = get_vm_area_caller(size, VM_ARM_DMA_CONSISTENT | VM_USERMAP, > + caller); > + if (!area) > return NULL; > - } > - > - /* > - * Align the virtual region allocation - maximum alignment is > - * a section size, minimum is a page size. This helps reduce > - * fragmentation of the DMA space, and also prevents allocations > - * smaller than a section from crossing a section boundary. > - */ > - bit = fls(size - 1); > - if (bit > SECTION_SHIFT) > - bit = SECTION_SHIFT; > - align = 1 << bit; > - > - /* > - * Allocate a virtual address in the consistent mapping region. > - */ > - c = arm_vmregion_alloc(&consistent_head, align, size, > - gfp & ~(__GFP_DMA | __GFP_HIGHMEM), NULL); > - if (c) { > - pte_t *pte; > - int idx = CONSISTENT_PTE_INDEX(c->vm_start); > - int i = 0; > - u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); > - > - pte = consistent_pte[idx] + off; > - c->priv = pages; > - > - do { > - BUG_ON(!pte_none(*pte)); > - > - set_pte_ext(pte, mk_pte(pages[i], prot), 0); > - pte++; > - off++; > - i++; > - if (off >= PTRS_PER_PTE) { > - off = 0; > - pte = consistent_pte[++idx]; > - } > - } while (i < count); > > - dsb(); > + area->pages = pages; > + area->nr_pages = nr_pages; > + p = (unsigned long)area->addr; > > - return (void *)c->vm_start; > + for (i = 0; i < nr_pages; i++) { > + phys_addr_t phys = __pfn_to_phys(page_to_pfn(pages[i])); > + if (ioremap_page_range(p, p + PAGE_SIZE, phys, prot)) > + goto err; > + p += PAGE_SIZE; > } > + return area->addr; > +err: > + unmap_kernel_range((unsigned long)area->addr, size); > + vunmap(area->addr); > return NULL; > } > > @@ -1230,6 +1074,16 @@ static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, size_t si > return 0; > } > > +static struct page **__iommu_get_pages(void *cpu_addr) > +{ > + struct vm_struct *area; > + > + area = find_vm_area(cpu_addr); > + if (area && (area->flags & VM_ARM_DMA_CONSISTENT)) > + return area->pages; > + return NULL; > +} > + > static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, > dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs) > { > @@ -1248,7 +1102,8 @@ static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, > if (*handle == DMA_ERROR_CODE) > goto err_buffer; > > - addr = __iommu_alloc_remap(pages, size, gfp, prot); > + addr = __iommu_alloc_remap(pages, size, gfp, prot, > + __builtin_return_address(0)); > if (!addr) > goto err_mapping; > > @@ -1265,31 +1120,25 @@ static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, > void *cpu_addr, dma_addr_t dma_addr, size_t size, > struct dma_attrs *attrs) > { > - struct arm_vmregion *c; > + unsigned long uaddr = vma->vm_start; > + unsigned long usize = vma->vm_end - vma->vm_start; > + struct page **pages = __iommu_get_pages(cpu_addr); > > vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); > - c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr); > > - if (c) { > - struct page **pages = c->priv; > + if (!pages) > + return -ENXIO; > > - unsigned long uaddr = vma->vm_start; > - unsigned long usize = vma->vm_end - vma->vm_start; > - int i = 0; > + do { > + int ret = vm_insert_page(vma, uaddr, *pages++); > + if (ret) { > + pr_err("Remapping memory failed: %d\n", ret); > + return ret; > + } > + uaddr += PAGE_SIZE; > + usize -= PAGE_SIZE; > + } while (usize > 0); > > - do { > - int ret; > - > - ret = vm_insert_page(vma, uaddr, pages[i++]); > - if (ret) { > - pr_err("Remapping memory, error: %d\n", ret); > - return ret; > - } > - > - uaddr += PAGE_SIZE; > - usize -= PAGE_SIZE; > - } while (usize > 0); > - } > return 0; > } > > @@ -1300,16 +1149,20 @@ static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, > void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, > dma_addr_t handle, struct dma_attrs *attrs) > { > - struct arm_vmregion *c; > + struct page **pages = __iommu_get_pages(cpu_addr); > size = PAGE_ALIGN(size); > > - c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr); > - if (c) { > - struct page **pages = c->priv; > - __dma_free_remap(cpu_addr, size); > - __iommu_remove_mapping(dev, handle, size); > - __iommu_free_buffer(dev, pages, size); > + if (!pages) { > + WARN(1, "%s: trying to free invalid coherent area: %p\n", > + __func__, cpu_addr); Remove __func__. > + return; > } > + > + unmap_kernel_range((unsigned long)cpu_addr, size); > + vunmap(cpu_addr); > + > + __iommu_remove_mapping(dev, handle, size); > + __iommu_free_buffer(dev, pages, size); > } > > /* > diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h > index 2e8a1ef..6776160 100644 > --- a/arch/arm/mm/mm.h > +++ b/arch/arm/mm/mm.h > @@ -59,6 +59,9 @@ extern void __flush_dcache_page(struct address_space *mapping, struct page *page > #define VM_ARM_MTYPE(mt) ((mt) << 20) > #define VM_ARM_MTYPE_MASK (0x1f << 20) > > +/* consistent regions used by dma_alloc_attrs() */ > +#define VM_ARM_DMA_CONSISTENT 0x20000000 > + > #endif > > #ifdef CONFIG_ZONE_DMA > diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h > index 2e28f4d..6071e91 100644 > --- a/include/linux/vmalloc.h > +++ b/include/linux/vmalloc.h > @@ -93,6 +93,7 @@ extern struct vm_struct *__get_vm_area_caller(unsigned long size, > unsigned long start, unsigned long end, > const void *caller); > extern struct vm_struct *remove_vm_area(const void *addr); > +extern struct vm_struct *find_vm_area(const void *addr); > > extern int map_vm_area(struct vm_struct *area, pgprot_t prot, > struct page ***pages); > diff --git a/mm/vmalloc.c b/mm/vmalloc.c > index 11308f0..65fc4dc 100644 > --- a/mm/vmalloc.c > +++ b/mm/vmalloc.c > @@ -1403,7 +1403,15 @@ struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags, > -1, GFP_KERNEL, caller); > } > > -static struct vm_struct *find_vm_area(const void *addr) > +/** > + * find_vm_area - find a continuous kernel virtual area > + * @addr: base address > + * > + * Search for the kernel VM area starting at @addr, and return it. > + * It is up to the caller to do all required locking to keep the returned > + * pointer valid. > + */ > +struct vm_struct *find_vm_area(const void *addr) > { > struct vmap_area *va; > > -- > 1.7.1.569.g6f426 > > -- > To unsubscribe, send a message with 'unsubscribe linux-mm' in > the body to majordomo@kvack.org. For more info on Linux MM, > see: http://www.linux-mm.org/ . > Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index a92c5eb..4ee28f3 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -526,7 +526,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. coherent_pool=nn[KMG] [ARM,KNL] Sets the size of memory pool for coherent, atomic dma - allocations if Contiguous Memory Allocator (CMA) is used. + allocations, by default set to 256K. code_bytes [X86] How many bytes of object code to print in an oops report. diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index bbef15d..80777d87 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -226,7 +226,7 @@ static inline int dma_mmap_writecombine(struct device *dev, struct vm_area_struc * DMA region above it's default value of 2MB. It must be called before the * memory allocator is initialised, i.e. before any core_initcall. */ -extern void __init init_consistent_dma_size(unsigned long size); +static inline void init_consistent_dma_size(unsigned long size) { } /* * For SA-1111, IXP425, and ADI systems the dma-mapping functions are "magic" diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 655878b..4d750a6 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -22,6 +22,7 @@ #include <linux/memblock.h> #include <linux/slab.h> #include <linux/iommu.h> +#include <linux/io.h> #include <linux/vmalloc.h> #include <asm/memory.h> @@ -217,115 +218,70 @@ static void __dma_free_buffer(struct page *page, size_t size) } #ifdef CONFIG_MMU - -#define CONSISTENT_OFFSET(x) (((unsigned long)(x) - consistent_base) >> PAGE_SHIFT) -#define CONSISTENT_PTE_INDEX(x) (((unsigned long)(x) - consistent_base) >> PMD_SHIFT) - -/* - * These are the page tables (2MB each) covering uncached, DMA consistent allocations - */ -static pte_t **consistent_pte; - -#define DEFAULT_CONSISTENT_DMA_SIZE SZ_2M - -static unsigned long consistent_base = CONSISTENT_END - DEFAULT_CONSISTENT_DMA_SIZE; - -void __init init_consistent_dma_size(unsigned long size) -{ - unsigned long base = CONSISTENT_END - ALIGN(size, SZ_2M); - - BUG_ON(consistent_pte); /* Check we're called before DMA region init */ - BUG_ON(base < VMALLOC_END); - - /* Grow region to accommodate specified size */ - if (base < consistent_base) - consistent_base = base; -} - -#include "vmregion.h" - -static struct arm_vmregion_head consistent_head = { - .vm_lock = __SPIN_LOCK_UNLOCKED(&consistent_head.vm_lock), - .vm_list = LIST_HEAD_INIT(consistent_head.vm_list), - .vm_end = CONSISTENT_END, -}; - #ifdef CONFIG_HUGETLB_PAGE #error ARM Coherent DMA allocator does not (yet) support huge TLB #endif -/* - * Initialise the consistent memory allocation. - */ -static int __init consistent_init(void) -{ - int ret = 0; - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - int i = 0; - unsigned long base = consistent_base; - unsigned long num_ptes = (CONSISTENT_END - base) >> PMD_SHIFT; - - if (IS_ENABLED(CONFIG_CMA) && !IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)) - return 0; - - consistent_pte = kmalloc(num_ptes * sizeof(pte_t), GFP_KERNEL); - if (!consistent_pte) { - pr_err("%s: no memory\n", __func__); - return -ENOMEM; - } - - pr_debug("DMA memory: 0x%08lx - 0x%08lx:\n", base, CONSISTENT_END); - consistent_head.vm_start = base; - - do { - pgd = pgd_offset(&init_mm, base); - - pud = pud_alloc(&init_mm, pgd, base); - if (!pud) { - pr_err("%s: no pud tables\n", __func__); - ret = -ENOMEM; - break; - } - - pmd = pmd_alloc(&init_mm, pud, base); - if (!pmd) { - pr_err("%s: no pmd tables\n", __func__); - ret = -ENOMEM; - break; - } - WARN_ON(!pmd_none(*pmd)); - - pte = pte_alloc_kernel(pmd, base); - if (!pte) { - pr_err("%s: no pte tables\n", __func__); - ret = -ENOMEM; - break; - } - - consistent_pte[i++] = pte; - base += PMD_SIZE; - } while (base < CONSISTENT_END); - - return ret; -} -core_initcall(consistent_init); - static void *__alloc_from_contiguous(struct device *dev, size_t size, pgprot_t prot, struct page **ret_page); -static struct arm_vmregion_head coherent_head = { - .vm_lock = __SPIN_LOCK_UNLOCKED(&coherent_head.vm_lock), - .vm_list = LIST_HEAD_INIT(coherent_head.vm_list), +static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp, + pgprot_t prot, struct page **ret_page, + const void *caller); + +static void * +__dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, + const void *caller) +{ + struct vm_struct *area; + unsigned long addr; + + /* + * DMA allocation can be mapped to user space, so lets + * set VM_USERMAP flags too. + */ + area = get_vm_area_caller(size, VM_ARM_DMA_CONSISTENT | VM_USERMAP, + caller); + if (!area) + return NULL; + addr = (unsigned long)area->addr; + area->phys_addr = __pfn_to_phys(page_to_pfn(page)); + + if (ioremap_page_range(addr, addr + size, area->phys_addr, prot)) { + vunmap((void *)addr); + return NULL; + } + return (void *)addr; +} + +static void __dma_free_remap(void *cpu_addr, size_t size) +{ + struct vm_struct *area = find_vm_area(cpu_addr); + if (!area || !(area->flags & (VM_ARM_DMA_CONSISTENT | VM_USERMAP))) { + WARN(1, "%s: trying to free invalid coherent area: %p\n", + __func__, cpu_addr); + return; + } + unmap_kernel_range((unsigned long)cpu_addr, size); + vunmap(cpu_addr); +} + +struct dma_pool { + size_t size; + spinlock_t lock; + unsigned long *bitmap; + unsigned long nr_pages; + void *vaddr; + struct page *page; }; -static size_t coherent_pool_size = DEFAULT_CONSISTENT_DMA_SIZE / 8; +static struct dma_pool atomic_pool = { + .size = SZ_256K, +}; static int __init early_coherent_pool(char *p) { - coherent_pool_size = memparse(p, &p); + atomic_pool.size = memparse(p, &p); return 0; } early_param("coherent_pool", early_coherent_pool); @@ -333,32 +289,45 @@ early_param("coherent_pool", early_coherent_pool); /* * Initialise the coherent pool for atomic allocations. */ -static int __init coherent_init(void) +static int __init atomic_pool_init(void) { + struct dma_pool *pool = &atomic_pool; pgprot_t prot = pgprot_dmacoherent(pgprot_kernel); - size_t size = coherent_pool_size; + unsigned long nr_pages = pool->size >> PAGE_SHIFT; + unsigned long *bitmap; struct page *page; void *ptr; + int bitmap_size = BITS_TO_LONGS(nr_pages) * sizeof(long); - if (!IS_ENABLED(CONFIG_CMA)) - return 0; + bitmap = kzalloc(bitmap_size, GFP_KERNEL); + if (!bitmap) + goto no_bitmap; - ptr = __alloc_from_contiguous(NULL, size, prot, &page); + if (IS_ENABLED(CONFIG_CMA)) + ptr = __alloc_from_contiguous(NULL, pool->size, prot, &page); + else + ptr = __alloc_remap_buffer(NULL, pool->size, GFP_KERNEL, prot, + &page, NULL); if (ptr) { - coherent_head.vm_start = (unsigned long) ptr; - coherent_head.vm_end = (unsigned long) ptr + size; - printk(KERN_INFO "DMA: preallocated %u KiB pool for atomic coherent allocations\n", - (unsigned)size / 1024); + spin_lock_init(&pool->lock); + pool->vaddr = ptr; + pool->page = page; + pool->bitmap = bitmap; + pool->nr_pages = nr_pages; + pr_info("DMA: preallocated %u KiB pool for atomic coherent allocations\n", + (unsigned)pool->size / 1024); return 0; } - printk(KERN_ERR "DMA: failed to allocate %u KiB pool for atomic coherent allocation\n", - (unsigned)size / 1024); + kfree(bitmap); +no_bitmap: + pr_err("DMA: failed to allocate %u KiB pool for atomic coherent allocation\n", + (unsigned)pool->size / 1024); return -ENOMEM; } /* * CMA is activated by core_initcall, so we must be called after it. */ -postcore_initcall(coherent_init); +postcore_initcall(atomic_pool_init); struct dma_contig_early_reserve { phys_addr_t base; @@ -406,112 +375,6 @@ void __init dma_contiguous_remap(void) } } -static void * -__dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, - const void *caller) -{ - struct arm_vmregion *c; - size_t align; - int bit; - - if (!consistent_pte) { - pr_err("%s: not initialised\n", __func__); - dump_stack(); - return NULL; - } - - /* - * Align the virtual region allocation - maximum alignment is - * a section size, minimum is a page size. This helps reduce - * fragmentation of the DMA space, and also prevents allocations - * smaller than a section from crossing a section boundary. - */ - bit = fls(size - 1); - if (bit > SECTION_SHIFT) - bit = SECTION_SHIFT; - align = 1 << bit; - - /* - * Allocate a virtual address in the consistent mapping region. - */ - c = arm_vmregion_alloc(&consistent_head, align, size, - gfp & ~(__GFP_DMA | __GFP_HIGHMEM), caller); - if (c) { - pte_t *pte; - int idx = CONSISTENT_PTE_INDEX(c->vm_start); - u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); - - pte = consistent_pte[idx] + off; - c->priv = page; - - do { - BUG_ON(!pte_none(*pte)); - - set_pte_ext(pte, mk_pte(page, prot), 0); - page++; - pte++; - off++; - if (off >= PTRS_PER_PTE) { - off = 0; - pte = consistent_pte[++idx]; - } - } while (size -= PAGE_SIZE); - - dsb(); - - return (void *)c->vm_start; - } - return NULL; -} - -static void __dma_free_remap(void *cpu_addr, size_t size) -{ - struct arm_vmregion *c; - unsigned long addr; - pte_t *ptep; - int idx; - u32 off; - - c = arm_vmregion_find_remove(&consistent_head, (unsigned long)cpu_addr); - if (!c) { - pr_err("%s: trying to free invalid coherent area: %p\n", - __func__, cpu_addr); - dump_stack(); - return; - } - - if ((c->vm_end - c->vm_start) != size) { - pr_err("%s: freeing wrong coherent size (%ld != %d)\n", - __func__, c->vm_end - c->vm_start, size); - dump_stack(); - size = c->vm_end - c->vm_start; - } - - idx = CONSISTENT_PTE_INDEX(c->vm_start); - off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); - ptep = consistent_pte[idx] + off; - addr = c->vm_start; - do { - pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep); - - ptep++; - addr += PAGE_SIZE; - off++; - if (off >= PTRS_PER_PTE) { - off = 0; - ptep = consistent_pte[++idx]; - } - - if (pte_none(pte) || !pte_present(pte)) - pr_crit("%s: bad page in kernel page table\n", - __func__); - } while (size -= PAGE_SIZE); - - flush_tlb_kernel_range(c->vm_start, c->vm_end); - - arm_vmregion_free(&consistent_head, c); -} - static int __dma_update_pte(pte_t *pte, pgtable_t token, unsigned long addr, void *data) { @@ -552,16 +415,17 @@ static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp, return ptr; } -static void *__alloc_from_pool(struct device *dev, size_t size, - struct page **ret_page, const void *caller) +static void *__alloc_from_pool(size_t size, struct page **ret_page) { - struct arm_vmregion *c; + struct dma_pool *pool = &atomic_pool; + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + unsigned int pageno; + unsigned long flags; + void *ptr = NULL; size_t align; - if (!coherent_head.vm_start) { - printk(KERN_ERR "%s: coherent pool not initialised!\n", - __func__); - dump_stack(); + if (!pool->vaddr) { + WARN(1, "%s: coherent pool not initialised!\n", __func__); return NULL; } @@ -571,35 +435,41 @@ static void *__alloc_from_pool(struct device *dev, size_t size, * size. This helps reduce fragmentation of the DMA space. */ align = PAGE_SIZE << get_order(size); - c = arm_vmregion_alloc(&coherent_head, align, size, 0, caller); - if (c) { - void *ptr = (void *)c->vm_start; - struct page *page = virt_to_page(ptr); - *ret_page = page; - return ptr; + + spin_lock_irqsave(&pool->lock, flags); + pageno = bitmap_find_next_zero_area(pool->bitmap, pool->nr_pages, + 0, count, (1 << align) - 1); + if (pageno < pool->nr_pages) { + bitmap_set(pool->bitmap, pageno, count); + ptr = pool->vaddr + PAGE_SIZE * pageno; + *ret_page = pool->page + pageno; } - return NULL; + spin_unlock_irqrestore(&pool->lock, flags); + + return ptr; } -static int __free_from_pool(void *cpu_addr, size_t size) +static int __free_from_pool(void *start, size_t size) { - unsigned long start = (unsigned long)cpu_addr; - unsigned long end = start + size; - struct arm_vmregion *c; + struct dma_pool *pool = &atomic_pool; + unsigned long pageno, count; + unsigned long flags; - if (start < coherent_head.vm_start || end > coherent_head.vm_end) + if (start < pool->vaddr || start > pool->vaddr + pool->size) return 0; - c = arm_vmregion_find_remove(&coherent_head, (unsigned long)start); - - if ((c->vm_end - c->vm_start) != size) { - printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n", - __func__, c->vm_end - c->vm_start, size); - dump_stack(); - size = c->vm_end - c->vm_start; + if (start + size > pool->vaddr + pool->size) { + WARN(1, "%s: freeing wrong coherent size from pool\n", __func__); + return 0; } - arm_vmregion_free(&coherent_head, c); + pageno = (start - pool->vaddr) >> PAGE_SHIFT; + count = size >> PAGE_SHIFT; + + spin_lock_irqsave(&pool->lock, flags); + bitmap_clear(pool->bitmap, pageno, count); + spin_unlock_irqrestore(&pool->lock, flags); + return 1; } @@ -644,7 +514,7 @@ static inline pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot) #define __get_dma_pgprot(attrs, prot) __pgprot(0) #define __alloc_remap_buffer(dev, size, gfp, prot, ret, c) NULL -#define __alloc_from_pool(dev, size, ret_page, c) NULL +#define __alloc_from_pool(size, ret_page) NULL #define __alloc_from_contiguous(dev, size, prot, ret) NULL #define __free_from_pool(cpu_addr, size) 0 #define __free_from_contiguous(dev, page, size) do { } while (0) @@ -702,10 +572,10 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, if (arch_is_coherent() || nommu()) addr = __alloc_simple_buffer(dev, size, gfp, &page); + else if (gfp & GFP_ATOMIC) + addr = __alloc_from_pool(size, &page); else if (!IS_ENABLED(CONFIG_CMA)) addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller); - else if (gfp & GFP_ATOMIC) - addr = __alloc_from_pool(dev, size, &page, caller); else addr = __alloc_from_contiguous(dev, size, prot, &page); @@ -741,16 +611,22 @@ int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, { int ret = -ENXIO; #ifdef CONFIG_MMU + unsigned long user_count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; unsigned long pfn = dma_to_pfn(dev, dma_addr); + unsigned long off = vma->vm_pgoff; + vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) return ret; - ret = remap_pfn_range(vma, vma->vm_start, - pfn + vma->vm_pgoff, - vma->vm_end - vma->vm_start, - vma->vm_page_prot); + if (off < count && user_count <= (count - off)) { + ret = remap_pfn_range(vma, vma->vm_start, + pfn + off, + user_count << PAGE_SHIFT, + vma->vm_page_prot); + } #endif /* CONFIG_MMU */ return ret; @@ -998,9 +874,6 @@ static int arm_dma_set_mask(struct device *dev, u64 dma_mask) static int __init dma_debug_do_init(void) { -#ifdef CONFIG_MMU - arm_vmregion_create_proc("dma-mappings", &consistent_head); -#endif dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); return 0; } @@ -1117,61 +990,32 @@ static int __iommu_free_buffer(struct device *dev, struct page **pages, size_t s * Create a CPU mapping for a specified pages */ static void * -__iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot) +__iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot, + const void *caller) { - struct arm_vmregion *c; - size_t align; - size_t count = size >> PAGE_SHIFT; - int bit; + unsigned int i, nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; + struct vm_struct *area; + unsigned long p; - if (!consistent_pte[0]) { - pr_err("%s: not initialised\n", __func__); - dump_stack(); + area = get_vm_area_caller(size, VM_ARM_DMA_CONSISTENT | VM_USERMAP, + caller); + if (!area) return NULL; - } - - /* - * Align the virtual region allocation - maximum alignment is - * a section size, minimum is a page size. This helps reduce - * fragmentation of the DMA space, and also prevents allocations - * smaller than a section from crossing a section boundary. - */ - bit = fls(size - 1); - if (bit > SECTION_SHIFT) - bit = SECTION_SHIFT; - align = 1 << bit; - - /* - * Allocate a virtual address in the consistent mapping region. - */ - c = arm_vmregion_alloc(&consistent_head, align, size, - gfp & ~(__GFP_DMA | __GFP_HIGHMEM), NULL); - if (c) { - pte_t *pte; - int idx = CONSISTENT_PTE_INDEX(c->vm_start); - int i = 0; - u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); - - pte = consistent_pte[idx] + off; - c->priv = pages; - - do { - BUG_ON(!pte_none(*pte)); - - set_pte_ext(pte, mk_pte(pages[i], prot), 0); - pte++; - off++; - i++; - if (off >= PTRS_PER_PTE) { - off = 0; - pte = consistent_pte[++idx]; - } - } while (i < count); - dsb(); + area->pages = pages; + area->nr_pages = nr_pages; + p = (unsigned long)area->addr; - return (void *)c->vm_start; + for (i = 0; i < nr_pages; i++) { + phys_addr_t phys = __pfn_to_phys(page_to_pfn(pages[i])); + if (ioremap_page_range(p, p + PAGE_SIZE, phys, prot)) + goto err; + p += PAGE_SIZE; } + return area->addr; +err: + unmap_kernel_range((unsigned long)area->addr, size); + vunmap(area->addr); return NULL; } @@ -1230,6 +1074,16 @@ static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, size_t si return 0; } +static struct page **__iommu_get_pages(void *cpu_addr) +{ + struct vm_struct *area; + + area = find_vm_area(cpu_addr); + if (area && (area->flags & VM_ARM_DMA_CONSISTENT)) + return area->pages; + return NULL; +} + static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs) { @@ -1248,7 +1102,8 @@ static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, if (*handle == DMA_ERROR_CODE) goto err_buffer; - addr = __iommu_alloc_remap(pages, size, gfp, prot); + addr = __iommu_alloc_remap(pages, size, gfp, prot, + __builtin_return_address(0)); if (!addr) goto err_mapping; @@ -1265,31 +1120,25 @@ static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, struct dma_attrs *attrs) { - struct arm_vmregion *c; + unsigned long uaddr = vma->vm_start; + unsigned long usize = vma->vm_end - vma->vm_start; + struct page **pages = __iommu_get_pages(cpu_addr); vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); - c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr); - if (c) { - struct page **pages = c->priv; + if (!pages) + return -ENXIO; - unsigned long uaddr = vma->vm_start; - unsigned long usize = vma->vm_end - vma->vm_start; - int i = 0; + do { + int ret = vm_insert_page(vma, uaddr, *pages++); + if (ret) { + pr_err("Remapping memory failed: %d\n", ret); + return ret; + } + uaddr += PAGE_SIZE; + usize -= PAGE_SIZE; + } while (usize > 0); - do { - int ret; - - ret = vm_insert_page(vma, uaddr, pages[i++]); - if (ret) { - pr_err("Remapping memory, error: %d\n", ret); - return ret; - } - - uaddr += PAGE_SIZE; - usize -= PAGE_SIZE; - } while (usize > 0); - } return 0; } @@ -1300,16 +1149,20 @@ static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle, struct dma_attrs *attrs) { - struct arm_vmregion *c; + struct page **pages = __iommu_get_pages(cpu_addr); size = PAGE_ALIGN(size); - c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr); - if (c) { - struct page **pages = c->priv; - __dma_free_remap(cpu_addr, size); - __iommu_remove_mapping(dev, handle, size); - __iommu_free_buffer(dev, pages, size); + if (!pages) { + WARN(1, "%s: trying to free invalid coherent area: %p\n", + __func__, cpu_addr); + return; } + + unmap_kernel_range((unsigned long)cpu_addr, size); + vunmap(cpu_addr); + + __iommu_remove_mapping(dev, handle, size); + __iommu_free_buffer(dev, pages, size); } /* diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h index 2e8a1ef..6776160 100644 --- a/arch/arm/mm/mm.h +++ b/arch/arm/mm/mm.h @@ -59,6 +59,9 @@ extern void __flush_dcache_page(struct address_space *mapping, struct page *page #define VM_ARM_MTYPE(mt) ((mt) << 20) #define VM_ARM_MTYPE_MASK (0x1f << 20) +/* consistent regions used by dma_alloc_attrs() */ +#define VM_ARM_DMA_CONSISTENT 0x20000000 + #endif #ifdef CONFIG_ZONE_DMA diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 2e28f4d..6071e91 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -93,6 +93,7 @@ extern struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long start, unsigned long end, const void *caller); extern struct vm_struct *remove_vm_area(const void *addr); +extern struct vm_struct *find_vm_area(const void *addr); extern int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 11308f0..65fc4dc 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1403,7 +1403,15 @@ struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags, -1, GFP_KERNEL, caller); } -static struct vm_struct *find_vm_area(const void *addr) +/** + * find_vm_area - find a continuous kernel virtual area + * @addr: base address + * + * Search for the kernel VM area starting at @addr, and return it. + * It is up to the caller to do all required locking to keep the returned + * pointer valid. + */ +struct vm_struct *find_vm_area(const void *addr) { struct vmap_area *va;