diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 0fa47ddf4c4662..f7ac0e663976e5 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -597,7 +597,18 @@ placement constraint by the physical address range of memory allocations. A value of 0 disables CMA altogether. For more information, see - include/linux/dma-contiguous.h + kernel/dma/contiguous.c + + cma_pernuma=nn[MG] + [ARM64,KNL] + Sets the size of kernel per-numa memory area for + contiguous memory allocations. A value of 0 disables + per-numa CMA altogether. And If this option is not + specificed, the default value is 0. + With per-numa CMA enabled, DMA users on node nid will + first try to allocate buffer from the pernuma area + which is located in node nid, if the allocation fails, + they will fallback to the global default memory area. cmo_free_hint= [PPC] Format: { yes | no } Specify whether pages are marked as being inactive diff --git a/Documentation/core-api/dma-api.rst b/Documentation/core-api/dma-api.rst index 3b3abbbb4b9a6f..ea0413276ddb70 100644 --- a/Documentation/core-api/dma-api.rst +++ b/Documentation/core-api/dma-api.rst @@ -516,48 +516,56 @@ routines, e.g.::: } -Part II - Advanced dma usage ----------------------------- +Part II - Non-coherent DMA allocations +-------------------------------------- -Warning: These pieces of the DMA API should not be used in the -majority of cases, since they cater for unlikely corner cases that -don't belong in usual drivers. +These APIs allow to allocate pages in the kernel direct mapping that are +guaranteed to be DMA addressable. This means that unlike dma_alloc_coherent, +virt_to_page can be called on the resulting address, and the resulting +struct page can be used for everything a struct page is suitable for. -If you don't understand how cache line coherency works between a -processor and an I/O device, you should not be using this part of the -API at all. +If you don't understand how cache line coherency works between a processor and +an I/O device, you should not be using this part of the API. :: void * - dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t flag, unsigned long attrs) + dma_alloc_noncoherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, enum dma_data_direction dir, + gfp_t gfp) -Identical to dma_alloc_coherent() except that when the -DMA_ATTR_NON_CONSISTENT flags is passed in the attrs argument, the -platform will choose to return either consistent or non-consistent memory -as it sees fit. By using this API, you are guaranteeing to the platform -that you have all the correct and necessary sync points for this memory -in the driver should it choose to return non-consistent memory. +This routine allocates a region of bytes of consistent memory. It +returns a pointer to the allocated region (in the processor's virtual address +space) or NULL if the allocation failed. The returned memory may or may not +be in the kernels direct mapping. Drivers must not call virt_to_page on +the returned memory region. -Note: where the platform can return consistent memory, it will -guarantee that the sync points become nops. +It also returns a which may be cast to an unsigned integer the +same width as the bus and given to the device as the DMA address base of +the region. + +The dir parameter specified if data is read and/or written by the device, +see dma_map_single() for details. + +The gfp parameter allows the caller to specify the ``GFP_`` flags (see +kmalloc()) for the allocation, but rejects flags used to specify a memory +zone such as GFP_DMA or GFP_HIGHMEM. -Warning: Handling non-consistent memory is a real pain. You should -only use this API if you positively know your driver will be -required to work on one of the rare (usually non-PCI) architectures -that simply cannot make consistent memory. +Before giving the memory to the device, dma_sync_single_for_device() needs +to be called, and before reading memory written by the device, +dma_sync_single_for_cpu(), just like for streaming DMA mappings that are +reused. :: void - dma_free_attrs(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t dma_handle, unsigned long attrs) + dma_free_noncoherent(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t dma_handle, enum dma_data_direction dir) -Free memory allocated by the dma_alloc_attrs(). All common -parameters must be identical to those otherwise passed to dma_free_coherent, -and the attrs argument must be identical to the attrs passed to -dma_alloc_attrs(). +Free a region of memory previously allocated using dma_alloc_noncoherent(). +dev, size and dma_handle and dir must all be the same as those passed into +dma_alloc_noncoherent(). cpu_addr must be the virtual address returned by +the dma_alloc_noncoherent(). :: @@ -575,41 +583,6 @@ memory or doing partial flushes. into the width returned by this call. It will also always be a power of two for easy alignment. -:: - - void - dma_cache_sync(struct device *dev, void *vaddr, size_t size, - enum dma_data_direction direction) - -Do a partial sync of memory that was allocated by dma_alloc_attrs() with -the DMA_ATTR_NON_CONSISTENT flag starting at virtual address vaddr and -continuing on for size. Again, you *must* observe the cache line -boundaries when doing this. - -:: - - int - dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, - dma_addr_t device_addr, size_t size); - -Declare region of memory to be handed out by dma_alloc_coherent() when -it's asked for coherent memory for this device. - -phys_addr is the CPU physical address to which the memory is currently -assigned (this will be ioremapped so the CPU can access the region). - -device_addr is the DMA address the device needs to be programmed -with to actually address this memory (this will be handed out as the -dma_addr_t in dma_alloc_coherent()). - -size is the size of the area (must be multiples of PAGE_SIZE). - -As a simplification for the platforms, only *one* such region of -memory may be declared per device. - -For reasons of efficiency, most platforms choose to track the declared -region only at the granularity of a page. For smaller allocations, -you should use the dma_pool() API. Part III - Debug drivers use of the DMA-API ------------------------------------------- diff --git a/Documentation/core-api/dma-attributes.rst b/Documentation/core-api/dma-attributes.rst index 29dcbe8826e85e..1887d92e8e9269 100644 --- a/Documentation/core-api/dma-attributes.rst +++ b/Documentation/core-api/dma-attributes.rst @@ -25,14 +25,6 @@ Since it is optional for platforms to implement DMA_ATTR_WRITE_COMBINE, those that do not will simply ignore the attribute and exhibit default behavior. -DMA_ATTR_NON_CONSISTENT ------------------------ - -DMA_ATTR_NON_CONSISTENT lets the platform to choose to return either -consistent or non-consistent memory as it sees fit. By using this API, -you are guaranteeing to the platform that you have all the correct and -necessary sync points for this memory in the driver. - DMA_ATTR_NO_KERNEL_MAPPING -------------------------- diff --git a/MAINTAINERS b/MAINTAINERS index 42fd3e3a252c28..0f39d1ca7770db 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5216,7 +5216,7 @@ T: git git://git.infradead.org/users/hch/dma-mapping.git F: include/asm-generic/dma-mapping.h F: include/linux/dma-direct.h F: include/linux/dma-mapping.h -F: include/linux/dma-noncoherent.h +F: include/linux/dma-map-ops.h F: kernel/dma/ DMA-BUF HEAPS FRAMEWORK diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c index 81037907268d5c..d84b19aa8e9da7 100644 --- a/arch/alpha/kernel/pci_iommu.c +++ b/arch/alpha/kernel/pci_iommu.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include @@ -141,12 +141,7 @@ iommu_arena_find_pages(struct device *dev, struct pci_iommu_arena *arena, unsigned long boundary_size; base = arena->dma_base >> PAGE_SHIFT; - if (dev) { - boundary_size = dma_get_seg_boundary(dev) + 1; - boundary_size >>= PAGE_SHIFT; - } else { - boundary_size = 1UL << (32 - PAGE_SHIFT); - } + boundary_size = dma_get_seg_boundary_nr_pages(dev, PAGE_SHIFT); /* Search forward for the first mask-aligned sequence of N free ptes */ ptes = arena->ptes; @@ -957,5 +952,7 @@ const struct dma_map_ops alpha_pci_ops = { .dma_supported = alpha_pci_supported, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, + .alloc_pages = dma_common_alloc_pages, + .free_pages = dma_common_free_pages, }; EXPORT_SYMBOL(alpha_pci_ops); diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index e947572a521ec0..517988e60cfc4d 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -3,7 +3,7 @@ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) */ -#include +#include #include #include diff --git a/arch/arm/common/dmabounce.c b/arch/arm/common/dmabounce.c index f4b719bde76367..7996c04393d501 100644 --- a/arch/arm/common/dmabounce.c +++ b/arch/arm/common/dmabounce.c @@ -24,7 +24,8 @@ #include #include #include -#include +#include +#include #include #include #include diff --git a/arch/arm/include/asm/dma-contiguous.h b/arch/arm/include/asm/dma-contiguous.h deleted file mode 100644 index d785187a6f8ac1..00000000000000 --- a/arch/arm/include/asm/dma-contiguous.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef ASMARM_DMA_CONTIGUOUS_H -#define ASMARM_DMA_CONTIGUOUS_H - -#ifdef __KERNEL__ -#ifdef CONFIG_DMA_CMA - -#include - -void dma_contiguous_early_fixup(phys_addr_t base, unsigned long size); - -#endif -#endif - -#endif diff --git a/arch/arm/include/asm/dma-direct.h b/arch/arm/include/asm/dma-direct.h index 7c3001a6a775bf..77fcb7ee5ec907 100644 --- a/arch/arm/include/asm/dma-direct.h +++ b/arch/arm/include/asm/dma-direct.h @@ -2,13 +2,44 @@ #ifndef ASM_ARM_DMA_DIRECT_H #define ASM_ARM_DMA_DIRECT_H 1 -static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) +#include + +/* + * dma_to_pfn/pfn_to_dma/virt_to_dma are architecture private + * functions used internally by the DMA-mapping API to provide DMA + * addresses. They must not be used by drivers. + */ +static inline dma_addr_t pfn_to_dma(struct device *dev, unsigned long pfn) +{ + if (dev && dev->dma_range_map) + pfn = PFN_DOWN(translate_phys_to_dma(dev, PFN_PHYS(pfn))); + return (dma_addr_t)__pfn_to_bus(pfn); +} + +static inline unsigned long dma_to_pfn(struct device *dev, dma_addr_t addr) +{ + unsigned long pfn = __bus_to_pfn(addr); + + if (dev && dev->dma_range_map) + pfn = PFN_DOWN(translate_dma_to_phys(dev, PFN_PHYS(pfn))); + return pfn; +} + +static inline dma_addr_t virt_to_dma(struct device *dev, void *addr) +{ + if (dev) + return pfn_to_dma(dev, virt_to_pfn(addr)); + + return (dma_addr_t)__virt_to_bus((unsigned long)(addr)); +} + +static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) { unsigned int offset = paddr & ~PAGE_MASK; return pfn_to_dma(dev, __phys_to_pfn(paddr)) + offset; } -static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dev_addr) +static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr) { unsigned int offset = dev_addr & ~PAGE_MASK; return __pfn_to_phys(dma_to_pfn(dev, dev_addr)) + offset; diff --git a/arch/arm/include/asm/dma-iommu.h b/arch/arm/include/asm/dma-iommu.h index 86405cc81385c8..fe9ef6f79e9cfe 100644 --- a/arch/arm/include/asm/dma-iommu.h +++ b/arch/arm/include/asm/dma-iommu.h @@ -6,7 +6,6 @@ #include #include -#include #include struct dma_iommu_mapping { diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index bdd80ddbca3451..77082246a5e121 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -6,9 +6,6 @@ #include #include -#include - -#include #include #include @@ -23,74 +20,6 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) return NULL; } -#ifdef __arch_page_to_dma -#error Please update to __arch_pfn_to_dma -#endif - -/* - * dma_to_pfn/pfn_to_dma/dma_to_virt/virt_to_dma are architecture private - * functions used internally by the DMA-mapping API to provide DMA - * addresses. They must not be used by drivers. - */ -#ifndef __arch_pfn_to_dma -static inline dma_addr_t pfn_to_dma(struct device *dev, unsigned long pfn) -{ - if (dev) - pfn -= dev->dma_pfn_offset; - return (dma_addr_t)__pfn_to_bus(pfn); -} - -static inline unsigned long dma_to_pfn(struct device *dev, dma_addr_t addr) -{ - unsigned long pfn = __bus_to_pfn(addr); - - if (dev) - pfn += dev->dma_pfn_offset; - - return pfn; -} - -static inline void *dma_to_virt(struct device *dev, dma_addr_t addr) -{ - if (dev) { - unsigned long pfn = dma_to_pfn(dev, addr); - - return phys_to_virt(__pfn_to_phys(pfn)); - } - - return (void *)__bus_to_virt((unsigned long)addr); -} - -static inline dma_addr_t virt_to_dma(struct device *dev, void *addr) -{ - if (dev) - return pfn_to_dma(dev, virt_to_pfn(addr)); - - return (dma_addr_t)__virt_to_bus((unsigned long)(addr)); -} - -#else -static inline dma_addr_t pfn_to_dma(struct device *dev, unsigned long pfn) -{ - return __arch_pfn_to_dma(dev, pfn); -} - -static inline unsigned long dma_to_pfn(struct device *dev, dma_addr_t addr) -{ - return __arch_dma_to_pfn(dev, addr); -} - -static inline void *dma_to_virt(struct device *dev, dma_addr_t addr) -{ - return __arch_dma_to_virt(dev, addr); -} - -static inline dma_addr_t virt_to_dma(struct device *dev, void *addr) -{ - return __arch_virt_to_dma(dev, addr); -} -#endif - /** * arm_dma_alloc - allocate consistent memory for DMA * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices diff --git a/arch/arm/mach-davinci/devices-da8xx.c b/arch/arm/mach-davinci/devices-da8xx.c index feb206bdf6e172..bb368938fc4921 100644 --- a/arch/arm/mach-davinci/devices-da8xx.c +++ b/arch/arm/mach-davinci/devices-da8xx.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include @@ -884,6 +884,7 @@ early_param("rproc_mem", early_rproc_mem); void __init da8xx_rproc_reserve_cma(void) { + struct cma *cma; int ret; if (!rproc_base || !rproc_size) { @@ -897,13 +898,16 @@ void __init da8xx_rproc_reserve_cma(void) pr_info("%s: reserving 0x%lx @ 0x%lx...\n", __func__, rproc_size, (unsigned long)rproc_base); - ret = dma_declare_contiguous(&da8xx_dsp.dev, rproc_size, rproc_base, 0); - if (ret) - pr_err("%s: dma_declare_contiguous failed %d\n", __func__, ret); - else - rproc_mem_inited = true; + ret = dma_contiguous_reserve_area(rproc_size, rproc_base, 0, &cma, + true); + if (ret) { + pr_err("%s: dma_contiguous_reserve_area failed %d\n", + __func__, ret); + return; + } + da8xx_dsp.dev.cma_area = cma; + rproc_mem_inited = true; } - #else void __init da8xx_rproc_reserve_cma(void) diff --git a/arch/arm/mach-highbank/highbank.c b/arch/arm/mach-highbank/highbank.c index 56bf29523c657c..db607955a7e451 100644 --- a/arch/arm/mach-highbank/highbank.c +++ b/arch/arm/mach-highbank/highbank.c @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm/mach-imx/mach-imx27_visstrim_m10.c b/arch/arm/mach-imx/mach-imx27_visstrim_m10.c index 3da4c0920198b6..a329e50928b649 100644 --- a/arch/arm/mach-imx/mach-imx27_visstrim_m10.c +++ b/arch/arm/mach-imx/mach-imx27_visstrim_m10.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm/mach-imx/mach-mx31moboard.c b/arch/arm/mach-imx/mach-mx31moboard.c index 96845a4eaf57ea..7f780ad2d45925 100644 --- a/arch/arm/mach-imx/mach-mx31moboard.c +++ b/arch/arm/mach-imx/mach-mx31moboard.c @@ -4,7 +4,7 @@ */ #include -#include +#include #include #include #include diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c index 184262d660baf5..000f672a94c971 100644 --- a/arch/arm/mach-ixp4xx/common.c +++ b/arch/arm/mach-ixp4xx/common.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/arm/mach-keystone/keystone.c b/arch/arm/mach-keystone/keystone.c index 638808c4e12247..09a65c2dfd7327 100644 --- a/arch/arm/mach-keystone/keystone.c +++ b/arch/arm/mach-keystone/keystone.c @@ -8,6 +8,7 @@ */ #include #include +#include #include #include #include @@ -24,8 +25,7 @@ #include "keystone.h" -static unsigned long keystone_dma_pfn_offset __read_mostly; - +#ifdef CONFIG_ARM_LPAE static int keystone_platform_notifier(struct notifier_block *nb, unsigned long event, void *data) { @@ -38,9 +38,12 @@ static int keystone_platform_notifier(struct notifier_block *nb, return NOTIFY_BAD; if (!dev->of_node) { - dev->dma_pfn_offset = keystone_dma_pfn_offset; - dev_err(dev, "set dma_pfn_offset%08lx\n", - dev->dma_pfn_offset); + int ret = dma_direct_set_offset(dev, KEYSTONE_HIGH_PHYS_START, + KEYSTONE_LOW_PHYS_START, + KEYSTONE_HIGH_PHYS_SIZE); + dev_err(dev, "set dma_offset%08llx%s\n", + KEYSTONE_HIGH_PHYS_START - KEYSTONE_LOW_PHYS_START, + ret ? " failed" : ""); } return NOTIFY_OK; } @@ -48,14 +51,14 @@ static int keystone_platform_notifier(struct notifier_block *nb, static struct notifier_block platform_nb = { .notifier_call = keystone_platform_notifier, }; +#endif /* CONFIG_ARM_LPAE */ static void __init keystone_init(void) { - if (PHYS_OFFSET >= KEYSTONE_HIGH_PHYS_START) { - keystone_dma_pfn_offset = PFN_DOWN(KEYSTONE_HIGH_PHYS_START - - KEYSTONE_LOW_PHYS_START); +#ifdef CONFIG_ARM_LPAE + if (PHYS_OFFSET >= KEYSTONE_HIGH_PHYS_START) bus_register_notifier(&platform_bus_type, &platform_nb); - } +#endif keystone_pm_runtime_init(); } diff --git a/arch/arm/mach-mvebu/coherency.c b/arch/arm/mach-mvebu/coherency.c index 8f8748a0c84f44..49e3c8d20c2fa1 100644 --- a/arch/arm/mach-mvebu/coherency.c +++ b/arch/arm/mach-mvebu/coherency.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm/mach-omap1/include/mach/memory.h b/arch/arm/mach-omap1/include/mach/memory.h index 1142560e0078f5..36bc0000cb6ab8 100644 --- a/arch/arm/mach-omap1/include/mach/memory.h +++ b/arch/arm/mach-omap1/include/mach/memory.h @@ -14,42 +14,11 @@ * OMAP-1510 bus address is translated into a Local Bus address if the * OMAP bus type is lbus. We do the address translation based on the * device overriding the defaults used in the dma-mapping API. - * Note that the is_lbus_device() test is not very efficient on 1510 - * because of the strncmp(). */ -#if defined(CONFIG_ARCH_OMAP15XX) && !defined(__ASSEMBLER__) /* * OMAP-1510 Local Bus address offset */ #define OMAP1510_LB_OFFSET UL(0x30000000) -#define virt_to_lbus(x) ((x) - PAGE_OFFSET + OMAP1510_LB_OFFSET) -#define lbus_to_virt(x) ((x) - OMAP1510_LB_OFFSET + PAGE_OFFSET) -#define is_lbus_device(dev) (cpu_is_omap15xx() && dev && (strncmp(dev_name(dev), "ohci", 4) == 0)) - -#define __arch_pfn_to_dma(dev, pfn) \ - ({ dma_addr_t __dma = __pfn_to_phys(pfn); \ - if (is_lbus_device(dev)) \ - __dma = __dma - PHYS_OFFSET + OMAP1510_LB_OFFSET; \ - __dma; }) - -#define __arch_dma_to_pfn(dev, addr) \ - ({ dma_addr_t __dma = addr; \ - if (is_lbus_device(dev)) \ - __dma += PHYS_OFFSET - OMAP1510_LB_OFFSET; \ - __phys_to_pfn(__dma); \ - }) - -#define __arch_dma_to_virt(dev, addr) ({ (void *) (is_lbus_device(dev) ? \ - lbus_to_virt(addr) : \ - __phys_to_virt(addr)); }) - -#define __arch_virt_to_dma(dev, addr) ({ unsigned long __addr = (unsigned long)(addr); \ - (dma_addr_t) (is_lbus_device(dev) ? \ - virt_to_lbus(__addr) : \ - __virt_to_phys(__addr)); }) - -#endif /* CONFIG_ARCH_OMAP15XX */ - #endif diff --git a/arch/arm/mach-omap1/usb.c b/arch/arm/mach-omap1/usb.c index d8e9bbda8f7bdd..ba8566204ea9f4 100644 --- a/arch/arm/mach-omap1/usb.c +++ b/arch/arm/mach-omap1/usb.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -542,6 +543,25 @@ static u32 __init omap1_usb2_init(unsigned nwires, unsigned alt_pingroup) /* ULPD_APLL_CTRL */ #define APLL_NDPLL_SWITCH (1 << 0) +static int omap_1510_usb_ohci_notifier(struct notifier_block *nb, + unsigned long event, void *data) +{ + struct device *dev = data; + + if (event != BUS_NOTIFY_ADD_DEVICE) + return NOTIFY_DONE; + + if (strncmp(dev_name(dev), "ohci", 4) == 0 && + dma_direct_set_offset(dev, PHYS_OFFSET, OMAP1510_LB_OFFSET, + (u64)-1)) + WARN_ONCE(1, "failed to set DMA offset\n"); + return NOTIFY_OK; +} + +static struct notifier_block omap_1510_usb_ohci_nb = { + .notifier_call = omap_1510_usb_ohci_notifier, +}; + static void __init omap_1510_usb_init(struct omap_usb_config *config) { unsigned int val; @@ -600,6 +620,8 @@ static void __init omap_1510_usb_init(struct omap_usb_config *config) if (config->register_host) { int status; + bus_register_notifier(&platform_bus_type, + &omap_1510_usb_ohci_nb); ohci_device.dev.platform_data = config; status = platform_device_register(&ohci_device); if (status) diff --git a/arch/arm/mach-shmobile/setup-rcar-gen2.c b/arch/arm/mach-shmobile/setup-rcar-gen2.c index c42ff8c314c82d..e00f5b3b929362 100644 --- a/arch/arm/mach-shmobile/setup-rcar-gen2.c +++ b/arch/arm/mach-shmobile/setup-rcar-gen2.c @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c index 287ef898a55e11..6bfd2b8845050b 100644 --- a/arch/arm/mm/dma-mapping-nommu.c +++ b/arch/arm/mm/dma-mapping-nommu.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -176,6 +177,8 @@ static void arm_nommu_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist const struct dma_map_ops arm_nommu_dma_ops = { .alloc = arm_nommu_dma_alloc, .free = arm_nommu_dma_free, + .alloc_pages = dma_direct_alloc_pages, + .free_pages = dma_direct_free_pages, .mmap = arm_nommu_dma_mmap, .map_page = arm_nommu_dma_map_page, .unmap_page = arm_nommu_dma_unmap_page, diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 8a8949174b1c06..c4b8df2ad32850 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -15,9 +15,7 @@ #include #include #include -#include -#include -#include +#include #include #include #include @@ -35,7 +33,6 @@ #include #include #include -#include #include #include "dma.h" @@ -199,6 +196,8 @@ static int arm_dma_supported(struct device *dev, u64 mask) const struct dma_map_ops arm_dma_ops = { .alloc = arm_dma_alloc, .free = arm_dma_free, + .alloc_pages = dma_direct_alloc_pages, + .free_pages = dma_direct_free_pages, .mmap = arm_dma_mmap, .get_sgtable = arm_dma_get_sgtable, .map_page = arm_dma_map_page, @@ -226,6 +225,8 @@ static int arm_coherent_dma_mmap(struct device *dev, struct vm_area_struct *vma, const struct dma_map_ops arm_coherent_dma_ops = { .alloc = arm_coherent_dma_alloc, .free = arm_coherent_dma_free, + .alloc_pages = dma_direct_alloc_pages, + .free_pages = dma_direct_free_pages, .mmap = arm_coherent_dma_mmap, .get_sgtable = arm_dma_get_sgtable, .map_page = arm_coherent_dma_map_page, diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 45f9d5ec23604e..d57112a276f5ac 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c index d3ef975a0965ad..467fa225c3d0ed 100644 --- a/arch/arm/xen/mm.c +++ b/arch/arm/xen/mm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only #include #include -#include +#include #include #include #include diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 6c45350e33aa5a..93e87b2875567e 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -6,7 +6,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index f0bf86d8162272..a53c1e0fb0179e 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -21,8 +21,7 @@ #include #include #include -#include -#include +#include #include #include #include @@ -429,6 +428,8 @@ void __init bootmem_init(void) arm64_hugetlb_cma_reserve(); #endif + dma_pernuma_cma_reserve(); + /* * sparse_init() tries to allocate memory from memblock, so must be * done after the fixed reservations diff --git a/arch/c6x/mm/dma-coherent.c b/arch/c6x/mm/dma-coherent.c index a5909091cb1424..03df07a831fc99 100644 --- a/arch/c6x/mm/dma-coherent.c +++ b/arch/c6x/mm/dma-coherent.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/csky/kernel/setup.c b/arch/csky/kernel/setup.c index 0481f4e34538ea..e4cab16056d606 100644 --- a/arch/csky/kernel/setup.c +++ b/arch/csky/kernel/setup.c @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/csky/mm/dma-mapping.c b/arch/csky/mm/dma-mapping.c index 8f6571ae27c867..c3a775a7e8f9d8 100644 --- a/arch/csky/mm/dma-mapping.c +++ b/arch/csky/mm/dma-mapping.c @@ -2,9 +2,7 @@ // Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. #include -#include -#include -#include +#include #include #include #include diff --git a/arch/hexagon/kernel/dma.c b/arch/hexagon/kernel/dma.c index 25f388d9cfcc36..00b9a81075dd54 100644 --- a/arch/hexagon/kernel/dma.c +++ b/arch/hexagon/kernel/dma.c @@ -5,7 +5,7 @@ * Copyright (c) 2010-2012, The Linux Foundation. All rights reserved. */ -#include +#include #include #include #include diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 996c410f21521b..f11a8ebfe5c226 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -8,6 +8,7 @@ menu "Processor type and features" config IA64 bool + select ARCH_HAS_DMA_MARK_CLEAN select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO select ACPI @@ -32,8 +33,6 @@ config IA64 select TTY select HAVE_ARCH_TRACEHOOK select HAVE_VIRT_CPU_ACCOUNTING - select DMA_NONCOHERENT_MMAP - select ARCH_HAS_SYNC_DMA_FOR_CPU select VIRT_TO_BUS select GENERIC_IRQ_PROBE select GENERIC_PENDING_IRQ if SMP diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c index 656a4888c300b5..9148ddbf02e501 100644 --- a/arch/ia64/hp/common/sba_iommu.c +++ b/arch/ia64/hp/common/sba_iommu.c @@ -33,7 +33,7 @@ #include /* hweight64() */ #include #include -#include +#include #include #include @@ -485,8 +485,7 @@ sba_search_bitmap(struct ioc *ioc, struct device *dev, ASSERT(((unsigned long) ioc->res_hint & (sizeof(unsigned long) - 1UL)) == 0); ASSERT(res_ptr < res_end); - boundary_size = (unsigned long long)dma_get_seg_boundary(dev) + 1; - boundary_size = ALIGN(boundary_size, 1ULL << iovp_shift) >> iovp_shift; + boundary_size = dma_get_seg_boundary_nr_pages(dev, iovp_shift); BUG_ON(ioc->ibase & ~iovp_mask); shift = ioc->ibase >> iovp_shift; @@ -2071,6 +2070,8 @@ static const struct dma_map_ops sba_dma_ops = { .dma_supported = sba_dma_supported, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, + .alloc_pages = dma_common_alloc_pages, + .free_pages = dma_common_free_pages, }; static int __init diff --git a/arch/ia64/kernel/dma-mapping.c b/arch/ia64/kernel/dma-mapping.c index 09ef9ce9988d1f..cd0c166bfbc23e 100644 --- a/arch/ia64/kernel/dma-mapping.c +++ b/arch/ia64/kernel/dma-mapping.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -#include +#include #include /* Set this to 1 if there is a HW IOMMU in the system */ @@ -7,15 +7,3 @@ int iommu_detected __read_mostly; const struct dma_map_ops *dma_ops; EXPORT_SYMBOL(dma_ops); - -void *arch_dma_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) -{ - return dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs); -} - -void arch_dma_free(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t dma_addr, unsigned long attrs) -{ - dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs); -} diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 8e7b8c6c576eee..d8686bf3ae2f68 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include #include @@ -73,8 +73,7 @@ __ia64_sync_icache_dcache (pte_t pte) * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to * flush them when they get mapped into an executable vm-area. */ -void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, - enum dma_data_direction dir) +void arch_dma_mark_clean(phys_addr_t paddr, size_t size) { unsigned long pfn = PHYS_PFN(paddr); diff --git a/arch/m68k/kernel/dma.c b/arch/m68k/kernel/dma.c index b1ca3522eccc2f..1c1b875fadc180 100644 --- a/arch/m68k/kernel/dma.c +++ b/arch/m68k/kernel/dma.c @@ -6,7 +6,7 @@ #undef DEBUG -#include +#include #include #include #include diff --git a/arch/microblaze/kernel/dma.c b/arch/microblaze/kernel/dma.c index d7bebd04247b72..04d091ade4172b 100644 --- a/arch/microblaze/kernel/dma.c +++ b/arch/microblaze/kernel/dma.c @@ -8,9 +8,8 @@ */ #include -#include +#include #include -#include #include #include #include diff --git a/arch/microblaze/mm/consistent.c b/arch/microblaze/mm/consistent.c index e09b66e43cb63f..81dffe43b18c1e 100644 --- a/arch/microblaze/mm/consistent.c +++ b/arch/microblaze/mm/consistent.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index 0902c459c38524..45da639bd22ca6 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -7,7 +7,7 @@ * for more details. */ -#include +#include #include #include #include diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 440614dc9de296..c695d103bf6d48 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -1136,7 +1136,6 @@ config DMA_NONCOHERENT select ARCH_HAS_SYNC_DMA_FOR_DEVICE select ARCH_HAS_DMA_SET_UNCACHED select DMA_NONCOHERENT_MMAP - select DMA_NONCOHERENT_CACHE_SYNC select NEED_DMA_MAP_STATE config SYS_HAS_EARLY_PRINTK diff --git a/arch/mips/bmips/dma.c b/arch/mips/bmips/dma.c index df56bf4179e347..49061b870680b9 100644 --- a/arch/mips/bmips/dma.c +++ b/arch/mips/bmips/dma.c @@ -40,7 +40,7 @@ static struct bmips_dma_range *bmips_dma_ranges; #define FLUSH_RAC 0x100 -dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t pa) +dma_addr_t phys_to_dma(struct device *dev, phys_addr_t pa) { struct bmips_dma_range *r; @@ -52,7 +52,7 @@ dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t pa) return pa; } -phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr) +phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dma_addr) { struct bmips_dma_range *r; diff --git a/arch/mips/cavium-octeon/dma-octeon.c b/arch/mips/cavium-octeon/dma-octeon.c index ad1aecc4b40182..df70308db0e697 100644 --- a/arch/mips/cavium-octeon/dma-octeon.c +++ b/arch/mips/cavium-octeon/dma-octeon.c @@ -168,7 +168,7 @@ void __init octeon_pci_dma_init(void) } #endif /* CONFIG_PCI */ -dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) +dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) { #ifdef CONFIG_PCI if (dev && dev_is_pci(dev)) @@ -177,7 +177,7 @@ dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) return paddr; } -phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr) +phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) { #ifdef CONFIG_PCI if (dev && dev_is_pci(dev)) diff --git a/arch/mips/include/asm/dma-direct.h b/arch/mips/include/asm/dma-direct.h index 14e352651ce946..9a640118316c9d 100644 --- a/arch/mips/include/asm/dma-direct.h +++ b/arch/mips/include/asm/dma-direct.h @@ -2,7 +2,7 @@ #ifndef _MIPS_DMA_DIRECT_H #define _MIPS_DMA_DIRECT_H 1 -dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr); -phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr); +dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr); +phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr); #endif /* _MIPS_DMA_DIRECT_H */ diff --git a/arch/mips/include/asm/jazzdma.h b/arch/mips/include/asm/jazzdma.h index d13f940022d5f9..c831da7fa89803 100644 --- a/arch/mips/include/asm/jazzdma.h +++ b/arch/mips/include/asm/jazzdma.h @@ -10,8 +10,6 @@ */ extern unsigned long vdma_alloc(unsigned long paddr, unsigned long size); extern int vdma_free(unsigned long laddr); -extern int vdma_remap(unsigned long laddr, unsigned long paddr, - unsigned long size); extern unsigned long vdma_phys2log(unsigned long paddr); extern unsigned long vdma_log2phys(unsigned long laddr); extern void vdma_stats(void); /* for debugging only */ diff --git a/arch/mips/jazz/jazzdma.c b/arch/mips/jazz/jazzdma.c index 014773f0bfcd74..461457b289828a 100644 --- a/arch/mips/jazz/jazzdma.c +++ b/arch/mips/jazz/jazzdma.c @@ -16,8 +16,7 @@ #include #include #include -#include -#include +#include #include #include #include @@ -209,76 +208,6 @@ int vdma_free(unsigned long laddr) EXPORT_SYMBOL(vdma_free); -/* - * Map certain page(s) to another physical address. - * Caller must have allocated the page(s) before. - */ -int vdma_remap(unsigned long laddr, unsigned long paddr, unsigned long size) -{ - int first, pages; - - if (laddr > 0xffffff) { - if (vdma_debug) - printk - ("vdma_map: Invalid logical address: %08lx\n", - laddr); - return -EINVAL; /* invalid logical address */ - } - if (paddr > 0x1fffffff) { - if (vdma_debug) - printk - ("vdma_map: Invalid physical address: %08lx\n", - paddr); - return -EINVAL; /* invalid physical address */ - } - - pages = (((paddr & (VDMA_PAGESIZE - 1)) + size) >> 12) + 1; - first = laddr >> 12; - if (vdma_debug) - printk("vdma_remap: first=%x, pages=%x\n", first, pages); - if (first + pages > VDMA_PGTBL_ENTRIES) { - if (vdma_debug) - printk("vdma_alloc: Invalid size: %08lx\n", size); - return -EINVAL; - } - - paddr &= ~(VDMA_PAGESIZE - 1); - while (pages > 0 && first < VDMA_PGTBL_ENTRIES) { - if (pgtbl[first].owner != laddr) { - if (vdma_debug) - printk("Trying to remap other's pages.\n"); - return -EPERM; /* not owner */ - } - pgtbl[first].frame = paddr; - paddr += VDMA_PAGESIZE; - first++; - pages--; - } - - /* - * Update translation table - */ - r4030_write_reg32(JAZZ_R4030_TRSTBL_INV, 0); - - if (vdma_debug > 2) { - int i; - pages = (((paddr & (VDMA_PAGESIZE - 1)) + size) >> 12) + 1; - first = laddr >> 12; - printk("LADDR: "); - for (i = first; i < first + pages; i++) - printk("%08x ", i << 12); - printk("\nPADDR: "); - for (i = first; i < first + pages; i++) - printk("%08x ", pgtbl[i].frame); - printk("\nOWNER: "); - for (i = first; i < first + pages; i++) - printk("%08x ", pgtbl[i].owner); - printk("\n"); - } - - return 0; -} - /* * Translate a physical address to a logical address. * This will return the logical address of the first @@ -562,26 +491,34 @@ int vdma_get_enable(int channel) static void *jazz_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { + struct page *page; void *ret; - ret = dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs); - if (!ret) - return NULL; + if (attrs & DMA_ATTR_NO_WARN) + gfp |= __GFP_NOWARN; - *dma_handle = vdma_alloc(virt_to_phys(ret), size); - if (*dma_handle == DMA_MAPPING_ERROR) { - dma_direct_free_pages(dev, size, ret, *dma_handle, attrs); + size = PAGE_ALIGN(size); + page = alloc_pages(gfp, get_order(size)); + if (!page) return NULL; - } - - return ret; + ret = page_address(page); + memset(ret, 0, size); + *dma_handle = vdma_alloc(virt_to_phys(ret), size); + if (*dma_handle == DMA_MAPPING_ERROR) + goto out_free_pages; + arch_dma_prep_coherent(page, size); + return (void *)(UNCAC_BASE + __pa(ret)); + +out_free_pages: + __free_pages(page, get_order(size)); + return NULL; } static void jazz_dma_free(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, unsigned long attrs) { vdma_free(dma_handle); - dma_direct_free_pages(dev, size, vaddr, dma_handle, attrs); + __free_pages(virt_to_page(vaddr), get_order(size)); } static dma_addr_t jazz_dma_map_page(struct device *dev, struct page *page, @@ -678,9 +615,9 @@ const struct dma_map_ops jazz_dma_ops = { .sync_single_for_device = jazz_dma_sync_single_for_device, .sync_sg_for_cpu = jazz_dma_sync_sg_for_cpu, .sync_sg_for_device = jazz_dma_sync_sg_for_device, - .dma_supported = dma_direct_supported, - .cache_sync = arch_dma_cache_sync, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, + .alloc_pages = dma_common_alloc_pages, + .free_pages = dma_common_free_pages, }; EXPORT_SYMBOL(jazz_dma_ops); diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index 335bd188b8b459..ee8636ccded295 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/mips/loongson2ef/fuloong-2e/dma.c b/arch/mips/loongson2ef/fuloong-2e/dma.c index e122292bf6660a..cea167d8aba8db 100644 --- a/arch/mips/loongson2ef/fuloong-2e/dma.c +++ b/arch/mips/loongson2ef/fuloong-2e/dma.c @@ -1,12 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 #include -dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) +dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) { return paddr | 0x80000000; } -phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr) +phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dma_addr) { return dma_addr & 0x7fffffff; } diff --git a/arch/mips/loongson2ef/lemote-2f/dma.c b/arch/mips/loongson2ef/lemote-2f/dma.c index abf0e39d7e4696..3c9e994563578c 100644 --- a/arch/mips/loongson2ef/lemote-2f/dma.c +++ b/arch/mips/loongson2ef/lemote-2f/dma.c @@ -1,12 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 #include -dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) +dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) { return paddr | 0x80000000; } -phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr) +phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dma_addr) { if (dma_addr > 0x8fffffff) return dma_addr; diff --git a/arch/mips/loongson64/dma.c b/arch/mips/loongson64/dma.c index dbfe6e82fddd1c..364f2f27c8723f 100644 --- a/arch/mips/loongson64/dma.c +++ b/arch/mips/loongson64/dma.c @@ -4,7 +4,7 @@ #include #include -dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) +dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) { /* We extract 2bit node id (bit 44~47, only bit 44~45 used now) from * Loongson-3's 48bit address space and embed it into 40bit */ @@ -13,7 +13,7 @@ dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) return ((nid << 44) ^ paddr) | (nid << node_id_offset); } -phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr) +phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) { /* We extract 2bit node id (bit 44~47, only bit 44~45 used now) from * Loongson-3's 48bit address space and embed it into 40bit */ diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c index 563c2c0d0c8193..38d3d9143b47fb 100644 --- a/arch/mips/mm/dma-noncoherent.c +++ b/arch/mips/mm/dma-noncoherent.c @@ -5,8 +5,7 @@ * swiped from i386, and cloned for MIPS by Geert, polished by Ralf. */ #include -#include -#include +#include #include #include @@ -55,22 +54,34 @@ void *arch_dma_set_uncached(void *addr, size_t size) return (void *)(__pa(addr) + UNCAC_BASE); } -static inline void dma_sync_virt(void *addr, size_t size, +static inline void dma_sync_virt_for_device(void *addr, size_t size, enum dma_data_direction dir) { switch (dir) { case DMA_TO_DEVICE: dma_cache_wback((unsigned long)addr, size); break; - case DMA_FROM_DEVICE: dma_cache_inv((unsigned long)addr, size); break; - case DMA_BIDIRECTIONAL: dma_cache_wback_inv((unsigned long)addr, size); break; + default: + BUG(); + } +} +static inline void dma_sync_virt_for_cpu(void *addr, size_t size, + enum dma_data_direction dir) +{ + switch (dir) { + case DMA_TO_DEVICE: + break; + case DMA_FROM_DEVICE: + case DMA_BIDIRECTIONAL: + dma_cache_inv((unsigned long)addr, size); + break; default: BUG(); } @@ -82,7 +93,7 @@ static inline void dma_sync_virt(void *addr, size_t size, * configured then the bulk of this loop gets optimized out. */ static inline void dma_sync_phys(phys_addr_t paddr, size_t size, - enum dma_data_direction dir) + enum dma_data_direction dir, bool for_device) { struct page *page = pfn_to_page(paddr >> PAGE_SHIFT); unsigned long offset = paddr & ~PAGE_MASK; @@ -90,18 +101,20 @@ static inline void dma_sync_phys(phys_addr_t paddr, size_t size, do { size_t len = left; + void *addr; if (PageHighMem(page)) { - void *addr; - if (offset + len > PAGE_SIZE) len = PAGE_SIZE - offset; + } + + addr = kmap_atomic(page); + if (for_device) + dma_sync_virt_for_device(addr + offset, len, dir); + else + dma_sync_virt_for_cpu(addr + offset, len, dir); + kunmap_atomic(addr); - addr = kmap_atomic(page); - dma_sync_virt(addr + offset, len, dir); - kunmap_atomic(addr); - } else - dma_sync_virt(page_address(page) + offset, size, dir); offset = 0; page++; left -= len; @@ -111,7 +124,7 @@ static inline void dma_sync_phys(phys_addr_t paddr, size_t size, void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, enum dma_data_direction dir) { - dma_sync_phys(paddr, size, dir); + dma_sync_phys(paddr, size, dir, true); } #ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU @@ -119,18 +132,10 @@ void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, enum dma_data_direction dir) { if (cpu_needs_post_dma_flush()) - dma_sync_phys(paddr, size, dir); + dma_sync_phys(paddr, size, dir, false); } #endif -void arch_dma_cache_sync(struct device *dev, void *vaddr, size_t size, - enum dma_data_direction direction) -{ - BUG_ON(direction == DMA_NONE); - - dma_sync_virt(vaddr, size, direction); -} - #ifdef CONFIG_DMA_PERDEV_COHERENT void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, const struct iommu_ops *iommu, bool coherent) diff --git a/arch/mips/pci/pci-ar2315.c b/arch/mips/pci/pci-ar2315.c index 490953f515282a..cef4a47ab06311 100644 --- a/arch/mips/pci/pci-ar2315.c +++ b/arch/mips/pci/pci-ar2315.c @@ -170,12 +170,12 @@ static inline dma_addr_t ar2315_dev_offset(struct device *dev) return 0; } -dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) +dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) { return paddr + ar2315_dev_offset(dev); } -phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr) +phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dma_addr) { return dma_addr - ar2315_dev_offset(dev); } diff --git a/arch/mips/pci/pci-xtalk-bridge.c b/arch/mips/pci/pci-xtalk-bridge.c index 9b3cc775c55e05..50f7d42cca5a78 100644 --- a/arch/mips/pci/pci-xtalk-bridge.c +++ b/arch/mips/pci/pci-xtalk-bridge.c @@ -25,7 +25,7 @@ /* * Common phys<->dma mapping for platforms using pci xtalk bridge */ -dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) +dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) { struct pci_dev *pdev = to_pci_dev(dev); struct bridge_controller *bc = BRIDGE_CONTROLLER(pdev->bus); @@ -33,7 +33,7 @@ dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) return bc->baddr + paddr; } -phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr) +phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dma_addr) { return dma_addr & ~(0xffUL << 56); } diff --git a/arch/mips/sgi-ip32/ip32-dma.c b/arch/mips/sgi-ip32/ip32-dma.c index fa7b17cb53853e..20c6da9d76bc5e 100644 --- a/arch/mips/sgi-ip32/ip32-dma.c +++ b/arch/mips/sgi-ip32/ip32-dma.c @@ -18,7 +18,7 @@ #define RAM_OFFSET_MASK 0x3fffffffUL -dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) +dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) { dma_addr_t dma_addr = paddr & RAM_OFFSET_MASK; @@ -27,7 +27,7 @@ dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) return dma_addr; } -phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr) +phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dma_addr) { phys_addr_t paddr = dma_addr & RAM_OFFSET_MASK; diff --git a/arch/nds32/kernel/dma.c b/arch/nds32/kernel/dma.c index 69d762182d49bf..2ac8e6c82a61a5 100644 --- a/arch/nds32/kernel/dma.c +++ b/arch/nds32/kernel/dma.c @@ -3,7 +3,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/openrisc/kernel/dma.c b/arch/openrisc/kernel/dma.c index 345727638d52da..1b16d97e7da7f9 100644 --- a/arch/openrisc/kernel/dma.c +++ b/arch/openrisc/kernel/dma.c @@ -13,7 +13,7 @@ * DMA mapping callbacks... */ -#include +#include #include #include diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index cd4afe1e7a6c77..d9a50f7a666c02 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -195,7 +195,6 @@ config PA11 depends on PA7000 || PA7100LC || PA7200 || PA7300LC select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYNC_DMA_FOR_DEVICE - select DMA_NONCOHERENT_CACHE_SYNC config PREFETCH def_bool y diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c index a5f3e50fe97619..80fa0650736ba7 100644 --- a/arch/parisc/kernel/drivers.c +++ b/arch/parisc/kernel/drivers.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c index 38c68e131bbe2a..36610a5c029fc2 100644 --- a/arch/parisc/kernel/pci-dma.c +++ b/arch/parisc/kernel/pci-dma.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include /* for DMA_CHUNK_SIZE */ @@ -454,9 +454,3 @@ void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, { flush_kernel_dcache_range((unsigned long)phys_to_virt(paddr), size); } - -void arch_dma_cache_sync(struct device *dev, void *vaddr, size_t size, - enum dma_data_direction direction) -{ - flush_kernel_dcache_range((unsigned long)vaddr, size); -} diff --git a/arch/powerpc/include/asm/dma-direct.h b/arch/powerpc/include/asm/dma-direct.h index abc154d784b078..128304cbee1d87 100644 --- a/arch/powerpc/include/asm/dma-direct.h +++ b/arch/powerpc/include/asm/dma-direct.h @@ -2,12 +2,12 @@ #ifndef ASM_POWERPC_DMA_DIRECT_H #define ASM_POWERPC_DMA_DIRECT_H 1 -static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) +static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) { return paddr + dev->archdata.dma_offset; } -static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr) +static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) { return daddr - dev->archdata.dma_offset; } diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 5032f1593299ef..deef7c94d7b61d 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h index 63ed7e3b0ba3e4..6436f0b41539e3 100644 --- a/arch/powerpc/include/asm/pci.h +++ b/arch/powerpc/include/asm/pci.h @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index 9053fc9d20c723..a1c74419401844 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -138,4 +138,6 @@ const struct dma_map_ops dma_iommu_ops = { .get_required_mask = dma_iommu_get_required_mask, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, + .alloc_pages = dma_common_alloc_pages, + .free_pages = dma_common_free_pages, }; diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 9704f3f76e63ea..5b69a6a72a0e21 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -172,7 +172,6 @@ static unsigned long iommu_range_alloc(struct device *dev, int largealloc = npages > 15; int pass = 0; unsigned long align_mask; - unsigned long boundary_size; unsigned long flags; unsigned int pool_nr; struct iommu_pool *pool; @@ -236,15 +235,9 @@ static unsigned long iommu_range_alloc(struct device *dev, } } - if (dev) - boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, - 1 << tbl->it_page_shift); - else - boundary_size = ALIGN(1UL << 32, 1 << tbl->it_page_shift); - /* 4GB boundary for iseries_hv_alloc and iseries_hv_map */ - n = iommu_area_alloc(tbl->it_map, limit, start, npages, tbl->it_offset, - boundary_size >> tbl->it_page_shift, align_mask); + dma_get_seg_boundary_nr_pages(dev, tbl->it_page_shift), + align_mask); if (n == -1) { if (likely(pass == 0)) { /* First try the pool from the start */ diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c index 5ab4f868e919b8..30260b5d146da3 100644 --- a/arch/powerpc/mm/dma-noncoherent.c +++ b/arch/powerpc/mm/dma-noncoherent.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c index 3542b7bd6a4689..c62aaa29a9d5ee 100644 --- a/arch/powerpc/platforms/ps3/system-bus.c +++ b/arch/powerpc/platforms/ps3/system-bus.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include @@ -696,6 +696,8 @@ static const struct dma_map_ops ps3_sb_dma_ops = { .unmap_page = ps3_unmap_page, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, + .alloc_pages = dma_common_alloc_pages, + .free_pages = dma_common_free_pages, }; static const struct dma_map_ops ps3_ioc0_dma_ops = { @@ -708,6 +710,8 @@ static const struct dma_map_ops ps3_ioc0_dma_ops = { .unmap_page = ps3_unmap_page, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, + .alloc_pages = dma_common_alloc_pages, + .free_pages = dma_common_free_pages, }; /** diff --git a/arch/powerpc/platforms/pseries/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c index a6f101c958e84b..8c6e509f696756 100644 --- a/arch/powerpc/platforms/pseries/ibmebus.c +++ b/arch/powerpc/platforms/pseries/ibmebus.c @@ -40,7 +40,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c index 0487b26f6f1af3..b2797cfe4e2b08 100644 --- a/arch/powerpc/platforms/pseries/vio.c +++ b/arch/powerpc/platforms/pseries/vio.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include @@ -608,6 +608,8 @@ static const struct dma_map_ops vio_dma_mapping_ops = { .get_required_mask = dma_iommu_get_required_mask, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, + .alloc_pages = dma_common_alloc_pages, + .free_pages = dma_common_free_pages, }; /** diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index d44e522c569baf..c64a95ae830f81 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 64b1399a73f04d..ebc9a49523aa31 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include @@ -261,13 +261,11 @@ static unsigned long __dma_alloc_iommu(struct device *dev, unsigned long start, int size) { struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); - unsigned long boundary_size; - boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, - PAGE_SIZE) >> PAGE_SHIFT; return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages, start, size, zdev->start_dma >> PAGE_SHIFT, - boundary_size, 0); + dma_get_seg_boundary_nr_pages(dev, PAGE_SHIFT), + 0); } static dma_addr_t dma_alloc_address(struct device *dev, int size) @@ -670,6 +668,8 @@ const struct dma_map_ops s390_pci_dma_ops = { .unmap_page = s390_dma_unmap_pages, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, + .alloc_pages = dma_common_alloc_pages, + .free_pages = dma_common_free_pages, /* dma_supported is unconditionally true without a callback */ }; EXPORT_SYMBOL_GPL(s390_pci_dma_ops); diff --git a/arch/sh/boards/mach-ap325rxa/setup.c b/arch/sh/boards/mach-ap325rxa/setup.c index 665cad452798bb..bac8a058ebd7cd 100644 --- a/arch/sh/boards/mach-ap325rxa/setup.c +++ b/arch/sh/boards/mach-ap325rxa/setup.c @@ -13,6 +13,7 @@ #include +#include #include #include #include diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c index dd427bac5cdeb7..bab91a99124e1f 100644 --- a/arch/sh/boards/mach-ecovec24/setup.c +++ b/arch/sh/boards/mach-ecovec24/setup.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include diff --git a/arch/sh/boards/mach-kfr2r09/setup.c b/arch/sh/boards/mach-kfr2r09/setup.c index 96538ba3aa323f..eeb5ce341efdd7 100644 --- a/arch/sh/boards/mach-kfr2r09/setup.c +++ b/arch/sh/boards/mach-kfr2r09/setup.c @@ -14,7 +14,6 @@ #include #include -#include #include #include #include @@ -33,6 +32,7 @@ #include #include #include +#include #include diff --git a/arch/sh/boards/mach-migor/setup.c b/arch/sh/boards/mach-migor/setup.c index 9ed369dad62df7..6703a2122c0d6b 100644 --- a/arch/sh/boards/mach-migor/setup.c +++ b/arch/sh/boards/mach-migor/setup.c @@ -5,7 +5,7 @@ * Copyright (C) 2008 Magnus Damm */ #include -#include +#include #include #include #include diff --git a/arch/sh/boards/mach-se/7724/setup.c b/arch/sh/boards/mach-se/7724/setup.c index 32f5dd9448894a..8d6541ba01865b 100644 --- a/arch/sh/boards/mach-se/7724/setup.c +++ b/arch/sh/boards/mach-se/7724/setup.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include diff --git a/arch/sh/drivers/pci/fixups-dreamcast.c b/arch/sh/drivers/pci/fixups-dreamcast.c index 7be8694c0d1310..41e4daee8f043f 100644 --- a/arch/sh/drivers/pci/fixups-dreamcast.c +++ b/arch/sh/drivers/pci/fixups-dreamcast.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c index 6ab0b7377f6634..a3903304f33faa 100644 --- a/arch/sh/drivers/pci/pci.c +++ b/arch/sh/drivers/pci/pci.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/sh/drivers/pci/pcie-sh7786.c b/arch/sh/drivers/pci/pcie-sh7786.c index e0b568aaa7014c..4468289ab2cac7 100644 --- a/arch/sh/drivers/pci/pcie-sh7786.c +++ b/arch/sh/drivers/pci/pcie-sh7786.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -31,6 +32,8 @@ struct sh7786_pcie_port { static struct sh7786_pcie_port *sh7786_pcie_ports; static unsigned int nr_ports; static unsigned long dma_pfn_offset; +size_t memsize; +u64 memstart; static struct sh7786_pcie_hwops { int (*core_init)(void); @@ -301,7 +304,6 @@ static int __init pcie_init(struct sh7786_pcie_port *port) struct pci_channel *chan = port->hose; unsigned int data; phys_addr_t memstart, memend; - size_t memsize; int ret, i, win; /* Begin initialization */ @@ -368,8 +370,6 @@ static int __init pcie_init(struct sh7786_pcie_port *port) memstart = ALIGN_DOWN(memstart, memsize); memsize = roundup_pow_of_two(memend - memstart); - dma_pfn_offset = memstart >> PAGE_SHIFT; - /* * If there's more than 512MB of memory, we need to roll over to * LAR1/LAMR1. @@ -487,7 +487,8 @@ int pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin) void pcibios_bus_add_device(struct pci_dev *pdev) { - pdev->dev.dma_pfn_offset = dma_pfn_offset; + dma_direct_set_offset(&pdev->dev, __pa(memory_start), + __pa(memory_start) - memstart, memsize); } static int __init sh7786_pcie_core_init(void) diff --git a/arch/sh/kernel/dma-coherent.c b/arch/sh/kernel/dma-coherent.c index cd46a9825e3c59..6a44c0e7ba40ef 100644 --- a/arch/sh/kernel/dma-coherent.c +++ b/arch/sh/kernel/dma-coherent.c @@ -3,7 +3,7 @@ * Copyright (C) 2004 - 2007 Paul Mundt */ #include -#include +#include #include #include diff --git a/arch/sparc/kernel/iommu-common.c b/arch/sparc/kernel/iommu-common.c index 59cb1669132261..23ca75f092770e 100644 --- a/arch/sparc/kernel/iommu-common.c +++ b/arch/sparc/kernel/iommu-common.c @@ -166,13 +166,6 @@ unsigned long iommu_tbl_range_alloc(struct device *dev, } } - if (dev) - boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, - 1 << iommu->table_shift); - else - boundary_size = ALIGN(1ULL << 32, 1 << iommu->table_shift); - - boundary_size = boundary_size >> iommu->table_shift; /* * if the skip_span_boundary_check had been set during init, we set * things up so that iommu_is_span_boundary() merely checks if the @@ -181,6 +174,9 @@ unsigned long iommu_tbl_range_alloc(struct device *dev, if ((iommu->flags & IOMMU_NO_SPAN_BOUND) != 0) { shift = 0; boundary_size = iommu->poolsize * iommu->nr_pools; + } else { + boundary_size = dma_get_seg_boundary_nr_pages(dev, + iommu->table_shift); } n = iommu_area_alloc(iommu->map, limit, start, npages, shift, boundary_size, align_mask); diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c index 4ae7388b1bff47..a034f571d8695e 100644 --- a/arch/sparc/kernel/iommu.c +++ b/arch/sparc/kernel/iommu.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include @@ -472,8 +472,7 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist, outs->dma_length = 0; max_seg_size = dma_get_max_seg_size(dev); - seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, - IO_PAGE_SIZE) >> IO_PAGE_SHIFT; + seg_boundary_size = dma_get_seg_boundary_nr_pages(dev, IO_PAGE_SHIFT); base_shift = iommu->tbl.table_map_base >> IO_PAGE_SHIFT; for_each_sg(sglist, s, nelems, i) { unsigned long paddr, npages, entry, out_entry = 0, slen; diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c index d6874c9b639f8d..8e1d72a1675949 100644 --- a/arch/sparc/kernel/ioport.c +++ b/arch/sparc/kernel/ioport.c @@ -38,7 +38,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index 14b93c5564e357..9de57e88f7a177 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -508,8 +509,7 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, iommu_batch_start(dev, prot, ~0UL); max_seg_size = dma_get_max_seg_size(dev); - seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, - IO_PAGE_SIZE) >> IO_PAGE_SHIFT; + seg_boundary_size = dma_get_seg_boundary_nr_pages(dev, IO_PAGE_SHIFT); mask = *dev->dma_mask; if (!iommu_use_atu(iommu, mask)) diff --git a/arch/sparc/mm/io-unit.c b/arch/sparc/mm/io-unit.c index 430a47a1b6aeff..bf3e6d2fe5d949 100644 --- a/arch/sparc/mm/io-unit.c +++ b/arch/sparc/mm/io-unit.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c index 3a388b1c5d4bc9..0c0342e5b10d52 100644 --- a/arch/sparc/mm/iommu.c +++ b/arch/sparc/mm/iommu.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index fed67eafcaccc2..bb1654fe0ce74c 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -8,10 +8,8 @@ */ #include -#include #include #include -#include extern int iommu_merge; extern int panic_on_overflow; diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index e89031e9c84761..9ac696487b134b 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -96,8 +97,7 @@ static unsigned long alloc_iommu(struct device *dev, int size, base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev), PAGE_SIZE) >> PAGE_SHIFT; - boundary_size = ALIGN((u64)dma_get_seg_boundary(dev) + 1, - PAGE_SIZE) >> PAGE_SHIFT; + boundary_size = dma_get_seg_boundary_nr_pages(dev, PAGE_SHIFT); spin_lock_irqsave(&iommu_bitmap_lock, flags); offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit, @@ -468,7 +468,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, { void *vaddr; - vaddr = dma_direct_alloc_pages(dev, size, dma_addr, flag, attrs); + vaddr = dma_direct_alloc(dev, size, dma_addr, flag, attrs); if (!vaddr || !force_iommu || dev->coherent_dma_mask <= DMA_BIT_MASK(24)) return vaddr; @@ -480,7 +480,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, goto out_free; return vaddr; out_free: - dma_direct_free_pages(dev, size, vaddr, *dma_addr, attrs); + dma_direct_free(dev, size, vaddr, *dma_addr, attrs); return NULL; } @@ -490,7 +490,7 @@ gart_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_addr, unsigned long attrs) { gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, 0); - dma_direct_free_pages(dev, size, vaddr, dma_addr, attrs); + dma_direct_free(dev, size, vaddr, dma_addr, attrs); } static int no_agp; @@ -678,6 +678,8 @@ static const struct dma_map_ops gart_dma_ops = { .get_sgtable = dma_common_get_sgtable, .dma_supported = dma_direct_supported, .get_required_mask = dma_direct_get_required_mask, + .alloc_pages = dma_direct_alloc_pages, + .free_pages = dma_direct_free_pages, }; static void gart_iommu_shutdown(void) diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 5dcedad21dffa2..de234e7a8962eb 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include -#include #include #include #include diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index b16caee53bea14..84f581c91db457 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -7,6 +7,7 @@ */ #include #include +#include #include #include #include @@ -20,6 +21,7 @@ #include #include #include +#include #include diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c index 11c0e80b9ed4e1..5701d5ba3df4ba 100644 --- a/arch/x86/pci/sta2x11-fixup.c +++ b/arch/x86/pci/sta2x11-fixup.c @@ -132,7 +132,7 @@ static void sta2x11_map_ep(struct pci_dev *pdev) struct sta2x11_instance *instance = sta2x11_pdev_to_instance(pdev); struct device *dev = &pdev->dev; u32 amba_base, max_amba_addr; - int i; + int i, ret; if (!instance) return; @@ -140,7 +140,9 @@ static void sta2x11_map_ep(struct pci_dev *pdev) pci_read_config_dword(pdev, AHB_BASE(0), &amba_base); max_amba_addr = amba_base + STA2X11_AMBA_SIZE - 1; - dev->dma_pfn_offset = PFN_DOWN(-amba_base); + ret = dma_direct_set_offset(dev, 0, amba_base, STA2X11_AMBA_SIZE); + if (ret) + dev_err(dev, "sta2x11: could not set DMA offset\n"); dev->bus_dma_limit = max_amba_addr; pci_set_consistent_dma_mask(pdev, max_amba_addr); diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c index 33293ce01d8db5..19ae3e4fe4e98e 100644 --- a/arch/x86/xen/pci-swiotlb-xen.c +++ b/arch/x86/xen/pci-swiotlb-xen.c @@ -2,7 +2,7 @@ /* Glue code to lib/swiotlb-xen.c */ -#include +#include #include #include diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c index 17c4384f849558..94955caa4488c8 100644 --- a/arch/xtensa/kernel/pci-dma.c +++ b/arch/xtensa/kernel/pci-dma.c @@ -11,8 +11,7 @@ * Joe Taylor */ -#include -#include +#include #include #include #include diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index ad9d59d93f39a1..c6fc83efee0c94 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index 6f89c16f45f3a1..9929ff50c0c095 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -18,6 +18,7 @@ #include #include #include +#include #define IORT_TYPE_MASK(type) (1 << (type)) #define IORT_MSI_TYPE (1 << ACPI_IORT_NODE_ITS_GROUP) @@ -1178,8 +1179,9 @@ void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size) *dma_addr = dmaaddr; *dma_size = size; - dev->dma_pfn_offset = PFN_DOWN(offset); - dev_dbg(dev, "dma_pfn_offset(%#08llx)\n", offset); + ret = dma_direct_set_offset(dev, dmaaddr + offset, dmaaddr, size); + + dev_dbg(dev, "dma_offset(%#08llx)%s\n", offset, ret ? " failed!" : ""); } static void __init acpi_iort_register_irq(int hwirq, const char *name, diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 684c726828e1cb..a896e5e87c935d 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/base/core.c b/drivers/base/core.c index 3b9404921da908..b919e6d01d9a01 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -1798,6 +1798,8 @@ static void device_release(struct kobject *kobj) */ devres_release_all(dev); + kfree(dev->dma_range_map); + if (dev->release) dev->release(dev); else if (dev->type && dev->type->release) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index b52d69eb4e71b3..b42229b74fd696 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/dma-buf/heaps/cma_heap.c b/drivers/dma-buf/heaps/cma_heap.c index 626cf7fd033afd..e55384dc115ba2 100644 --- a/drivers/dma-buf/heaps/cma_heap.c +++ b/drivers/dma-buf/heaps/cma_heap.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index 020cb15a4d8fcc..9811c40956e54d 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -674,17 +674,16 @@ static void ar_context_link_page(struct ar_context *ctx, unsigned int index) static void ar_context_release(struct ar_context *ctx) { + struct device *dev = ctx->ohci->card.device; unsigned int i; vunmap(ctx->buffer); - for (i = 0; i < AR_BUFFERS; i++) - if (ctx->pages[i]) { - dma_unmap_page(ctx->ohci->card.device, - ar_buffer_bus(ctx, i), - PAGE_SIZE, DMA_FROM_DEVICE); - __free_page(ctx->pages[i]); - } + for (i = 0; i < AR_BUFFERS; i++) { + if (ctx->pages[i]) + dma_free_pages(dev, PAGE_SIZE, ctx->pages[i], + ar_buffer_bus(ctx, i), DMA_FROM_DEVICE); + } } static void ar_context_abort(struct ar_context *ctx, const char *error_msg) @@ -970,6 +969,7 @@ static void ar_context_tasklet(unsigned long data) static int ar_context_init(struct ar_context *ctx, struct fw_ohci *ohci, unsigned int descriptors_offset, u32 regs) { + struct device *dev = ohci->card.device; unsigned int i; dma_addr_t dma_addr; struct page *pages[AR_BUFFERS + AR_WRAPAROUND_PAGES]; @@ -980,17 +980,13 @@ static int ar_context_init(struct ar_context *ctx, struct fw_ohci *ohci, tasklet_init(&ctx->tasklet, ar_context_tasklet, (unsigned long)ctx); for (i = 0; i < AR_BUFFERS; i++) { - ctx->pages[i] = alloc_page(GFP_KERNEL | GFP_DMA32); + ctx->pages[i] = dma_alloc_pages(dev, PAGE_SIZE, &dma_addr, + DMA_FROM_DEVICE, GFP_KERNEL); if (!ctx->pages[i]) goto out_of_memory; - dma_addr = dma_map_page(ohci->card.device, ctx->pages[i], - 0, PAGE_SIZE, DMA_FROM_DEVICE); - if (dma_mapping_error(ohci->card.device, dma_addr)) { - __free_page(ctx->pages[i]); - ctx->pages[i] = NULL; - goto out_of_memory; - } set_page_private(ctx->pages[i], dma_addr); + dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, + DMA_FROM_DEVICE); } for (i = 0; i < AR_BUFFERS; i++) diff --git a/drivers/gpu/drm/exynos/exynos_drm_dma.c b/drivers/gpu/drm/exynos/exynos_drm_dma.c index 5887f7f52f96e3..0644936afee26a 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dma.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dma.c @@ -5,7 +5,7 @@ // Author: Andrzej Hajda #include -#include +#include #include #include diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c index 1716a023bca032..7777f19c9d38ff 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c @@ -42,8 +42,6 @@ static int exynos_drm_alloc_buf(struct exynos_drm_gem *exynos_gem, bool kvmap) if (exynos_gem->flags & EXYNOS_BO_WC || !(exynos_gem->flags & EXYNOS_BO_CACHABLE)) attr |= DMA_ATTR_WRITE_COMBINE; - else - attr |= DMA_ATTR_NON_CONSISTENT; /* FBDev emulation requires kernel mapping */ if (!kvmap) diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index ec602113be78cd..04be4cfcccc185 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -4,6 +4,7 @@ * Author: Rob Clark */ +#include #include #include #include diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c index 985f2990ab0dda..13d4d7ac0697b4 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c @@ -594,8 +594,7 @@ gk20a_instmem_new(struct nvkm_device *device, int index, nvkm_info(&imem->base.subdev, "using IOMMU\n"); } else { - imem->attrs = DMA_ATTR_NON_CONSISTENT | - DMA_ATTR_WEAK_ORDERING | + imem->attrs = DMA_ATTR_WEAK_ORDERING | DMA_ATTR_WRITE_COMBINE; nvkm_info(&imem->base.subdev, "using DMA API\n"); diff --git a/drivers/gpu/drm/sun4i/sun4i_backend.c b/drivers/gpu/drm/sun4i/sun4i_backend.c index 2f26f85ef53843..77497b45f9a28d 100644 --- a/drivers/gpu/drm/sun4i/sun4i_backend.c +++ b/drivers/gpu/drm/sun4i/sun4i_backend.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -810,8 +811,13 @@ static int sun4i_backend_bind(struct device *dev, struct device *master, * because of an old DT, we need to set the DMA offset by hand * on our device since the RAM mapping is at 0 for the DMA bus, * unlike the CPU. + * + * XXX(hch): this has no business in a driver and needs to move + * to the device tree. */ - drm->dev->dma_pfn_offset = PHYS_PFN_OFFSET; + ret = dma_direct_set_offset(drm->dev, PHYS_OFFSET, 0, SZ_4G); + if (ret) + return ret; } backend->engine.node = dev->of_node; diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 4b1b02c80f556d..b9cf59443843b5 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index cd6e3c70ebb31c..0cbcd3fc3e7e8d 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -10,9 +10,8 @@ #include #include -#include +#include #include -#include #include #include #include @@ -572,6 +571,7 @@ static struct page **__iommu_dma_alloc_pages(struct device *dev, * @size: Size of buffer in bytes * @dma_handle: Out argument for allocated DMA handle * @gfp: Allocation flags + * @prot: pgprot_t to use for the remapped mapping * @attrs: DMA attributes for this allocation * * If @size is less than PAGE_SIZE, then a full CPU page will be allocated, @@ -580,14 +580,14 @@ static struct page **__iommu_dma_alloc_pages(struct device *dev, * Return: Mapped virtual address, or NULL on failure. */ static void *iommu_dma_alloc_remap(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) + dma_addr_t *dma_handle, gfp_t gfp, pgprot_t prot, + unsigned long attrs) { struct iommu_domain *domain = iommu_get_dma_domain(dev); struct iommu_dma_cookie *cookie = domain->iova_cookie; struct iova_domain *iovad = &cookie->iovad; bool coherent = dev_is_dma_coherent(dev); int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs); - pgprot_t prot = dma_pgprot(dev, PAGE_KERNEL, attrs); unsigned int count, min_size, alloc_sizes = domain->pgsize_bitmap; struct page **pages; struct sg_table sgt; @@ -1030,8 +1030,10 @@ static void *iommu_dma_alloc(struct device *dev, size_t size, gfp |= __GFP_ZERO; if (IS_ENABLED(CONFIG_DMA_REMAP) && gfpflags_allow_blocking(gfp) && - !(attrs & DMA_ATTR_FORCE_CONTIGUOUS)) - return iommu_dma_alloc_remap(dev, size, handle, gfp, attrs); + !(attrs & DMA_ATTR_FORCE_CONTIGUOUS)) { + return iommu_dma_alloc_remap(dev, size, handle, gfp, + dma_pgprot(dev, PAGE_KERNEL, attrs), attrs); + } if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && !gfpflags_allow_blocking(gfp) && !coherent) @@ -1052,6 +1054,34 @@ static void *iommu_dma_alloc(struct device *dev, size_t size, return cpu_addr; } +#ifdef CONFIG_DMA_REMAP +static void *iommu_dma_alloc_noncoherent(struct device *dev, size_t size, + dma_addr_t *handle, enum dma_data_direction dir, gfp_t gfp) +{ + if (!gfpflags_allow_blocking(gfp)) { + struct page *page; + + page = dma_common_alloc_pages(dev, size, handle, dir, gfp); + if (!page) + return NULL; + return page_address(page); + } + + return iommu_dma_alloc_remap(dev, size, handle, gfp | __GFP_ZERO, + PAGE_KERNEL, 0); +} + +static void iommu_dma_free_noncoherent(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t handle, enum dma_data_direction dir) +{ + __iommu_dma_unmap(dev, handle, size); + __iommu_dma_free(dev, size, cpu_addr); +} +#else +#define iommu_dma_alloc_noncoherent NULL +#define iommu_dma_free_noncoherent NULL +#endif /* CONFIG_DMA_REMAP */ + static int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs) @@ -1120,6 +1150,10 @@ static unsigned long iommu_dma_get_merge_boundary(struct device *dev) static const struct dma_map_ops iommu_dma_ops = { .alloc = iommu_dma_alloc, .free = iommu_dma_free, + .alloc_pages = dma_common_alloc_pages, + .free_pages = dma_common_free_pages, + .alloc_noncoherent = iommu_dma_alloc_noncoherent, + .free_noncoherent = iommu_dma_free_noncoherent, .mmap = iommu_dma_mmap, .get_sgtable = iommu_dma_get_sgtable, .map_page = iommu_dma_map_page, diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 2e2ba294bb152c..8651f6d4dfa032 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #include #include @@ -3747,6 +3747,8 @@ static const struct dma_map_ops intel_dma_ops = { .dma_supported = dma_direct_supported, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, + .alloc_pages = dma_common_alloc_pages, + .free_pages = dma_common_free_pages, .get_required_mask = intel_get_required_mask, }; @@ -3814,7 +3816,7 @@ bounce_map_single(struct device *dev, phys_addr_t paddr, size_t size, */ if (!IS_ALIGNED(paddr | size, VTD_PAGE_SIZE)) { tlb_addr = swiotlb_tbl_map_single(dev, - __phys_to_dma(dev, io_tlb_start), + phys_to_dma_unencrypted(dev, io_tlb_start), paddr, size, aligned_size, dir, attrs); if (tlb_addr == DMA_MAPPING_ERROR) { goto swiotlb_error; @@ -4000,6 +4002,8 @@ static const struct dma_map_ops bounce_dma_ops = { .sync_sg_for_device = bounce_sync_sg_for_device, .map_resource = bounce_map_resource, .unmap_resource = bounce_unmap_resource, + .alloc_pages = dma_common_alloc_pages, + .free_pages = dma_common_free_pages, .dma_supported = dma_direct_supported, }; diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 534810b6be770e..a7a9bc08dcd115 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -728,11 +728,6 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg) if (cfg->oas > ARM_LPAE_MAX_ADDR_BITS) return NULL; - if (!selftest_running && cfg->iommu_dev->dma_pfn_offset) { - dev_err(cfg->iommu_dev, "Cannot accommodate DMA offset for IOMMU page tables\n"); - return NULL; - } - data = kmalloc(sizeof(*data), GFP_KERNEL); if (!data) return NULL; diff --git a/drivers/media/platform/exynos4-is/fimc-is.c b/drivers/media/platform/exynos4-is/fimc-is.c index 019bb47df915e7..32ab01e89196d2 100644 --- a/drivers/media/platform/exynos4-is/fimc-is.c +++ b/drivers/media/platform/exynos4-is/fimc-is.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/media/platform/sunxi/sun4i-csi/sun4i_csi.c b/drivers/media/platform/sunxi/sun4i-csi/sun4i_csi.c index d226ecadff8e52..eb15c8c725ca0b 100644 --- a/drivers/media/platform/sunxi/sun4i-csi/sun4i_csi.c +++ b/drivers/media/platform/sunxi/sun4i-csi/sun4i_csi.c @@ -7,6 +7,7 @@ */ #include +#include #include #include #include @@ -182,8 +183,14 @@ static int sun4i_csi_probe(struct platform_device *pdev) if (ret) return ret; } else { + /* + * XXX(hch): this has no business in a driver and needs to move + * to the device tree. + */ #ifdef PHYS_PFN_OFFSET - csi->dev->dma_pfn_offset = PHYS_PFN_OFFSET; + ret = dma_direct_set_offset(csi->dev, PHYS_OFFSET, 0, SZ_4G); + if (ret) + return ret; #endif } diff --git a/drivers/media/platform/sunxi/sun6i-csi/sun6i_csi.c b/drivers/media/platform/sunxi/sun6i-csi/sun6i_csi.c index 28e89340fed981..e69e14379fc6e4 100644 --- a/drivers/media/platform/sunxi/sun6i-csi/sun6i_csi.c +++ b/drivers/media/platform/sunxi/sun6i-csi/sun6i_csi.c @@ -899,8 +899,15 @@ static int sun6i_csi_probe(struct platform_device *pdev) return -ENOMEM; sdev->dev = &pdev->dev; - /* The DMA bus has the memory mapped at 0 */ - sdev->dev->dma_pfn_offset = PHYS_OFFSET >> PAGE_SHIFT; + /* + * The DMA bus has the memory mapped at 0. + * + * XXX(hch): this has no business in a driver and needs to move + * to the device tree. + */ + ret = dma_direct_set_offset(sdev->dev, PHYS_OFFSET, 0, SZ_4G); + if (ret) + return ret; ret = sun6i_csi_resource_request(sdev, pdev); if (ret) diff --git a/drivers/misc/mic/Kconfig b/drivers/misc/mic/Kconfig index b9bb086785db48..8a7c2c5711d5f4 100644 --- a/drivers/misc/mic/Kconfig +++ b/drivers/misc/mic/Kconfig @@ -35,6 +35,7 @@ config SCIF_BUS config VOP_BUS tristate "VOP Bus Driver" + depends on HAS_DMA select DMA_OPS help This option is selected by any driver which registers a diff --git a/drivers/misc/mic/bus/mic_bus.c b/drivers/misc/mic/bus/mic_bus.c index ed9a8351c3bfd1..a08cb29692a8fe 100644 --- a/drivers/misc/mic/bus/mic_bus.c +++ b/drivers/misc/mic/bus/mic_bus.c @@ -9,6 +9,7 @@ * This implementation is very similar to the the virtio bus driver * implementation @ drivers/virtio/virtio.c */ +#include #include #include #include diff --git a/drivers/misc/mic/bus/scif_bus.c b/drivers/misc/mic/bus/scif_bus.c index ae84109649d0f3..ad7c3604f1510f 100644 --- a/drivers/misc/mic/bus/scif_bus.c +++ b/drivers/misc/mic/bus/scif_bus.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include "scif_bus.h" diff --git a/drivers/misc/mic/bus/scif_bus.h b/drivers/misc/mic/bus/scif_bus.h index 642cd43bcabc6d..4981eb56f87979 100644 --- a/drivers/misc/mic/bus/scif_bus.h +++ b/drivers/misc/mic/bus/scif_bus.h @@ -12,7 +12,7 @@ * Everything a scif driver needs to work with any particular scif * hardware abstraction layer. */ -#include +#include #include #include "../common/mic_dev.h" diff --git a/drivers/misc/mic/bus/vop_bus.c b/drivers/misc/mic/bus/vop_bus.c index 3c865534868a0e..6935ddca1bd5b1 100644 --- a/drivers/misc/mic/bus/vop_bus.c +++ b/drivers/misc/mic/bus/vop_bus.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include "vop_bus.h" diff --git a/drivers/misc/mic/host/mic_boot.c b/drivers/misc/mic/host/mic_boot.c index fb5b3989753daf..8cb85b8b3e199b 100644 --- a/drivers/misc/mic/host/mic_boot.c +++ b/drivers/misc/mic/host/mic_boot.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include "../bus/scif_bus.h" diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c index 75dbd221dc594b..19e195420e2434 100644 --- a/drivers/net/ethernet/amd/au1000_eth.c +++ b/drivers/net/ethernet/amd/au1000_eth.c @@ -1131,10 +1131,9 @@ static int au1000_probe(struct platform_device *pdev) /* Allocate the data buffers * Snooping works fine with eth on all au1xxx */ - aup->vaddr = (u32)dma_alloc_attrs(&pdev->dev, MAX_BUF_SIZE * + aup->vaddr = (u32)dma_alloc_coherent(&pdev->dev, MAX_BUF_SIZE * (NUM_TX_BUFFS + NUM_RX_BUFFS), - &aup->dma_addr, 0, - DMA_ATTR_NON_CONSISTENT); + &aup->dma_addr, 0); if (!aup->vaddr) { dev_err(&pdev->dev, "failed to allocate data buffers\n"); err = -ENOMEM; @@ -1310,9 +1309,8 @@ static int au1000_probe(struct platform_device *pdev) err_remap2: iounmap(aup->mac); err_remap1: - dma_free_attrs(&pdev->dev, MAX_BUF_SIZE * (NUM_TX_BUFFS + NUM_RX_BUFFS), - (void *)aup->vaddr, aup->dma_addr, - DMA_ATTR_NON_CONSISTENT); + dma_free_coherent(&pdev->dev, MAX_BUF_SIZE * (NUM_TX_BUFFS + NUM_RX_BUFFS), + (void *)aup->vaddr, aup->dma_addr); err_vaddr: free_netdev(dev); err_alloc: @@ -1344,9 +1342,8 @@ static int au1000_remove(struct platform_device *pdev) if (aup->tx_db_inuse[i]) au1000_ReleaseDB(aup, aup->tx_db_inuse[i]); - dma_free_attrs(&pdev->dev, MAX_BUF_SIZE * (NUM_TX_BUFFS + NUM_RX_BUFFS), - (void *)aup->vaddr, aup->dma_addr, - DMA_ATTR_NON_CONSISTENT); + dma_free_coherent(&pdev->dev, MAX_BUF_SIZE * (NUM_TX_BUFFS + NUM_RX_BUFFS), + (void *)aup->vaddr, aup->dma_addr); iounmap(aup->macdma); iounmap(aup->mac); diff --git a/drivers/net/ethernet/i825xx/lasi_82596.c b/drivers/net/ethernet/i825xx/lasi_82596.c index aec7e98bcc853a..96c6f4f36904ed 100644 --- a/drivers/net/ethernet/i825xx/lasi_82596.c +++ b/drivers/net/ethernet/i825xx/lasi_82596.c @@ -96,23 +96,14 @@ #define OPT_SWAP_PORT 0x0001 /* Need to wordswp on the MPU port */ -#define LIB82596_DMA_ATTR DMA_ATTR_NON_CONSISTENT - -#define DMA_WBACK(ndev, addr, len) \ - do { dma_cache_sync((ndev)->dev.parent, (void *)addr, len, DMA_TO_DEVICE); } while (0) - -#define DMA_INV(ndev, addr, len) \ - do { dma_cache_sync((ndev)->dev.parent, (void *)addr, len, DMA_FROM_DEVICE); } while (0) - -#define DMA_WBACK_INV(ndev, addr, len) \ - do { dma_cache_sync((ndev)->dev.parent, (void *)addr, len, DMA_BIDIRECTIONAL); } while (0) - #define SYSBUS 0x0000006c /* big endian CPU, 82596 "big" endian mode */ #define SWAP32(x) (((u32)(x)<<16) | ((((u32)(x)))>>16)) #define SWAP16(x) (x) +#define NONCOHERENT_DMA 1 + #include "lib82596.c" MODULE_AUTHOR("Richard Hirst"); @@ -155,7 +146,7 @@ lan_init_chip(struct parisc_device *dev) { struct net_device *netdevice; struct i596_private *lp; - int retval; + int retval = -ENOMEM; int i; if (!dev->irq) { @@ -186,12 +177,22 @@ lan_init_chip(struct parisc_device *dev) lp = netdev_priv(netdevice); lp->options = dev->id.sversion == 0x72 ? OPT_SWAP_PORT : 0; + lp->dma = dma_alloc_noncoherent(&dev->dev, + sizeof(struct i596_dma), &lp->dma_addr, + DMA_BIDIRECTIONAL, GFP_KERNEL); + if (!lp->dma) + goto out_free_netdev; retval = i82596_probe(netdevice); - if (retval) { - free_netdev(netdevice); - return -ENODEV; - } + if (retval) + goto out_free_dma; + return 0; + +out_free_dma: + dma_free_noncoherent(&dev->dev, sizeof(struct i596_dma), + lp->dma, lp->dma_addr, DMA_BIDIRECTIONAL); +out_free_netdev: + free_netdev(netdevice); return retval; } @@ -201,8 +202,8 @@ static int __exit lan_remove_chip(struct parisc_device *pdev) struct i596_private *lp = netdev_priv(dev); unregister_netdev (dev); - dma_free_attrs(&pdev->dev, sizeof(struct i596_private), lp->dma, - lp->dma_addr, LIB82596_DMA_ATTR); + dma_free_noncoherent(&pdev->dev, sizeof(struct i596_private), lp->dma, + lp->dma_addr, DMA_BIDIRECTIONAL); free_netdev (dev); return 0; } diff --git a/drivers/net/ethernet/i825xx/lib82596.c b/drivers/net/ethernet/i825xx/lib82596.c index b03757e169e475..ca2fb303fcc6f6 100644 --- a/drivers/net/ethernet/i825xx/lib82596.c +++ b/drivers/net/ethernet/i825xx/lib82596.c @@ -365,13 +365,44 @@ static int max_cmd_backlog = TX_RING_SIZE-1; static void i596_poll_controller(struct net_device *dev); #endif +static inline dma_addr_t virt_to_dma(struct i596_private *lp, volatile void *v) +{ + return lp->dma_addr + ((unsigned long)v - (unsigned long)lp->dma); +} + +#ifdef NONCOHERENT_DMA +static inline void dma_sync_dev(struct net_device *ndev, volatile void *addr, + size_t len) +{ + dma_sync_single_for_device(ndev->dev.parent, + virt_to_dma(netdev_priv(ndev), addr), len, + DMA_BIDIRECTIONAL); +} + +static inline void dma_sync_cpu(struct net_device *ndev, volatile void *addr, + size_t len) +{ + dma_sync_single_for_cpu(ndev->dev.parent, + virt_to_dma(netdev_priv(ndev), addr), len, + DMA_BIDIRECTIONAL); +} +#else +static inline void dma_sync_dev(struct net_device *ndev, volatile void *addr, + size_t len) +{ +} +static inline void dma_sync_cpu(struct net_device *ndev, volatile void *addr, + size_t len) +{ +} +#endif /* NONCOHERENT_DMA */ static inline int wait_istat(struct net_device *dev, struct i596_dma *dma, int delcnt, char *str) { - DMA_INV(dev, &(dma->iscp), sizeof(struct i596_iscp)); + dma_sync_cpu(dev, &(dma->iscp), sizeof(struct i596_iscp)); while (--delcnt && dma->iscp.stat) { udelay(10); - DMA_INV(dev, &(dma->iscp), sizeof(struct i596_iscp)); + dma_sync_cpu(dev, &(dma->iscp), sizeof(struct i596_iscp)); } if (!delcnt) { printk(KERN_ERR "%s: %s, iscp.stat %04x, didn't clear\n", @@ -384,10 +415,10 @@ static inline int wait_istat(struct net_device *dev, struct i596_dma *dma, int d static inline int wait_cmd(struct net_device *dev, struct i596_dma *dma, int delcnt, char *str) { - DMA_INV(dev, &(dma->scb), sizeof(struct i596_scb)); + dma_sync_cpu(dev, &(dma->scb), sizeof(struct i596_scb)); while (--delcnt && dma->scb.command) { udelay(10); - DMA_INV(dev, &(dma->scb), sizeof(struct i596_scb)); + dma_sync_cpu(dev, &(dma->scb), sizeof(struct i596_scb)); } if (!delcnt) { printk(KERN_ERR "%s: %s, status %4.4x, cmd %4.4x.\n", @@ -451,12 +482,9 @@ static void i596_display_data(struct net_device *dev) SWAP32(rbd->b_data), SWAP16(rbd->size)); rbd = rbd->v_next; } while (rbd != lp->rbd_head); - DMA_INV(dev, dma, sizeof(struct i596_dma)); + dma_sync_cpu(dev, dma, sizeof(struct i596_dma)); } - -#define virt_to_dma(lp, v) ((lp)->dma_addr + (dma_addr_t)((unsigned long)(v)-(unsigned long)((lp)->dma))) - static inline int init_rx_bufs(struct net_device *dev) { struct i596_private *lp = netdev_priv(dev); @@ -508,7 +536,7 @@ static inline int init_rx_bufs(struct net_device *dev) rfd->b_next = SWAP32(virt_to_dma(lp, dma->rfds)); rfd->cmd = SWAP16(CMD_EOL|CMD_FLEX); - DMA_WBACK_INV(dev, dma, sizeof(struct i596_dma)); + dma_sync_dev(dev, dma, sizeof(struct i596_dma)); return 0; } @@ -547,7 +575,7 @@ static void rebuild_rx_bufs(struct net_device *dev) lp->rbd_head = dma->rbds; dma->rfds[0].rbd = SWAP32(virt_to_dma(lp, dma->rbds)); - DMA_WBACK_INV(dev, dma, sizeof(struct i596_dma)); + dma_sync_dev(dev, dma, sizeof(struct i596_dma)); } @@ -575,9 +603,9 @@ static int init_i596_mem(struct net_device *dev) DEB(DEB_INIT, printk(KERN_DEBUG "%s: starting i82596.\n", dev->name)); - DMA_WBACK(dev, &(dma->scp), sizeof(struct i596_scp)); - DMA_WBACK(dev, &(dma->iscp), sizeof(struct i596_iscp)); - DMA_WBACK(dev, &(dma->scb), sizeof(struct i596_scb)); + dma_sync_dev(dev, &(dma->scp), sizeof(struct i596_scp)); + dma_sync_dev(dev, &(dma->iscp), sizeof(struct i596_iscp)); + dma_sync_dev(dev, &(dma->scb), sizeof(struct i596_scb)); mpu_port(dev, PORT_ALTSCP, virt_to_dma(lp, &dma->scp)); ca(dev); @@ -596,24 +624,24 @@ static int init_i596_mem(struct net_device *dev) rebuild_rx_bufs(dev); dma->scb.command = 0; - DMA_WBACK(dev, &(dma->scb), sizeof(struct i596_scb)); + dma_sync_dev(dev, &(dma->scb), sizeof(struct i596_scb)); DEB(DEB_INIT, printk(KERN_DEBUG "%s: queuing CmdConfigure\n", dev->name)); memcpy(dma->cf_cmd.i596_config, init_setup, 14); dma->cf_cmd.cmd.command = SWAP16(CmdConfigure); - DMA_WBACK(dev, &(dma->cf_cmd), sizeof(struct cf_cmd)); + dma_sync_dev(dev, &(dma->cf_cmd), sizeof(struct cf_cmd)); i596_add_cmd(dev, &dma->cf_cmd.cmd); DEB(DEB_INIT, printk(KERN_DEBUG "%s: queuing CmdSASetup\n", dev->name)); memcpy(dma->sa_cmd.eth_addr, dev->dev_addr, ETH_ALEN); dma->sa_cmd.cmd.command = SWAP16(CmdSASetup); - DMA_WBACK(dev, &(dma->sa_cmd), sizeof(struct sa_cmd)); + dma_sync_dev(dev, &(dma->sa_cmd), sizeof(struct sa_cmd)); i596_add_cmd(dev, &dma->sa_cmd.cmd); DEB(DEB_INIT, printk(KERN_DEBUG "%s: queuing CmdTDR\n", dev->name)); dma->tdr_cmd.cmd.command = SWAP16(CmdTDR); - DMA_WBACK(dev, &(dma->tdr_cmd), sizeof(struct tdr_cmd)); + dma_sync_dev(dev, &(dma->tdr_cmd), sizeof(struct tdr_cmd)); i596_add_cmd(dev, &dma->tdr_cmd.cmd); spin_lock_irqsave (&lp->lock, flags); @@ -625,7 +653,7 @@ static int init_i596_mem(struct net_device *dev) DEB(DEB_INIT, printk(KERN_DEBUG "%s: Issuing RX_START\n", dev->name)); dma->scb.command = SWAP16(RX_START); dma->scb.rfd = SWAP32(virt_to_dma(lp, dma->rfds)); - DMA_WBACK(dev, &(dma->scb), sizeof(struct i596_scb)); + dma_sync_dev(dev, &(dma->scb), sizeof(struct i596_scb)); ca(dev); @@ -659,13 +687,13 @@ static inline int i596_rx(struct net_device *dev) rfd = lp->rfd_head; /* Ref next frame to check */ - DMA_INV(dev, rfd, sizeof(struct i596_rfd)); + dma_sync_cpu(dev, rfd, sizeof(struct i596_rfd)); while (rfd->stat & SWAP16(STAT_C)) { /* Loop while complete frames */ if (rfd->rbd == I596_NULL) rbd = NULL; else if (rfd->rbd == lp->rbd_head->b_addr) { rbd = lp->rbd_head; - DMA_INV(dev, rbd, sizeof(struct i596_rbd)); + dma_sync_cpu(dev, rbd, sizeof(struct i596_rbd)); } else { printk(KERN_ERR "%s: rbd chain broken!\n", dev->name); /* XXX Now what? */ @@ -713,7 +741,7 @@ static inline int i596_rx(struct net_device *dev) DMA_FROM_DEVICE); rbd->v_data = newskb->data; rbd->b_data = SWAP32(dma_addr); - DMA_WBACK_INV(dev, rbd, sizeof(struct i596_rbd)); + dma_sync_dev(dev, rbd, sizeof(struct i596_rbd)); } else { skb = netdev_alloc_skb_ip_align(dev, pkt_len); } @@ -765,7 +793,7 @@ static inline int i596_rx(struct net_device *dev) if (rbd != NULL && (rbd->count & SWAP16(0x4000))) { rbd->count = 0; lp->rbd_head = rbd->v_next; - DMA_WBACK_INV(dev, rbd, sizeof(struct i596_rbd)); + dma_sync_dev(dev, rbd, sizeof(struct i596_rbd)); } /* Tidy the frame descriptor, marking it as end of list */ @@ -779,14 +807,14 @@ static inline int i596_rx(struct net_device *dev) lp->dma->scb.rfd = rfd->b_next; lp->rfd_head = rfd->v_next; - DMA_WBACK_INV(dev, rfd, sizeof(struct i596_rfd)); + dma_sync_dev(dev, rfd, sizeof(struct i596_rfd)); /* Remove end-of-list from old end descriptor */ rfd->v_prev->cmd = SWAP16(CMD_FLEX); - DMA_WBACK_INV(dev, rfd->v_prev, sizeof(struct i596_rfd)); + dma_sync_dev(dev, rfd->v_prev, sizeof(struct i596_rfd)); rfd = lp->rfd_head; - DMA_INV(dev, rfd, sizeof(struct i596_rfd)); + dma_sync_cpu(dev, rfd, sizeof(struct i596_rfd)); } DEB(DEB_RXFRAME, printk(KERN_DEBUG "frames %d\n", frames)); @@ -827,12 +855,12 @@ static inline void i596_cleanup_cmd(struct net_device *dev, struct i596_private ptr->v_next = NULL; ptr->b_next = I596_NULL; } - DMA_WBACK_INV(dev, ptr, sizeof(struct i596_cmd)); + dma_sync_dev(dev, ptr, sizeof(struct i596_cmd)); } wait_cmd(dev, lp->dma, 100, "i596_cleanup_cmd timed out"); lp->dma->scb.cmd = I596_NULL; - DMA_WBACK(dev, &(lp->dma->scb), sizeof(struct i596_scb)); + dma_sync_dev(dev, &(lp->dma->scb), sizeof(struct i596_scb)); } @@ -850,7 +878,7 @@ static inline void i596_reset(struct net_device *dev, struct i596_private *lp) /* FIXME: this command might cause an lpmc */ lp->dma->scb.command = SWAP16(CUC_ABORT | RX_ABORT); - DMA_WBACK(dev, &(lp->dma->scb), sizeof(struct i596_scb)); + dma_sync_dev(dev, &(lp->dma->scb), sizeof(struct i596_scb)); ca(dev); /* wait for shutdown */ @@ -878,20 +906,20 @@ static void i596_add_cmd(struct net_device *dev, struct i596_cmd *cmd) cmd->command |= SWAP16(CMD_EOL | CMD_INTR); cmd->v_next = NULL; cmd->b_next = I596_NULL; - DMA_WBACK(dev, cmd, sizeof(struct i596_cmd)); + dma_sync_dev(dev, cmd, sizeof(struct i596_cmd)); spin_lock_irqsave (&lp->lock, flags); if (lp->cmd_head != NULL) { lp->cmd_tail->v_next = cmd; lp->cmd_tail->b_next = SWAP32(virt_to_dma(lp, &cmd->status)); - DMA_WBACK(dev, lp->cmd_tail, sizeof(struct i596_cmd)); + dma_sync_dev(dev, lp->cmd_tail, sizeof(struct i596_cmd)); } else { lp->cmd_head = cmd; wait_cmd(dev, dma, 100, "i596_add_cmd timed out"); dma->scb.cmd = SWAP32(virt_to_dma(lp, &cmd->status)); dma->scb.command = SWAP16(CUC_START); - DMA_WBACK(dev, &(dma->scb), sizeof(struct i596_scb)); + dma_sync_dev(dev, &(dma->scb), sizeof(struct i596_scb)); ca(dev); } lp->cmd_tail = cmd; @@ -956,7 +984,7 @@ static void i596_tx_timeout (struct net_device *dev, unsigned int txqueue) /* Issue a channel attention signal */ DEB(DEB_ERRORS, printk(KERN_DEBUG "Kicking board.\n")); lp->dma->scb.command = SWAP16(CUC_START | RX_START); - DMA_WBACK_INV(dev, &(lp->dma->scb), sizeof(struct i596_scb)); + dma_sync_dev(dev, &(lp->dma->scb), sizeof(struct i596_scb)); ca (dev); lp->last_restart = dev->stats.tx_packets; } @@ -1014,8 +1042,8 @@ static netdev_tx_t i596_start_xmit(struct sk_buff *skb, struct net_device *dev) tbd->data = SWAP32(tx_cmd->dma_addr); DEB(DEB_TXADDR, print_eth(skb->data, "tx-queued")); - DMA_WBACK_INV(dev, tx_cmd, sizeof(struct tx_cmd)); - DMA_WBACK_INV(dev, tbd, sizeof(struct i596_tbd)); + dma_sync_dev(dev, tx_cmd, sizeof(struct tx_cmd)); + dma_sync_dev(dev, tbd, sizeof(struct i596_tbd)); i596_add_cmd(dev, &tx_cmd->cmd); dev->stats.tx_packets++; @@ -1047,9 +1075,8 @@ static const struct net_device_ops i596_netdev_ops = { static int i82596_probe(struct net_device *dev) { - int i; struct i596_private *lp = netdev_priv(dev); - struct i596_dma *dma; + int ret; /* This lot is ensure things have been cache line aligned. */ BUILD_BUG_ON(sizeof(struct i596_rfd) != 32); @@ -1063,41 +1090,28 @@ static int i82596_probe(struct net_device *dev) if (!dev->base_addr || !dev->irq) return -ENODEV; - dma = dma_alloc_attrs(dev->dev.parent, sizeof(struct i596_dma), - &lp->dma_addr, GFP_KERNEL, - LIB82596_DMA_ATTR); - if (!dma) { - printk(KERN_ERR "%s: Couldn't get shared memory\n", __FILE__); - return -ENOMEM; - } - dev->netdev_ops = &i596_netdev_ops; dev->watchdog_timeo = TX_TIMEOUT; - memset(dma, 0, sizeof(struct i596_dma)); - lp->dma = dma; - - dma->scb.command = 0; - dma->scb.cmd = I596_NULL; - dma->scb.rfd = I596_NULL; + memset(lp->dma, 0, sizeof(struct i596_dma)); + lp->dma->scb.command = 0; + lp->dma->scb.cmd = I596_NULL; + lp->dma->scb.rfd = I596_NULL; spin_lock_init(&lp->lock); - DMA_WBACK_INV(dev, dma, sizeof(struct i596_dma)); + dma_sync_dev(dev, lp->dma, sizeof(struct i596_dma)); - i = register_netdev(dev); - if (i) { - dma_free_attrs(dev->dev.parent, sizeof(struct i596_dma), - dma, lp->dma_addr, LIB82596_DMA_ATTR); - return i; - } + ret = register_netdev(dev); + if (ret) + return ret; DEB(DEB_PROBE, printk(KERN_INFO "%s: 82596 at %#3lx, %pM IRQ %d.\n", dev->name, dev->base_addr, dev->dev_addr, dev->irq)); DEB(DEB_INIT, printk(KERN_INFO "%s: dma at 0x%p (%d bytes), lp->scb at 0x%p\n", - dev->name, dma, (int)sizeof(struct i596_dma), - &dma->scb)); + dev->name, lp->dma, (int)sizeof(struct i596_dma), + &lp->dma->scb)); return 0; } @@ -1155,7 +1169,7 @@ static irqreturn_t i596_interrupt(int irq, void *dev_id) dev->name, status & 0x0700)); while (lp->cmd_head != NULL) { - DMA_INV(dev, lp->cmd_head, sizeof(struct i596_cmd)); + dma_sync_cpu(dev, lp->cmd_head, sizeof(struct i596_cmd)); if (!(lp->cmd_head->status & SWAP16(STAT_C))) break; @@ -1237,7 +1251,7 @@ static irqreturn_t i596_interrupt(int irq, void *dev_id) } ptr->v_next = NULL; ptr->b_next = I596_NULL; - DMA_WBACK(dev, ptr, sizeof(struct i596_cmd)); + dma_sync_dev(dev, ptr, sizeof(struct i596_cmd)); lp->last_cmd = jiffies; } @@ -1251,13 +1265,13 @@ static irqreturn_t i596_interrupt(int irq, void *dev_id) ptr->command &= SWAP16(0x1fff); ptr = ptr->v_next; - DMA_WBACK_INV(dev, prev, sizeof(struct i596_cmd)); + dma_sync_dev(dev, prev, sizeof(struct i596_cmd)); } if (lp->cmd_head != NULL) ack_cmd |= CUC_START; dma->scb.cmd = SWAP32(virt_to_dma(lp, &lp->cmd_head->status)); - DMA_WBACK_INV(dev, &dma->scb, sizeof(struct i596_scb)); + dma_sync_dev(dev, &dma->scb, sizeof(struct i596_scb)); } if ((status & 0x1000) || (status & 0x4000)) { if ((status & 0x4000)) @@ -1282,7 +1296,7 @@ static irqreturn_t i596_interrupt(int irq, void *dev_id) } wait_cmd(dev, dma, 100, "i596 interrupt, timeout"); dma->scb.command = SWAP16(ack_cmd); - DMA_WBACK(dev, &dma->scb, sizeof(struct i596_scb)); + dma_sync_dev(dev, &dma->scb, sizeof(struct i596_scb)); /* DANGER: I suspect that some kind of interrupt acknowledgement aside from acking the 82596 might be needed @@ -1313,7 +1327,7 @@ static int i596_close(struct net_device *dev) wait_cmd(dev, lp->dma, 100, "close1 timed out"); lp->dma->scb.command = SWAP16(CUC_ABORT | RX_ABORT); - DMA_WBACK(dev, &lp->dma->scb, sizeof(struct i596_scb)); + dma_sync_dev(dev, &lp->dma->scb, sizeof(struct i596_scb)); ca(dev); @@ -1372,7 +1386,7 @@ static void set_multicast_list(struct net_device *dev) dev->name); else { dma->cf_cmd.cmd.command = SWAP16(CmdConfigure); - DMA_WBACK_INV(dev, &dma->cf_cmd, sizeof(struct cf_cmd)); + dma_sync_dev(dev, &dma->cf_cmd, sizeof(struct cf_cmd)); i596_add_cmd(dev, &dma->cf_cmd.cmd); } } @@ -1404,7 +1418,7 @@ static void set_multicast_list(struct net_device *dev) dev->name, cp)); cp += ETH_ALEN; } - DMA_WBACK_INV(dev, &dma->mc_cmd, sizeof(struct mc_cmd)); + dma_sync_dev(dev, &dma->mc_cmd, sizeof(struct mc_cmd)); i596_add_cmd(dev, &cmd->cmd); } } diff --git a/drivers/net/ethernet/i825xx/sni_82596.c b/drivers/net/ethernet/i825xx/sni_82596.c index 22f5887578b2bd..27937c5d795673 100644 --- a/drivers/net/ethernet/i825xx/sni_82596.c +++ b/drivers/net/ethernet/i825xx/sni_82596.c @@ -24,12 +24,6 @@ static const char sni_82596_string[] = "snirm_82596"; -#define LIB82596_DMA_ATTR 0 - -#define DMA_WBACK(priv, addr, len) do { } while (0) -#define DMA_INV(priv, addr, len) do { } while (0) -#define DMA_WBACK_INV(priv, addr, len) do { } while (0) - #define SYSBUS 0x00004400 /* big endian CPU, 82596 little endian */ @@ -134,10 +128,19 @@ static int sni_82596_probe(struct platform_device *dev) lp->ca = ca_addr; lp->mpu_port = mpu_addr; + lp->dma = dma_alloc_coherent(&dev->dev, sizeof(struct i596_dma), + &lp->dma_addr, GFP_KERNEL); + if (!lp->dma) + goto probe_failed; + retval = i82596_probe(netdevice); - if (retval == 0) - return 0; + if (retval) + goto probe_failed_free_dma; + return 0; +probe_failed_free_dma: + dma_free_coherent(&dev->dev, sizeof(struct i596_dma), lp->dma, + lp->dma_addr); probe_failed: free_netdev(netdevice); probe_failed_free_ca: @@ -153,8 +156,8 @@ static int sni_82596_driver_remove(struct platform_device *pdev) struct i596_private *lp = netdev_priv(dev); unregister_netdev(dev); - dma_free_attrs(dev->dev.parent, sizeof(struct i596_private), lp->dma, - lp->dma_addr, LIB82596_DMA_ATTR); + dma_free_coherent(&pdev->dev, sizeof(struct i596_private), lp->dma, + lp->dma_addr); iounmap(lp->ca); iounmap(lp->mpu_port); free_netdev (dev); diff --git a/drivers/net/ethernet/seeq/sgiseeq.c b/drivers/net/ethernet/seeq/sgiseeq.c index 8507ff2420143a..37ff25a84030eb 100644 --- a/drivers/net/ethernet/seeq/sgiseeq.c +++ b/drivers/net/ethernet/seeq/sgiseeq.c @@ -112,14 +112,18 @@ struct sgiseeq_private { static inline void dma_sync_desc_cpu(struct net_device *dev, void *addr) { - dma_cache_sync(dev->dev.parent, addr, sizeof(struct sgiseeq_rx_desc), - DMA_FROM_DEVICE); + struct sgiseeq_private *sp = netdev_priv(dev); + + dma_sync_single_for_cpu(dev->dev.parent, VIRT_TO_DMA(sp, addr), + sizeof(struct sgiseeq_rx_desc), DMA_BIDIRECTIONAL); } static inline void dma_sync_desc_dev(struct net_device *dev, void *addr) { - dma_cache_sync(dev->dev.parent, addr, sizeof(struct sgiseeq_rx_desc), - DMA_TO_DEVICE); + struct sgiseeq_private *sp = netdev_priv(dev); + + dma_sync_single_for_device(dev->dev.parent, VIRT_TO_DMA(sp, addr), + sizeof(struct sgiseeq_rx_desc), DMA_BIDIRECTIONAL); } static inline void hpc3_eth_reset(struct hpc3_ethregs *hregs) @@ -403,6 +407,8 @@ static inline void sgiseeq_rx(struct net_device *dev, struct sgiseeq_private *sp rd = &sp->rx_desc[sp->rx_new]; dma_sync_desc_cpu(dev, rd); } + dma_sync_desc_dev(dev, rd); + dma_sync_desc_cpu(dev, &sp->rx_desc[orig_end]); sp->rx_desc[orig_end].rdma.cntinfo &= ~(HPCDMA_EOR); dma_sync_desc_dev(dev, &sp->rx_desc[orig_end]); @@ -443,6 +449,7 @@ static inline void kick_tx(struct net_device *dev, dma_sync_desc_cpu(dev, td); } if (td->tdma.cntinfo & HPCDMA_XIU) { + dma_sync_desc_dev(dev, td); hregs->tx_ndptr = VIRT_TO_DMA(sp, td); hregs->tx_ctrl = HPC3_ETXCTRL_ACTIVE; } @@ -476,6 +483,7 @@ static inline void sgiseeq_tx(struct net_device *dev, struct sgiseeq_private *sp if (!(td->tdma.cntinfo & (HPCDMA_XIU))) break; if (!(td->tdma.cntinfo & (HPCDMA_ETXD))) { + dma_sync_desc_dev(dev, td); if (!(status & HPC3_ETXCTRL_ACTIVE)) { hregs->tx_ndptr = VIRT_TO_DMA(sp, td); hregs->tx_ctrl = HPC3_ETXCTRL_ACTIVE; @@ -740,8 +748,8 @@ static int sgiseeq_probe(struct platform_device *pdev) sp = netdev_priv(dev); /* Make private data page aligned */ - sr = dma_alloc_attrs(&pdev->dev, sizeof(*sp->srings), &sp->srings_dma, - GFP_KERNEL, DMA_ATTR_NON_CONSISTENT); + sr = dma_alloc_noncoherent(&pdev->dev, sizeof(*sp->srings), + &sp->srings_dma, DMA_BIDIRECTIONAL, GFP_KERNEL); if (!sr) { printk(KERN_ERR "Sgiseeq: Page alloc failed, aborting.\n"); err = -ENOMEM; @@ -802,8 +810,8 @@ static int sgiseeq_probe(struct platform_device *pdev) return 0; err_out_free_attrs: - dma_free_attrs(&pdev->dev, sizeof(*sp->srings), sp->srings, - sp->srings_dma, DMA_ATTR_NON_CONSISTENT); + dma_free_noncoherent(&pdev->dev, sizeof(*sp->srings), sp->srings, + sp->srings_dma, DMA_BIDIRECTIONAL); err_out_free_dev: free_netdev(dev); @@ -817,8 +825,8 @@ static int sgiseeq_remove(struct platform_device *pdev) struct sgiseeq_private *sp = netdev_priv(dev); unregister_netdev(dev); - dma_free_attrs(&pdev->dev, sizeof(*sp->srings), sp->srings, - sp->srings_dma, DMA_ATTR_NON_CONSISTENT); + dma_free_noncoherent(&pdev->dev, sizeof(*sp->srings), sp->srings, + sp->srings_dma, DMA_BIDIRECTIONAL); free_netdev(dev); return 0; diff --git a/drivers/of/address.c b/drivers/of/address.c index da4f7341323f22..eb9ab4f1e80b00 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -13,6 +13,7 @@ #include #include #include +#include /* for bus_dma_region */ #include "of_private.h" @@ -937,33 +938,33 @@ void __iomem *of_io_request_and_map(struct device_node *np, int index, } EXPORT_SYMBOL(of_io_request_and_map); +#ifdef CONFIG_HAS_DMA /** - * of_dma_get_range - Get DMA range info + * of_dma_get_range - Get DMA range info and put it into a map array * @np: device node to get DMA range info - * @dma_addr: pointer to store initial DMA address of DMA range - * @paddr: pointer to store initial CPU address of DMA range - * @size: pointer to store size of DMA range + * @map: dma range structure to return * * Look in bottom up direction for the first "dma-ranges" property - * and parse it. - * dma-ranges format: + * and parse it. Put the information into a DMA offset map array. + * + * dma-ranges format: * DMA addr (dma_addr) : naddr cells * CPU addr (phys_addr_t) : pna cells * size : nsize cells * - * It returns -ENODEV if "dma-ranges" property was not found - * for this device in DT. + * It returns -ENODEV if "dma-ranges" property was not found for this + * device in the DT. */ -int of_dma_get_range(struct device_node *np, u64 *dma_addr, u64 *paddr, u64 *size) +int of_dma_get_range(struct device_node *np, const struct bus_dma_region **map) { struct device_node *node = of_node_get(np); const __be32 *ranges = NULL; - int len; - int ret = 0; bool found_dma_ranges = false; struct of_range_parser parser; struct of_range range; - u64 dma_start = U64_MAX, dma_end = 0, dma_offset = 0; + struct bus_dma_region *r; + int len, num_ranges = 0; + int ret = 0; while (node) { ranges = of_get_property(node, "dma-ranges", &len); @@ -989,49 +990,39 @@ int of_dma_get_range(struct device_node *np, u64 *dma_addr, u64 *paddr, u64 *siz } of_dma_range_parser_init(&parser, node); + for_each_of_range(&parser, &range) + num_ranges++; + + r = kcalloc(num_ranges + 1, sizeof(*r), GFP_KERNEL); + if (!r) { + ret = -ENOMEM; + goto out; + } + /* + * Record all info in the generic DMA ranges array for struct device. + */ + *map = r; + of_dma_range_parser_init(&parser, node); for_each_of_range(&parser, &range) { pr_debug("dma_addr(%llx) cpu_addr(%llx) size(%llx)\n", range.bus_addr, range.cpu_addr, range.size); - - if (dma_offset && range.cpu_addr - range.bus_addr != dma_offset) { - pr_warn("Can't handle multiple dma-ranges with different offsets on node(%pOF)\n", node); - /* Don't error out as we'd break some existing DTs */ - continue; - } if (range.cpu_addr == OF_BAD_ADDR) { pr_err("translation of DMA address(%llx) to CPU address failed node(%pOF)\n", range.bus_addr, node); continue; } - dma_offset = range.cpu_addr - range.bus_addr; - - /* Take lower and upper limits */ - if (range.bus_addr < dma_start) - dma_start = range.bus_addr; - if (range.bus_addr + range.size > dma_end) - dma_end = range.bus_addr + range.size; - } - - if (dma_start >= dma_end) { - ret = -EINVAL; - pr_debug("Invalid DMA ranges configuration on node(%pOF)\n", - node); - goto out; + r->cpu_start = range.cpu_addr; + r->dma_start = range.bus_addr; + r->size = range.size; + r->offset = range.cpu_addr - range.bus_addr; + r++; } - - *dma_addr = dma_start; - *size = dma_end - dma_start; - *paddr = dma_start + dma_offset; - - pr_debug("final: dma_addr(%llx) cpu_addr(%llx) size(%llx)\n", - *dma_addr, *paddr, *size); - out: of_node_put(node); - return ret; } +#endif /* CONFIG_HAS_DMA */ /** * of_dma_is_coherent - Check if device is coherent diff --git a/drivers/of/device.c b/drivers/of/device.c index b439c1e054349b..655dee422563f0 100644 --- a/drivers/of/device.c +++ b/drivers/of/device.c @@ -5,7 +5,8 @@ #include #include #include -#include +#include /* for bus_dma_region */ +#include #include #include #include @@ -90,14 +91,14 @@ int of_device_add(struct platform_device *ofdev) int of_dma_configure_id(struct device *dev, struct device_node *np, bool force_dma, const u32 *id) { - u64 dma_addr, paddr, size = 0; - int ret; - bool coherent; - unsigned long offset; const struct iommu_ops *iommu; - u64 mask, end; + const struct bus_dma_region *map = NULL; + dma_addr_t dma_start = 0; + u64 mask, end, size = 0; + bool coherent; + int ret; - ret = of_dma_get_range(np, &dma_addr, &paddr, &size); + ret = of_dma_get_range(np, &map); if (ret < 0) { /* * For legacy reasons, we have to assume some devices need @@ -106,26 +107,35 @@ int of_dma_configure_id(struct device *dev, struct device_node *np, */ if (!force_dma) return ret == -ENODEV ? 0 : ret; - - dma_addr = offset = 0; } else { - offset = PFN_DOWN(paddr - dma_addr); + const struct bus_dma_region *r = map; + dma_addr_t dma_end = 0; + + /* Determine the overall bounds of all DMA regions */ + for (dma_start = ~(dma_addr_t)0; r->size; r++) { + /* Take lower and upper limits */ + if (r->dma_start < dma_start) + dma_start = r->dma_start; + if (r->dma_start + r->size > dma_end) + dma_end = r->dma_start + r->size; + } + size = dma_end - dma_start; /* * Add a work around to treat the size as mask + 1 in case * it is defined in DT as a mask. */ if (size & 1) { - dev_warn(dev, "Invalid size 0x%llx for dma-range\n", + dev_warn(dev, "Invalid size 0x%llx for dma-range(s)\n", size); size = size + 1; } if (!size) { dev_err(dev, "Adjusted size 0x%llx invalid\n", size); + kfree(map); return -EINVAL; } - dev_dbg(dev, "dma_pfn_offset(%#08lx)\n", offset); } /* @@ -144,13 +154,11 @@ int of_dma_configure_id(struct device *dev, struct device_node *np, else if (!size) size = 1ULL << 32; - dev->dma_pfn_offset = offset; - /* * Limit coherent and dma mask based on size and default mask * set by the driver. */ - end = dma_addr + size - 1; + end = dma_start + size - 1; mask = DMA_BIT_MASK(ilog2(end) + 1); dev->coherent_dma_mask &= mask; *dev->dma_mask &= mask; @@ -163,14 +171,17 @@ int of_dma_configure_id(struct device *dev, struct device_node *np, coherent ? " " : " not "); iommu = of_iommu_configure(dev, np, id); - if (PTR_ERR(iommu) == -EPROBE_DEFER) + if (PTR_ERR(iommu) == -EPROBE_DEFER) { + kfree(map); return -EPROBE_DEFER; + } dev_dbg(dev, "device is%sbehind an iommu\n", iommu ? " " : " not "); - arch_setup_dma_ops(dev, dma_addr, size, iommu, coherent); + arch_setup_dma_ops(dev, dma_start, size, iommu, coherent); + dev->dma_range_map = map; return 0; } EXPORT_SYMBOL_GPL(of_dma_configure_id); diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h index edc682249c0015..d9e6a324de0a77 100644 --- a/drivers/of/of_private.h +++ b/drivers/of/of_private.h @@ -157,12 +157,13 @@ extern void __of_sysfs_remove_bin_file(struct device_node *np, extern int of_bus_n_addr_cells(struct device_node *np); extern int of_bus_n_size_cells(struct device_node *np); -#ifdef CONFIG_OF_ADDRESS -extern int of_dma_get_range(struct device_node *np, u64 *dma_addr, - u64 *paddr, u64 *size); +struct bus_dma_region; +#if defined(CONFIG_OF_ADDRESS) && defined(CONFIG_HAS_DMA) +int of_dma_get_range(struct device_node *np, + const struct bus_dma_region **map); #else -static inline int of_dma_get_range(struct device_node *np, u64 *dma_addr, - u64 *paddr, u64 *size) +static inline int of_dma_get_range(struct device_node *np, + const struct bus_dma_region **map) { return -ENODEV; } diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index 9b7e84bdc7d446..06cc988faf78b3 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -7,6 +7,7 @@ #include #include +#include /* to test phys_to_dma/dma_to_phys */ #include #include #include @@ -869,10 +870,11 @@ static void __init of_unittest_changeset(void) } static void __init of_unittest_dma_ranges_one(const char *path, - u64 expect_dma_addr, u64 expect_paddr, u64 expect_size) + u64 expect_dma_addr, u64 expect_paddr) { +#ifdef CONFIG_HAS_DMA struct device_node *np; - u64 dma_addr, paddr, size; + const struct bus_dma_region *map = NULL; int rc; np = of_find_node_by_path(path); @@ -881,28 +883,40 @@ static void __init of_unittest_dma_ranges_one(const char *path, return; } - rc = of_dma_get_range(np, &dma_addr, &paddr, &size); + rc = of_dma_get_range(np, &map); unittest(!rc, "of_dma_get_range failed on node %pOF rc=%i\n", np, rc); + if (!rc) { - unittest(size == expect_size, - "of_dma_get_range wrong size on node %pOF size=%llx\n", np, size); + phys_addr_t paddr; + dma_addr_t dma_addr; + struct device dev_bogus; + + dev_bogus.dma_range_map = map; + paddr = dma_to_phys(&dev_bogus, expect_dma_addr); + dma_addr = phys_to_dma(&dev_bogus, expect_paddr); + unittest(paddr == expect_paddr, - "of_dma_get_range wrong phys addr (%llx) on node %pOF", paddr, np); + "of_dma_get_range: wrong phys addr %pap (expecting %llx) on node %pOF\n", + &paddr, expect_paddr, np); unittest(dma_addr == expect_dma_addr, - "of_dma_get_range wrong DMA addr (%llx) on node %pOF", dma_addr, np); + "of_dma_get_range: wrong DMA addr %pad (expecting %llx) on node %pOF\n", + &dma_addr, expect_dma_addr, np); + + kfree(map); } of_node_put(np); +#endif } static void __init of_unittest_parse_dma_ranges(void) { of_unittest_dma_ranges_one("/testcase-data/address-tests/device@70000000", - 0x0, 0x20000000, 0x40000000); + 0x0, 0x20000000); of_unittest_dma_ranges_one("/testcase-data/address-tests/bus@80000000/device@1000", - 0x100000000, 0x20000000, 0x2000000000); + 0x100000000, 0x20000000); of_unittest_dma_ranges_one("/testcase-data/address-tests/pci@90000000", - 0x80000000, 0x20000000, 0x10000000); + 0x80000000, 0x20000000); } static void __init of_unittest_pci_dma_ranges(void) diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c index a5507f75b524c4..b5f9ee81a46c1e 100644 --- a/drivers/parisc/ccio-dma.c +++ b/drivers/parisc/ccio-dma.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -356,8 +357,7 @@ ccio_alloc_range(struct ioc *ioc, struct device *dev, size_t size) ** ggg sacrifices another 710 to the computer gods. */ - boundary_size = ALIGN((unsigned long long)dma_get_seg_boundary(dev) + 1, - 1ULL << IOVP_SHIFT) >> IOVP_SHIFT; + boundary_size = dma_get_seg_boundary_nr_pages(dev, IOVP_SHIFT); if (pages_needed <= 8) { /* @@ -1025,6 +1025,8 @@ static const struct dma_map_ops ccio_ops = { .map_sg = ccio_map_sg, .unmap_sg = ccio_unmap_sg, .get_sgtable = dma_common_get_sgtable, + .alloc_pages = dma_common_alloc_pages, + .free_pages = dma_common_free_pages, }; #ifdef CONFIG_PROC_FS diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c index d4314fba026914..dce4cdf786cdb1 100644 --- a/drivers/parisc/sba_iommu.c +++ b/drivers/parisc/sba_iommu.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -342,8 +343,7 @@ sba_search_bitmap(struct ioc *ioc, struct device *dev, unsigned long shift; int ret; - boundary_size = ALIGN((unsigned long long)dma_get_seg_boundary(dev) + 1, - 1ULL << IOVP_SHIFT) >> IOVP_SHIFT; + boundary_size = dma_get_seg_boundary_nr_pages(dev, IOVP_SHIFT); #if defined(ZX1_SUPPORT) BUG_ON(ioc->ibase & ~IOVP_MASK); @@ -1077,6 +1077,8 @@ static const struct dma_map_ops sba_ops = { .map_sg = sba_map_sg, .unmap_sg = sba_unmap_sg, .get_sgtable = dma_common_get_sgtable, + .alloc_pages = dma_common_alloc_pages, + .free_pages = dma_common_free_pages, }; diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 449466f71040d2..d1b7169c068403 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "pci.h" #include "pcie/portdrv.h" diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c index c0e85be598c112..c6fe0cfec0f681 100644 --- a/drivers/pci/xen-pcifront.c +++ b/drivers/pci/xen-pcifront.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c index 09d06b082f8b8c..72114907c0e4d2 100644 --- a/drivers/pcmcia/ds.c +++ b/drivers/pcmcia/ds.c @@ -516,7 +516,7 @@ static struct pcmcia_device *pcmcia_device_add(struct pcmcia_socket *s, p_dev->dev.parent = s->dev.parent; p_dev->dev.release = pcmcia_release_dev; /* by default don't allow DMA */ - p_dev->dma_mask = DMA_MASK_NONE; + p_dev->dma_mask = 0; p_dev->dev.dma_mask = &p_dev->dma_mask; dev_set_name(&p_dev->dev, "%d.%d", p_dev->socket->sock, p_dev->device_no); if (!dev_name(&p_dev->dev)) diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c index 7f90eeea67e279..dab2c0f5caf0ec 100644 --- a/drivers/remoteproc/remoteproc_core.c +++ b/drivers/remoteproc/remoteproc_core.c @@ -22,7 +22,9 @@ #include #include #include +#include #include +#include /* XXX: pokes into bus_dma_range */ #include #include #include @@ -458,6 +460,25 @@ static void rproc_rvdev_release(struct device *dev) kfree(rvdev); } +static int copy_dma_range_map(struct device *to, struct device *from) +{ + const struct bus_dma_region *map = from->dma_range_map, *new_map, *r; + int num_ranges = 0; + + if (!map) + return 0; + + for (r = map; r->size; r++) + num_ranges++; + + new_map = kmemdup(map, array_size(num_ranges + 1, sizeof(*map)), + GFP_KERNEL); + if (!new_map) + return -ENOMEM; + to->dma_range_map = new_map; + return 0; +} + /** * rproc_handle_vdev() - handle a vdev fw resource * @rproc: the remote processor @@ -529,7 +550,9 @@ static int rproc_handle_vdev(struct rproc *rproc, struct fw_rsc_vdev *rsc, /* Initialise vdev subdevice */ snprintf(name, sizeof(name), "vdev%dbuffer", rvdev->index); rvdev->dev.parent = &rproc->dev; - rvdev->dev.dma_pfn_offset = rproc->dev.parent->dma_pfn_offset; + ret = copy_dma_range_map(&rvdev->dev, rproc->dev.parent); + if (ret) + return ret; rvdev->dev.release = rproc_rvdev_release; dev_set_name(&rvdev->dev, "%s#%s", dev_name(rvdev->dev.parent), name); dev_set_drvdata(&rvdev->dev, rvdev); diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c index dfd3808c34fdbd..0cc617f760682c 100644 --- a/drivers/remoteproc/remoteproc_virtio.c +++ b/drivers/remoteproc/remoteproc_virtio.c @@ -9,7 +9,7 @@ * Brian Swetland */ -#include +#include #include #include #include diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c index 84b57a8f86bfa9..5117d90ccd9edf 100644 --- a/drivers/scsi/53c700.c +++ b/drivers/scsi/53c700.c @@ -269,6 +269,27 @@ NCR_700_get_SXFER(struct scsi_device *SDp) spi_period(SDp->sdev_target)); } +static inline dma_addr_t virt_to_dma(struct NCR_700_Host_Parameters *h, void *p) +{ + return h->pScript + ((uintptr_t)p - (uintptr_t)h->script); +} + +static inline void dma_sync_to_dev(struct NCR_700_Host_Parameters *h, + void *addr, size_t size) +{ + if (h->noncoherent) + dma_sync_single_for_device(h->dev, virt_to_dma(h, addr), + size, DMA_BIDIRECTIONAL); +} + +static inline void dma_sync_from_dev(struct NCR_700_Host_Parameters *h, + void *addr, size_t size) +{ + if (h->noncoherent) + dma_sync_single_for_device(h->dev, virt_to_dma(h, addr), size, + DMA_BIDIRECTIONAL); +} + struct Scsi_Host * NCR_700_detect(struct scsi_host_template *tpnt, struct NCR_700_Host_Parameters *hostdata, struct device *dev) @@ -283,9 +304,13 @@ NCR_700_detect(struct scsi_host_template *tpnt, if(tpnt->sdev_attrs == NULL) tpnt->sdev_attrs = NCR_700_dev_attrs; - memory = dma_alloc_attrs(dev, TOTAL_MEM_SIZE, &pScript, - GFP_KERNEL, DMA_ATTR_NON_CONSISTENT); - if(memory == NULL) { + memory = dma_alloc_coherent(dev, TOTAL_MEM_SIZE, &pScript, GFP_KERNEL); + if (!memory) { + hostdata->noncoherent = 1; + memory = dma_alloc_noncoherent(dev, TOTAL_MEM_SIZE, &pScript, + DMA_BIDIRECTIONAL, GFP_KERNEL); + } + if (!memory) { printk(KERN_ERR "53c700: Failed to allocate memory for driver, detaching\n"); return NULL; } @@ -339,11 +364,11 @@ NCR_700_detect(struct scsi_host_template *tpnt, for (j = 0; j < PATCHES; j++) script[LABELPATCHES[j]] = bS_to_host(pScript + SCRIPT[LABELPATCHES[j]]); /* now patch up fixed addresses. */ - script_patch_32(hostdata->dev, script, MessageLocation, + script_patch_32(hostdata, script, MessageLocation, pScript + MSGOUT_OFFSET); - script_patch_32(hostdata->dev, script, StatusAddress, + script_patch_32(hostdata, script, StatusAddress, pScript + STATUS_OFFSET); - script_patch_32(hostdata->dev, script, ReceiveMsgAddress, + script_patch_32(hostdata, script, ReceiveMsgAddress, pScript + MSGIN_OFFSET); hostdata->script = script; @@ -395,8 +420,13 @@ NCR_700_release(struct Scsi_Host *host) struct NCR_700_Host_Parameters *hostdata = (struct NCR_700_Host_Parameters *)host->hostdata[0]; - dma_free_attrs(hostdata->dev, TOTAL_MEM_SIZE, hostdata->script, - hostdata->pScript, DMA_ATTR_NON_CONSISTENT); + if (hostdata->noncoherent) + dma_free_noncoherent(hostdata->dev, TOTAL_MEM_SIZE, + hostdata->script, hostdata->pScript, + DMA_BIDIRECTIONAL); + else + dma_free_coherent(hostdata->dev, TOTAL_MEM_SIZE, + hostdata->script, hostdata->pScript); return 1; } @@ -804,8 +834,8 @@ process_extended_message(struct Scsi_Host *host, shost_printk(KERN_WARNING, host, "Unexpected SDTR msg\n"); hostdata->msgout[0] = A_REJECT_MSG; - dma_cache_sync(hostdata->dev, hostdata->msgout, 1, DMA_TO_DEVICE); - script_patch_16(hostdata->dev, hostdata->script, + dma_sync_to_dev(hostdata, hostdata->msgout, 1); + script_patch_16(hostdata, hostdata->script, MessageCount, 1); /* SendMsgOut returns, so set up the return * address */ @@ -817,9 +847,8 @@ process_extended_message(struct Scsi_Host *host, printk(KERN_INFO "scsi%d: (%d:%d), Unsolicited WDTR after CMD, Rejecting\n", host->host_no, pun, lun); hostdata->msgout[0] = A_REJECT_MSG; - dma_cache_sync(hostdata->dev, hostdata->msgout, 1, DMA_TO_DEVICE); - script_patch_16(hostdata->dev, hostdata->script, MessageCount, - 1); + dma_sync_to_dev(hostdata, hostdata->msgout, 1); + script_patch_16(hostdata, hostdata->script, MessageCount, 1); resume_offset = hostdata->pScript + Ent_SendMessageWithATN; break; @@ -832,9 +861,8 @@ process_extended_message(struct Scsi_Host *host, printk("\n"); /* just reject it */ hostdata->msgout[0] = A_REJECT_MSG; - dma_cache_sync(hostdata->dev, hostdata->msgout, 1, DMA_TO_DEVICE); - script_patch_16(hostdata->dev, hostdata->script, MessageCount, - 1); + dma_sync_to_dev(hostdata, hostdata->msgout, 1); + script_patch_16(hostdata, hostdata->script, MessageCount, 1); /* SendMsgOut returns, so set up the return * address */ resume_offset = hostdata->pScript + Ent_SendMessageWithATN; @@ -917,9 +945,8 @@ process_message(struct Scsi_Host *host, struct NCR_700_Host_Parameters *hostdata printk("\n"); /* just reject it */ hostdata->msgout[0] = A_REJECT_MSG; - dma_cache_sync(hostdata->dev, hostdata->msgout, 1, DMA_TO_DEVICE); - script_patch_16(hostdata->dev, hostdata->script, MessageCount, - 1); + dma_sync_to_dev(hostdata, hostdata->msgout, 1); + script_patch_16(hostdata, hostdata->script, MessageCount, 1); /* SendMsgOut returns, so set up the return * address */ resume_offset = hostdata->pScript + Ent_SendMessageWithATN; @@ -928,7 +955,7 @@ process_message(struct Scsi_Host *host, struct NCR_700_Host_Parameters *hostdata } NCR_700_writel(temp, host, TEMP_REG); /* set us up to receive another message */ - dma_cache_sync(hostdata->dev, hostdata->msgin, MSG_ARRAY_SIZE, DMA_FROM_DEVICE); + dma_sync_from_dev(hostdata, hostdata->msgin, MSG_ARRAY_SIZE); return resume_offset; } @@ -1008,8 +1035,8 @@ process_script_interrupt(__u32 dsps, __u32 dsp, struct scsi_cmnd *SCp, slot->SG[1].ins = bS_to_host(SCRIPT_RETURN); slot->SG[1].pAddr = 0; slot->resume_offset = hostdata->pScript; - dma_cache_sync(hostdata->dev, slot->SG, sizeof(slot->SG[0])*2, DMA_TO_DEVICE); - dma_cache_sync(hostdata->dev, SCp->sense_buffer, SCSI_SENSE_BUFFERSIZE, DMA_FROM_DEVICE); + dma_sync_to_dev(hostdata, slot->SG, sizeof(slot->SG[0])*2); + dma_sync_from_dev(hostdata, SCp->sense_buffer, SCSI_SENSE_BUFFERSIZE); /* queue the command for reissue */ slot->state = NCR_700_SLOT_QUEUED; @@ -1129,11 +1156,11 @@ process_script_interrupt(__u32 dsps, __u32 dsp, struct scsi_cmnd *SCp, hostdata->cmd = slot->cmnd; /* re-patch for this command */ - script_patch_32_abs(hostdata->dev, hostdata->script, + script_patch_32_abs(hostdata, hostdata->script, CommandAddress, slot->pCmd); - script_patch_16(hostdata->dev, hostdata->script, + script_patch_16(hostdata, hostdata->script, CommandCount, slot->cmnd->cmd_len); - script_patch_32_abs(hostdata->dev, hostdata->script, + script_patch_32_abs(hostdata, hostdata->script, SGScriptStartAddress, to32bit(&slot->pSG[0].ins)); @@ -1144,14 +1171,14 @@ process_script_interrupt(__u32 dsps, __u32 dsp, struct scsi_cmnd *SCp, * should therefore always clear ACK */ NCR_700_writeb(NCR_700_get_SXFER(hostdata->cmd->device), host, SXFER_REG); - dma_cache_sync(hostdata->dev, hostdata->msgin, - MSG_ARRAY_SIZE, DMA_FROM_DEVICE); - dma_cache_sync(hostdata->dev, hostdata->msgout, - MSG_ARRAY_SIZE, DMA_TO_DEVICE); + dma_sync_from_dev(hostdata, hostdata->msgin, + MSG_ARRAY_SIZE); + dma_sync_to_dev(hostdata, hostdata->msgout, + MSG_ARRAY_SIZE); /* I'm just being paranoid here, the command should * already have been flushed from the cache */ - dma_cache_sync(hostdata->dev, slot->cmnd->cmnd, - slot->cmnd->cmd_len, DMA_TO_DEVICE); + dma_sync_to_dev(hostdata, slot->cmnd->cmnd, + slot->cmnd->cmd_len); @@ -1214,8 +1241,7 @@ process_script_interrupt(__u32 dsps, __u32 dsp, struct scsi_cmnd *SCp, hostdata->reselection_id = reselection_id; /* just in case we have a stale simple tag message, clear it */ hostdata->msgin[1] = 0; - dma_cache_sync(hostdata->dev, hostdata->msgin, - MSG_ARRAY_SIZE, DMA_BIDIRECTIONAL); + dma_sync_to_dev(hostdata, hostdata->msgin, MSG_ARRAY_SIZE); if(hostdata->tag_negotiated & (1<pScript + Ent_GetReselectionWithTag; } else { @@ -1329,8 +1355,7 @@ process_selection(struct Scsi_Host *host, __u32 dsp) hostdata->cmd = NULL; /* clear any stale simple tag message */ hostdata->msgin[1] = 0; - dma_cache_sync(hostdata->dev, hostdata->msgin, MSG_ARRAY_SIZE, - DMA_BIDIRECTIONAL); + dma_sync_to_dev(hostdata, hostdata->msgin, MSG_ARRAY_SIZE); if(id == 0xff) { /* Selected as target, Ignore */ @@ -1427,30 +1452,26 @@ NCR_700_start_command(struct scsi_cmnd *SCp) NCR_700_set_flag(SCp->device, NCR_700_DEV_BEGIN_SYNC_NEGOTIATION); } - script_patch_16(hostdata->dev, hostdata->script, MessageCount, count); - + script_patch_16(hostdata, hostdata->script, MessageCount, count); - script_patch_ID(hostdata->dev, hostdata->script, - Device_ID, 1<script, Device_ID, 1<dev, hostdata->script, CommandAddress, + script_patch_32_abs(hostdata, hostdata->script, CommandAddress, slot->pCmd); - script_patch_16(hostdata->dev, hostdata->script, CommandCount, - SCp->cmd_len); + script_patch_16(hostdata, hostdata->script, CommandCount, SCp->cmd_len); /* finally plumb the beginning of the SG list into the script * */ - script_patch_32_abs(hostdata->dev, hostdata->script, + script_patch_32_abs(hostdata, hostdata->script, SGScriptStartAddress, to32bit(&slot->pSG[0].ins)); NCR_700_clear_fifo(SCp->device->host); if(slot->resume_offset == 0) slot->resume_offset = hostdata->pScript; /* now perform all the writebacks and invalidates */ - dma_cache_sync(hostdata->dev, hostdata->msgout, count, DMA_TO_DEVICE); - dma_cache_sync(hostdata->dev, hostdata->msgin, MSG_ARRAY_SIZE, - DMA_FROM_DEVICE); - dma_cache_sync(hostdata->dev, SCp->cmnd, SCp->cmd_len, DMA_TO_DEVICE); - dma_cache_sync(hostdata->dev, hostdata->status, 1, DMA_FROM_DEVICE); + dma_sync_to_dev(hostdata, hostdata->msgout, count); + dma_sync_from_dev(hostdata, hostdata->msgin, MSG_ARRAY_SIZE); + dma_sync_to_dev(hostdata, SCp->cmnd, SCp->cmd_len); + dma_sync_from_dev(hostdata, hostdata->status, 1); /* set the synchronous period/offset */ NCR_700_writeb(NCR_700_get_SXFER(SCp->device), @@ -1626,7 +1647,7 @@ NCR_700_intr(int irq, void *dev_id) slot->SG[i].ins = bS_to_host(SCRIPT_NOP); slot->SG[i].pAddr = 0; } - dma_cache_sync(hostdata->dev, slot->SG, sizeof(slot->SG), DMA_TO_DEVICE); + dma_sync_to_dev(hostdata, slot->SG, sizeof(slot->SG)); /* and pretend we disconnected after * the command phase */ resume_offset = hostdata->pScript + Ent_MsgInDuringData; @@ -1878,7 +1899,7 @@ NCR_700_queuecommand_lck(struct scsi_cmnd *SCp, void (*done)(struct scsi_cmnd *) } slot->SG[i].ins = bS_to_host(SCRIPT_RETURN); slot->SG[i].pAddr = 0; - dma_cache_sync(hostdata->dev, slot->SG, sizeof(slot->SG), DMA_TO_DEVICE); + dma_sync_to_dev(hostdata, slot->SG, sizeof(slot->SG)); DEBUG((" SETTING %p to %x\n", (&slot->pSG[i].ins), slot->SG[i].ins)); diff --git a/drivers/scsi/53c700.h b/drivers/scsi/53c700.h index 05fe439b66afe5..c9f8c497babb3d 100644 --- a/drivers/scsi/53c700.h +++ b/drivers/scsi/53c700.h @@ -209,6 +209,7 @@ struct NCR_700_Host_Parameters { #endif __u32 chip710:1; /* set if really a 710 not 700 */ __u32 burst_length:4; /* set to 0 to disable 710 bursting */ + __u32 noncoherent:1; /* needs to use non-coherent DMA */ /* NOTHING BELOW HERE NEEDS ALTERING */ __u32 fast:1; /* if we can alter the SCSI bus clock @@ -422,33 +423,33 @@ struct NCR_700_Host_Parameters { #define NCR_710_MIN_XFERP 0 #define NCR_700_MIN_PERIOD 25 /* for SDTR message, 100ns */ -#define script_patch_32(dev, script, symbol, value) \ +#define script_patch_32(h, script, symbol, value) \ { \ int i; \ dma_addr_t da = value; \ for(i=0; i< (sizeof(A_##symbol##_used) / sizeof(__u32)); i++) { \ __u32 val = bS_to_cpu((script)[A_##symbol##_used[i]]) + da; \ (script)[A_##symbol##_used[i]] = bS_to_host(val); \ - dma_cache_sync((dev), &(script)[A_##symbol##_used[i]], 4, DMA_TO_DEVICE); \ + dma_sync_to_dev((h), &(script)[A_##symbol##_used[i]], 4); \ DEBUG((" script, patching %s at %d to %pad\n", \ #symbol, A_##symbol##_used[i], &da)); \ } \ } -#define script_patch_32_abs(dev, script, symbol, value) \ +#define script_patch_32_abs(h, script, symbol, value) \ { \ int i; \ dma_addr_t da = value; \ for(i=0; i< (sizeof(A_##symbol##_used) / sizeof(__u32)); i++) { \ (script)[A_##symbol##_used[i]] = bS_to_host(da); \ - dma_cache_sync((dev), &(script)[A_##symbol##_used[i]], 4, DMA_TO_DEVICE); \ + dma_sync_to_dev((h), &(script)[A_##symbol##_used[i]], 4); \ DEBUG((" script, patching %s at %d to %pad\n", \ #symbol, A_##symbol##_used[i], &da)); \ } \ } /* Used for patching the SCSI ID in the SELECT instruction */ -#define script_patch_ID(dev, script, symbol, value) \ +#define script_patch_ID(h, script, symbol, value) \ { \ int i; \ for(i=0; i< (sizeof(A_##symbol##_used) / sizeof(__u32)); i++) { \ @@ -456,13 +457,13 @@ struct NCR_700_Host_Parameters { val &= 0xff00ffff; \ val |= ((value) & 0xff) << 16; \ (script)[A_##symbol##_used[i]] = bS_to_host(val); \ - dma_cache_sync((dev), &(script)[A_##symbol##_used[i]], 4, DMA_TO_DEVICE); \ + dma_sync_to_dev((h), &(script)[A_##symbol##_used[i]], 4); \ DEBUG((" script, patching ID field %s at %d to 0x%x\n", \ #symbol, A_##symbol##_used[i], val)); \ } \ } -#define script_patch_16(dev, script, symbol, value) \ +#define script_patch_16(h, script, symbol, value) \ { \ int i; \ for(i=0; i< (sizeof(A_##symbol##_used) / sizeof(__u32)); i++) { \ @@ -470,7 +471,7 @@ struct NCR_700_Host_Parameters { val &= 0xffff0000; \ val |= ((value) & 0xffff); \ (script)[A_##symbol##_used[i]] = bS_to_host(val); \ - dma_cache_sync((dev), &(script)[A_##symbol##_used[i]], 4, DMA_TO_DEVICE); \ + dma_sync_to_dev((h), &(script)[A_##symbol##_used[i]], 4); \ DEBUG((" script, patching short field %s at %d to 0x%x\n", \ #symbol, A_##symbol##_used[i], val)); \ } \ diff --git a/drivers/scsi/sgiwd93.c b/drivers/scsi/sgiwd93.c index 3bdf0deb8f1529..cf1030c9dda17f 100644 --- a/drivers/scsi/sgiwd93.c +++ b/drivers/scsi/sgiwd93.c @@ -95,7 +95,7 @@ void fill_hpc_entries(struct ip22_hostdata *hd, struct scsi_cmnd *cmd, int din) */ hcp->desc.pbuf = 0; hcp->desc.cntinfo = HPCDMA_EOX; - dma_cache_sync(hd->dev, hd->cpu, + dma_sync_single_for_device(hd->dev, hd->dma, (unsigned long)(hcp + 1) - (unsigned long)hd->cpu, DMA_TO_DEVICE); } @@ -234,8 +234,8 @@ static int sgiwd93_probe(struct platform_device *pdev) hdata = host_to_hostdata(host); hdata->dev = &pdev->dev; - hdata->cpu = dma_alloc_attrs(&pdev->dev, HPC_DMA_SIZE, &hdata->dma, - GFP_KERNEL, DMA_ATTR_NON_CONSISTENT); + hdata->cpu = dma_alloc_noncoherent(&pdev->dev, HPC_DMA_SIZE, + &hdata->dma, DMA_TO_DEVICE, GFP_KERNEL); if (!hdata->cpu) { printk(KERN_WARNING "sgiwd93: Could not allocate memory for " "host %d buffer.\n", unit); @@ -274,8 +274,8 @@ static int sgiwd93_probe(struct platform_device *pdev) out_irq: free_irq(irq, host); out_free: - dma_free_attrs(&pdev->dev, HPC_DMA_SIZE, hdata->cpu, hdata->dma, - DMA_ATTR_NON_CONSISTENT); + dma_free_noncoherent(&pdev->dev, HPC_DMA_SIZE, hdata->cpu, hdata->dma, + DMA_TO_DEVICE); out_put: scsi_host_put(host); out: @@ -291,8 +291,8 @@ static int sgiwd93_remove(struct platform_device *pdev) scsi_remove_host(host); free_irq(pd->irq, host); - dma_free_attrs(&pdev->dev, HPC_DMA_SIZE, hdata->cpu, hdata->dma, - DMA_ATTR_NON_CONSISTENT); + dma_free_noncoherent(&pdev->dev, HPC_DMA_SIZE, hdata->cpu, hdata->dma, + DMA_TO_DEVICE); scsi_host_put(host); return 0; } diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_hw.c b/drivers/staging/media/sunxi/cedrus/cedrus_hw.c index 1744e6fcc99980..bcf050a04ffc45 100644 --- a/drivers/staging/media/sunxi/cedrus/cedrus_hw.c +++ b/drivers/staging/media/sunxi/cedrus/cedrus_hw.c @@ -227,11 +227,17 @@ int cedrus_hw_probe(struct cedrus_dev *dev) * the RAM offset to the physcal addresses. * * This information will eventually be obtained from device-tree. + * + * XXX(hch): this has no business in a driver and needs to move + * to the device tree. */ #ifdef PHYS_PFN_OFFSET - if (!(variant->quirks & CEDRUS_QUIRK_NO_DMA_OFFSET)) - dev->dev->dma_pfn_offset = PHYS_PFN_OFFSET; + if (!(variant->quirks & CEDRUS_QUIRK_NO_DMA_OFFSET)) { + ret = dma_direct_set_offset(dev->dev, PHYS_OFFSET, 0, SZ_4G); + if (ret) + return ret; + } #endif ret = of_reserved_mem_device_init(dev->dev); diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index f4107b9e8c3862..19ebb542befcb5 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -2087,12 +2087,6 @@ int usb_set_configuration(struct usb_device *dev, int configuration) intf->dev.bus = &usb_bus_type; intf->dev.type = &usb_if_device_type; intf->dev.groups = usb_interface_groups; - /* - * Please refer to usb_alloc_dev() to see why we set - * dma_mask and dma_pfn_offset. - */ - intf->dev.dma_mask = dev->dev.dma_mask; - intf->dev.dma_pfn_offset = dev->dev.dma_pfn_offset; INIT_WORK(&intf->reset_ws, __usb_queue_reset_device); intf->minor = -1; device_initialize(&intf->dev); diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c index bafc113f2b3ef3..9b4ac4415f1a47 100644 --- a/drivers/usb/core/usb.c +++ b/drivers/usb/core/usb.c @@ -599,18 +599,6 @@ struct usb_device *usb_alloc_dev(struct usb_device *parent, dev->dev.bus = &usb_bus_type; dev->dev.type = &usb_device_type; dev->dev.groups = usb_device_groups; - /* - * Fake a dma_mask/offset for the USB device: - * We cannot really use the dma-mapping API (dma_alloc_* and - * dma_map_*) for USB devices but instead need to use - * usb_alloc_coherent and pass data in 'urb's, but some subsystems - * manually look into the mask/offset pair to determine whether - * they need bounce buffers. - * Note: calling dma_set_mask() on a USB device would set the - * mask for the entire HCD, so don't do that. - */ - dev->dev.dma_mask = bus->sysdev->dma_mask; - dev->dev.dma_pfn_offset = bus->sysdev->dma_pfn_offset; set_dev_node(&dev->dev, dev_to_node(bus->sysdev)); dev->state = USB_STATE_ATTACHED; dev->lpm_disable_count = 1; diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 62d6403271450e..2629911c29bbf4 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 39a0f2e0847c95..71ce1b7a23d1cc 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -28,7 +28,7 @@ #include #include -#include +#include #include #include #include @@ -578,4 +578,6 @@ const struct dma_map_ops xen_swiotlb_dma_ops = { .dma_supported = xen_swiotlb_dma_supported, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, + .alloc_pages = dma_common_alloc_pages, + .free_pages = dma_common_free_pages, }; diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild index 74b0612601dd1b..62ebdc731ee239 100644 --- a/include/asm-generic/Kbuild +++ b/include/asm-generic/Kbuild @@ -16,7 +16,6 @@ mandatory-y += current.h mandatory-y += delay.h mandatory-y += device.h mandatory-y += div64.h -mandatory-y += dma-contiguous.h mandatory-y += dma-mapping.h mandatory-y += dma.h mandatory-y += emergency-restart.h diff --git a/include/asm-generic/dma-contiguous.h b/include/asm-generic/dma-contiguous.h deleted file mode 100644 index f24b0f9a4f05b6..00000000000000 --- a/include/asm-generic/dma-contiguous.h +++ /dev/null @@ -1,10 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_GENERIC_DMA_CONTIGUOUS_H -#define _ASM_GENERIC_DMA_CONTIGUOUS_H - -#include - -static inline void -dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) { } - -#endif diff --git a/include/linux/cma.h b/include/linux/cma.h index 6ff79fefd01fd2..217999c8a76219 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -18,6 +18,8 @@ #endif +#define CMA_MAX_NAME 64 + struct cma; extern unsigned long totalcma_pages; diff --git a/include/linux/device.h b/include/linux/device.h index a8e3a86e35f61e..5ed101be7b2e7d 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -413,7 +413,7 @@ struct dev_links_info { * such descriptors. * @bus_dma_limit: Limit of an upstream bridge or bus which imposes a smaller * DMA limit than the device itself supports. - * @dma_pfn_offset: offset of DMA memory range relatively of RAM + * @dma_range_map: map for DMA memory ranges relative to that of RAM * @dma_parms: A low level driver may set these to teach IOMMU code about * segment limitations. * @dma_pools: Dma pools (if dma'ble device). @@ -508,7 +508,7 @@ struct device { 64 bit addresses for consistent allocations such descriptors. */ u64 bus_dma_limit; /* upstream dma constraint */ - unsigned long dma_pfn_offset; + const struct bus_dma_region *dma_range_map; struct device_dma_parameters *dma_parms; diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h deleted file mode 100644 index 03f8e98e3bcc94..00000000000000 --- a/include/linux/dma-contiguous.h +++ /dev/null @@ -1,176 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef __LINUX_CMA_H -#define __LINUX_CMA_H - -/* - * Contiguous Memory Allocator for DMA mapping framework - * Copyright (c) 2010-2011 by Samsung Electronics. - * Written by: - * Marek Szyprowski - * Michal Nazarewicz - */ - -/* - * Contiguous Memory Allocator - * - * The Contiguous Memory Allocator (CMA) makes it possible to - * allocate big contiguous chunks of memory after the system has - * booted. - * - * Why is it needed? - * - * Various devices on embedded systems have no scatter-getter and/or - * IO map support and require contiguous blocks of memory to - * operate. They include devices such as cameras, hardware video - * coders, etc. - * - * Such devices often require big memory buffers (a full HD frame - * is, for instance, more then 2 mega pixels large, i.e. more than 6 - * MB of memory), which makes mechanisms such as kmalloc() or - * alloc_page() ineffective. - * - * At the same time, a solution where a big memory region is - * reserved for a device is suboptimal since often more memory is - * reserved then strictly required and, moreover, the memory is - * inaccessible to page system even if device drivers don't use it. - * - * CMA tries to solve this issue by operating on memory regions - * where only movable pages can be allocated from. This way, kernel - * can use the memory for pagecache and when device driver requests - * it, allocated pages can be migrated. - * - * Driver usage - * - * CMA should not be used by the device drivers directly. It is - * only a helper framework for dma-mapping subsystem. - * - * For more information, see kernel-docs in kernel/dma/contiguous.c - */ - -#ifdef __KERNEL__ - -#include -#include - -struct cma; -struct page; - -#ifdef CONFIG_DMA_CMA - -extern struct cma *dma_contiguous_default_area; - -static inline struct cma *dev_get_cma_area(struct device *dev) -{ - if (dev && dev->cma_area) - return dev->cma_area; - return dma_contiguous_default_area; -} - -static inline void dev_set_cma_area(struct device *dev, struct cma *cma) -{ - if (dev) - dev->cma_area = cma; -} - -static inline void dma_contiguous_set_default(struct cma *cma) -{ - dma_contiguous_default_area = cma; -} - -void dma_contiguous_reserve(phys_addr_t addr_limit); - -int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base, - phys_addr_t limit, struct cma **res_cma, - bool fixed); - -/** - * dma_declare_contiguous() - reserve area for contiguous memory handling - * for particular device - * @dev: Pointer to device structure. - * @size: Size of the reserved memory. - * @base: Start address of the reserved memory (optional, 0 for any). - * @limit: End address of the reserved memory (optional, 0 for any). - * - * This function reserves memory for specified device. It should be - * called by board specific code when early allocator (memblock or bootmem) - * is still activate. - */ - -static inline int dma_declare_contiguous(struct device *dev, phys_addr_t size, - phys_addr_t base, phys_addr_t limit) -{ - struct cma *cma; - int ret; - ret = dma_contiguous_reserve_area(size, base, limit, &cma, true); - if (ret == 0) - dev_set_cma_area(dev, cma); - - return ret; -} - -struct page *dma_alloc_from_contiguous(struct device *dev, size_t count, - unsigned int order, bool no_warn); -bool dma_release_from_contiguous(struct device *dev, struct page *pages, - int count); -struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp); -void dma_free_contiguous(struct device *dev, struct page *page, size_t size); - -#else - -static inline struct cma *dev_get_cma_area(struct device *dev) -{ - return NULL; -} - -static inline void dev_set_cma_area(struct device *dev, struct cma *cma) { } - -static inline void dma_contiguous_set_default(struct cma *cma) { } - -static inline void dma_contiguous_reserve(phys_addr_t limit) { } - -static inline int dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base, - phys_addr_t limit, struct cma **res_cma, - bool fixed) -{ - return -ENOSYS; -} - -static inline -int dma_declare_contiguous(struct device *dev, phys_addr_t size, - phys_addr_t base, phys_addr_t limit) -{ - return -ENOSYS; -} - -static inline -struct page *dma_alloc_from_contiguous(struct device *dev, size_t count, - unsigned int order, bool no_warn) -{ - return NULL; -} - -static inline -bool dma_release_from_contiguous(struct device *dev, struct page *pages, - int count) -{ - return false; -} - -/* Use fallback alloc() and free() when CONFIG_DMA_CMA=n */ -static inline struct page *dma_alloc_contiguous(struct device *dev, size_t size, - gfp_t gfp) -{ - return NULL; -} - -static inline void dma_free_contiguous(struct device *dev, struct page *page, - size_t size) -{ - __free_pages(page, get_order(size)); -} - -#endif - -#endif - -#endif diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 6e87225600ae35..18aade195884d7 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -7,64 +7,102 @@ #define _LINUX_DMA_DIRECT_H 1 #include -#include +#include #include /* for min_low_pfn */ #include #include extern unsigned int zone_dma_bits; -#ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA -#include -#else -static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) +/* + * Record the mapping of CPU physical to DMA addresses for a given region. + */ +struct bus_dma_region { + phys_addr_t cpu_start; + dma_addr_t dma_start; + u64 size; + u64 offset; +}; + +static inline dma_addr_t translate_phys_to_dma(struct device *dev, + phys_addr_t paddr) { - dma_addr_t dev_addr = (dma_addr_t)paddr; + const struct bus_dma_region *m; + + for (m = dev->dma_range_map; m->size; m++) + if (paddr >= m->cpu_start && paddr - m->cpu_start < m->size) + return (dma_addr_t)paddr - m->offset; - return dev_addr - ((dma_addr_t)dev->dma_pfn_offset << PAGE_SHIFT); + /* make sure dma_capable fails when no translation is available */ + return DMA_MAPPING_ERROR; } -static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dev_addr) +static inline phys_addr_t translate_dma_to_phys(struct device *dev, + dma_addr_t dma_addr) { - phys_addr_t paddr = (phys_addr_t)dev_addr; + const struct bus_dma_region *m; - return paddr + ((phys_addr_t)dev->dma_pfn_offset << PAGE_SHIFT); + for (m = dev->dma_range_map; m->size; m++) + if (dma_addr >= m->dma_start && dma_addr - m->dma_start < m->size) + return (phys_addr_t)dma_addr + m->offset; + + return (phys_addr_t)-1; } -#endif /* !CONFIG_ARCH_HAS_PHYS_TO_DMA */ -#ifdef CONFIG_ARCH_HAS_FORCE_DMA_UNENCRYPTED -bool force_dma_unencrypted(struct device *dev); +#ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA +#include +#ifndef phys_to_dma_unencrypted +#define phys_to_dma_unencrypted phys_to_dma +#endif #else -static inline bool force_dma_unencrypted(struct device *dev) +static inline dma_addr_t phys_to_dma_unencrypted(struct device *dev, + phys_addr_t paddr) { - return false; + if (dev->dma_range_map) + return translate_phys_to_dma(dev, paddr); + return paddr; } -#endif /* CONFIG_ARCH_HAS_FORCE_DMA_UNENCRYPTED */ /* * If memory encryption is supported, phys_to_dma will set the memory encryption - * bit in the DMA address, and dma_to_phys will clear it. The raw __phys_to_dma - * and __dma_to_phys versions should only be used on non-encrypted memory for - * special occasions like DMA coherent buffers. + * bit in the DMA address, and dma_to_phys will clear it. + * phys_to_dma_unencrypted is for use on special unencrypted memory like swiotlb + * buffers. */ static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) { - return __sme_set(__phys_to_dma(dev, paddr)); + return __sme_set(phys_to_dma_unencrypted(dev, paddr)); } -static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) +static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dma_addr) { - return __sme_clr(__dma_to_phys(dev, daddr)); + phys_addr_t paddr; + + if (dev->dma_range_map) + paddr = translate_dma_to_phys(dev, dma_addr); + else + paddr = dma_addr; + + return __sme_clr(paddr); +} +#endif /* !CONFIG_ARCH_HAS_PHYS_TO_DMA */ + +#ifdef CONFIG_ARCH_HAS_FORCE_DMA_UNENCRYPTED +bool force_dma_unencrypted(struct device *dev); +#else +static inline bool force_dma_unencrypted(struct device *dev) +{ + return false; } +#endif /* CONFIG_ARCH_HAS_FORCE_DMA_UNENCRYPTED */ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size, bool is_ram) { dma_addr_t end = addr + size - 1; - if (!dev->dma_mask) + if (addr == DMA_MAPPING_ERROR) return false; - if (is_ram && !IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT) && min(addr, end) < phys_to_dma(dev, PFN_PHYS(min_low_pfn))) return false; @@ -77,115 +115,13 @@ void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); void dma_direct_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs); -void *dma_direct_alloc_pages(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); -void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t dma_addr, unsigned long attrs); -int dma_direct_get_sgtable(struct device *dev, struct sg_table *sgt, - void *cpu_addr, dma_addr_t dma_addr, size_t size, - unsigned long attrs); -bool dma_direct_can_mmap(struct device *dev); -int dma_direct_mmap(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size, - unsigned long attrs); +struct page *dma_direct_alloc_pages(struct device *dev, size_t size, + dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp); +void dma_direct_free_pages(struct device *dev, size_t size, + struct page *page, dma_addr_t dma_addr, + enum dma_data_direction dir); int dma_direct_supported(struct device *dev, u64 mask); -bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr); -int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, - enum dma_data_direction dir, unsigned long attrs); dma_addr_t dma_direct_map_resource(struct device *dev, phys_addr_t paddr, size_t size, enum dma_data_direction dir, unsigned long attrs); -size_t dma_direct_max_mapping_size(struct device *dev); -#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ - defined(CONFIG_SWIOTLB) -void dma_direct_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir); -#else -static inline void dma_direct_sync_sg_for_device(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir) -{ -} -#endif - -#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ - defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) || \ - defined(CONFIG_SWIOTLB) -void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir, unsigned long attrs); -void dma_direct_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir); -#else -static inline void dma_direct_unmap_sg(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir, - unsigned long attrs) -{ -} -static inline void dma_direct_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir) -{ -} -#endif - -static inline void dma_direct_sync_single_for_device(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir) -{ - phys_addr_t paddr = dma_to_phys(dev, addr); - - if (unlikely(is_swiotlb_buffer(paddr))) - swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_DEVICE); - - if (!dev_is_dma_coherent(dev)) - arch_sync_dma_for_device(paddr, size, dir); -} - -static inline void dma_direct_sync_single_for_cpu(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir) -{ - phys_addr_t paddr = dma_to_phys(dev, addr); - - if (!dev_is_dma_coherent(dev)) { - arch_sync_dma_for_cpu(paddr, size, dir); - arch_sync_dma_for_cpu_all(); - } - - if (unlikely(is_swiotlb_buffer(paddr))) - swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU); -} - -static inline dma_addr_t dma_direct_map_page(struct device *dev, - struct page *page, unsigned long offset, size_t size, - enum dma_data_direction dir, unsigned long attrs) -{ - phys_addr_t phys = page_to_phys(page) + offset; - dma_addr_t dma_addr = phys_to_dma(dev, phys); - - if (unlikely(swiotlb_force == SWIOTLB_FORCE)) - return swiotlb_map(dev, phys, size, dir, attrs); - - if (unlikely(!dma_capable(dev, dma_addr, size, true))) { - if (swiotlb_force != SWIOTLB_NO_FORCE) - return swiotlb_map(dev, phys, size, dir, attrs); - - dev_WARN_ONCE(dev, 1, - "DMA addr %pad+%zu overflow (mask %llx, bus limit %llx).\n", - &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit); - return DMA_MAPPING_ERROR; - } - - if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - arch_sync_dma_for_device(phys, size, dir); - return dma_addr; -} - -static inline void dma_direct_unmap_page(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir, unsigned long attrs) -{ - phys_addr_t phys = dma_to_phys(dev, addr); - - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - dma_direct_sync_single_for_cpu(dev, addr, size, dir); - - if (unlikely(is_swiotlb_buffer(phys))) - swiotlb_tbl_unmap_single(dev, phys, size, size, dir, attrs); -} #endif /* _LINUX_DMA_DIRECT_H */ diff --git a/include/linux/dma-direction.h b/include/linux/dma-direction.h index 9c96e30e6a0bb0..a2fe4571bc9279 100644 --- a/include/linux/dma-direction.h +++ b/include/linux/dma-direction.h @@ -9,4 +9,10 @@ enum dma_data_direction { DMA_NONE = 3, }; -#endif +static inline int valid_dma_direction(enum dma_data_direction dir) +{ + return dir == DMA_BIDIRECTIONAL || dir == DMA_TO_DEVICE || + dir == DMA_FROM_DEVICE; +} + +#endif /* _LINUX_DMA_DIRECTION_H */ diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h new file mode 100644 index 00000000000000..8029f7e0414521 --- /dev/null +++ b/include/linux/dma-map-ops.h @@ -0,0 +1,326 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This header is for implementations of dma_map_ops and related code. + * It should not be included in drivers just using the DMA API. + */ +#ifndef _LINUX_DMA_MAP_OPS_H +#define _LINUX_DMA_MAP_OPS_H + +#include +#include + +struct cma; + +struct dma_map_ops { + void *(*alloc)(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp, + unsigned long attrs); + void (*free)(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_handle, unsigned long attrs); + struct page *(*alloc_pages)(struct device *dev, size_t size, + dma_addr_t *dma_handle, enum dma_data_direction dir, + gfp_t gfp); + void (*free_pages)(struct device *dev, size_t size, struct page *vaddr, + dma_addr_t dma_handle, enum dma_data_direction dir); + void *(*alloc_noncoherent)(struct device *dev, size_t size, + dma_addr_t *dma_handle, enum dma_data_direction dir, + gfp_t gfp); + void (*free_noncoherent)(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_handle, enum dma_data_direction dir); + int (*mmap)(struct device *, struct vm_area_struct *, + void *, dma_addr_t, size_t, unsigned long attrs); + + int (*get_sgtable)(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs); + + dma_addr_t (*map_page)(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, unsigned long attrs); + void (*unmap_page)(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir, + unsigned long attrs); + /* + * map_sg returns 0 on error and a value > 0 on success. + * It should never return a value < 0. + */ + int (*map_sg)(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, unsigned long attrs); + void (*unmap_sg)(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, unsigned long attrs); + dma_addr_t (*map_resource)(struct device *dev, phys_addr_t phys_addr, + size_t size, enum dma_data_direction dir, + unsigned long attrs); + void (*unmap_resource)(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir, + unsigned long attrs); + void (*sync_single_for_cpu)(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir); + void (*sync_single_for_device)(struct device *dev, + dma_addr_t dma_handle, size_t size, + enum dma_data_direction dir); + void (*sync_sg_for_cpu)(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir); + void (*sync_sg_for_device)(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir); + void (*cache_sync)(struct device *dev, void *vaddr, size_t size, + enum dma_data_direction direction); + int (*dma_supported)(struct device *dev, u64 mask); + u64 (*get_required_mask)(struct device *dev); + size_t (*max_mapping_size)(struct device *dev); + unsigned long (*get_merge_boundary)(struct device *dev); +}; + +#ifdef CONFIG_DMA_OPS +#include + +static inline const struct dma_map_ops *get_dma_ops(struct device *dev) +{ + if (dev->dma_ops) + return dev->dma_ops; + return get_arch_dma_ops(dev->bus); +} + +static inline void set_dma_ops(struct device *dev, + const struct dma_map_ops *dma_ops) +{ + dev->dma_ops = dma_ops; +} +#else /* CONFIG_DMA_OPS */ +static inline const struct dma_map_ops *get_dma_ops(struct device *dev) +{ + return NULL; +} +static inline void set_dma_ops(struct device *dev, + const struct dma_map_ops *dma_ops) +{ +} +#endif /* CONFIG_DMA_OPS */ + +#ifdef CONFIG_DMA_CMA +extern struct cma *dma_contiguous_default_area; + +static inline struct cma *dev_get_cma_area(struct device *dev) +{ + if (dev && dev->cma_area) + return dev->cma_area; + return dma_contiguous_default_area; +} + +void dma_contiguous_reserve(phys_addr_t addr_limit); +int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base, + phys_addr_t limit, struct cma **res_cma, bool fixed); + +struct page *dma_alloc_from_contiguous(struct device *dev, size_t count, + unsigned int order, bool no_warn); +bool dma_release_from_contiguous(struct device *dev, struct page *pages, + int count); +struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp); +void dma_free_contiguous(struct device *dev, struct page *page, size_t size); + +void dma_contiguous_early_fixup(phys_addr_t base, unsigned long size); +#else /* CONFIG_DMA_CMA */ +static inline struct cma *dev_get_cma_area(struct device *dev) +{ + return NULL; +} +static inline void dma_contiguous_reserve(phys_addr_t limit) +{ +} +static inline int dma_contiguous_reserve_area(phys_addr_t size, + phys_addr_t base, phys_addr_t limit, struct cma **res_cma, + bool fixed) +{ + return -ENOSYS; +} +static inline struct page *dma_alloc_from_contiguous(struct device *dev, + size_t count, unsigned int order, bool no_warn) +{ + return NULL; +} +static inline bool dma_release_from_contiguous(struct device *dev, + struct page *pages, int count) +{ + return false; +} +/* Use fallback alloc() and free() when CONFIG_DMA_CMA=n */ +static inline struct page *dma_alloc_contiguous(struct device *dev, size_t size, + gfp_t gfp) +{ + return NULL; +} +static inline void dma_free_contiguous(struct device *dev, struct page *page, + size_t size) +{ + __free_pages(page, get_order(size)); +} +#endif /* CONFIG_DMA_CMA*/ + +#ifdef CONFIG_DMA_PERNUMA_CMA +void dma_pernuma_cma_reserve(void); +#else +static inline void dma_pernuma_cma_reserve(void) { } +#endif /* CONFIG_DMA_PERNUMA_CMA */ + +#ifdef CONFIG_DMA_DECLARE_COHERENT +int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, + dma_addr_t device_addr, size_t size); +int dma_alloc_from_dev_coherent(struct device *dev, ssize_t size, + dma_addr_t *dma_handle, void **ret); +int dma_release_from_dev_coherent(struct device *dev, int order, void *vaddr); +int dma_mmap_from_dev_coherent(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, size_t size, int *ret); + +void *dma_alloc_from_global_coherent(struct device *dev, ssize_t size, + dma_addr_t *dma_handle); +int dma_release_from_global_coherent(int order, void *vaddr); +int dma_mmap_from_global_coherent(struct vm_area_struct *vma, void *cpu_addr, + size_t size, int *ret); + +#else +static inline int dma_declare_coherent_memory(struct device *dev, + phys_addr_t phys_addr, dma_addr_t device_addr, size_t size) +{ + return -ENOSYS; +} +#define dma_alloc_from_dev_coherent(dev, size, handle, ret) (0) +#define dma_release_from_dev_coherent(dev, order, vaddr) (0) +#define dma_mmap_from_dev_coherent(dev, vma, vaddr, order, ret) (0) + +static inline void *dma_alloc_from_global_coherent(struct device *dev, + ssize_t size, dma_addr_t *dma_handle) +{ + return NULL; +} +static inline int dma_release_from_global_coherent(int order, void *vaddr) +{ + return 0; +} +static inline int dma_mmap_from_global_coherent(struct vm_area_struct *vma, + void *cpu_addr, size_t size, int *ret) +{ + return 0; +} +#endif /* CONFIG_DMA_DECLARE_COHERENT */ + +#ifdef CONFIG_ARCH_HAS_DMA_COHERENCE_H +#include +#elif defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ + defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ + defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) +static inline bool dev_is_dma_coherent(struct device *dev) +{ + return dev->dma_coherent; +} +#else +static inline bool dev_is_dma_coherent(struct device *dev) +{ + return true; +} +#endif /* CONFIG_ARCH_HAS_DMA_COHERENCE_H */ + +void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, + gfp_t gfp, unsigned long attrs); +void arch_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t dma_addr, unsigned long attrs); + +#ifdef CONFIG_MMU +/* + * Page protection so that devices that can't snoop CPU caches can use the + * memory coherently. We default to pgprot_noncached which is usually used + * for ioremap as a safe bet, but architectures can override this with less + * strict semantics if possible. + */ +#ifndef pgprot_dmacoherent +#define pgprot_dmacoherent(prot) pgprot_noncached(prot) +#endif + +pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs); +#else +static inline pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, + unsigned long attrs) +{ + return prot; /* no protection bits supported without page tables */ +} +#endif /* CONFIG_MMU */ + +#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE +void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, + enum dma_data_direction dir); +#else +static inline void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, + enum dma_data_direction dir) +{ +} +#endif /* ARCH_HAS_SYNC_DMA_FOR_DEVICE */ + +#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU +void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, + enum dma_data_direction dir); +#else +static inline void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, + enum dma_data_direction dir) +{ +} +#endif /* ARCH_HAS_SYNC_DMA_FOR_CPU */ + +#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL +void arch_sync_dma_for_cpu_all(void); +#else +static inline void arch_sync_dma_for_cpu_all(void) +{ +} +#endif /* CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL */ + +#ifdef CONFIG_ARCH_HAS_DMA_PREP_COHERENT +void arch_dma_prep_coherent(struct page *page, size_t size); +#else +static inline void arch_dma_prep_coherent(struct page *page, size_t size) +{ +} +#endif /* CONFIG_ARCH_HAS_DMA_PREP_COHERENT */ + +#ifdef CONFIG_ARCH_HAS_DMA_MARK_CLEAN +void arch_dma_mark_clean(phys_addr_t paddr, size_t size); +#else +static inline void arch_dma_mark_clean(phys_addr_t paddr, size_t size) +{ +} +#endif /* ARCH_HAS_DMA_MARK_CLEAN */ + +void *arch_dma_set_uncached(void *addr, size_t size); +void arch_dma_clear_uncached(void *addr, size_t size); + +#ifdef CONFIG_ARCH_HAS_SETUP_DMA_OPS +void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, + const struct iommu_ops *iommu, bool coherent); +#else +static inline void arch_setup_dma_ops(struct device *dev, u64 dma_base, + u64 size, const struct iommu_ops *iommu, bool coherent) +{ +} +#endif /* CONFIG_ARCH_HAS_SETUP_DMA_OPS */ + +#ifdef CONFIG_ARCH_HAS_TEARDOWN_DMA_OPS +void arch_teardown_dma_ops(struct device *dev); +#else +static inline void arch_teardown_dma_ops(struct device *dev) +{ +} +#endif /* CONFIG_ARCH_HAS_TEARDOWN_DMA_OPS */ + +#ifdef CONFIG_DMA_API_DEBUG +void dma_debug_add_bus(struct bus_type *bus); +void debug_dma_dump_mappings(struct device *dev); +#else +static inline void dma_debug_add_bus(struct bus_type *bus) +{ +} +static inline void debug_dma_dump_mappings(struct device *dev) +{ +} +#endif /* CONFIG_DMA_API_DEBUG */ + +extern const struct dma_map_ops dma_dummy_ops; + +#endif /* _LINUX_DMA_MAP_OPS_H */ diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 52635e91143b25..3f029afdc9dc6a 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include @@ -27,11 +26,6 @@ * buffered to improve performance. */ #define DMA_ATTR_WRITE_COMBINE (1UL << 2) -/* - * DMA_ATTR_NON_CONSISTENT: Lets the platform to choose to return either - * consistent or non-consistent memory as it sees fit. - */ -#define DMA_ATTR_NON_CONSISTENT (1UL << 3) /* * DMA_ATTR_NO_KERNEL_MAPPING: Lets the platform to avoid creating a kernel * virtual mapping for the allocated buffer. @@ -68,153 +62,35 @@ #define DMA_ATTR_PRIVILEGED (1UL << 9) /* - * A dma_addr_t can hold any valid DMA or bus address for the platform. - * It can be given to a device to use as a DMA source or target. A CPU cannot - * reference a dma_addr_t directly because there may be translation between - * its physical address space and the bus address space. + * A dma_addr_t can hold any valid DMA or bus address for the platform. It can + * be given to a device to use as a DMA source or target. It is specific to a + * given device and there may be a translation between the CPU physical address + * space and the bus address space. + * + * DMA_MAPPING_ERROR is the magic error code if a mapping failed. It should not + * be used directly in drivers, but checked for using dma_mapping_error() + * instead. */ -struct dma_map_ops { - void* (*alloc)(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, - unsigned long attrs); - void (*free)(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle, - unsigned long attrs); - int (*mmap)(struct device *, struct vm_area_struct *, - void *, dma_addr_t, size_t, - unsigned long attrs); - - int (*get_sgtable)(struct device *dev, struct sg_table *sgt, void *, - dma_addr_t, size_t, unsigned long attrs); - - dma_addr_t (*map_page)(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - unsigned long attrs); - void (*unmap_page)(struct device *dev, dma_addr_t dma_handle, - size_t size, enum dma_data_direction dir, - unsigned long attrs); - /* - * map_sg returns 0 on error and a value > 0 on success. - * It should never return a value < 0. - */ - int (*map_sg)(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, - unsigned long attrs); - void (*unmap_sg)(struct device *dev, - struct scatterlist *sg, int nents, - enum dma_data_direction dir, - unsigned long attrs); - dma_addr_t (*map_resource)(struct device *dev, phys_addr_t phys_addr, - size_t size, enum dma_data_direction dir, - unsigned long attrs); - void (*unmap_resource)(struct device *dev, dma_addr_t dma_handle, - size_t size, enum dma_data_direction dir, - unsigned long attrs); - void (*sync_single_for_cpu)(struct device *dev, - dma_addr_t dma_handle, size_t size, - enum dma_data_direction dir); - void (*sync_single_for_device)(struct device *dev, - dma_addr_t dma_handle, size_t size, - enum dma_data_direction dir); - void (*sync_sg_for_cpu)(struct device *dev, - struct scatterlist *sg, int nents, - enum dma_data_direction dir); - void (*sync_sg_for_device)(struct device *dev, - struct scatterlist *sg, int nents, - enum dma_data_direction dir); - void (*cache_sync)(struct device *dev, void *vaddr, size_t size, - enum dma_data_direction direction); - int (*dma_supported)(struct device *dev, u64 mask); - u64 (*get_required_mask)(struct device *dev); - size_t (*max_mapping_size)(struct device *dev); - unsigned long (*get_merge_boundary)(struct device *dev); -}; - #define DMA_MAPPING_ERROR (~(dma_addr_t)0) -extern const struct dma_map_ops dma_virt_ops; -extern const struct dma_map_ops dma_dummy_ops; - #define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1)) -#define DMA_MASK_NONE 0x0ULL - -static inline int valid_dma_direction(int dma_direction) -{ - return ((dma_direction == DMA_BIDIRECTIONAL) || - (dma_direction == DMA_TO_DEVICE) || - (dma_direction == DMA_FROM_DEVICE)); -} - -#ifdef CONFIG_DMA_DECLARE_COHERENT -/* - * These three functions are only for dma allocator. - * Don't use them in device drivers. - */ -int dma_alloc_from_dev_coherent(struct device *dev, ssize_t size, - dma_addr_t *dma_handle, void **ret); -int dma_release_from_dev_coherent(struct device *dev, int order, void *vaddr); - -int dma_mmap_from_dev_coherent(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, size_t size, int *ret); - -void *dma_alloc_from_global_coherent(struct device *dev, ssize_t size, dma_addr_t *dma_handle); -int dma_release_from_global_coherent(int order, void *vaddr); -int dma_mmap_from_global_coherent(struct vm_area_struct *vma, void *cpu_addr, - size_t size, int *ret); - +#ifdef CONFIG_DMA_API_DEBUG +void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr); +void debug_dma_map_single(struct device *dev, const void *addr, + unsigned long len); #else -#define dma_alloc_from_dev_coherent(dev, size, handle, ret) (0) -#define dma_release_from_dev_coherent(dev, order, vaddr) (0) -#define dma_mmap_from_dev_coherent(dev, vma, vaddr, order, ret) (0) - -static inline void *dma_alloc_from_global_coherent(struct device *dev, ssize_t size, - dma_addr_t *dma_handle) -{ - return NULL; -} - -static inline int dma_release_from_global_coherent(int order, void *vaddr) +static inline void debug_dma_mapping_error(struct device *dev, + dma_addr_t dma_addr) { - return 0; } - -static inline int dma_mmap_from_global_coherent(struct vm_area_struct *vma, - void *cpu_addr, size_t size, - int *ret) +static inline void debug_dma_map_single(struct device *dev, const void *addr, + unsigned long len) { - return 0; } -#endif /* CONFIG_DMA_DECLARE_COHERENT */ +#endif /* CONFIG_DMA_API_DEBUG */ #ifdef CONFIG_HAS_DMA -#include - -#ifdef CONFIG_DMA_OPS -static inline const struct dma_map_ops *get_dma_ops(struct device *dev) -{ - if (dev->dma_ops) - return dev->dma_ops; - return get_arch_dma_ops(dev->bus); -} - -static inline void set_dma_ops(struct device *dev, - const struct dma_map_ops *dma_ops) -{ - dev->dma_ops = dma_ops; -} -#else /* CONFIG_DMA_OPS */ -static inline const struct dma_map_ops *get_dma_ops(struct device *dev) -{ - return NULL; -} -static inline void set_dma_ops(struct device *dev, - const struct dma_map_ops *dma_ops) -{ -} -#endif /* CONFIG_DMA_OPS */ - static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { debug_dma_mapping_error(dev, dma_addr); @@ -254,8 +130,6 @@ void *dmam_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); void dmam_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle); -void dma_cache_sync(struct device *dev, void *vaddr, size_t size, - enum dma_data_direction dir); int dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs); @@ -339,10 +213,6 @@ static inline void dmam_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle) { } -static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size, - enum dma_data_direction dir) -{ -} static inline int dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs) @@ -389,6 +259,15 @@ static inline unsigned long dma_get_merge_boundary(struct device *dev) } #endif /* CONFIG_HAS_DMA */ +struct page *dma_alloc_pages(struct device *dev, size_t size, + dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp); +void dma_free_pages(struct device *dev, size_t size, struct page *page, + dma_addr_t dma_handle, enum dma_data_direction dir); +void *dma_alloc_noncoherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp); +void dma_free_noncoherent(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_handle, enum dma_data_direction dir); + static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr, size_t size, enum dma_data_direction dir, unsigned long attrs) { @@ -513,7 +392,10 @@ static inline void dma_sync_sgtable_for_device(struct device *dev, extern int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs); - +struct page *dma_common_alloc_pages(struct device *dev, size_t size, + dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp); +void dma_common_free_pages(struct device *dev, size_t size, struct page *vaddr, + dma_addr_t dma_handle, enum dma_data_direction dir); struct page **dma_common_find_pages(void *cpu_addr); void *dma_common_contiguous_remap(struct page *page, size_t size, pgprot_t prot, const void *caller); @@ -591,24 +473,6 @@ static inline bool dma_addressing_limited(struct device *dev) dma_get_required_mask(dev); } -#ifdef CONFIG_ARCH_HAS_SETUP_DMA_OPS -void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, - const struct iommu_ops *iommu, bool coherent); -#else -static inline void arch_setup_dma_ops(struct device *dev, u64 dma_base, - u64 size, const struct iommu_ops *iommu, bool coherent) -{ -} -#endif /* CONFIG_ARCH_HAS_SETUP_DMA_OPS */ - -#ifdef CONFIG_ARCH_HAS_TEARDOWN_DMA_OPS -void arch_teardown_dma_ops(struct device *dev); -#else -static inline void arch_teardown_dma_ops(struct device *dev) -{ -} -#endif /* CONFIG_ARCH_HAS_TEARDOWN_DMA_OPS */ - static inline unsigned int dma_get_max_seg_size(struct device *dev) { if (dev->dma_parms && dev->dma_parms->max_segment_size) @@ -629,7 +493,26 @@ static inline unsigned long dma_get_seg_boundary(struct device *dev) { if (dev->dma_parms && dev->dma_parms->segment_boundary_mask) return dev->dma_parms->segment_boundary_mask; - return DMA_BIT_MASK(32); + return ULONG_MAX; +} + +/** + * dma_get_seg_boundary_nr_pages - return the segment boundary in "page" units + * @dev: device to guery the boundary for + * @page_shift: ilog() of the IOMMU page size + * + * Return the segment boundary in IOMMU page units (which may be different from + * the CPU page size) for the passed in device. + * + * If @dev is NULL a boundary of U32_MAX is assumed, this case is just for + * non-DMA API callers. + */ +static inline unsigned long dma_get_seg_boundary_nr_pages(struct device *dev, + unsigned int page_shift) +{ + if (!dev) + return (U32_MAX >> page_shift) + 1; + return (dma_get_seg_boundary(dev) >> page_shift) + 1; } static inline int dma_set_seg_boundary(struct device *dev, unsigned long mask) @@ -649,18 +532,6 @@ static inline int dma_get_cache_alignment(void) return 1; } -#ifdef CONFIG_DMA_DECLARE_COHERENT -int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, - dma_addr_t device_addr, size_t size); -#else -static inline int -dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, - dma_addr_t device_addr, size_t size) -{ - return -ENOSYS; -} -#endif /* CONFIG_DMA_DECLARE_COHERENT */ - static inline void *dmam_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp) { @@ -711,4 +582,13 @@ static inline int dma_mmap_wc(struct device *dev, #define dma_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0) #endif -#endif +/* + * Legacy interface to set up the dma offset map. Drivers really should not + * actually use it, but we have a few legacy cases left. + */ +int dma_direct_set_offset(struct device *dev, phys_addr_t cpu_start, + dma_addr_t dma_start, u64 size); + +extern const struct dma_map_ops dma_virt_ops; + +#endif /* _LINUX_DMA_MAPPING_H */ diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h deleted file mode 100644 index ca09a4e07d2d3d..00000000000000 --- a/include/linux/dma-noncoherent.h +++ /dev/null @@ -1,114 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_DMA_NONCOHERENT_H -#define _LINUX_DMA_NONCOHERENT_H 1 - -#include -#include - -#ifdef CONFIG_ARCH_HAS_DMA_COHERENCE_H -#include -#elif defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ - defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ - defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) -static inline bool dev_is_dma_coherent(struct device *dev) -{ - return dev->dma_coherent; -} -#else -static inline bool dev_is_dma_coherent(struct device *dev) -{ - return true; -} -#endif /* CONFIG_ARCH_HAS_DMA_COHERENCE_H */ - -/* - * Check if an allocation needs to be marked uncached to be coherent. - */ -static __always_inline bool dma_alloc_need_uncached(struct device *dev, - unsigned long attrs) -{ - if (dev_is_dma_coherent(dev)) - return false; - if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) - return false; - if (IS_ENABLED(CONFIG_DMA_NONCOHERENT_CACHE_SYNC) && - (attrs & DMA_ATTR_NON_CONSISTENT)) - return false; - return true; -} - -void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t gfp, unsigned long attrs); -void arch_dma_free(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t dma_addr, unsigned long attrs); - -#ifdef CONFIG_MMU -/* - * Page protection so that devices that can't snoop CPU caches can use the - * memory coherently. We default to pgprot_noncached which is usually used - * for ioremap as a safe bet, but architectures can override this with less - * strict semantics if possible. - */ -#ifndef pgprot_dmacoherent -#define pgprot_dmacoherent(prot) pgprot_noncached(prot) -#endif - -pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs); -#else -static inline pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, - unsigned long attrs) -{ - return prot; /* no protection bits supported without page tables */ -} -#endif /* CONFIG_MMU */ - -#ifdef CONFIG_DMA_NONCOHERENT_CACHE_SYNC -void arch_dma_cache_sync(struct device *dev, void *vaddr, size_t size, - enum dma_data_direction direction); -#else -static inline void arch_dma_cache_sync(struct device *dev, void *vaddr, - size_t size, enum dma_data_direction direction) -{ -} -#endif /* CONFIG_DMA_NONCOHERENT_CACHE_SYNC */ - -#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE -void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, - enum dma_data_direction dir); -#else -static inline void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, - enum dma_data_direction dir) -{ -} -#endif /* ARCH_HAS_SYNC_DMA_FOR_DEVICE */ - -#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU -void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, - enum dma_data_direction dir); -#else -static inline void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, - enum dma_data_direction dir) -{ -} -#endif /* ARCH_HAS_SYNC_DMA_FOR_CPU */ - -#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL -void arch_sync_dma_for_cpu_all(void); -#else -static inline void arch_sync_dma_for_cpu_all(void) -{ -} -#endif /* CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL */ - -#ifdef CONFIG_ARCH_HAS_DMA_PREP_COHERENT -void arch_dma_prep_coherent(struct page *page, size_t size); -#else -static inline void arch_dma_prep_coherent(struct page *page, size_t size) -{ -} -#endif /* CONFIG_ARCH_HAS_DMA_PREP_COHERENT */ - -void *arch_dma_set_uncached(void *addr, size_t size); -void arch_dma_clear_uncached(void *addr, size_t size); - -#endif /* _LINUX_DMA_NONCOHERENT_H */ diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 07e481993ef5da..c603237e006ceb 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -552,8 +552,10 @@ extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true) #else -#define alloc_pages(gfp_mask, order) \ - alloc_pages_node(numa_node_id(), gfp_mask, order) +static inline struct page *alloc_pages(gfp_t gfp_mask, unsigned int order) +{ + return alloc_pages_node(numa_node_id(), gfp_mask, order); +} #define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\ alloc_pages(gfp_mask, order) #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index 847a9d1fa6343d..c99de4a2145889 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -9,6 +9,7 @@ config HAS_DMA default y config DMA_OPS + depends on HAS_DMA bool # @@ -43,6 +44,12 @@ config ARCH_HAS_DMA_SET_MASK config ARCH_HAS_DMA_WRITE_COMBINE bool +# +# Select if the architectures provides the arch_dma_mark_clean hook +# +config ARCH_HAS_DMA_MARK_CLEAN + bool + config DMA_DECLARE_COHERENT bool @@ -68,9 +75,6 @@ config ARCH_HAS_DMA_PREP_COHERENT config ARCH_HAS_FORCE_DMA_UNENCRYPTED bool -config DMA_NONCOHERENT_CACHE_SYNC - bool - config DMA_VIRT_OPS bool depends on HAS_DMA @@ -114,10 +118,21 @@ config DMA_CMA You can disable CMA by specifying "cma=0" on the kernel's command line. - For more information see . + For more information see . If unsure, say "n". if DMA_CMA + +config DMA_PERNUMA_CMA + bool "Enable separate DMA Contiguous Memory Area for each NUMA Node" + default NUMA && ARM64 + help + Enable this option to get pernuma CMA areas so that devices like + ARM64 SMMU can get local memory by DMA coherent APIs. + + You can set the size of pernuma CMA by specifying "cma_pernuma=size" + on the kernel's command line. + comment "Default contiguous memory area size:" config CMA_SIZE_MBYTES @@ -162,7 +177,7 @@ endchoice config CMA_ALIGNMENT int "Maximum PAGE_SIZE order of alignment for contiguous buffers" - range 4 12 + range 2 12 default 8 help DMA mapping framework by default aligns all buffers to the smallest diff --git a/kernel/dma/Makefile b/kernel/dma/Makefile index 32c7c1942bbd6c..dc755ab68aabf9 100644 --- a/kernel/dma/Makefile +++ b/kernel/dma/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_HAS_DMA) += mapping.o direct.o +obj-$(CONFIG_DMA_OPS) += ops_helpers.o obj-$(CONFIG_DMA_OPS) += dummy.o obj-$(CONFIG_DMA_CMA) += contiguous.o obj-$(CONFIG_DMA_DECLARE_COHERENT) += coherent.o diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c index 2a0c4985f38e41..5b5b6c7ec7f28e 100644 --- a/kernel/dma/coherent.c +++ b/kernel/dma/coherent.c @@ -7,7 +7,8 @@ #include #include #include -#include +#include +#include struct dma_coherent_mem { void *virt_base; @@ -32,9 +33,8 @@ static inline dma_addr_t dma_get_device_base(struct device *dev, struct dma_coherent_mem * mem) { if (mem->use_dev_dma_pfn_offset) - return (mem->pfn_base - dev->dma_pfn_offset) << PAGE_SHIFT; - else - return mem->device_base; + return phys_to_dma(dev, PFN_PHYS(mem->pfn_base)); + return mem->device_base; } static int dma_init_coherent_memory(phys_addr_t phys_addr, @@ -107,6 +107,23 @@ static int dma_assign_coherent_memory(struct device *dev, return 0; } +/* + * Declare a region of memory to be handed out by dma_alloc_coherent() when it + * is asked for coherent memory for this device. This shall only be used + * from platform code, usually based on the device tree description. + * + * phys_addr is the CPU physical address to which the memory is currently + * assigned (this will be ioremapped so the CPU can access the region). + * + * device_addr is the DMA address the device needs to be programmed with to + * actually address this memory (this will be handed out as the dma_addr_t in + * dma_alloc_coherent()). + * + * size is the size of the area (must be a multiple of PAGE_SIZE). + * + * As a simplification for the platforms, only *one* such region of memory may + * be declared per device. + */ int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, dma_addr_t device_addr, size_t size) { diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c index 0369fd5fda8fc7..16b95ff12e4df3 100644 --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -5,6 +5,34 @@ * Written by: * Marek Szyprowski * Michal Nazarewicz + * + * Contiguous Memory Allocator + * + * The Contiguous Memory Allocator (CMA) makes it possible to + * allocate big contiguous chunks of memory after the system has + * booted. + * + * Why is it needed? + * + * Various devices on embedded systems have no scatter-getter and/or + * IO map support and require contiguous blocks of memory to + * operate. They include devices such as cameras, hardware video + * coders, etc. + * + * Such devices often require big memory buffers (a full HD frame + * is, for instance, more then 2 mega pixels large, i.e. more than 6 + * MB of memory), which makes mechanisms such as kmalloc() or + * alloc_page() ineffective. + * + * At the same time, a solution where a big memory region is + * reserved for a device is suboptimal since often more memory is + * reserved then strictly required and, moreover, the memory is + * inaccessible to page system even if device drivers don't use it. + * + * CMA tries to solve this issue by operating on memory regions + * where only movable pages can be allocated from. This way, kernel + * can use the memory for pagecache and when device driver requests + * it, allocated pages can be migrated. */ #define pr_fmt(fmt) "cma: " fmt @@ -16,12 +44,11 @@ #endif #include -#include #include #include #include -#include +#include #include #ifdef CONFIG_CMA_SIZE_MBYTES @@ -69,6 +96,19 @@ static int __init early_cma(char *p) } early_param("cma", early_cma); +#ifdef CONFIG_DMA_PERNUMA_CMA + +static struct cma *dma_contiguous_pernuma_area[MAX_NUMNODES]; +static phys_addr_t pernuma_size_bytes __initdata; + +static int __init early_cma_pernuma(char *p) +{ + pernuma_size_bytes = memparse(p, &p); + return 0; +} +early_param("cma_pernuma", early_cma_pernuma); +#endif + #ifdef CONFIG_CMA_SIZE_PERCENTAGE static phys_addr_t __init __maybe_unused cma_early_percent_memory(void) @@ -87,6 +127,34 @@ static inline __maybe_unused phys_addr_t cma_early_percent_memory(void) #endif +#ifdef CONFIG_DMA_PERNUMA_CMA +void __init dma_pernuma_cma_reserve(void) +{ + int nid; + + if (!pernuma_size_bytes) + return; + + for_each_online_node(nid) { + int ret; + char name[CMA_MAX_NAME]; + struct cma **cma = &dma_contiguous_pernuma_area[nid]; + + snprintf(name, sizeof(name), "pernuma%d", nid); + ret = cma_declare_contiguous_nid(0, pernuma_size_bytes, 0, 0, + 0, false, name, cma, nid); + if (ret) { + pr_warn("%s: reservation failed: err %d, node %d", __func__, + ret, nid); + continue; + } + + pr_debug("%s: reserved %llu MiB on node %d\n", __func__, + (unsigned long long)pernuma_size_bytes / SZ_1M, nid); + } +} +#endif + /** * dma_contiguous_reserve() - reserve area(s) for contiguous memory handling * @limit: End address of the reserved memory (optional, 0 for any). @@ -134,6 +202,11 @@ void __init dma_contiguous_reserve(phys_addr_t limit) } } +void __weak +dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) +{ +} + /** * dma_contiguous_reserve_area() - reserve custom contiguous area * @size: Size of the reserved area (in bytes), @@ -219,23 +292,44 @@ static struct page *cma_alloc_aligned(struct cma *cma, size_t size, gfp_t gfp) * @size: Requested allocation size. * @gfp: Allocation flags. * - * This function allocates contiguous memory buffer for specified device. It - * tries to use device specific contiguous memory area if available, or the - * default global one. + * tries to use device specific contiguous memory area if available, or it + * tries to use per-numa cma, if the allocation fails, it will fallback to + * try default global one. * - * Note that it byapss one-page size of allocations from the global area as - * the addresses within one page are always contiguous, so there is no need - * to waste CMA pages for that kind; it also helps reduce fragmentations. + * Note that it bypass one-page size of allocations from the per-numa and + * global area as the addresses within one page are always contiguous, so + * there is no need to waste CMA pages for that kind; it also helps reduce + * fragmentations. */ struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp) { +#ifdef CONFIG_DMA_PERNUMA_CMA + int nid = dev_to_node(dev); +#endif + /* CMA can be used only in the context which permits sleeping */ if (!gfpflags_allow_blocking(gfp)) return NULL; if (dev->cma_area) return cma_alloc_aligned(dev->cma_area, size, gfp); - if (size <= PAGE_SIZE || !dma_contiguous_default_area) + if (size <= PAGE_SIZE) return NULL; + +#ifdef CONFIG_DMA_PERNUMA_CMA + if (nid != NUMA_NO_NODE && !(gfp & (GFP_DMA | GFP_DMA32))) { + struct cma *cma = dma_contiguous_pernuma_area[nid]; + struct page *page; + + if (cma) { + page = cma_alloc_aligned(cma, size, gfp); + if (page) + return page; + } + } +#endif + if (!dma_contiguous_default_area) + return NULL; + return cma_alloc_aligned(dma_contiguous_default_area, size, gfp); } @@ -252,9 +346,27 @@ struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp) */ void dma_free_contiguous(struct device *dev, struct page *page, size_t size) { - if (!cma_release(dev_get_cma_area(dev), page, - PAGE_ALIGN(size) >> PAGE_SHIFT)) - __free_pages(page, get_order(size)); + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + + /* if dev has its own cma, free page from there */ + if (dev->cma_area) { + if (cma_release(dev->cma_area, page, count)) + return; + } else { + /* + * otherwise, page is from either per-numa cma or default cma + */ +#ifdef CONFIG_DMA_PERNUMA_CMA + if (cma_release(dma_contiguous_pernuma_area[page_to_nid(page)], + page, count)) + return; +#endif + if (cma_release(dma_contiguous_default_area, page, count)) + return; + } + + /* not in any cma, free from buddy */ + __free_pages(page, get_order(size)); } /* @@ -270,14 +382,14 @@ void dma_free_contiguous(struct device *dev, struct page *page, size_t size) static int rmem_cma_device_init(struct reserved_mem *rmem, struct device *dev) { - dev_set_cma_area(dev, rmem->priv); + dev->cma_area = rmem->priv; return 0; } static void rmem_cma_device_release(struct reserved_mem *rmem, struct device *dev) { - dev_set_cma_area(dev, NULL); + dev->cma_area = NULL; } static const struct reserved_mem_ops rmem_cma_ops = { @@ -318,7 +430,7 @@ static int __init rmem_cma_setup(struct reserved_mem *rmem) dma_contiguous_early_fixup(rmem->base, rmem->size); if (default_cma) - dma_contiguous_set_default(cma); + dma_contiguous_default_area = cma; rmem->ops = &rmem_cma_ops; rmem->priv = cma; diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index 8e9f7b301c6d39..14de1271463fd0 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -9,10 +9,9 @@ #include #include -#include +#include #include #include -#include #include #include #include @@ -24,8 +23,8 @@ #include #include #include - #include +#include "debug.h" #define HASH_SIZE 16384ULL #define HASH_FN_SHIFT 13 @@ -1219,7 +1218,7 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, entry->dev = dev; entry->type = dma_debug_single; entry->pfn = page_to_pfn(page); - entry->offset = offset, + entry->offset = offset; entry->dev_addr = dma_addr; entry->size = size; entry->direction = direction; @@ -1235,7 +1234,6 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, add_dma_entry(entry); } -EXPORT_SYMBOL(debug_dma_map_page); void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { @@ -1290,7 +1288,6 @@ void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, return; check_unmap(&ref); } -EXPORT_SYMBOL(debug_dma_unmap_page); void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, int mapped_ents, int direction) @@ -1310,7 +1307,7 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, entry->type = dma_debug_sg; entry->dev = dev; entry->pfn = page_to_pfn(sg_page(s)); - entry->offset = s->offset, + entry->offset = s->offset; entry->size = sg_dma_len(s); entry->dev_addr = sg_dma_address(s); entry->direction = direction; @@ -1328,7 +1325,6 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, add_dma_entry(entry); } } -EXPORT_SYMBOL(debug_dma_map_sg); static int get_nr_mapped_entries(struct device *dev, struct dma_debug_entry *ref) @@ -1380,7 +1376,6 @@ void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, check_unmap(&ref); } } -EXPORT_SYMBOL(debug_dma_unmap_sg); void debug_dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t dma_addr, void *virt) @@ -1466,7 +1461,6 @@ void debug_dma_map_resource(struct device *dev, phys_addr_t addr, size_t size, add_dma_entry(entry); } -EXPORT_SYMBOL(debug_dma_map_resource); void debug_dma_unmap_resource(struct device *dev, dma_addr_t dma_addr, size_t size, int direction) @@ -1484,7 +1478,6 @@ void debug_dma_unmap_resource(struct device *dev, dma_addr_t dma_addr, check_unmap(&ref); } -EXPORT_SYMBOL(debug_dma_unmap_resource); void debug_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, int direction) @@ -1503,7 +1496,6 @@ void debug_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, check_sync(dev, &ref, true); } -EXPORT_SYMBOL(debug_dma_sync_single_for_cpu); void debug_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size, @@ -1523,7 +1515,6 @@ void debug_dma_sync_single_for_device(struct device *dev, check_sync(dev, &ref, false); } -EXPORT_SYMBOL(debug_dma_sync_single_for_device); void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, int direction) @@ -1556,7 +1547,6 @@ void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, check_sync(dev, &ref, true); } } -EXPORT_SYMBOL(debug_dma_sync_sg_for_cpu); void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, int direction) @@ -1588,7 +1578,6 @@ void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, check_sync(dev, &ref, false); } } -EXPORT_SYMBOL(debug_dma_sync_sg_for_device); static int __init dma_debug_driver_setup(char *str) { diff --git a/include/linux/dma-debug.h b/kernel/dma/debug.h similarity index 81% rename from include/linux/dma-debug.h rename to kernel/dma/debug.h index 7b3b04ba60f32a..83643b3010b2fc 100644 --- a/include/linux/dma-debug.h +++ b/kernel/dma/debug.h @@ -5,28 +5,14 @@ * Author: Joerg Roedel */ -#ifndef __DMA_DEBUG_H -#define __DMA_DEBUG_H - -#include - -struct device; -struct scatterlist; -struct bus_type; +#ifndef _KERNEL_DMA_DEBUG_H +#define _KERNEL_DMA_DEBUG_H #ifdef CONFIG_DMA_API_DEBUG - -extern void dma_debug_add_bus(struct bus_type *bus); - -extern void debug_dma_map_single(struct device *dev, const void *addr, - unsigned long len); - extern void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, size_t size, int direction, dma_addr_t dma_addr); -extern void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr); - extern void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, size_t size, int direction); @@ -64,31 +50,13 @@ extern void debug_dma_sync_sg_for_cpu(struct device *dev, extern void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, int direction); - -extern void debug_dma_dump_mappings(struct device *dev); - #else /* CONFIG_DMA_API_DEBUG */ - -static inline void dma_debug_add_bus(struct bus_type *bus) -{ -} - -static inline void debug_dma_map_single(struct device *dev, const void *addr, - unsigned long len) -{ -} - static inline void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, size_t size, int direction, dma_addr_t dma_addr) { } -static inline void debug_dma_mapping_error(struct device *dev, - dma_addr_t dma_addr) -{ -} - static inline void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, size_t size, int direction) { @@ -150,11 +118,5 @@ static inline void debug_dma_sync_sg_for_device(struct device *dev, int nelems, int direction) { } - -static inline void debug_dma_dump_mappings(struct device *dev) -{ -} - #endif /* CONFIG_DMA_API_DEBUG */ - -#endif /* __DMA_DEBUG_H */ +#endif /* _KERNEL_DMA_DEBUG_H */ diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index db6ef07aec3b37..b92d08e65999db 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -1,18 +1,19 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (C) 2018 Christoph Hellwig. + * Copyright (C) 2018-2020 Christoph Hellwig. * * DMA operations that map physical memory directly without using an IOMMU. */ #include /* for max_pfn */ #include #include -#include +#include #include -#include #include #include #include +#include +#include "direct.h" /* * Most architectures use ZONE_DMA for the first 16 Megabytes, but some use it @@ -25,7 +26,7 @@ static inline dma_addr_t phys_to_dma_direct(struct device *dev, phys_addr_t phys) { if (force_dma_unencrypted(dev)) - return __phys_to_dma(dev, phys); + return phys_to_dma_unencrypted(dev, phys); return phys_to_dma(dev, phys); } @@ -48,11 +49,6 @@ static gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask, { u64 dma_limit = min_not_zero(dma_mask, dev->bus_dma_limit); - if (force_dma_unencrypted(dev)) - *phys_limit = __dma_to_phys(dev, dma_limit); - else - *phys_limit = dma_to_phys(dev, dma_limit); - /* * Optimistically try the zone that the physical address mask falls * into first. If that returns memory that isn't actually addressable @@ -61,6 +57,7 @@ static gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask, * Note that GFP_DMA32 and GFP_DMA are no ops without the corresponding * zones. */ + *phys_limit = dma_to_phys(dev, dma_limit); if (*phys_limit <= DMA_BIT_MASK(zone_dma_bits)) return GFP_DMA; if (*phys_limit <= DMA_BIT_MASK(32)) @@ -70,45 +67,16 @@ static gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask, static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) { - return phys_to_dma_direct(dev, phys) + size - 1 <= - min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit); -} - -/* - * Decrypting memory is allowed to block, so if this device requires - * unencrypted memory it must come from atomic pools. - */ -static inline bool dma_should_alloc_from_pool(struct device *dev, gfp_t gfp, - unsigned long attrs) -{ - if (!IS_ENABLED(CONFIG_DMA_COHERENT_POOL)) - return false; - if (gfpflags_allow_blocking(gfp)) - return false; - if (force_dma_unencrypted(dev)) - return true; - if (!IS_ENABLED(CONFIG_DMA_DIRECT_REMAP)) - return false; - if (dma_alloc_need_uncached(dev, attrs)) - return true; - return false; -} + dma_addr_t dma_addr = phys_to_dma_direct(dev, phys); -static inline bool dma_should_free_from_pool(struct device *dev, - unsigned long attrs) -{ - if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL)) - return true; - if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) && - !force_dma_unencrypted(dev)) + if (dma_addr == DMA_MAPPING_ERROR) return false; - if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP)) - return true; - return false; + return dma_addr + size - 1 <= + min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit); } static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, - gfp_t gfp, unsigned long attrs) + gfp_t gfp) { int node = dev_to_node(dev); struct page *page = NULL; @@ -116,11 +84,6 @@ static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, WARN_ON_ONCE(!PAGE_ALIGNED(size)); - if (attrs & DMA_ATTR_NO_WARN) - gfp |= __GFP_NOWARN; - - /* we always manually zero the memory once we are done: */ - gfp &= ~__GFP_ZERO; gfp |= dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask, &phys_limit); page = dma_alloc_contiguous(dev, size, gfp); @@ -151,7 +114,23 @@ static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, return page; } -void *dma_direct_alloc_pages(struct device *dev, size_t size, +static void *dma_direct_alloc_from_pool(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp) +{ + struct page *page; + u64 phys_mask; + void *ret; + + gfp |= dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask, + &phys_mask); + page = dma_alloc_from_pool(dev, size, &ret, gfp, dma_coherent_ok); + if (!page) + return NULL; + *dma_handle = phys_to_dma_direct(dev, page_to_phys(page)); + return ret; +} + +void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { struct page *page; @@ -159,35 +138,44 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size, int err; size = PAGE_ALIGN(size); - - if (dma_should_alloc_from_pool(dev, gfp, attrs)) { - u64 phys_mask; - - gfp |= dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask, - &phys_mask); - page = dma_alloc_from_pool(dev, size, &ret, gfp, - dma_coherent_ok); - if (!page) - return NULL; - goto done; - } - - page = __dma_direct_alloc_pages(dev, size, gfp, attrs); - if (!page) - return NULL; + if (attrs & DMA_ATTR_NO_WARN) + gfp |= __GFP_NOWARN; if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) && !force_dma_unencrypted(dev)) { + page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO); + if (!page) + return NULL; /* remove any dirty cache lines on the kernel alias */ if (!PageHighMem(page)) arch_dma_prep_coherent(page, size); + *dma_handle = phys_to_dma_direct(dev, page_to_phys(page)); /* return the page pointer as the opaque cookie */ - ret = page; - goto done; + return page; } + if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) && + !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && + !dev_is_dma_coherent(dev)) + return arch_dma_alloc(dev, size, dma_handle, gfp, attrs); + + /* + * Remapping or decrypting memory may block. If either is required and + * we can't block, allocate the memory from the atomic pools. + */ + if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) && + !gfpflags_allow_blocking(gfp) && + (force_dma_unencrypted(dev) || + (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && !dev_is_dma_coherent(dev)))) + return dma_direct_alloc_from_pool(dev, size, dma_handle, gfp); + + /* we always manually zero the memory once we are done */ + page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO); + if (!page) + return NULL; + if ((IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && - dma_alloc_need_uncached(dev, attrs)) || + !dev_is_dma_coherent(dev)) || (IS_ENABLED(CONFIG_DMA_REMAP) && PageHighMem(page))) { /* remove any dirty cache lines on the kernel alias */ arch_dma_prep_coherent(page, size); @@ -230,17 +218,14 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size, memset(ret, 0, size); if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) && - dma_alloc_need_uncached(dev, attrs)) { + !dev_is_dma_coherent(dev)) { arch_dma_prep_coherent(page, size); ret = arch_dma_set_uncached(ret, size); if (IS_ERR(ret)) goto out_encrypt_pages; } done: - if (force_dma_unencrypted(dev)) - *dma_handle = __phys_to_dma(dev, page_to_phys(page)); - else - *dma_handle = phys_to_dma(dev, page_to_phys(page)); + *dma_handle = phys_to_dma_direct(dev, page_to_phys(page)); return ret; out_encrypt_pages: @@ -256,16 +241,11 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size, return NULL; } -void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t dma_addr, unsigned long attrs) +void dma_direct_free(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs) { unsigned int page_order = get_order(size); - /* If cpu_addr is not from an atomic pool, dma_free_from_pool() fails */ - if (dma_should_free_from_pool(dev, attrs) && - dma_free_from_pool(dev, cpu_addr, PAGE_ALIGN(size))) - return; - if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) && !force_dma_unencrypted(dev)) { /* cpu_addr is a struct page cookie, not a kernel address */ @@ -273,6 +253,18 @@ void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr, return; } + if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) && + !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && + !dev_is_dma_coherent(dev)) { + arch_dma_free(dev, size, cpu_addr, dma_addr, attrs); + return; + } + + /* If cpu_addr is not from an atomic pool, dma_free_from_pool() fails */ + if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) && + dma_free_from_pool(dev, cpu_addr, PAGE_ALIGN(size))) + return; + if (force_dma_unencrypted(dev)) set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order); @@ -284,25 +276,60 @@ void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr, dma_free_contiguous(dev, dma_direct_to_page(dev, dma_addr), size); } -void *dma_direct_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) +struct page *dma_direct_alloc_pages(struct device *dev, size_t size, + dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp) { - if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) && - !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && - dma_alloc_need_uncached(dev, attrs)) - return arch_dma_alloc(dev, size, dma_handle, gfp, attrs); - return dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs); + struct page *page; + void *ret; + + if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) && + force_dma_unencrypted(dev) && !gfpflags_allow_blocking(gfp)) + return dma_direct_alloc_from_pool(dev, size, dma_handle, gfp); + + page = __dma_direct_alloc_pages(dev, size, gfp); + if (!page) + return NULL; + if (PageHighMem(page)) { + /* + * Depending on the cma= arguments and per-arch setup + * dma_alloc_contiguous could return highmem pages. + * Without remapping there is no way to return them here, + * so log an error and fail. + */ + dev_info(dev, "Rejecting highmem page from CMA.\n"); + goto out_free_pages; + } + + ret = page_address(page); + if (force_dma_unencrypted(dev)) { + if (set_memory_decrypted((unsigned long)ret, + 1 << get_order(size))) + goto out_free_pages; + } + memset(ret, 0, size); + *dma_handle = phys_to_dma_direct(dev, page_to_phys(page)); + return page; +out_free_pages: + dma_free_contiguous(dev, page, size); + return NULL; } -void dma_direct_free(struct device *dev, size_t size, - void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs) +void dma_direct_free_pages(struct device *dev, size_t size, + struct page *page, dma_addr_t dma_addr, + enum dma_data_direction dir) { - if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) && - !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && - dma_alloc_need_uncached(dev, attrs)) - arch_dma_free(dev, size, cpu_addr, dma_addr, attrs); - else - dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs); + unsigned int page_order = get_order(size); + void *vaddr = page_address(page); + + /* If cpu_addr is not from an atomic pool, dma_free_from_pool() fails */ + if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) && + dma_free_from_pool(dev, vaddr, size)) + return; + + if (force_dma_unencrypted(dev)) + set_memory_encrypted((unsigned long)vaddr, 1 << page_order); + + dma_free_contiguous(dev, page, size); } #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ @@ -345,6 +372,9 @@ void dma_direct_sync_sg_for_cpu(struct device *dev, if (unlikely(is_swiotlb_buffer(paddr))) swiotlb_tbl_sync_single(dev, paddr, sg->length, dir, SYNC_FOR_CPU); + + if (dir == DMA_FROM_DEVICE) + arch_dma_mark_clean(paddr, sg->length); } if (!dev_is_dma_coherent(dev)) @@ -453,13 +483,13 @@ int dma_direct_supported(struct device *dev, u64 mask) return 1; /* - * This check needs to be against the actual bit mask value, so - * use __phys_to_dma() here so that the SME encryption mask isn't + * This check needs to be against the actual bit mask value, so use + * phys_to_dma_unencrypted() here so that the SME encryption mask isn't * part of the check. */ if (IS_ENABLED(CONFIG_ZONE_DMA)) min_mask = min_t(u64, min_mask, DMA_BIT_MASK(zone_dma_bits)); - return mask >= __phys_to_dma(dev, min_mask); + return mask >= phys_to_dma_unencrypted(dev, min_mask); } size_t dma_direct_max_mapping_size(struct device *dev) @@ -476,3 +506,45 @@ bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr) return !dev_is_dma_coherent(dev) || is_swiotlb_buffer(dma_to_phys(dev, dma_addr)); } + +/** + * dma_direct_set_offset - Assign scalar offset for a single DMA range. + * @dev: device pointer; needed to "own" the alloced memory. + * @cpu_start: beginning of memory region covered by this offset. + * @dma_start: beginning of DMA/PCI region covered by this offset. + * @size: size of the region. + * + * This is for the simple case of a uniform offset which cannot + * be discovered by "dma-ranges". + * + * It returns -ENOMEM if out of memory, -EINVAL if a map + * already exists, 0 otherwise. + * + * Note: any call to this from a driver is a bug. The mapping needs + * to be described by the device tree or other firmware interfaces. + */ +int dma_direct_set_offset(struct device *dev, phys_addr_t cpu_start, + dma_addr_t dma_start, u64 size) +{ + struct bus_dma_region *map; + u64 offset = (u64)cpu_start - (u64)dma_start; + + if (dev->dma_range_map) { + dev_err(dev, "attempt to add DMA range to existing map\n"); + return -EINVAL; + } + + if (!offset) + return 0; + + map = kcalloc(2, sizeof(*map), GFP_KERNEL); + if (!map) + return -ENOMEM; + map[0].cpu_start = cpu_start; + map[0].dma_start = dma_start; + map[0].offset = offset; + map[0].size = size; + dev->dma_range_map = map; + return 0; +} +EXPORT_SYMBOL_GPL(dma_direct_set_offset); diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h new file mode 100644 index 00000000000000..b9861557873768 --- /dev/null +++ b/kernel/dma/direct.h @@ -0,0 +1,119 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2018 Christoph Hellwig. + * + * DMA operations that map physical memory directly without using an IOMMU. + */ +#ifndef _KERNEL_DMA_DIRECT_H +#define _KERNEL_DMA_DIRECT_H + +#include + +int dma_direct_get_sgtable(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs); +bool dma_direct_can_mmap(struct device *dev); +int dma_direct_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs); +bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr); +int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, + enum dma_data_direction dir, unsigned long attrs); +size_t dma_direct_max_mapping_size(struct device *dev); + +#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ + defined(CONFIG_SWIOTLB) +void dma_direct_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir); +#else +static inline void dma_direct_sync_sg_for_device(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir) +{ +} +#endif + +#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ + defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) || \ + defined(CONFIG_SWIOTLB) +void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir, unsigned long attrs); +void dma_direct_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir); +#else +static inline void dma_direct_unmap_sg(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir, + unsigned long attrs) +{ +} +static inline void dma_direct_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir) +{ +} +#endif + +static inline void dma_direct_sync_single_for_device(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir) +{ + phys_addr_t paddr = dma_to_phys(dev, addr); + + if (unlikely(is_swiotlb_buffer(paddr))) + swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_DEVICE); + + if (!dev_is_dma_coherent(dev)) + arch_sync_dma_for_device(paddr, size, dir); +} + +static inline void dma_direct_sync_single_for_cpu(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir) +{ + phys_addr_t paddr = dma_to_phys(dev, addr); + + if (!dev_is_dma_coherent(dev)) { + arch_sync_dma_for_cpu(paddr, size, dir); + arch_sync_dma_for_cpu_all(); + } + + if (unlikely(is_swiotlb_buffer(paddr))) + swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU); + + if (dir == DMA_FROM_DEVICE) + arch_dma_mark_clean(paddr, size); +} + +static inline dma_addr_t dma_direct_map_page(struct device *dev, + struct page *page, unsigned long offset, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + phys_addr_t phys = page_to_phys(page) + offset; + dma_addr_t dma_addr = phys_to_dma(dev, phys); + + if (unlikely(swiotlb_force == SWIOTLB_FORCE)) + return swiotlb_map(dev, phys, size, dir, attrs); + + if (unlikely(!dma_capable(dev, dma_addr, size, true))) { + if (swiotlb_force != SWIOTLB_NO_FORCE) + return swiotlb_map(dev, phys, size, dir, attrs); + + dev_WARN_ONCE(dev, 1, + "DMA addr %pad+%zu overflow (mask %llx, bus limit %llx).\n", + &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit); + return DMA_MAPPING_ERROR; + } + + if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + arch_sync_dma_for_device(phys, size, dir); + return dma_addr; +} + +static inline void dma_direct_unmap_page(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ + phys_addr_t phys = dma_to_phys(dev, addr); + + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + dma_direct_sync_single_for_cpu(dev, addr, size, dir); + + if (unlikely(is_swiotlb_buffer(phys))) + swiotlb_tbl_unmap_single(dev, phys, size, size, dir, attrs); +} +#endif /* _KERNEL_DMA_DIRECT_H */ diff --git a/kernel/dma/dummy.c b/kernel/dma/dummy.c index 05607642c888d9..eacd4c5b10bf6d 100644 --- a/kernel/dma/dummy.c +++ b/kernel/dma/dummy.c @@ -2,7 +2,7 @@ /* * Dummy DMA ops that always fail. */ -#include +#include static int dma_dummy_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, @@ -36,4 +36,3 @@ const struct dma_map_ops dma_dummy_ops = { .map_sg = dma_dummy_map_sg, .dma_supported = dma_dummy_supported, }; -EXPORT_SYMBOL(dma_dummy_ops); diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 0d129421e75fc8..51bb8fa8eb8948 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -7,13 +7,14 @@ */ #include /* for max_pfn */ #include -#include -#include +#include #include #include #include #include #include +#include "debug.h" +#include "direct.h" /* * Managed DMA API @@ -144,6 +145,10 @@ dma_addr_t dma_map_page_attrs(struct device *dev, struct page *page, dma_addr_t addr; BUG_ON(!valid_dma_direction(dir)); + + if (WARN_ON_ONCE(!dev->dma_mask)) + return DMA_MAPPING_ERROR; + if (dma_map_direct(dev, ops)) addr = dma_direct_map_page(dev, page, offset, size, dir, attrs); else @@ -179,6 +184,10 @@ int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, int nents, int ents; BUG_ON(!valid_dma_direction(dir)); + + if (WARN_ON_ONCE(!dev->dma_mask)) + return 0; + if (dma_map_direct(dev, ops)) ents = dma_direct_map_sg(dev, sg, nents, dir, attrs); else @@ -213,6 +222,9 @@ dma_addr_t dma_map_resource(struct device *dev, phys_addr_t phys_addr, BUG_ON(!valid_dma_direction(dir)); + if (WARN_ON_ONCE(!dev->dma_mask)) + return DMA_MAPPING_ERROR; + /* Don't allow RAM to be mapped */ if (WARN_ON_ONCE(pfn_valid(PHYS_PFN(phys_addr)))) return DMA_MAPPING_ERROR; @@ -295,22 +307,6 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, } EXPORT_SYMBOL(dma_sync_sg_for_device); -/* - * Create scatter-list for the already allocated DMA buffer. - */ -int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, - void *cpu_addr, dma_addr_t dma_addr, size_t size, - unsigned long attrs) -{ - struct page *page = virt_to_page(cpu_addr); - int ret; - - ret = sg_alloc_table(sgt, 1, GFP_KERNEL); - if (!ret) - sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0); - return ret; -} - /* * The whole dma_get_sgtable() idea is fundamentally unsafe - it seems * that the intention is to allow exporting memory allocated via the @@ -346,9 +342,7 @@ pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs) { if (force_dma_unencrypted(dev)) prot = pgprot_decrypted(prot); - if (dev_is_dma_coherent(dev) || - (IS_ENABLED(CONFIG_DMA_NONCOHERENT_CACHE_SYNC) && - (attrs & DMA_ATTR_NON_CONSISTENT))) + if (dev_is_dma_coherent(dev)) return prot; #ifdef CONFIG_ARCH_HAS_DMA_WRITE_COMBINE if (attrs & DMA_ATTR_WRITE_COMBINE) @@ -358,35 +352,6 @@ pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs) } #endif /* CONFIG_MMU */ -/* - * Create userspace mapping for the DMA-coherent memory. - */ -int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size, - unsigned long attrs) -{ -#ifdef CONFIG_MMU - unsigned long user_count = vma_pages(vma); - unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; - unsigned long off = vma->vm_pgoff; - int ret = -ENXIO; - - vma->vm_page_prot = dma_pgprot(dev, vma->vm_page_prot, attrs); - - if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) - return ret; - - if (off >= count || user_count > count - off) - return -ENXIO; - - return remap_pfn_range(vma, vma->vm_start, - page_to_pfn(virt_to_page(cpu_addr)) + vma->vm_pgoff, - user_count << PAGE_SHIFT, vma->vm_page_prot); -#else - return -ENXIO; -#endif /* CONFIG_MMU */ -} - /** * dma_can_mmap - check if a given device supports dma_mmap_* * @dev: device to check @@ -506,6 +471,86 @@ void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr, } EXPORT_SYMBOL(dma_free_attrs); +struct page *dma_alloc_pages(struct device *dev, size_t size, + dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + struct page *page; + + if (WARN_ON_ONCE(!dev->coherent_dma_mask)) + return NULL; + if (WARN_ON_ONCE(gfp & (__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM))) + return NULL; + + size = PAGE_ALIGN(size); + if (dma_alloc_direct(dev, ops)) + page = dma_direct_alloc_pages(dev, size, dma_handle, dir, gfp); + else if (ops->alloc_pages) + page = ops->alloc_pages(dev, size, dma_handle, dir, gfp); + else + return NULL; + + debug_dma_map_page(dev, page, 0, size, dir, *dma_handle); + + return page; +} +EXPORT_SYMBOL_GPL(dma_alloc_pages); + +void dma_free_pages(struct device *dev, size_t size, struct page *page, + dma_addr_t dma_handle, enum dma_data_direction dir) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + size = PAGE_ALIGN(size); + debug_dma_unmap_page(dev, dma_handle, size, dir); + + if (dma_alloc_direct(dev, ops)) + dma_direct_free_pages(dev, size, page, dma_handle, dir); + else if (ops->free_pages) + ops->free_pages(dev, size, page, dma_handle, dir); +} +EXPORT_SYMBOL_GPL(dma_free_pages); + +void *dma_alloc_noncoherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + void *vaddr; + + if (!ops || !ops->alloc_noncoherent) { + struct page *page; + + page = dma_alloc_pages(dev, size, dma_handle, dir, gfp); + if (!page) + return NULL; + return page_address(page); + } + + size = PAGE_ALIGN(size); + vaddr = ops->alloc_noncoherent(dev, size, dma_handle, dir, gfp); + if (vaddr) + debug_dma_map_page(dev, virt_to_page(vaddr), 0, size, dir, + *dma_handle); + return vaddr; +} +EXPORT_SYMBOL_GPL(dma_alloc_noncoherent); + +void dma_free_noncoherent(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_handle, enum dma_data_direction dir) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + if (!ops || !ops->free_noncoherent) { + dma_free_pages(dev, size, virt_to_page(vaddr), dma_handle, dir); + return; + } + + size = PAGE_ALIGN(size); + debug_dma_unmap_page(dev, dma_handle, size, dir); + ops->free_noncoherent(dev, size, vaddr, dma_handle, dir); +} +EXPORT_SYMBOL_GPL(dma_free_noncoherent); + int dma_supported(struct device *dev, u64 mask) { const struct dma_map_ops *ops = get_dma_ops(dev); @@ -563,20 +608,6 @@ int dma_set_coherent_mask(struct device *dev, u64 mask) EXPORT_SYMBOL(dma_set_coherent_mask); #endif -void dma_cache_sync(struct device *dev, void *vaddr, size_t size, - enum dma_data_direction dir) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - - BUG_ON(!valid_dma_direction(dir)); - - if (dma_alloc_direct(dev, ops)) - arch_dma_cache_sync(dev, vaddr, size, dir); - else if (ops->cache_sync) - ops->cache_sync(dev, vaddr, size, dir); -} -EXPORT_SYMBOL(dma_cache_sync); - size_t dma_max_mapping_size(struct device *dev) { const struct dma_map_ops *ops = get_dma_ops(dev); diff --git a/kernel/dma/ops_helpers.c b/kernel/dma/ops_helpers.c new file mode 100644 index 00000000000000..910ae69cae7774 --- /dev/null +++ b/kernel/dma/ops_helpers.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Helpers for DMA ops implementations. These generally rely on the fact that + * the allocated memory contains normal pages in the direct kernel mapping. + */ +#include + +/* + * Create scatter-list for the already allocated DMA buffer. + */ +int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs) +{ + struct page *page = virt_to_page(cpu_addr); + int ret; + + ret = sg_alloc_table(sgt, 1, GFP_KERNEL); + if (!ret) + sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0); + return ret; +} + +/* + * Create userspace mapping for the DMA-coherent memory. + */ +int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs) +{ +#ifdef CONFIG_MMU + unsigned long user_count = vma_pages(vma); + unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; + unsigned long off = vma->vm_pgoff; + int ret = -ENXIO; + + vma->vm_page_prot = dma_pgprot(dev, vma->vm_page_prot, attrs); + + if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) + return ret; + + if (off >= count || user_count > count - off) + return -ENXIO; + + return remap_pfn_range(vma, vma->vm_start, + page_to_pfn(virt_to_page(cpu_addr)) + vma->vm_pgoff, + user_count << PAGE_SHIFT, vma->vm_page_prot); +#else + return -ENXIO; +#endif /* CONFIG_MMU */ +} + +struct page *dma_common_alloc_pages(struct device *dev, size_t size, + dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + struct page *page; + + page = dma_alloc_contiguous(dev, size, gfp); + if (!page) + page = alloc_pages_node(dev_to_node(dev), gfp, get_order(size)); + if (!page) + return NULL; + + *dma_handle = ops->map_page(dev, page, 0, size, dir, + DMA_ATTR_SKIP_CPU_SYNC); + if (*dma_handle == DMA_MAPPING_ERROR) { + dma_free_contiguous(dev, page, size); + return NULL; + } + + memset(page_address(page), 0, size); + return page; +} + +void dma_common_free_pages(struct device *dev, size_t size, struct page *page, + dma_addr_t dma_handle, enum dma_data_direction dir) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + if (ops->unmap_page) + ops->unmap_page(dev, dma_handle, size, dir, + DMA_ATTR_SKIP_CPU_SYNC); + dma_free_contiguous(dev, page, size); +} diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c index 1281c0f0442bc5..d4637f72239b40 100644 --- a/kernel/dma/pool.c +++ b/kernel/dma/pool.c @@ -5,9 +5,8 @@ */ #include #include -#include +#include #include -#include #include #include #include @@ -115,7 +114,7 @@ static int atomic_pool_expand(struct gen_pool *pool, size_t pool_size, #endif /* * Memory in the atomic DMA pools must be unencrypted, the pools do not - * shrink so no re-encryption occurs in dma_direct_free_pages(). + * shrink so no re-encryption occurs in dma_direct_free(). */ ret = set_memory_decrypted((unsigned long)page_to_virt(page), 1 << order); diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 465a567678d966..b4eea0abc3f002 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -22,7 +22,7 @@ #include #include -#include +#include #include #include #include @@ -668,13 +668,13 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size, swiotlb_force); swiotlb_addr = swiotlb_tbl_map_single(dev, - __phys_to_dma(dev, io_tlb_start), + phys_to_dma_unencrypted(dev, io_tlb_start), paddr, size, size, dir, attrs); if (swiotlb_addr == (phys_addr_t)DMA_MAPPING_ERROR) return DMA_MAPPING_ERROR; /* Ensure that the address returned is DMA'ble */ - dma_addr = __phys_to_dma(dev, swiotlb_addr); + dma_addr = phys_to_dma_unencrypted(dev, swiotlb_addr); if (unlikely(!dma_capable(dev, dma_addr, size, true))) { swiotlb_tbl_unmap_single(dev, swiotlb_addr, size, size, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); diff --git a/kernel/dma/virt.c b/kernel/dma/virt.c index ebe128833af7b5..59d32317dd574a 100644 --- a/kernel/dma/virt.c +++ b/kernel/dma/virt.c @@ -4,7 +4,7 @@ */ #include #include -#include +#include #include static void *dma_virt_alloc(struct device *dev, size_t size, @@ -55,5 +55,7 @@ const struct dma_map_ops dma_virt_ops = { .free = dma_virt_free, .map_page = dma_virt_map_page, .map_sg = dma_virt_map_sg, + .alloc_pages = dma_common_alloc_pages, + .free_pages = dma_common_free_pages, }; EXPORT_SYMBOL(dma_virt_ops); diff --git a/mm/Kconfig b/mm/Kconfig index 8c60c49a123bd6..e72e61c1d62e1a 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -516,13 +516,14 @@ config CMA_DEBUGFS config CMA_AREAS int "Maximum count of the CMA areas" depends on CMA + default 19 if NUMA default 7 help CMA allows to create CMA areas for particular purpose, mainly, used as device private area. This parameter sets the maximum number of CMA area in the system. - If unsure, leave the default value "7". + If unsure, leave the default value "7" in UMA and "19" in NUMA. config MEM_SOFT_DIRTY bool "Track memory changes" diff --git a/mm/cma.h b/mm/cma.h index 20f6e24bc477bd..42ae082cb067d1 100644 --- a/mm/cma.h +++ b/mm/cma.h @@ -4,8 +4,6 @@ #include -#define CMA_MAX_NAME 64 - struct cma { unsigned long base_pfn; unsigned long count; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index ddbd3b355361b8..fe76f8fd5a732c 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5706,12 +5706,12 @@ void __init hugetlb_cma_reserve(int order) reserved = 0; for_each_node_state(nid, N_ONLINE) { int res; - char name[20]; + char name[CMA_MAX_NAME]; size = min(per_node, hugetlb_cma_size - reserved); size = round_up(size, PAGE_SIZE << order); - snprintf(name, 20, "hugetlb%d", nid); + snprintf(name, sizeof(name), "hugetlb%d", nid); res = cma_declare_contiguous_nid(0, size, 0, PAGE_SIZE << order, 0, false, name, &hugetlb_cma[nid], nid); diff --git a/mm/memory.c b/mm/memory.c index f482af8bc828dc..2afb01ea130765 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -65,7 +65,6 @@ #include #include #include -#include #include #include #include diff --git a/sound/mips/hal2.c b/sound/mips/hal2.c index ec84bc4c3a6e77..9ac9b58d7c8cdd 100644 --- a/sound/mips/hal2.c +++ b/sound/mips/hal2.c @@ -441,7 +441,8 @@ static inline void hal2_stop_adc(struct snd_hal2 *hal2) hal2->adc.pbus.pbus->pbdma_ctrl = HPC3_PDMACTRL_LD; } -static int hal2_alloc_dmabuf(struct snd_hal2 *hal2, struct hal2_codec *codec) +static int hal2_alloc_dmabuf(struct snd_hal2 *hal2, struct hal2_codec *codec, + enum dma_data_direction buffer_dir) { struct device *dev = hal2->card->dev; struct hal2_desc *desc; @@ -449,15 +450,15 @@ static int hal2_alloc_dmabuf(struct snd_hal2 *hal2, struct hal2_codec *codec) int count = H2_BUF_SIZE / H2_BLOCK_SIZE; int i; - codec->buffer = dma_alloc_attrs(dev, H2_BUF_SIZE, &buffer_dma, - GFP_KERNEL, DMA_ATTR_NON_CONSISTENT); + codec->buffer = dma_alloc_noncoherent(dev, H2_BUF_SIZE, &buffer_dma, + buffer_dir, GFP_KERNEL); if (!codec->buffer) return -ENOMEM; - desc = dma_alloc_attrs(dev, count * sizeof(struct hal2_desc), - &desc_dma, GFP_KERNEL, DMA_ATTR_NON_CONSISTENT); + desc = dma_alloc_noncoherent(dev, count * sizeof(struct hal2_desc), + &desc_dma, DMA_BIDIRECTIONAL, GFP_KERNEL); if (!desc) { - dma_free_attrs(dev, H2_BUF_SIZE, codec->buffer, buffer_dma, - DMA_ATTR_NON_CONSISTENT); + dma_free_noncoherent(dev, H2_BUF_SIZE, codec->buffer, buffer_dma, + buffer_dir); return -ENOMEM; } codec->buffer_dma = buffer_dma; @@ -470,20 +471,22 @@ static int hal2_alloc_dmabuf(struct snd_hal2 *hal2, struct hal2_codec *codec) desc_dma : desc_dma + (i + 1) * sizeof(struct hal2_desc); desc++; } - dma_cache_sync(dev, codec->desc, count * sizeof(struct hal2_desc), - DMA_TO_DEVICE); + dma_sync_single_for_device(dev, codec->desc_dma, + count * sizeof(struct hal2_desc), + DMA_BIDIRECTIONAL); codec->desc_count = count; return 0; } -static void hal2_free_dmabuf(struct snd_hal2 *hal2, struct hal2_codec *codec) +static void hal2_free_dmabuf(struct snd_hal2 *hal2, struct hal2_codec *codec, + enum dma_data_direction buffer_dir) { struct device *dev = hal2->card->dev; - dma_free_attrs(dev, codec->desc_count * sizeof(struct hal2_desc), - codec->desc, codec->desc_dma, DMA_ATTR_NON_CONSISTENT); - dma_free_attrs(dev, H2_BUF_SIZE, codec->buffer, codec->buffer_dma, - DMA_ATTR_NON_CONSISTENT); + dma_free_noncoherent(dev, codec->desc_count * sizeof(struct hal2_desc), + codec->desc, codec->desc_dma, DMA_BIDIRECTIONAL); + dma_free_noncoherent(dev, H2_BUF_SIZE, codec->buffer, codec->buffer_dma, + buffer_dir); } static const struct snd_pcm_hardware hal2_pcm_hw = { @@ -509,21 +512,16 @@ static int hal2_playback_open(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime = substream->runtime; struct snd_hal2 *hal2 = snd_pcm_substream_chip(substream); - int err; runtime->hw = hal2_pcm_hw; - - err = hal2_alloc_dmabuf(hal2, &hal2->dac); - if (err) - return err; - return 0; + return hal2_alloc_dmabuf(hal2, &hal2->dac, DMA_TO_DEVICE); } static int hal2_playback_close(struct snd_pcm_substream *substream) { struct snd_hal2 *hal2 = snd_pcm_substream_chip(substream); - hal2_free_dmabuf(hal2, &hal2->dac); + hal2_free_dmabuf(hal2, &hal2->dac, DMA_TO_DEVICE); return 0; } @@ -579,7 +577,9 @@ static void hal2_playback_transfer(struct snd_pcm_substream *substream, unsigned char *buf = hal2->dac.buffer + rec->hw_data; memcpy(buf, substream->runtime->dma_area + rec->sw_data, bytes); - dma_cache_sync(hal2->card->dev, buf, bytes, DMA_TO_DEVICE); + dma_sync_single_for_device(hal2->card->dev, + hal2->dac.buffer_dma + rec->hw_data, bytes, + DMA_TO_DEVICE); } @@ -597,22 +597,16 @@ static int hal2_capture_open(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime = substream->runtime; struct snd_hal2 *hal2 = snd_pcm_substream_chip(substream); - struct hal2_codec *adc = &hal2->adc; - int err; runtime->hw = hal2_pcm_hw; - - err = hal2_alloc_dmabuf(hal2, adc); - if (err) - return err; - return 0; + return hal2_alloc_dmabuf(hal2, &hal2->adc, DMA_FROM_DEVICE); } static int hal2_capture_close(struct snd_pcm_substream *substream) { struct snd_hal2 *hal2 = snd_pcm_substream_chip(substream); - hal2_free_dmabuf(hal2, &hal2->adc); + hal2_free_dmabuf(hal2, &hal2->adc, DMA_FROM_DEVICE); return 0; } @@ -667,7 +661,9 @@ static void hal2_capture_transfer(struct snd_pcm_substream *substream, struct snd_hal2 *hal2 = snd_pcm_substream_chip(substream); unsigned char *buf = hal2->adc.buffer + rec->hw_data; - dma_cache_sync(hal2->card->dev, buf, bytes, DMA_FROM_DEVICE); + dma_sync_single_for_cpu(hal2->card->dev, + hal2->adc.buffer_dma + rec->hw_data, bytes, + DMA_FROM_DEVICE); memcpy(substream->runtime->dma_area + rec->sw_data, buf, bytes); }