Blame - lib/swiotlb.c - SHIFTPHONES/android_kernel_shift_sdm845

blob: cae806d1ef1267448656fdeff64a7e94fb6d2c91 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	* Dynamic DMA mapping support.
				3	*
John W. Linville	569c8bf	2005-09-29 14:45:24 -0700	[diff] [blame^]	4	* This implementation is for IA-64 and EM64T platforms that do not support
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	5	* I/O TLBs (aka DMA address translation hardware).
				6	* Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com>
				7	* Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com>
				8	* Copyright (C) 2000, 2003 Hewlett-Packard Co
				9	* David Mosberger-Tang <davidm@hpl.hp.com>
				10	*
				11	* 03/05/07 davidm Switch from PCI-DMA to generic device DMA API.
				12	* 00/12/13 davidm Rename to swiotlb.c and add mark_clean() to avoid
				13	* unnecessary i-cache flushing.
John W. Linville	569c8bf	2005-09-29 14:45:24 -0700	[diff] [blame^]	14	* 04/07/.. ak Better overflow handling. Assorted fixes.
				15	* 05/09/10 linville Add support for syncing ranges, support syncing for
				16	* DMA_BIDIRECTIONAL mappings, miscellaneous cleanup.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	17	*/
				18
				19	#include <linux/cache.h>
				20	#include <linux/mm.h>
				21	#include <linux/module.h>
				22	#include <linux/pci.h>
				23	#include <linux/spinlock.h>
				24	#include <linux/string.h>
				25	#include <linux/types.h>
				26	#include <linux/ctype.h>
				27
				28	#include <asm/io.h>
				29	#include <asm/pci.h>
				30	#include <asm/dma.h>
				31
				32	#include <linux/init.h>
				33	#include <linux/bootmem.h>
				34
				35	#define OFFSET(val,align) ((unsigned long) \
				36	( (val) & ( (align) - 1)))
				37
				38	#define SG_ENT_VIRT_ADDRESS(sg) (page_address((sg)->page) + (sg)->offset)
				39	#define SG_ENT_PHYS_ADDRESS(SG) virt_to_phys(SG_ENT_VIRT_ADDRESS(SG))
				40
				41	/*
				42	* Maximum allowable number of contiguous slabs to map,
				43	* must be a power of 2. What is the appropriate value ?
				44	* The complexity of {map,unmap}_single is linearly dependent on this value.
				45	*/
				46	#define IO_TLB_SEGSIZE 128
				47
				48	/*
				49	* log of the size of each IO TLB slab. The number of slabs is command line
				50	* controllable.
				51	*/
				52	#define IO_TLB_SHIFT 11
				53
Alex Williamson	0b9afed	2005-09-06 11:20:49 -0600	[diff] [blame]	54	#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
				55
				56	/*
				57	* Minimum IO TLB size to bother booting with. Systems with mainly
				58	* 64bit capable cards will only lightly use the swiotlb. If we can't
				59	* allocate a contiguous 1MB, we're probably in trouble anyway.
				60	*/
				61	#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
				62
John W. Linville	de69e0f	2005-09-29 14:44:57 -0700	[diff] [blame]	63	/*
				64	* Enumeration for sync targets
				65	*/
				66	enum dma_sync_target {
				67	SYNC_FOR_CPU = 0,
				68	SYNC_FOR_DEVICE = 1,
				69	};
				70
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	71	int swiotlb_force;
				72
				73	/*
				74	* Used to do a quick range check in swiotlb_unmap_single and
				75	* swiotlb_sync_single_*, to see if the memory was in fact allocated by this
				76	* API.
				77	*/
				78	static char io_tlb_start, io_tlb_end;
				79
				80	/*
				81	* The number of IO TLB blocks (in groups of 64) betweeen io_tlb_start and
				82	* io_tlb_end. This is command line adjustable via setup_io_tlb_npages.
				83	*/
				84	static unsigned long io_tlb_nslabs;
				85
				86	/*
				87	* When the IOMMU overflows we return a fallback buffer. This sets the size.
				88	*/
				89	static unsigned long io_tlb_overflow = 32*1024;
				90
				91	void *io_tlb_overflow_buffer;
				92
				93	/*
				94	* This is a free list describing the number of free entries available from
				95	* each index
				96	*/
				97	static unsigned int *io_tlb_list;
				98	static unsigned int io_tlb_index;
				99
				100	/*
				101	* We need to save away the original address corresponding to a mapped entry
				102	* for the sync operations.
				103	*/
				104	static unsigned char **io_tlb_orig_addr;
				105
				106	/*
				107	* Protect the above data structures in the map and unmap calls
				108	*/
				109	static DEFINE_SPINLOCK(io_tlb_lock);
				110
				111	static int __init
				112	setup_io_tlb_npages(char *str)
				113	{
				114	if (isdigit(*str)) {
Alex Williamson	e8579e7	2005-08-04 13:06:00 -0700	[diff] [blame]	115	io_tlb_nslabs = simple_strtoul(str, &str, 0);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	116	/* avoid tail segment of size < IO_TLB_SEGSIZE */
				117	io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
				118	}
				119	if (*str == ',')
				120	++str;
				121	if (!strcmp(str, "force"))
				122	swiotlb_force = 1;
				123	return 1;
				124	}
				125	__setup("swiotlb=", setup_io_tlb_npages);
				126	/* make io_tlb_overflow tunable too? */
				127
				128	/*
				129	* Statically reserve bounce buffer space and initialize bounce buffer data
				130	* structures for the software IO TLB used to implement the PCI DMA API.
				131	*/
				132	void
				133	swiotlb_init_with_default_size (size_t default_size)
				134	{
				135	unsigned long i;
				136
				137	if (!io_tlb_nslabs) {
Alex Williamson	e8579e7	2005-08-04 13:06:00 -0700	[diff] [blame]	138	io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	139	io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
				140	}
				141
				142	/*
				143	* Get IO TLB memory from the low pages
				144	*/
				145	io_tlb_start = alloc_bootmem_low_pages(io_tlb_nslabs *
				146	(1 << IO_TLB_SHIFT));
				147	if (!io_tlb_start)
				148	panic("Cannot allocate SWIOTLB buffer");
				149	io_tlb_end = io_tlb_start + io_tlb_nslabs * (1 << IO_TLB_SHIFT);
				150
				151	/*
				152	* Allocate and initialize the free list array. This array is used
				153	* to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
				154	* between io_tlb_start and io_tlb_end.
				155	*/
				156	io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int));
				157	for (i = 0; i < io_tlb_nslabs; i++)
				158	io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
				159	io_tlb_index = 0;
				160	io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(char *));
				161
				162	/*
				163	* Get the overflow emergency buffer
				164	*/
				165	io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow);
				166	printk(KERN_INFO "Placing software IO TLB between 0x%lx - 0x%lx\n",
				167	virt_to_phys(io_tlb_start), virt_to_phys(io_tlb_end));
				168	}
				169
				170	void
				171	swiotlb_init (void)
				172	{
				173	swiotlb_init_with_default_size(64 * (1<<20)); /* default to 64MB */
				174	}
				175
Alex Williamson	0b9afed	2005-09-06 11:20:49 -0600	[diff] [blame]	176	/*
				177	* Systems with larger DMA zones (those that don't support ISA) can
				178	* initialize the swiotlb later using the slab allocator if needed.
				179	* This should be just like above, but with some error catching.
				180	*/
				181	int
				182	swiotlb_late_init_with_default_size (size_t default_size)
				183	{
				184	unsigned long i, req_nslabs = io_tlb_nslabs;
				185	unsigned int order;
				186
				187	if (!io_tlb_nslabs) {
				188	io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
				189	io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
				190	}
				191
				192	/*
				193	* Get IO TLB memory from the low pages
				194	*/
				195	order = get_order(io_tlb_nslabs * (1 << IO_TLB_SHIFT));
				196	io_tlb_nslabs = SLABS_PER_PAGE << order;
				197
				198	while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
				199	io_tlb_start = (char *)__get_free_pages(GFP_DMA \| __GFP_NOWARN,
				200	order);
				201	if (io_tlb_start)
				202	break;
				203	order--;
				204	}
				205
				206	if (!io_tlb_start)
				207	goto cleanup1;
				208
				209	if (order != get_order(io_tlb_nslabs * (1 << IO_TLB_SHIFT))) {
				210	printk(KERN_WARNING "Warning: only able to allocate %ld MB "
				211	"for software IO TLB\n", (PAGE_SIZE << order) >> 20);
				212	io_tlb_nslabs = SLABS_PER_PAGE << order;
				213	}
				214	io_tlb_end = io_tlb_start + io_tlb_nslabs * (1 << IO_TLB_SHIFT);
				215	memset(io_tlb_start, 0, io_tlb_nslabs * (1 << IO_TLB_SHIFT));
				216
				217	/*
				218	* Allocate and initialize the free list array. This array is used
				219	* to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
				220	* between io_tlb_start and io_tlb_end.
				221	*/
				222	io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL,
				223	get_order(io_tlb_nslabs * sizeof(int)));
				224	if (!io_tlb_list)
				225	goto cleanup2;
				226
				227	for (i = 0; i < io_tlb_nslabs; i++)
				228	io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
				229	io_tlb_index = 0;
				230
				231	io_tlb_orig_addr = (unsigned char **)__get_free_pages(GFP_KERNEL,
				232	get_order(io_tlb_nslabs * sizeof(char *)));
				233	if (!io_tlb_orig_addr)
				234	goto cleanup3;
				235
				236	memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(char *));
				237
				238	/*
				239	* Get the overflow emergency buffer
				240	*/
				241	io_tlb_overflow_buffer = (void *)__get_free_pages(GFP_DMA,
				242	get_order(io_tlb_overflow));
				243	if (!io_tlb_overflow_buffer)
				244	goto cleanup4;
				245
				246	printk(KERN_INFO "Placing %ldMB software IO TLB between 0x%lx - "
				247	"0x%lx\n", (io_tlb_nslabs * (1 << IO_TLB_SHIFT)) >> 20,
				248	virt_to_phys(io_tlb_start), virt_to_phys(io_tlb_end));
				249
				250	return 0;
				251
				252	cleanup4:
				253	free_pages((unsigned long)io_tlb_orig_addr, get_order(io_tlb_nslabs *
				254	sizeof(char *)));
				255	io_tlb_orig_addr = NULL;
				256	cleanup3:
				257	free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
				258	sizeof(int)));
				259	io_tlb_list = NULL;
				260	io_tlb_end = NULL;
				261	cleanup2:
				262	free_pages((unsigned long)io_tlb_start, order);
				263	io_tlb_start = NULL;
				264	cleanup1:
				265	io_tlb_nslabs = req_nslabs;
				266	return -ENOMEM;
				267	}
				268
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	269	static inline int
				270	address_needs_mapping(struct device *hwdev, dma_addr_t addr)
				271	{
				272	dma_addr_t mask = 0xffffffff;
				273	/* If the device has a mask, use it, otherwise default to 32 bits */
				274	if (hwdev && hwdev->dma_mask)
				275	mask = *hwdev->dma_mask;
				276	return (addr & ~mask) != 0;
				277	}
				278
				279	/*
				280	* Allocates bounce buffer and returns its kernel virtual address.
				281	*/
				282	static void *
				283	map_single(struct device hwdev, char buffer, size_t size, int dir)
				284	{
				285	unsigned long flags;
				286	char *dma_addr;
				287	unsigned int nslots, stride, index, wrap;
				288	int i;
				289
				290	/*
				291	* For mappings greater than a page, we limit the stride (and
				292	* hence alignment) to a page size.
				293	*/
				294	nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
				295	if (size > PAGE_SIZE)
				296	stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
				297	else
				298	stride = 1;
				299
				300	if (!nslots)
				301	BUG();
				302
				303	/*
				304	* Find suitable number of IO TLB entries size that will fit this
				305	* request and allocate a buffer from that IO TLB pool.
				306	*/
				307	spin_lock_irqsave(&io_tlb_lock, flags);
				308	{
				309	wrap = index = ALIGN(io_tlb_index, stride);
				310
				311	if (index >= io_tlb_nslabs)
				312	wrap = index = 0;
				313
				314	do {
				315	/*
				316	* If we find a slot that indicates we have 'nslots'
				317	* number of contiguous buffers, we allocate the
				318	* buffers from that slot and mark the entries as '0'
				319	* indicating unavailable.
				320	*/
				321	if (io_tlb_list[index] >= nslots) {
				322	int count = 0;
				323
				324	for (i = index; i < (int) (index + nslots); i++)
				325	io_tlb_list[i] = 0;
				326	for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--)
				327	io_tlb_list[i] = ++count;
				328	dma_addr = io_tlb_start + (index << IO_TLB_SHIFT);
				329
				330	/*
				331	* Update the indices to avoid searching in
				332	* the next round.
				333	*/
				334	io_tlb_index = ((index + nslots) < io_tlb_nslabs
				335	? (index + nslots) : 0);
				336
				337	goto found;
				338	}
				339	index += stride;
				340	if (index >= io_tlb_nslabs)
				341	index = 0;
				342	} while (index != wrap);
				343
				344	spin_unlock_irqrestore(&io_tlb_lock, flags);
				345	return NULL;
				346	}
				347	found:
				348	spin_unlock_irqrestore(&io_tlb_lock, flags);
				349
				350	/*
				351	* Save away the mapping from the original address to the DMA address.
				352	* This is needed when we sync the memory. Then we sync the buffer if
				353	* needed.
				354	*/
				355	io_tlb_orig_addr[index] = buffer;
				356	if (dir == DMA_TO_DEVICE \|\| dir == DMA_BIDIRECTIONAL)
				357	memcpy(dma_addr, buffer, size);
				358
				359	return dma_addr;
				360	}
				361
				362	/*
				363	* dma_addr is the kernel virtual address of the bounce buffer to unmap.
				364	*/
				365	static void
				366	unmap_single(struct device hwdev, char dma_addr, size_t size, int dir)
				367	{
				368	unsigned long flags;
				369	int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
				370	int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
				371	char *buffer = io_tlb_orig_addr[index];
				372
				373	/*
				374	* First, sync the memory before unmapping the entry
				375	*/
				376	if (buffer && ((dir == DMA_FROM_DEVICE) \|\| (dir == DMA_BIDIRECTIONAL)))
				377	/*
				378	* bounce... copy the data back into the original buffer * and
				379	* delete the bounce buffer.
				380	*/
				381	memcpy(buffer, dma_addr, size);
				382
				383	/*
				384	* Return the buffer to the free list by setting the corresponding
				385	* entries to indicate the number of contigous entries available.
				386	* While returning the entries to the free list, we merge the entries
				387	* with slots below and above the pool being returned.
				388	*/
				389	spin_lock_irqsave(&io_tlb_lock, flags);
				390	{
				391	count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ?
				392	io_tlb_list[index + nslots] : 0);
				393	/*
				394	* Step 1: return the slots to the free list, merging the
				395	* slots with superceeding slots
				396	*/
				397	for (i = index + nslots - 1; i >= index; i--)
				398	io_tlb_list[i] = ++count;
				399	/*
				400	* Step 2: merge the returned slots with the preceding slots,
				401	* if available (non zero)
				402	*/
				403	for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--)
				404	io_tlb_list[i] = ++count;
				405	}
				406	spin_unlock_irqrestore(&io_tlb_lock, flags);
				407	}
				408
				409	static void
John W. Linville	de69e0f	2005-09-29 14:44:57 -0700	[diff] [blame]	410	sync_single(struct device hwdev, char dma_addr, size_t size,
				411	int dir, int target)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	412	{
				413	int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
				414	char *buffer = io_tlb_orig_addr[index];
				415
John W. Linville	de69e0f	2005-09-29 14:44:57 -0700	[diff] [blame]	416	switch (target) {
				417	case SYNC_FOR_CPU:
				418	if (likely(dir == DMA_FROM_DEVICE \|\| dir == DMA_BIDIRECTIONAL))
				419	memcpy(buffer, dma_addr, size);
				420	else if (dir != DMA_TO_DEVICE)
				421	BUG();
				422	break;
				423	case SYNC_FOR_DEVICE:
				424	if (likely(dir == DMA_TO_DEVICE \|\| dir == DMA_BIDIRECTIONAL))
				425	memcpy(dma_addr, buffer, size);
				426	else if (dir != DMA_FROM_DEVICE)
				427	BUG();
				428	break;
				429	default:
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	430	BUG();
John W. Linville	de69e0f	2005-09-29 14:44:57 -0700	[diff] [blame]	431	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	432	}
				433
				434	void *
				435	swiotlb_alloc_coherent(struct device *hwdev, size_t size,
				436	dma_addr_t *dma_handle, int flags)
				437	{
				438	unsigned long dev_addr;
				439	void *ret;
				440	int order = get_order(size);
				441
				442	/*
				443	* XXX fix me: the DMA API should pass us an explicit DMA mask
				444	* instead, or use ZONE_DMA32 (ia64 overloads ZONE_DMA to be a ~32
				445	* bit range instead of a 16MB one).
				446	*/
				447	flags \|= GFP_DMA;
				448
				449	ret = (void *)__get_free_pages(flags, order);
				450	if (ret && address_needs_mapping(hwdev, virt_to_phys(ret))) {
				451	/*
				452	* The allocated memory isn't reachable by the device.
				453	* Fall back on swiotlb_map_single().
				454	*/
				455	free_pages((unsigned long) ret, order);
				456	ret = NULL;
				457	}
				458	if (!ret) {
				459	/*
				460	* We are either out of memory or the device can't DMA
				461	* to GFP_DMA memory; fall back on
				462	* swiotlb_map_single(), which will grab memory from
				463	* the lowest available address range.
				464	*/
				465	dma_addr_t handle;
				466	handle = swiotlb_map_single(NULL, NULL, size, DMA_FROM_DEVICE);
				467	if (dma_mapping_error(handle))
				468	return NULL;
				469
				470	ret = phys_to_virt(handle);
				471	}
				472
				473	memset(ret, 0, size);
				474	dev_addr = virt_to_phys(ret);
				475
				476	/* Confirm address can be DMA'd by device */
				477	if (address_needs_mapping(hwdev, dev_addr)) {
				478	printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016lx\n",
				479	(unsigned long long)*hwdev->dma_mask, dev_addr);
				480	panic("swiotlb_alloc_coherent: allocated memory is out of "
				481	"range for device");
				482	}
				483	*dma_handle = dev_addr;
				484	return ret;
				485	}
				486
				487	void
				488	swiotlb_free_coherent(struct device hwdev, size_t size, void vaddr,
				489	dma_addr_t dma_handle)
				490	{
				491	if (!(vaddr >= (void *)io_tlb_start
				492	&& vaddr < (void *)io_tlb_end))
				493	free_pages((unsigned long) vaddr, get_order(size));
				494	else
				495	/* DMA_TO_DEVICE to avoid memcpy in unmap_single */
				496	swiotlb_unmap_single (hwdev, dma_handle, size, DMA_TO_DEVICE);
				497	}
				498
				499	static void
				500	swiotlb_full(struct device *dev, size_t size, int dir, int do_panic)
				501	{
				502	/*
				503	* Ran out of IOMMU space for this operation. This is very bad.
				504	* Unfortunately the drivers cannot handle this operation properly.
				505	* unless they check for pci_dma_mapping_error (most don't)
				506	* When the mapping is small enough return a static buffer to limit
				507	* the damage, or panic when the transfer is too big.
				508	*/
				509	printk(KERN_ERR "PCI-DMA: Out of SW-IOMMU space for %lu bytes at "
				510	"device %s\n", size, dev ? dev->bus_id : "?");
				511
				512	if (size > io_tlb_overflow && do_panic) {
				513	if (dir == PCI_DMA_FROMDEVICE \|\| dir == PCI_DMA_BIDIRECTIONAL)
				514	panic("PCI-DMA: Memory would be corrupted\n");
				515	if (dir == PCI_DMA_TODEVICE \|\| dir == PCI_DMA_BIDIRECTIONAL)
				516	panic("PCI-DMA: Random memory would be DMAed\n");
				517	}
				518	}
				519
				520	/*
				521	* Map a single buffer of the indicated size for DMA in streaming mode. The
				522	* PCI address to use is returned.
				523	*
				524	* Once the device is given the dma address, the device owns this memory until
				525	* either swiotlb_unmap_single or swiotlb_dma_sync_single is performed.
				526	*/
				527	dma_addr_t
				528	swiotlb_map_single(struct device hwdev, void ptr, size_t size, int dir)
				529	{
				530	unsigned long dev_addr = virt_to_phys(ptr);
				531	void *map;
				532
				533	if (dir == DMA_NONE)
				534	BUG();
				535	/*
				536	* If the pointer passed in happens to be in the device's DMA window,
				537	* we can safely return the device addr and not worry about bounce
				538	* buffering it.
				539	*/
				540	if (!address_needs_mapping(hwdev, dev_addr) && !swiotlb_force)
				541	return dev_addr;
				542
				543	/*
				544	* Oh well, have to allocate and map a bounce buffer.
				545	*/
				546	map = map_single(hwdev, ptr, size, dir);
				547	if (!map) {
				548	swiotlb_full(hwdev, size, dir, 1);
				549	map = io_tlb_overflow_buffer;
				550	}
				551
				552	dev_addr = virt_to_phys(map);
				553
				554	/*
				555	* Ensure that the address returned is DMA'ble
				556	*/
				557	if (address_needs_mapping(hwdev, dev_addr))
				558	panic("map_single: bounce buffer is not DMA'ble");
				559
				560	return dev_addr;
				561	}
				562
				563	/*
				564	* Since DMA is i-cache coherent, any (complete) pages that were written via
				565	* DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to
				566	* flush them when they get mapped into an executable vm-area.
				567	*/
				568	static void
				569	mark_clean(void *addr, size_t size)
				570	{
				571	unsigned long pg_addr, end;
				572
				573	pg_addr = PAGE_ALIGN((unsigned long) addr);
				574	end = (unsigned long) addr + size;
				575	while (pg_addr + PAGE_SIZE <= end) {
				576	struct page *page = virt_to_page(pg_addr);
				577	set_bit(PG_arch_1, &page->flags);
				578	pg_addr += PAGE_SIZE;
				579	}
				580	}
				581
				582	/*
				583	* Unmap a single streaming mode DMA translation. The dma_addr and size must
				584	* match what was provided for in a previous swiotlb_map_single call. All
				585	* other usages are undefined.
				586	*
				587	* After this call, reads by the cpu to the buffer are guaranteed to see
				588	* whatever the device wrote there.
				589	*/
				590	void
				591	swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size,
				592	int dir)
				593	{
				594	char *dma_addr = phys_to_virt(dev_addr);
				595
				596	if (dir == DMA_NONE)
				597	BUG();
				598	if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
				599	unmap_single(hwdev, dma_addr, size, dir);
				600	else if (dir == DMA_FROM_DEVICE)
				601	mark_clean(dma_addr, size);
				602	}
				603
				604	/*
				605	* Make physical memory consistent for a single streaming mode DMA translation
				606	* after a transfer.
				607	*
				608	* If you perform a swiotlb_map_single() but wish to interrogate the buffer
				609	* using the cpu, yet do not wish to teardown the PCI dma mapping, you must
				610	* call this function before doing so. At the next point you give the PCI dma
				611	* address back to the card, you must first perform a
				612	* swiotlb_dma_sync_for_device, and then the device again owns the buffer
				613	*/
John W. Linville	8270f3f	2005-09-29 14:43:32 -0700	[diff] [blame]	614	static inline void
				615	swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
John W. Linville	de69e0f	2005-09-29 14:44:57 -0700	[diff] [blame]	616	size_t size, int dir, int target)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	617	{
				618	char *dma_addr = phys_to_virt(dev_addr);
				619
				620	if (dir == DMA_NONE)
				621	BUG();
				622	if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
John W. Linville	de69e0f	2005-09-29 14:44:57 -0700	[diff] [blame]	623	sync_single(hwdev, dma_addr, size, dir, target);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	624	else if (dir == DMA_FROM_DEVICE)
				625	mark_clean(dma_addr, size);
				626	}
				627
				628	void
John W. Linville	8270f3f	2005-09-29 14:43:32 -0700	[diff] [blame]	629	swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
				630	size_t size, int dir)
				631	{
John W. Linville	de69e0f	2005-09-29 14:44:57 -0700	[diff] [blame]	632	swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU);
John W. Linville	8270f3f	2005-09-29 14:43:32 -0700	[diff] [blame]	633	}
				634
				635	void
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	636	swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
				637	size_t size, int dir)
				638	{
John W. Linville	de69e0f	2005-09-29 14:44:57 -0700	[diff] [blame]	639	swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	640	}
				641
				642	/*
John W. Linville	878a97c	2005-09-29 14:44:23 -0700	[diff] [blame]	643	* Same as above, but for a sub-range of the mapping.
				644	*/
				645	static inline void
				646	swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr,
John W. Linville	de69e0f	2005-09-29 14:44:57 -0700	[diff] [blame]	647	unsigned long offset, size_t size,
				648	int dir, int target)
John W. Linville	878a97c	2005-09-29 14:44:23 -0700	[diff] [blame]	649	{
				650	char *dma_addr = phys_to_virt(dev_addr) + offset;
				651
				652	if (dir == DMA_NONE)
				653	BUG();
				654	if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
John W. Linville	de69e0f	2005-09-29 14:44:57 -0700	[diff] [blame]	655	sync_single(hwdev, dma_addr, size, dir, target);
John W. Linville	878a97c	2005-09-29 14:44:23 -0700	[diff] [blame]	656	else if (dir == DMA_FROM_DEVICE)
				657	mark_clean(dma_addr, size);
				658	}
				659
				660	void
				661	swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
				662	unsigned long offset, size_t size, int dir)
				663	{
John W. Linville	de69e0f	2005-09-29 14:44:57 -0700	[diff] [blame]	664	swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir,
				665	SYNC_FOR_CPU);
John W. Linville	878a97c	2005-09-29 14:44:23 -0700	[diff] [blame]	666	}
				667
				668	void
				669	swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr,
				670	unsigned long offset, size_t size, int dir)
				671	{
John W. Linville	de69e0f	2005-09-29 14:44:57 -0700	[diff] [blame]	672	swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir,
				673	SYNC_FOR_DEVICE);
John W. Linville	878a97c	2005-09-29 14:44:23 -0700	[diff] [blame]	674	}
				675
				676	/*
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	677	* Map a set of buffers described by scatterlist in streaming mode for DMA.
				678	* This is the scatter-gather version of the above swiotlb_map_single
				679	* interface. Here the scatter gather list elements are each tagged with the
				680	* appropriate dma address and length. They are obtained via
				681	* sg_dma_{address,length}(SG).
				682	*
				683	* NOTE: An implementation may be able to use a smaller number of
				684	* DMA address/length pairs than there are SG table elements.
				685	* (for example via virtual mapping capabilities)
				686	* The routine returns the number of addr/length pairs actually
				687	* used, at most nents.
				688	*
				689	* Device ownership issues as mentioned above for swiotlb_map_single are the
				690	* same here.
				691	*/
				692	int
				693	swiotlb_map_sg(struct device hwdev, struct scatterlist sg, int nelems,
				694	int dir)
				695	{
				696	void *addr;
				697	unsigned long dev_addr;
				698	int i;
				699
				700	if (dir == DMA_NONE)
				701	BUG();
				702
				703	for (i = 0; i < nelems; i++, sg++) {
				704	addr = SG_ENT_VIRT_ADDRESS(sg);
				705	dev_addr = virt_to_phys(addr);
				706	if (swiotlb_force \|\| address_needs_mapping(hwdev, dev_addr)) {
				707	sg->dma_address = (dma_addr_t) virt_to_phys(map_single(hwdev, addr, sg->length, dir));
				708	if (!sg->dma_address) {
				709	/* Don't panic here, we expect map_sg users
				710	to do proper error handling. */
				711	swiotlb_full(hwdev, sg->length, dir, 0);
				712	swiotlb_unmap_sg(hwdev, sg - i, i, dir);
				713	sg[0].dma_length = 0;
				714	return 0;
				715	}
				716	} else
				717	sg->dma_address = dev_addr;
				718	sg->dma_length = sg->length;
				719	}
				720	return nelems;
				721	}
				722
				723	/*
				724	* Unmap a set of streaming mode DMA translations. Again, cpu read rules
				725	* concerning calls here are the same as for swiotlb_unmap_single() above.
				726	*/
				727	void
				728	swiotlb_unmap_sg(struct device hwdev, struct scatterlist sg, int nelems,
				729	int dir)
				730	{
				731	int i;
				732
				733	if (dir == DMA_NONE)
				734	BUG();
				735
				736	for (i = 0; i < nelems; i++, sg++)
				737	if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
				738	unmap_single(hwdev, (void *) phys_to_virt(sg->dma_address), sg->dma_length, dir);
				739	else if (dir == DMA_FROM_DEVICE)
				740	mark_clean(SG_ENT_VIRT_ADDRESS(sg), sg->dma_length);
				741	}
				742
				743	/*
				744	* Make physical memory consistent for a set of streaming mode DMA translations
				745	* after a transfer.
				746	*
				747	* The same as swiotlb_sync_single_* but for a scatter-gather list, same rules
				748	* and usage.
				749	*/
John W. Linville	8270f3f	2005-09-29 14:43:32 -0700	[diff] [blame]	750	static inline void
				751	swiotlb_sync_sg(struct device hwdev, struct scatterlist sg,
John W. Linville	de69e0f	2005-09-29 14:44:57 -0700	[diff] [blame]	752	int nelems, int dir, int target)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	753	{
				754	int i;
				755
				756	if (dir == DMA_NONE)
				757	BUG();
				758
				759	for (i = 0; i < nelems; i++, sg++)
				760	if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
				761	sync_single(hwdev, (void *) sg->dma_address,
John W. Linville	de69e0f	2005-09-29 14:44:57 -0700	[diff] [blame]	762	sg->dma_length, dir, target);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	763	}
				764
				765	void
John W. Linville	8270f3f	2005-09-29 14:43:32 -0700	[diff] [blame]	766	swiotlb_sync_sg_for_cpu(struct device hwdev, struct scatterlist sg,
				767	int nelems, int dir)
				768	{
John W. Linville	de69e0f	2005-09-29 14:44:57 -0700	[diff] [blame]	769	swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU);
John W. Linville	8270f3f	2005-09-29 14:43:32 -0700	[diff] [blame]	770	}
				771
				772	void
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	773	swiotlb_sync_sg_for_device(struct device hwdev, struct scatterlist sg,
				774	int nelems, int dir)
				775	{
John W. Linville	de69e0f	2005-09-29 14:44:57 -0700	[diff] [blame]	776	swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	777	}
				778
				779	int
				780	swiotlb_dma_mapping_error(dma_addr_t dma_addr)
				781	{
				782	return (dma_addr == virt_to_phys(io_tlb_overflow_buffer));
				783	}
				784
				785	/*
				786	* Return whether the given PCI device DMA address mask can be supported
				787	* properly. For example, if your device can only drive the low 24-bits
				788	* during PCI bus mastering, then you would pass 0x00ffffff as the mask to
				789	* this function.
				790	*/
				791	int
				792	swiotlb_dma_supported (struct device *hwdev, u64 mask)
				793	{
				794	return (virt_to_phys (io_tlb_end) - 1) <= mask;
				795	}
				796
				797	EXPORT_SYMBOL(swiotlb_init);
				798	EXPORT_SYMBOL(swiotlb_map_single);
				799	EXPORT_SYMBOL(swiotlb_unmap_single);
				800	EXPORT_SYMBOL(swiotlb_map_sg);
				801	EXPORT_SYMBOL(swiotlb_unmap_sg);
				802	EXPORT_SYMBOL(swiotlb_sync_single_for_cpu);
				803	EXPORT_SYMBOL(swiotlb_sync_single_for_device);
John W. Linville	878a97c	2005-09-29 14:44:23 -0700	[diff] [blame]	804	EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_cpu);
				805	EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	806	EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu);
				807	EXPORT_SYMBOL(swiotlb_sync_sg_for_device);
				808	EXPORT_SYMBOL(swiotlb_dma_mapping_error);
				809	EXPORT_SYMBOL(swiotlb_alloc_coherent);
				810	EXPORT_SYMBOL(swiotlb_free_coherent);
				811	EXPORT_SYMBOL(swiotlb_dma_supported);