Blame - drivers/net/xen-netback/netback.c - SHIFTPHONES/mainline/linux

blob: 05593d8820233b72681af9f76510cdc9a5eefb52 [file] [log] [blame]

Ian Campbell	f942dc2	2011-03-15 00:06:18 +0000	[diff] [blame]	1	/*
				2	* Back-end of the driver for virtual network devices. This portion of the
				3	* driver exports a 'unified' network-device interface that can be accessed
				4	* by any operating system that implements a compatible front end. A
				5	* reference front-end implementation can be found in:
				6	* drivers/net/xen-netfront.c
				7	*
				8	* Copyright (c) 2002-2005, K A Fraser
				9	*
				10	* This program is free software; you can redistribute it and/or
				11	* modify it under the terms of the GNU General Public License version 2
				12	* as published by the Free Software Foundation; or, when distributed
				13	* separately from the Linux kernel or incorporated into other
				14	* software packages, subject to the following license:
				15	*
				16	* Permission is hereby granted, free of charge, to any person obtaining a copy
				17	* of this source file (the "Software"), to deal in the Software without
				18	* restriction, including without limitation the rights to use, copy, modify,
				19	* merge, publish, distribute, sublicense, and/or sell copies of the Software,
				20	* and to permit persons to whom the Software is furnished to do so, subject to
				21	* the following conditions:
				22	*
				23	* The above copyright notice and this permission notice shall be included in
				24	* all copies or substantial portions of the Software.
				25	*
				26	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				27	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				28	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				29	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				30	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
				31	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
				32	* IN THE SOFTWARE.
				33	*/
				34
				35	#include "common.h"
				36
				37	#include <linux/kthread.h>
				38	#include <linux/if_vlan.h>
				39	#include <linux/udp.h>
				40
				41	#include <net/tcp.h>
				42
				43	#include <xen/events.h>
				44	#include <xen/interface/memory.h>
				45
				46	#include <asm/xen/hypercall.h>
				47	#include <asm/xen/page.h>
				48
				49	struct pending_tx_info {
				50	struct xen_netif_tx_request req;
				51	struct xenvif *vif;
				52	};
				53	typedef unsigned int pending_ring_idx_t;
				54
				55	struct netbk_rx_meta {
				56	int id;
				57	int size;
				58	int gso_size;
				59	};
				60
				61	#define MAX_PENDING_REQS 256
				62
Ian Campbell	ea066ad	2011-10-05 00:28:46 +0000	[diff] [blame]	63	/* Discriminate from any valid pending_idx value. */
				64	#define INVALID_PENDING_IDX 0xFFFF
				65
Ian Campbell	f942dc2	2011-03-15 00:06:18 +0000	[diff] [blame]	66	#define MAX_BUFFER_OFFSET PAGE_SIZE
				67
				68	/* extra field used in struct page */
				69	union page_ext {
				70	struct {
				71	#if BITS_PER_LONG < 64
				72	#define IDX_WIDTH 8
				73	#define GROUP_WIDTH (BITS_PER_LONG - IDX_WIDTH)
				74	unsigned int group:GROUP_WIDTH;
				75	unsigned int idx:IDX_WIDTH;
				76	#else
				77	unsigned int group, idx;
				78	#endif
				79	} e;
				80	void *mapping;
				81	};
				82
				83	struct xen_netbk {
				84	wait_queue_head_t wq;
				85	struct task_struct *task;
				86
				87	struct sk_buff_head rx_queue;
				88	struct sk_buff_head tx_queue;
				89
				90	struct timer_list net_timer;
				91
				92	struct page *mmap_pages[MAX_PENDING_REQS];
				93
				94	pending_ring_idx_t pending_prod;
				95	pending_ring_idx_t pending_cons;
				96	struct list_head net_schedule_list;
				97
				98	/* Protect the net_schedule_list in netif. */
				99	spinlock_t net_schedule_list_lock;
				100
				101	atomic_t netfront_count;
				102
				103	struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
				104	struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS];
				105
				106	u16 pending_ring[MAX_PENDING_REQS];
				107
				108	/*
				109	* Given MAX_BUFFER_OFFSET of 4096 the worst case is that each
				110	* head/fragment page uses 2 copy operations because it
				111	* straddles two buffers in the frontend.
				112	*/
				113	struct gnttab_copy grant_copy_op[2*XEN_NETIF_RX_RING_SIZE];
				114	struct netbk_rx_meta meta[2*XEN_NETIF_RX_RING_SIZE];
				115	};
				116
				117	static struct xen_netbk *xen_netbk;
				118	static int xen_netbk_group_nr;
				119
				120	void xen_netbk_add_xenvif(struct xenvif *vif)
				121	{
				122	int i;
				123	int min_netfront_count;
				124	int min_group = 0;
				125	struct xen_netbk *netbk;
				126
				127	min_netfront_count = atomic_read(&xen_netbk[0].netfront_count);
				128	for (i = 0; i < xen_netbk_group_nr; i++) {
				129	int netfront_count = atomic_read(&xen_netbk[i].netfront_count);
				130	if (netfront_count < min_netfront_count) {
				131	min_group = i;
				132	min_netfront_count = netfront_count;
				133	}
				134	}
				135
				136	netbk = &xen_netbk[min_group];
				137
				138	vif->netbk = netbk;
				139	atomic_inc(&netbk->netfront_count);
				140	}
				141
				142	void xen_netbk_remove_xenvif(struct xenvif *vif)
				143	{
				144	struct xen_netbk *netbk = vif->netbk;
				145	vif->netbk = NULL;
				146	atomic_dec(&netbk->netfront_count);
				147	}
				148
				149	static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx);
				150	static void make_tx_response(struct xenvif *vif,
				151	struct xen_netif_tx_request *txp,
				152	s8 st);
				153	static struct xen_netif_rx_response make_rx_response(struct xenvif vif,
				154	u16 id,
				155	s8 st,
				156	u16 offset,
				157	u16 size,
				158	u16 flags);
				159
				160	static inline unsigned long idx_to_pfn(struct xen_netbk *netbk,
Ian Campbell	ea066ad	2011-10-05 00:28:46 +0000	[diff] [blame]	161	u16 idx)
Ian Campbell	f942dc2	2011-03-15 00:06:18 +0000	[diff] [blame]	162	{
				163	return page_to_pfn(netbk->mmap_pages[idx]);
				164	}
				165
				166	static inline unsigned long idx_to_kaddr(struct xen_netbk *netbk,
Ian Campbell	ea066ad	2011-10-05 00:28:46 +0000	[diff] [blame]	167	u16 idx)
Ian Campbell	f942dc2	2011-03-15 00:06:18 +0000	[diff] [blame]	168	{
				169	return (unsigned long)pfn_to_kaddr(idx_to_pfn(netbk, idx));
				170	}
				171
				172	/* extra field used in struct page */
				173	static inline void set_page_ext(struct page pg, struct xen_netbk netbk,
				174	unsigned int idx)
				175	{
				176	unsigned int group = netbk - xen_netbk;
				177	union page_ext ext = { .e = { .group = group + 1, .idx = idx } };
				178
				179	BUILD_BUG_ON(sizeof(ext) > sizeof(ext.mapping));
				180	pg->mapping = ext.mapping;
				181	}
				182
				183	static int get_page_ext(struct page *pg,
				184	unsigned int pgroup, unsigned int pidx)
				185	{
				186	union page_ext ext = { .mapping = pg->mapping };
				187	struct xen_netbk *netbk;
				188	unsigned int group, idx;
				189
				190	group = ext.e.group - 1;
				191
				192	if (group < 0 \|\| group >= xen_netbk_group_nr)
				193	return 0;
				194
				195	netbk = &xen_netbk[group];
				196
				197	idx = ext.e.idx;
				198
				199	if ((idx < 0) \|\| (idx >= MAX_PENDING_REQS))
				200	return 0;
				201
				202	if (netbk->mmap_pages[idx] != pg)
				203	return 0;
				204
				205	*pgroup = group;
				206	*pidx = idx;
				207
				208	return 1;
				209	}
				210
				211	/*
				212	* This is the amount of packet we copy rather than map, so that the
				213	* guest can't fiddle with the contents of the headers while we do
				214	* packet processing on them (netfilter, routing, etc).
				215	*/
				216	#define PKT_PROT_LEN (ETH_HLEN + \
				217	VLAN_HLEN + \
				218	sizeof(struct iphdr) + MAX_IPOPTLEN + \
				219	sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE)
				220
Ian Campbell	ea066ad	2011-10-05 00:28:46 +0000	[diff] [blame]	221	static u16 frag_get_pending_idx(skb_frag_t *frag)
				222	{
				223	return (u16)frag->page_offset;
				224	}
				225
				226	static void frag_set_pending_idx(skb_frag_t *frag, u16 pending_idx)
				227	{
				228	frag->page_offset = pending_idx;
				229	}
				230
Ian Campbell	f942dc2	2011-03-15 00:06:18 +0000	[diff] [blame]	231	static inline pending_ring_idx_t pending_index(unsigned i)
				232	{
				233	return i & (MAX_PENDING_REQS-1);
				234	}
				235
				236	static inline pending_ring_idx_t nr_pending_reqs(struct xen_netbk *netbk)
				237	{
				238	return MAX_PENDING_REQS -
				239	netbk->pending_prod + netbk->pending_cons;
				240	}
				241
				242	static void xen_netbk_kick_thread(struct xen_netbk *netbk)
				243	{
				244	wake_up(&netbk->wq);
				245	}
				246
				247	static int max_required_rx_slots(struct xenvif *vif)
				248	{
				249	int max = DIV_ROUND_UP(vif->dev->mtu, PAGE_SIZE);
				250
				251	if (vif->can_sg \|\| vif->gso \|\| vif->gso_prefix)
				252	max += MAX_SKB_FRAGS + 1; /* extra_info + frags */
				253
				254	return max;
				255	}
				256
				257	int xen_netbk_rx_ring_full(struct xenvif *vif)
				258	{
				259	RING_IDX peek = vif->rx_req_cons_peek;
				260	RING_IDX needed = max_required_rx_slots(vif);
				261
				262	return ((vif->rx.sring->req_prod - peek) < needed) \|\|
				263	((vif->rx.rsp_prod_pvt + XEN_NETIF_RX_RING_SIZE - peek) < needed);
				264	}
				265
				266	int xen_netbk_must_stop_queue(struct xenvif *vif)
				267	{
				268	if (!xen_netbk_rx_ring_full(vif))
				269	return 0;
				270
				271	vif->rx.sring->req_event = vif->rx_req_cons_peek +
				272	max_required_rx_slots(vif);
				273	mb(); /* request notification /then/ check the queue */
				274
				275	return xen_netbk_rx_ring_full(vif);
				276	}
				277
				278	/*
				279	* Returns true if we should start a new receive buffer instead of
				280	* adding 'size' bytes to a buffer which currently contains 'offset'
				281	* bytes.
				282	*/
				283	static bool start_new_rx_buffer(int offset, unsigned long size, int head)
				284	{
				285	/* simple case: we have completely filled the current buffer. */
				286	if (offset == MAX_BUFFER_OFFSET)
				287	return true;
				288
				289	/*
				290	* complex case: start a fresh buffer if the current frag
				291	* would overflow the current buffer but only if:
				292	* (i) this frag would fit completely in the next buffer
				293	* and (ii) there is already some data in the current buffer
				294	* and (iii) this is not the head buffer.
				295	*
				296	* Where:
				297	* - (i) stops us splitting a frag into two copies
				298	* unless the frag is too large for a single buffer.
				299	* - (ii) stops us from leaving a buffer pointlessly empty.
				300	* - (iii) stops us leaving the first buffer
				301	* empty. Strictly speaking this is already covered
				302	* by (ii) but is explicitly checked because
				303	* netfront relies on the first buffer being
				304	* non-empty and can crash otherwise.
				305	*
				306	* This means we will effectively linearise small
				307	* frags but do not needlessly split large buffers
				308	* into multiple copies tend to give large frags their
				309	* own buffers as before.
				310	*/
				311	if ((offset + size > MAX_BUFFER_OFFSET) &&
				312	(size <= MAX_BUFFER_OFFSET) && offset && !head)
				313	return true;
				314
				315	return false;
				316	}
				317
				318	/*
				319	* Figure out how many ring slots we're going to need to send @skb to
				320	* the guest. This function is essentially a dry run of
				321	* netbk_gop_frag_copy.
				322	*/
				323	unsigned int xen_netbk_count_skb_slots(struct xenvif vif, struct sk_buff skb)
				324	{
				325	unsigned int count;
				326	int i, copy_off;
				327
Simon Graham	e26b203	2012-05-24 06:26:07 +0000	[diff] [blame]	328	count = DIV_ROUND_UP(skb_headlen(skb), PAGE_SIZE);
Ian Campbell	f942dc2	2011-03-15 00:06:18 +0000	[diff] [blame]	329
				330	copy_off = skb_headlen(skb) % PAGE_SIZE;
				331
				332	if (skb_shinfo(skb)->gso_size)
				333	count++;
				334
				335	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
Eric Dumazet	9e903e0	2011-10-18 21:00:24 +0000	[diff] [blame]	336	unsigned long size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
Ian Campbell	f942dc2	2011-03-15 00:06:18 +0000	[diff] [blame]	337	unsigned long bytes;
				338	while (size > 0) {
				339	BUG_ON(copy_off > MAX_BUFFER_OFFSET);
				340
				341	if (start_new_rx_buffer(copy_off, size, 0)) {
				342	count++;
				343	copy_off = 0;
				344	}
				345
				346	bytes = size;
				347	if (copy_off + bytes > MAX_BUFFER_OFFSET)
				348	bytes = MAX_BUFFER_OFFSET - copy_off;
				349
				350	copy_off += bytes;
				351	size -= bytes;
				352	}
				353	}
				354	return count;
				355	}
				356
				357	struct netrx_pending_operations {
				358	unsigned copy_prod, copy_cons;
				359	unsigned meta_prod, meta_cons;
				360	struct gnttab_copy *copy;
				361	struct netbk_rx_meta *meta;
				362	int copy_off;
				363	grant_ref_t copy_gref;
				364	};
				365
				366	static struct netbk_rx_meta get_next_rx_buffer(struct xenvif vif,
				367	struct netrx_pending_operations *npo)
				368	{
				369	struct netbk_rx_meta *meta;
				370	struct xen_netif_rx_request *req;
				371
				372	req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
				373
				374	meta = npo->meta + npo->meta_prod++;
				375	meta->gso_size = 0;
				376	meta->size = 0;
				377	meta->id = req->id;
				378
				379	npo->copy_off = 0;
				380	npo->copy_gref = req->gref;
				381
				382	return meta;
				383	}
				384
				385	/*
				386	* Set up the grant operations for this fragment. If it's a flipping
				387	* interface, we also set up the unmap request from here.
				388	*/
				389	static void netbk_gop_frag_copy(struct xenvif vif, struct sk_buff skb,
				390	struct netrx_pending_operations *npo,
				391	struct page *page, unsigned long size,
				392	unsigned long offset, int *head)
				393	{
				394	struct gnttab_copy *copy_gop;
				395	struct netbk_rx_meta *meta;
				396	/*
Wei Liu	e34c024	2011-12-06 02:04:50 +0000	[diff] [blame]	397	* These variables are used iff get_page_ext returns true,
Ian Campbell	f942dc2	2011-03-15 00:06:18 +0000	[diff] [blame]	398	* in which case they are guaranteed to be initialized.
				399	*/
				400	unsigned int uninitialized_var(group), uninitialized_var(idx);
				401	int foreign = get_page_ext(page, &group, &idx);
				402	unsigned long bytes;
				403
				404	/* Data must not cross a page boundary. */
				405	BUG_ON(size + offset > PAGE_SIZE);
				406
				407	meta = npo->meta + npo->meta_prod - 1;
				408
				409	while (size > 0) {
				410	BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET);
				411
				412	if (start_new_rx_buffer(npo->copy_off, size, *head)) {
				413	/*
				414	* Netfront requires there to be some data in the head
				415	* buffer.
				416	*/
				417	BUG_ON(*head);
				418
				419	meta = get_next_rx_buffer(vif, npo);
				420	}
				421
				422	bytes = size;
				423	if (npo->copy_off + bytes > MAX_BUFFER_OFFSET)
				424	bytes = MAX_BUFFER_OFFSET - npo->copy_off;
				425
				426	copy_gop = npo->copy + npo->copy_prod++;
				427	copy_gop->flags = GNTCOPY_dest_gref;
				428	if (foreign) {
				429	struct xen_netbk *netbk = &xen_netbk[group];
				430	struct pending_tx_info *src_pend;
				431
				432	src_pend = &netbk->pending_tx_info[idx];
				433
				434	copy_gop->source.domid = src_pend->vif->domid;
				435	copy_gop->source.u.ref = src_pend->req.gref;
				436	copy_gop->flags \|= GNTCOPY_source_gref;
				437	} else {
				438	void *vaddr = page_address(page);
				439	copy_gop->source.domid = DOMID_SELF;
				440	copy_gop->source.u.gmfn = virt_to_mfn(vaddr);
				441	}
				442	copy_gop->source.offset = offset;
				443	copy_gop->dest.domid = vif->domid;
				444
				445	copy_gop->dest.offset = npo->copy_off;
				446	copy_gop->dest.u.ref = npo->copy_gref;
				447	copy_gop->len = bytes;
				448
				449	npo->copy_off += bytes;
				450	meta->size += bytes;
				451
				452	offset += bytes;
				453	size -= bytes;
				454
				455	/* Leave a gap for the GSO descriptor. */
				456	if (*head && skb_shinfo(skb)->gso_size && !vif->gso_prefix)
				457	vif->rx.req_cons++;
				458
				459	head = 0; / There must be something in this buffer now. */
				460
				461	}
				462	}
				463
				464	/*
				465	* Prepare an SKB to be transmitted to the frontend.
				466	*
				467	* This function is responsible for allocating grant operations, meta
				468	* structures, etc.
				469	*
				470	* It returns the number of meta structures consumed. The number of
				471	* ring slots used is always equal to the number of meta slots used
				472	* plus the number of GSO descriptors used. Currently, we use either
				473	* zero GSO descriptors (for non-GSO packets) or one descriptor (for
				474	* frontend-side LRO).
				475	*/
				476	static int netbk_gop_skb(struct sk_buff *skb,
				477	struct netrx_pending_operations *npo)
				478	{
				479	struct xenvif *vif = netdev_priv(skb->dev);
				480	int nr_frags = skb_shinfo(skb)->nr_frags;
				481	int i;
				482	struct xen_netif_rx_request *req;
				483	struct netbk_rx_meta *meta;
				484	unsigned char *data;
				485	int head = 1;
				486	int old_meta_prod;
				487
				488	old_meta_prod = npo->meta_prod;
				489
				490	/* Set up a GSO prefix descriptor, if necessary */
				491	if (skb_shinfo(skb)->gso_size && vif->gso_prefix) {
				492	req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
				493	meta = npo->meta + npo->meta_prod++;
				494	meta->gso_size = skb_shinfo(skb)->gso_size;
				495	meta->size = 0;
				496	meta->id = req->id;
				497	}
				498
				499	req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
				500	meta = npo->meta + npo->meta_prod++;
				501
				502	if (!vif->gso_prefix)
				503	meta->gso_size = skb_shinfo(skb)->gso_size;
				504	else
				505	meta->gso_size = 0;
				506
				507	meta->size = 0;
				508	meta->id = req->id;
				509	npo->copy_off = 0;
				510	npo->copy_gref = req->gref;
				511
				512	data = skb->data;
				513	while (data < skb_tail_pointer(skb)) {
				514	unsigned int offset = offset_in_page(data);
				515	unsigned int len = PAGE_SIZE - offset;
				516
				517	if (data + len > skb_tail_pointer(skb))
				518	len = skb_tail_pointer(skb) - data;
				519
				520	netbk_gop_frag_copy(vif, skb, npo,
				521	virt_to_page(data), len, offset, &head);
				522	data += len;
				523	}
				524
				525	for (i = 0; i < nr_frags; i++) {
				526	netbk_gop_frag_copy(vif, skb, npo,
Ian Campbell	ea066ad	2011-10-05 00:28:46 +0000	[diff] [blame]	527	skb_frag_page(&skb_shinfo(skb)->frags[i]),
Eric Dumazet	9e903e0	2011-10-18 21:00:24 +0000	[diff] [blame]	528	skb_frag_size(&skb_shinfo(skb)->frags[i]),
Ian Campbell	f942dc2	2011-03-15 00:06:18 +0000	[diff] [blame]	529	skb_shinfo(skb)->frags[i].page_offset,
				530	&head);
				531	}
				532
				533	return npo->meta_prod - old_meta_prod;
				534	}
				535
				536	/*
				537	* This is a twin to netbk_gop_skb. Assume that netbk_gop_skb was
				538	* used to set up the operations on the top of
				539	* netrx_pending_operations, which have since been done. Check that
				540	* they didn't give any errors and advance over them.
				541	*/
				542	static int netbk_check_gop(struct xenvif *vif, int nr_meta_slots,
				543	struct netrx_pending_operations *npo)
				544	{
				545	struct gnttab_copy *copy_op;
				546	int status = XEN_NETIF_RSP_OKAY;
				547	int i;
				548
				549	for (i = 0; i < nr_meta_slots; i++) {
				550	copy_op = npo->copy + npo->copy_cons++;
				551	if (copy_op->status != GNTST_okay) {
				552	netdev_dbg(vif->dev,
				553	"Bad status %d from copy to DOM%d.\n",
				554	copy_op->status, vif->domid);
				555	status = XEN_NETIF_RSP_ERROR;
				556	}
				557	}
				558
				559	return status;
				560	}
				561
				562	static void netbk_add_frag_responses(struct xenvif *vif, int status,
				563	struct netbk_rx_meta *meta,
				564	int nr_meta_slots)
				565	{
				566	int i;
				567	unsigned long offset;
				568
				569	/* No fragments used */
				570	if (nr_meta_slots <= 1)
				571	return;
				572
				573	nr_meta_slots--;
				574
				575	for (i = 0; i < nr_meta_slots; i++) {
				576	int flags;
				577	if (i == nr_meta_slots - 1)
				578	flags = 0;
				579	else
				580	flags = XEN_NETRXF_more_data;
				581
				582	offset = 0;
				583	make_rx_response(vif, meta[i].id, status, offset,
				584	meta[i].size, flags);
				585	}
				586	}
				587
				588	struct skb_cb_overlay {
				589	int meta_slots_used;
				590	};
				591
				592	static void xen_netbk_rx_action(struct xen_netbk *netbk)
				593	{
				594	struct xenvif vif = NULL, tmp;
				595	s8 status;
				596	u16 irq, flags;
				597	struct xen_netif_rx_response *resp;
				598	struct sk_buff_head rxq;
				599	struct sk_buff *skb;
				600	LIST_HEAD(notify);
				601	int ret;
				602	int nr_frags;
				603	int count;
				604	unsigned long offset;
				605	struct skb_cb_overlay *sco;
				606
				607	struct netrx_pending_operations npo = {
				608	.copy = netbk->grant_copy_op,
				609	.meta = netbk->meta,
				610	};
				611
				612	skb_queue_head_init(&rxq);
				613
				614	count = 0;
				615
				616	while ((skb = skb_dequeue(&netbk->rx_queue)) != NULL) {
				617	vif = netdev_priv(skb->dev);
				618	nr_frags = skb_shinfo(skb)->nr_frags;
				619
				620	sco = (struct skb_cb_overlay *)skb->cb;
				621	sco->meta_slots_used = netbk_gop_skb(skb, &npo);
				622
				623	count += nr_frags + 1;
				624
				625	__skb_queue_tail(&rxq, skb);
				626
				627	/* Filled the batch queue? */
				628	if (count + MAX_SKB_FRAGS >= XEN_NETIF_RX_RING_SIZE)
				629	break;
				630	}
				631
				632	BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta));
				633
				634	if (!npo.copy_prod)
				635	return;
				636
				637	BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op));
Andres Lagar-Cavilla	c571898	2012-09-14 14:26:59 +0000	[diff] [blame^]	638	gnttab_batch_copy(netbk->grant_copy_op, npo.copy_prod);
Ian Campbell	f942dc2	2011-03-15 00:06:18 +0000	[diff] [blame]	639
				640	while ((skb = __skb_dequeue(&rxq)) != NULL) {
				641	sco = (struct skb_cb_overlay *)skb->cb;
				642
				643	vif = netdev_priv(skb->dev);
				644
				645	if (netbk->meta[npo.meta_cons].gso_size && vif->gso_prefix) {
				646	resp = RING_GET_RESPONSE(&vif->rx,
				647	vif->rx.rsp_prod_pvt++);
				648
				649	resp->flags = XEN_NETRXF_gso_prefix \| XEN_NETRXF_more_data;
				650
				651	resp->offset = netbk->meta[npo.meta_cons].gso_size;
				652	resp->id = netbk->meta[npo.meta_cons].id;
				653	resp->status = sco->meta_slots_used;
				654
				655	npo.meta_cons++;
				656	sco->meta_slots_used--;
				657	}
				658
				659
				660	vif->dev->stats.tx_bytes += skb->len;
				661	vif->dev->stats.tx_packets++;
				662
				663	status = netbk_check_gop(vif, sco->meta_slots_used, &npo);
				664
				665	if (sco->meta_slots_used == 1)
				666	flags = 0;
				667	else
				668	flags = XEN_NETRXF_more_data;
				669
				670	if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
				671	flags \|= XEN_NETRXF_csum_blank \| XEN_NETRXF_data_validated;
				672	else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
				673	/* remote but checksummed. */
				674	flags \|= XEN_NETRXF_data_validated;
				675
				676	offset = 0;
				677	resp = make_rx_response(vif, netbk->meta[npo.meta_cons].id,
				678	status, offset,
				679	netbk->meta[npo.meta_cons].size,
				680	flags);
				681
				682	if (netbk->meta[npo.meta_cons].gso_size && !vif->gso_prefix) {
				683	struct xen_netif_extra_info *gso =
				684	(struct xen_netif_extra_info *)
				685	RING_GET_RESPONSE(&vif->rx,
				686	vif->rx.rsp_prod_pvt++);
				687
				688	resp->flags \|= XEN_NETRXF_extra_info;
				689
				690	gso->u.gso.size = netbk->meta[npo.meta_cons].gso_size;
				691	gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
				692	gso->u.gso.pad = 0;
				693	gso->u.gso.features = 0;
				694
				695	gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
				696	gso->flags = 0;
				697	}
				698
				699	netbk_add_frag_responses(vif, status,
				700	netbk->meta + npo.meta_cons + 1,
				701	sco->meta_slots_used);
				702
				703	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->rx, ret);
				704	irq = vif->irq;
				705	if (ret && list_empty(&vif->notify_list))
				706	list_add_tail(&vif->notify_list, &notify);
				707
				708	xenvif_notify_tx_completion(vif);
				709
				710	xenvif_put(vif);
				711	npo.meta_cons += sco->meta_slots_used;
				712	dev_kfree_skb(skb);
				713	}
				714
				715	list_for_each_entry_safe(vif, tmp, &notify, notify_list) {
				716	notify_remote_via_irq(vif->irq);
				717	list_del_init(&vif->notify_list);
				718	}
				719
				720	/* More work to do? */
				721	if (!skb_queue_empty(&netbk->rx_queue) &&
				722	!timer_pending(&netbk->net_timer))
				723	xen_netbk_kick_thread(netbk);
				724	}
				725
				726	void xen_netbk_queue_tx_skb(struct xenvif vif, struct sk_buff skb)
				727	{
				728	struct xen_netbk *netbk = vif->netbk;
				729
				730	skb_queue_tail(&netbk->rx_queue, skb);
				731
				732	xen_netbk_kick_thread(netbk);
				733	}
				734
				735	static void xen_netbk_alarm(unsigned long data)
				736	{
				737	struct xen_netbk netbk = (struct xen_netbk )data;
				738	xen_netbk_kick_thread(netbk);
				739	}
				740
				741	static int __on_net_schedule_list(struct xenvif *vif)
				742	{
				743	return !list_empty(&vif->schedule_list);
				744	}
				745
				746	/* Must be called with net_schedule_list_lock held */
				747	static void remove_from_net_schedule_list(struct xenvif *vif)
				748	{
				749	if (likely(__on_net_schedule_list(vif))) {
				750	list_del_init(&vif->schedule_list);
				751	xenvif_put(vif);
				752	}
				753	}
				754
				755	static struct xenvif poll_net_schedule_list(struct xen_netbk netbk)
				756	{
				757	struct xenvif *vif = NULL;
				758
				759	spin_lock_irq(&netbk->net_schedule_list_lock);
				760	if (list_empty(&netbk->net_schedule_list))
				761	goto out;
				762
				763	vif = list_first_entry(&netbk->net_schedule_list,
				764	struct xenvif, schedule_list);
				765	if (!vif)
				766	goto out;
				767
				768	xenvif_get(vif);
				769
				770	remove_from_net_schedule_list(vif);
				771	out:
				772	spin_unlock_irq(&netbk->net_schedule_list_lock);
				773	return vif;
				774	}
				775
				776	void xen_netbk_schedule_xenvif(struct xenvif *vif)
				777	{
				778	unsigned long flags;
				779	struct xen_netbk *netbk = vif->netbk;
				780
				781	if (__on_net_schedule_list(vif))
				782	goto kick;
				783
				784	spin_lock_irqsave(&netbk->net_schedule_list_lock, flags);
				785	if (!__on_net_schedule_list(vif) &&
				786	likely(xenvif_schedulable(vif))) {
				787	list_add_tail(&vif->schedule_list, &netbk->net_schedule_list);
				788	xenvif_get(vif);
				789	}
				790	spin_unlock_irqrestore(&netbk->net_schedule_list_lock, flags);
				791
				792	kick:
				793	smp_mb();
				794	if ((nr_pending_reqs(netbk) < (MAX_PENDING_REQS/2)) &&
				795	!list_empty(&netbk->net_schedule_list))
				796	xen_netbk_kick_thread(netbk);
				797	}
				798
				799	void xen_netbk_deschedule_xenvif(struct xenvif *vif)
				800	{
				801	struct xen_netbk *netbk = vif->netbk;
				802	spin_lock_irq(&netbk->net_schedule_list_lock);
				803	remove_from_net_schedule_list(vif);
				804	spin_unlock_irq(&netbk->net_schedule_list_lock);
				805	}
				806
				807	void xen_netbk_check_rx_xenvif(struct xenvif *vif)
				808	{
				809	int more_to_do;
				810
				811	RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do);
				812
				813	if (more_to_do)
				814	xen_netbk_schedule_xenvif(vif);
				815	}
				816
				817	static void tx_add_credit(struct xenvif *vif)
				818	{
				819	unsigned long max_burst, max_credit;
				820
				821	/*
				822	* Allow a burst big enough to transmit a jumbo packet of up to 128kB.
				823	* Otherwise the interface can seize up due to insufficient credit.
				824	*/
				825	max_burst = RING_GET_REQUEST(&vif->tx, vif->tx.req_cons)->size;
				826	max_burst = min(max_burst, 131072UL);
				827	max_burst = max(max_burst, vif->credit_bytes);
				828
				829	/* Take care that adding a new chunk of credit doesn't wrap to zero. */
				830	max_credit = vif->remaining_credit + vif->credit_bytes;
				831	if (max_credit < vif->remaining_credit)
				832	max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
				833
				834	vif->remaining_credit = min(max_credit, max_burst);
				835	}
				836
				837	static void tx_credit_callback(unsigned long data)
				838	{
				839	struct xenvif vif = (struct xenvif )data;
				840	tx_add_credit(vif);
				841	xen_netbk_check_rx_xenvif(vif);
				842	}
				843
				844	static void netbk_tx_err(struct xenvif *vif,
				845	struct xen_netif_tx_request *txp, RING_IDX end)
				846	{
				847	RING_IDX cons = vif->tx.req_cons;
				848
				849	do {
				850	make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR);
				851	if (cons >= end)
				852	break;
				853	txp = RING_GET_REQUEST(&vif->tx, cons++);
				854	} while (1);
				855	vif->tx.req_cons = cons;
				856	xen_netbk_check_rx_xenvif(vif);
				857	xenvif_put(vif);
				858	}
				859
				860	static int netbk_count_requests(struct xenvif *vif,
				861	struct xen_netif_tx_request *first,
				862	struct xen_netif_tx_request *txp,
				863	int work_to_do)
				864	{
				865	RING_IDX cons = vif->tx.req_cons;
				866	int frags = 0;
				867
				868	if (!(first->flags & XEN_NETTXF_more_data))
				869	return 0;
				870
				871	do {
				872	if (frags >= work_to_do) {
				873	netdev_dbg(vif->dev, "Need more frags\n");
				874	return -frags;
				875	}
				876
				877	if (unlikely(frags >= MAX_SKB_FRAGS)) {
				878	netdev_dbg(vif->dev, "Too many frags\n");
				879	return -frags;
				880	}
				881
				882	memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + frags),
				883	sizeof(*txp));
				884	if (txp->size > first->size) {
				885	netdev_dbg(vif->dev, "Frags galore\n");
				886	return -frags;
				887	}
				888
				889	first->size -= txp->size;
				890	frags++;
				891
				892	if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
				893	netdev_dbg(vif->dev, "txp->offset: %x, size: %u\n",
				894	txp->offset, txp->size);
				895	return -frags;
				896	}
				897	} while ((txp++)->flags & XEN_NETTXF_more_data);
				898	return frags;
				899	}
				900
				901	static struct page xen_netbk_alloc_page(struct xen_netbk netbk,
				902	struct sk_buff *skb,
Ian Campbell	ea066ad	2011-10-05 00:28:46 +0000	[diff] [blame]	903	u16 pending_idx)
Ian Campbell	f942dc2	2011-03-15 00:06:18 +0000	[diff] [blame]	904	{
				905	struct page *page;
				906	page = alloc_page(GFP_KERNEL\|__GFP_COLD);
				907	if (!page)
				908	return NULL;
				909	set_page_ext(page, netbk, pending_idx);
				910	netbk->mmap_pages[pending_idx] = page;
				911	return page;
				912	}
				913
				914	static struct gnttab_copy xen_netbk_get_requests(struct xen_netbk netbk,
				915	struct xenvif *vif,
				916	struct sk_buff *skb,
				917	struct xen_netif_tx_request *txp,
				918	struct gnttab_copy *gop)
				919	{
				920	struct skb_shared_info *shinfo = skb_shinfo(skb);
				921	skb_frag_t *frags = shinfo->frags;
Ian Campbell	ea066ad	2011-10-05 00:28:46 +0000	[diff] [blame]	922	u16 pending_idx = ((u16 )skb->data);
Ian Campbell	f942dc2	2011-03-15 00:06:18 +0000	[diff] [blame]	923	int i, start;
				924
				925	/* Skip first skb fragment if it is on same page as header fragment. */
Ian Campbell	ea066ad	2011-10-05 00:28:46 +0000	[diff] [blame]	926	start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx);
Ian Campbell	f942dc2	2011-03-15 00:06:18 +0000	[diff] [blame]	927
				928	for (i = start; i < shinfo->nr_frags; i++, txp++) {
				929	struct page *page;
				930	pending_ring_idx_t index;
				931	struct pending_tx_info *pending_tx_info =
				932	netbk->pending_tx_info;
				933
				934	index = pending_index(netbk->pending_cons++);
				935	pending_idx = netbk->pending_ring[index];
				936	page = xen_netbk_alloc_page(netbk, skb, pending_idx);
				937	if (!page)
				938	return NULL;
				939
Ian Campbell	f942dc2	2011-03-15 00:06:18 +0000	[diff] [blame]	940	gop->source.u.ref = txp->gref;
				941	gop->source.domid = vif->domid;
				942	gop->source.offset = txp->offset;
				943
				944	gop->dest.u.gmfn = virt_to_mfn(page_address(page));
				945	gop->dest.domid = DOMID_SELF;
				946	gop->dest.offset = txp->offset;
				947
				948	gop->len = txp->size;
				949	gop->flags = GNTCOPY_source_gref;
				950
				951	gop++;
				952
				953	memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
				954	xenvif_get(vif);
				955	pending_tx_info[pending_idx].vif = vif;
Ian Campbell	ea066ad	2011-10-05 00:28:46 +0000	[diff] [blame]	956	frag_set_pending_idx(&frags[i], pending_idx);
Ian Campbell	f942dc2	2011-03-15 00:06:18 +0000	[diff] [blame]	957	}
				958
				959	return gop;
				960	}
				961
				962	static int xen_netbk_tx_check_gop(struct xen_netbk *netbk,
				963	struct sk_buff *skb,
				964	struct gnttab_copy **gopp)
				965	{
				966	struct gnttab_copy gop = gopp;
Ian Campbell	ea066ad	2011-10-05 00:28:46 +0000	[diff] [blame]	967	u16 pending_idx = ((u16 )skb->data);
Ian Campbell	f942dc2	2011-03-15 00:06:18 +0000	[diff] [blame]	968	struct pending_tx_info *pending_tx_info = netbk->pending_tx_info;
				969	struct xenvif *vif = pending_tx_info[pending_idx].vif;
				970	struct xen_netif_tx_request *txp;
				971	struct skb_shared_info *shinfo = skb_shinfo(skb);
				972	int nr_frags = shinfo->nr_frags;
				973	int i, err, start;
				974
				975	/* Check status of header. */
				976	err = gop->status;
				977	if (unlikely(err)) {
				978	pending_ring_idx_t index;
				979	index = pending_index(netbk->pending_prod++);
				980	txp = &pending_tx_info[pending_idx].req;
				981	make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR);
				982	netbk->pending_ring[index] = pending_idx;
				983	xenvif_put(vif);
				984	}
				985
				986	/* Skip first skb fragment if it is on same page as header fragment. */
Ian Campbell	ea066ad	2011-10-05 00:28:46 +0000	[diff] [blame]	987	start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx);
Ian Campbell	f942dc2	2011-03-15 00:06:18 +0000	[diff] [blame]	988
				989	for (i = start; i < nr_frags; i++) {
				990	int j, newerr;
				991	pending_ring_idx_t index;
				992
Ian Campbell	ea066ad	2011-10-05 00:28:46 +0000	[diff] [blame]	993	pending_idx = frag_get_pending_idx(&shinfo->frags[i]);
Ian Campbell	f942dc2	2011-03-15 00:06:18 +0000	[diff] [blame]	994
				995	/* Check error status: if okay then remember grant handle. */
				996	newerr = (++gop)->status;
				997	if (likely(!newerr)) {
				998	/* Had a previous error? Invalidate this fragment. */
				999	if (unlikely(err))
				1000	xen_netbk_idx_release(netbk, pending_idx);
				1001	continue;
				1002	}
				1003
				1004	/* Error on this fragment: respond to client with an error. */
				1005	txp = &netbk->pending_tx_info[pending_idx].req;
				1006	make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR);
				1007	index = pending_index(netbk->pending_prod++);
				1008	netbk->pending_ring[index] = pending_idx;
				1009	xenvif_put(vif);
				1010
				1011	/* Not the first error? Preceding frags already invalidated. */
				1012	if (err)
				1013	continue;
				1014
				1015	/* First error: invalidate header and preceding fragments. */
				1016	pending_idx = ((u16 )skb->data);
				1017	xen_netbk_idx_release(netbk, pending_idx);
				1018	for (j = start; j < i; j++) {
Jan Beulich	5ccb3ea	2011-11-18 05:42:05 +0000	[diff] [blame]	1019	pending_idx = frag_get_pending_idx(&shinfo->frags[j]);
Ian Campbell	f942dc2	2011-03-15 00:06:18 +0000	[diff] [blame]	1020	xen_netbk_idx_release(netbk, pending_idx);
				1021	}
				1022
				1023	/* Remember the error: invalidate all subsequent fragments. */
				1024	err = newerr;
				1025	}
				1026
				1027	*gopp = gop + 1;
				1028	return err;
				1029	}
				1030
				1031	static void xen_netbk_fill_frags(struct xen_netbk netbk, struct sk_buff skb)
				1032	{
				1033	struct skb_shared_info *shinfo = skb_shinfo(skb);
				1034	int nr_frags = shinfo->nr_frags;
				1035	int i;
				1036
				1037	for (i = 0; i < nr_frags; i++) {
				1038	skb_frag_t *frag = shinfo->frags + i;
				1039	struct xen_netif_tx_request *txp;
Ian Campbell	ea066ad	2011-10-05 00:28:46 +0000	[diff] [blame]	1040	struct page *page;
				1041	u16 pending_idx;
Ian Campbell	f942dc2	2011-03-15 00:06:18 +0000	[diff] [blame]	1042
Ian Campbell	ea066ad	2011-10-05 00:28:46 +0000	[diff] [blame]	1043	pending_idx = frag_get_pending_idx(frag);
Ian Campbell	f942dc2	2011-03-15 00:06:18 +0000	[diff] [blame]	1044
				1045	txp = &netbk->pending_tx_info[pending_idx].req;
Ian Campbell	ea066ad	2011-10-05 00:28:46 +0000	[diff] [blame]	1046	page = virt_to_page(idx_to_kaddr(netbk, pending_idx));
				1047	__skb_fill_page_desc(skb, i, page, txp->offset, txp->size);
Ian Campbell	f942dc2	2011-03-15 00:06:18 +0000	[diff] [blame]	1048	skb->len += txp->size;
				1049	skb->data_len += txp->size;
				1050	skb->truesize += txp->size;
				1051
				1052	/* Take an extra reference to offset xen_netbk_idx_release */
				1053	get_page(netbk->mmap_pages[pending_idx]);
				1054	xen_netbk_idx_release(netbk, pending_idx);
				1055	}
				1056	}
				1057
				1058	static int xen_netbk_get_extras(struct xenvif *vif,
				1059	struct xen_netif_extra_info *extras,
				1060	int work_to_do)
				1061	{
				1062	struct xen_netif_extra_info extra;
				1063	RING_IDX cons = vif->tx.req_cons;
				1064
				1065	do {
				1066	if (unlikely(work_to_do-- <= 0)) {
				1067	netdev_dbg(vif->dev, "Missing extra info\n");
				1068	return -EBADR;
				1069	}
				1070
				1071	memcpy(&extra, RING_GET_REQUEST(&vif->tx, cons),
				1072	sizeof(extra));
				1073	if (unlikely(!extra.type \|\|
				1074	extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
				1075	vif->tx.req_cons = ++cons;
				1076	netdev_dbg(vif->dev,
				1077	"Invalid extra type: %d\n", extra.type);
				1078	return -EINVAL;
				1079	}
				1080
				1081	memcpy(&extras[extra.type - 1], &extra, sizeof(extra));
				1082	vif->tx.req_cons = ++cons;
				1083	} while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
				1084
				1085	return work_to_do;
				1086	}
				1087
				1088	static int netbk_set_skb_gso(struct xenvif *vif,
				1089	struct sk_buff *skb,
				1090	struct xen_netif_extra_info *gso)
				1091	{
				1092	if (!gso->u.gso.size) {
				1093	netdev_dbg(vif->dev, "GSO size must not be zero.\n");
				1094	return -EINVAL;
				1095	}
				1096
				1097	/* Currently only TCPv4 S.O. is supported. */
				1098	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
				1099	netdev_dbg(vif->dev, "Bad GSO type %d.\n", gso->u.gso.type);
				1100	return -EINVAL;
				1101	}
				1102
				1103	skb_shinfo(skb)->gso_size = gso->u.gso.size;
				1104	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
				1105
				1106	/* Header must be checked, and gso_segs computed. */
				1107	skb_shinfo(skb)->gso_type \|= SKB_GSO_DODGY;
				1108	skb_shinfo(skb)->gso_segs = 0;
				1109
				1110	return 0;
				1111	}
				1112
				1113	static int checksum_setup(struct xenvif vif, struct sk_buff skb)
				1114	{
				1115	struct iphdr *iph;
				1116	unsigned char *th;
				1117	int err = -EPROTO;
				1118	int recalculate_partial_csum = 0;
				1119
				1120	/*
				1121	* A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
				1122	* peers can fail to set NETRXF_csum_blank when sending a GSO
				1123	* frame. In this case force the SKB to CHECKSUM_PARTIAL and
				1124	* recalculate the partial checksum.
				1125	*/
				1126	if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
				1127	vif->rx_gso_checksum_fixup++;
				1128	skb->ip_summed = CHECKSUM_PARTIAL;
				1129	recalculate_partial_csum = 1;
				1130	}
				1131
				1132	/* A non-CHECKSUM_PARTIAL SKB does not require setup. */
				1133	if (skb->ip_summed != CHECKSUM_PARTIAL)
				1134	return 0;
				1135
				1136	if (skb->protocol != htons(ETH_P_IP))
				1137	goto out;
				1138
				1139	iph = (void *)skb->data;
				1140	th = skb->data + 4 * iph->ihl;
				1141	if (th >= skb_tail_pointer(skb))
				1142	goto out;
				1143
				1144	skb->csum_start = th - skb->head;
				1145	switch (iph->protocol) {
				1146	case IPPROTO_TCP:
				1147	skb->csum_offset = offsetof(struct tcphdr, check);
				1148
				1149	if (recalculate_partial_csum) {
				1150	struct tcphdr tcph = (struct tcphdr )th;
				1151	tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
				1152	skb->len - iph->ihl*4,
				1153	IPPROTO_TCP, 0);
				1154	}
				1155	break;
				1156	case IPPROTO_UDP:
				1157	skb->csum_offset = offsetof(struct udphdr, check);
				1158
				1159	if (recalculate_partial_csum) {
				1160	struct udphdr udph = (struct udphdr )th;
				1161	udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
				1162	skb->len - iph->ihl*4,
				1163	IPPROTO_UDP, 0);
				1164	}
				1165	break;
				1166	default:
				1167	if (net_ratelimit())
				1168	netdev_err(vif->dev,
				1169	"Attempting to checksum a non-TCP/UDP packet, dropping a protocol %d packet\n",
				1170	iph->protocol);
				1171	goto out;
				1172	}
				1173
				1174	if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb))
				1175	goto out;
				1176
				1177	err = 0;
				1178
				1179	out:
				1180	return err;
				1181	}
				1182
				1183	static bool tx_credit_exceeded(struct xenvif *vif, unsigned size)
				1184	{
				1185	unsigned long now = jiffies;
				1186	unsigned long next_credit =
				1187	vif->credit_timeout.expires +
				1188	msecs_to_jiffies(vif->credit_usec / 1000);
				1189
				1190	/* Timer could already be pending in rare cases. */
				1191	if (timer_pending(&vif->credit_timeout))
				1192	return true;
				1193
				1194	/* Passed the point where we can replenish credit? */
				1195	if (time_after_eq(now, next_credit)) {
				1196	vif->credit_timeout.expires = now;
				1197	tx_add_credit(vif);
				1198	}
				1199
				1200	/* Still too big to send right now? Set a callback. */
				1201	if (size > vif->remaining_credit) {
				1202	vif->credit_timeout.data =
				1203	(unsigned long)vif;
				1204	vif->credit_timeout.function =
				1205	tx_credit_callback;
				1206	mod_timer(&vif->credit_timeout,
				1207	next_credit);
				1208
				1209	return true;
				1210	}
				1211
				1212	return false;
				1213	}
				1214
				1215	static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
				1216	{
				1217	struct gnttab_copy gop = netbk->tx_copy_ops, request_gop;
				1218	struct sk_buff *skb;
				1219	int ret;
				1220
				1221	while (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
				1222	!list_empty(&netbk->net_schedule_list)) {
				1223	struct xenvif *vif;
				1224	struct xen_netif_tx_request txreq;
				1225	struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
				1226	struct page *page;
				1227	struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
				1228	u16 pending_idx;
				1229	RING_IDX idx;
				1230	int work_to_do;
				1231	unsigned int data_len;
				1232	pending_ring_idx_t index;
				1233
				1234	/* Get a netif from the list with work to do. */
				1235	vif = poll_net_schedule_list(netbk);
				1236	if (!vif)
				1237	continue;
				1238
				1239	RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, work_to_do);
				1240	if (!work_to_do) {
				1241	xenvif_put(vif);
				1242	continue;
				1243	}
				1244
				1245	idx = vif->tx.req_cons;
				1246	rmb(); /* Ensure that we see the request before we copy it. */
				1247	memcpy(&txreq, RING_GET_REQUEST(&vif->tx, idx), sizeof(txreq));
				1248
				1249	/* Credit-based scheduling. */
				1250	if (txreq.size > vif->remaining_credit &&
				1251	tx_credit_exceeded(vif, txreq.size)) {
				1252	xenvif_put(vif);
				1253	continue;
				1254	}
				1255
				1256	vif->remaining_credit -= txreq.size;
				1257
				1258	work_to_do--;
				1259	vif->tx.req_cons = ++idx;
				1260
				1261	memset(extras, 0, sizeof(extras));
				1262	if (txreq.flags & XEN_NETTXF_extra_info) {
				1263	work_to_do = xen_netbk_get_extras(vif, extras,
				1264	work_to_do);
				1265	idx = vif->tx.req_cons;
				1266	if (unlikely(work_to_do < 0)) {
				1267	netbk_tx_err(vif, &txreq, idx);
				1268	continue;
				1269	}
				1270	}
				1271
				1272	ret = netbk_count_requests(vif, &txreq, txfrags, work_to_do);
				1273	if (unlikely(ret < 0)) {
				1274	netbk_tx_err(vif, &txreq, idx - ret);
				1275	continue;
				1276	}
				1277	idx += ret;
				1278
				1279	if (unlikely(txreq.size < ETH_HLEN)) {
				1280	netdev_dbg(vif->dev,
				1281	"Bad packet size: %d\n", txreq.size);
				1282	netbk_tx_err(vif, &txreq, idx);
				1283	continue;
				1284	}
				1285
				1286	/* No crossing a page as the payload mustn't fragment. */
				1287	if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
				1288	netdev_dbg(vif->dev,
				1289	"txreq.offset: %x, size: %u, end: %lu\n",
				1290	txreq.offset, txreq.size,
				1291	(txreq.offset&~PAGE_MASK) + txreq.size);
				1292	netbk_tx_err(vif, &txreq, idx);
				1293	continue;
				1294	}
				1295
				1296	index = pending_index(netbk->pending_cons);
				1297	pending_idx = netbk->pending_ring[index];
				1298
				1299	data_len = (txreq.size > PKT_PROT_LEN &&
				1300	ret < MAX_SKB_FRAGS) ?
				1301	PKT_PROT_LEN : txreq.size;
				1302
				1303	skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN,
				1304	GFP_ATOMIC \| __GFP_NOWARN);
				1305	if (unlikely(skb == NULL)) {
				1306	netdev_dbg(vif->dev,
				1307	"Can't allocate a skb in start_xmit.\n");
				1308	netbk_tx_err(vif, &txreq, idx);
				1309	break;
				1310	}
				1311
				1312	/* Packets passed to netif_rx() must have some headroom. */
				1313	skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
				1314
				1315	if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
				1316	struct xen_netif_extra_info *gso;
				1317	gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
				1318
				1319	if (netbk_set_skb_gso(vif, skb, gso)) {
				1320	kfree_skb(skb);
				1321	netbk_tx_err(vif, &txreq, idx);
				1322	continue;
				1323	}
				1324	}
				1325
				1326	/* XXX could copy straight to head */
				1327	page = xen_netbk_alloc_page(netbk, skb, pending_idx);
				1328	if (!page) {
				1329	kfree_skb(skb);
				1330	netbk_tx_err(vif, &txreq, idx);
				1331	continue;
				1332	}
				1333
Ian Campbell	f942dc2	2011-03-15 00:06:18 +0000	[diff] [blame]	1334	gop->source.u.ref = txreq.gref;
				1335	gop->source.domid = vif->domid;
				1336	gop->source.offset = txreq.offset;
				1337
				1338	gop->dest.u.gmfn = virt_to_mfn(page_address(page));
				1339	gop->dest.domid = DOMID_SELF;
				1340	gop->dest.offset = txreq.offset;
				1341
				1342	gop->len = txreq.size;
				1343	gop->flags = GNTCOPY_source_gref;
				1344
				1345	gop++;
				1346
				1347	memcpy(&netbk->pending_tx_info[pending_idx].req,
				1348	&txreq, sizeof(txreq));
				1349	netbk->pending_tx_info[pending_idx].vif = vif;
				1350	((u16 )skb->data) = pending_idx;
				1351
				1352	__skb_put(skb, data_len);
				1353
				1354	skb_shinfo(skb)->nr_frags = ret;
				1355	if (data_len < txreq.size) {
				1356	skb_shinfo(skb)->nr_frags++;
Ian Campbell	ea066ad	2011-10-05 00:28:46 +0000	[diff] [blame]	1357	frag_set_pending_idx(&skb_shinfo(skb)->frags[0],
				1358	pending_idx);
Ian Campbell	f942dc2	2011-03-15 00:06:18 +0000	[diff] [blame]	1359	} else {
Ian Campbell	ea066ad	2011-10-05 00:28:46 +0000	[diff] [blame]	1360	frag_set_pending_idx(&skb_shinfo(skb)->frags[0],
				1361	INVALID_PENDING_IDX);
Ian Campbell	f942dc2	2011-03-15 00:06:18 +0000	[diff] [blame]	1362	}
				1363
Ian Campbell	f942dc2	2011-03-15 00:06:18 +0000	[diff] [blame]	1364	netbk->pending_cons++;
				1365
				1366	request_gop = xen_netbk_get_requests(netbk, vif,
				1367	skb, txfrags, gop);
				1368	if (request_gop == NULL) {
				1369	kfree_skb(skb);
				1370	netbk_tx_err(vif, &txreq, idx);
				1371	continue;
				1372	}
				1373	gop = request_gop;
				1374
Annie Li	1e0b6ea	2012-06-27 00:46:58 +0000	[diff] [blame]	1375	__skb_queue_tail(&netbk->tx_queue, skb);
				1376
Ian Campbell	f942dc2	2011-03-15 00:06:18 +0000	[diff] [blame]	1377	vif->tx.req_cons = idx;
				1378	xen_netbk_check_rx_xenvif(vif);
				1379
				1380	if ((gop-netbk->tx_copy_ops) >= ARRAY_SIZE(netbk->tx_copy_ops))
				1381	break;
				1382	}
				1383
				1384	return gop - netbk->tx_copy_ops;
				1385	}
				1386
				1387	static void xen_netbk_tx_submit(struct xen_netbk *netbk)
				1388	{
				1389	struct gnttab_copy *gop = netbk->tx_copy_ops;
				1390	struct sk_buff *skb;
				1391
				1392	while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) {
				1393	struct xen_netif_tx_request *txp;
				1394	struct xenvif *vif;
				1395	u16 pending_idx;
				1396	unsigned data_len;
				1397
				1398	pending_idx = ((u16 )skb->data);
				1399	vif = netbk->pending_tx_info[pending_idx].vif;
				1400	txp = &netbk->pending_tx_info[pending_idx].req;
				1401
				1402	/* Check the remap error code. */
				1403	if (unlikely(xen_netbk_tx_check_gop(netbk, skb, &gop))) {
				1404	netdev_dbg(vif->dev, "netback grant failed.\n");
				1405	skb_shinfo(skb)->nr_frags = 0;
				1406	kfree_skb(skb);
				1407	continue;
				1408	}
				1409
				1410	data_len = skb->len;
				1411	memcpy(skb->data,
				1412	(void *)(idx_to_kaddr(netbk, pending_idx)\|txp->offset),
				1413	data_len);
				1414	if (data_len < txp->size) {
				1415	/* Append the packet payload as a fragment. */
				1416	txp->offset += data_len;
				1417	txp->size -= data_len;
				1418	} else {
				1419	/* Schedule a response immediately. */
				1420	xen_netbk_idx_release(netbk, pending_idx);
				1421	}
				1422
				1423	if (txp->flags & XEN_NETTXF_csum_blank)
				1424	skb->ip_summed = CHECKSUM_PARTIAL;
				1425	else if (txp->flags & XEN_NETTXF_data_validated)
				1426	skb->ip_summed = CHECKSUM_UNNECESSARY;
				1427
				1428	xen_netbk_fill_frags(netbk, skb);
				1429
				1430	/*
				1431	* If the initial fragment was < PKT_PROT_LEN then
				1432	* pull through some bytes from the other fragments to
				1433	* increase the linear region to PKT_PROT_LEN bytes.
				1434	*/
				1435	if (skb_headlen(skb) < PKT_PROT_LEN && skb_is_nonlinear(skb)) {
				1436	int target = min_t(int, skb->len, PKT_PROT_LEN);
				1437	__pskb_pull_tail(skb, target - skb_headlen(skb));
				1438	}
				1439
				1440	skb->dev = vif->dev;
				1441	skb->protocol = eth_type_trans(skb, skb->dev);
				1442
				1443	if (checksum_setup(vif, skb)) {
				1444	netdev_dbg(vif->dev,
				1445	"Can't setup checksum in net_tx_action\n");
				1446	kfree_skb(skb);
				1447	continue;
				1448	}
				1449
				1450	vif->dev->stats.rx_bytes += skb->len;
				1451	vif->dev->stats.rx_packets++;
				1452
				1453	xenvif_receive_skb(vif, skb);
				1454	}
				1455	}
				1456
				1457	/* Called after netfront has transmitted */
				1458	static void xen_netbk_tx_action(struct xen_netbk *netbk)
				1459	{
				1460	unsigned nr_gops;
Ian Campbell	f942dc2	2011-03-15 00:06:18 +0000	[diff] [blame]	1461
				1462	nr_gops = xen_netbk_tx_build_gops(netbk);
				1463
				1464	if (nr_gops == 0)
				1465	return;
Andres Lagar-Cavilla	c571898	2012-09-14 14:26:59 +0000	[diff] [blame^]	1466
				1467	gnttab_batch_copy(netbk->tx_copy_ops, nr_gops);
Ian Campbell	f942dc2	2011-03-15 00:06:18 +0000	[diff] [blame]	1468
				1469	xen_netbk_tx_submit(netbk);
Ian Campbell	f942dc2	2011-03-15 00:06:18 +0000	[diff] [blame]	1470	}
				1471
				1472	static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx)
				1473	{
				1474	struct xenvif *vif;
				1475	struct pending_tx_info *pending_tx_info;
				1476	pending_ring_idx_t index;
				1477
				1478	/* Already complete? */
				1479	if (netbk->mmap_pages[pending_idx] == NULL)
				1480	return;
				1481
				1482	pending_tx_info = &netbk->pending_tx_info[pending_idx];
				1483
				1484	vif = pending_tx_info->vif;
				1485
				1486	make_tx_response(vif, &pending_tx_info->req, XEN_NETIF_RSP_OKAY);
				1487
				1488	index = pending_index(netbk->pending_prod++);
				1489	netbk->pending_ring[index] = pending_idx;
				1490
				1491	xenvif_put(vif);
				1492
				1493	netbk->mmap_pages[pending_idx]->mapping = 0;
				1494	put_page(netbk->mmap_pages[pending_idx]);
				1495	netbk->mmap_pages[pending_idx] = NULL;
				1496	}
				1497
				1498	static void make_tx_response(struct xenvif *vif,
				1499	struct xen_netif_tx_request *txp,
				1500	s8 st)
				1501	{
				1502	RING_IDX i = vif->tx.rsp_prod_pvt;
				1503	struct xen_netif_tx_response *resp;
				1504	int notify;
				1505
				1506	resp = RING_GET_RESPONSE(&vif->tx, i);
				1507	resp->id = txp->id;
				1508	resp->status = st;
				1509
				1510	if (txp->flags & XEN_NETTXF_extra_info)
				1511	RING_GET_RESPONSE(&vif->tx, ++i)->status = XEN_NETIF_RSP_NULL;
				1512
				1513	vif->tx.rsp_prod_pvt = ++i;
				1514	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->tx, notify);
				1515	if (notify)
				1516	notify_remote_via_irq(vif->irq);
				1517	}
				1518
				1519	static struct xen_netif_rx_response make_rx_response(struct xenvif vif,
				1520	u16 id,
				1521	s8 st,
				1522	u16 offset,
				1523	u16 size,
				1524	u16 flags)
				1525	{
				1526	RING_IDX i = vif->rx.rsp_prod_pvt;
				1527	struct xen_netif_rx_response *resp;
				1528
				1529	resp = RING_GET_RESPONSE(&vif->rx, i);
				1530	resp->offset = offset;
				1531	resp->flags = flags;
				1532	resp->id = id;
				1533	resp->status = (s16)size;
				1534	if (st < 0)
				1535	resp->status = (s16)st;
				1536
				1537	vif->rx.rsp_prod_pvt = ++i;
				1538
				1539	return resp;
				1540	}
				1541
				1542	static inline int rx_work_todo(struct xen_netbk *netbk)
				1543	{
				1544	return !skb_queue_empty(&netbk->rx_queue);
				1545	}
				1546
				1547	static inline int tx_work_todo(struct xen_netbk *netbk)
				1548	{
				1549
				1550	if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
				1551	!list_empty(&netbk->net_schedule_list))
				1552	return 1;
				1553
				1554	return 0;
				1555	}
				1556
				1557	static int xen_netbk_kthread(void *data)
				1558	{
				1559	struct xen_netbk *netbk = data;
				1560	while (!kthread_should_stop()) {
				1561	wait_event_interruptible(netbk->wq,
				1562	rx_work_todo(netbk) \|\|
				1563	tx_work_todo(netbk) \|\|
				1564	kthread_should_stop());
				1565	cond_resched();
				1566
				1567	if (kthread_should_stop())
				1568	break;
				1569
				1570	if (rx_work_todo(netbk))
				1571	xen_netbk_rx_action(netbk);
				1572
				1573	if (tx_work_todo(netbk))
				1574	xen_netbk_tx_action(netbk);
				1575	}
				1576
				1577	return 0;
				1578	}
				1579
				1580	void xen_netbk_unmap_frontend_rings(struct xenvif *vif)
				1581	{
David Vrabel	c9d6369	2011-09-29 16:53:31 +0100	[diff] [blame]	1582	if (vif->tx.sring)
				1583	xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif),
				1584	vif->tx.sring);
				1585	if (vif->rx.sring)
				1586	xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif),
				1587	vif->rx.sring);
Ian Campbell	f942dc2	2011-03-15 00:06:18 +0000	[diff] [blame]	1588	}
				1589
				1590	int xen_netbk_map_frontend_rings(struct xenvif *vif,
				1591	grant_ref_t tx_ring_ref,
				1592	grant_ref_t rx_ring_ref)
				1593	{
David Vrabel	c9d6369	2011-09-29 16:53:31 +0100	[diff] [blame]	1594	void *addr;
Ian Campbell	f942dc2	2011-03-15 00:06:18 +0000	[diff] [blame]	1595	struct xen_netif_tx_sring *txs;
				1596	struct xen_netif_rx_sring *rxs;
				1597
				1598	int err = -ENOMEM;
				1599
David Vrabel	c9d6369	2011-09-29 16:53:31 +0100	[diff] [blame]	1600	err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif),
				1601	tx_ring_ref, &addr);
				1602	if (err)
Ian Campbell	f942dc2	2011-03-15 00:06:18 +0000	[diff] [blame]	1603	goto err;
				1604
David Vrabel	c9d6369	2011-09-29 16:53:31 +0100	[diff] [blame]	1605	txs = (struct xen_netif_tx_sring *)addr;
Ian Campbell	f942dc2	2011-03-15 00:06:18 +0000	[diff] [blame]	1606	BACK_RING_INIT(&vif->tx, txs, PAGE_SIZE);
				1607
David Vrabel	c9d6369	2011-09-29 16:53:31 +0100	[diff] [blame]	1608	err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif),
				1609	rx_ring_ref, &addr);
				1610	if (err)
Ian Campbell	f942dc2	2011-03-15 00:06:18 +0000	[diff] [blame]	1611	goto err;
Ian Campbell	f942dc2	2011-03-15 00:06:18 +0000	[diff] [blame]	1612
David Vrabel	c9d6369	2011-09-29 16:53:31 +0100	[diff] [blame]	1613	rxs = (struct xen_netif_rx_sring *)addr;
Ian Campbell	f942dc2	2011-03-15 00:06:18 +0000	[diff] [blame]	1614	BACK_RING_INIT(&vif->rx, rxs, PAGE_SIZE);
				1615
David Vrabel	c9d6369	2011-09-29 16:53:31 +0100	[diff] [blame]	1616	vif->rx_req_cons_peek = 0;
				1617
Ian Campbell	f942dc2	2011-03-15 00:06:18 +0000	[diff] [blame]	1618	return 0;
				1619
				1620	err:
				1621	xen_netbk_unmap_frontend_rings(vif);
				1622	return err;
				1623	}
				1624
				1625	static int __init netback_init(void)
				1626	{
				1627	int i;
				1628	int rc = 0;
				1629	int group;
				1630
Daniel De Graaf	2a14b244	2011-12-14 15:12:13 -0500	[diff] [blame]	1631	if (!xen_domain())
Ian Campbell	f942dc2	2011-03-15 00:06:18 +0000	[diff] [blame]	1632	return -ENODEV;
				1633
				1634	xen_netbk_group_nr = num_online_cpus();
				1635	xen_netbk = vzalloc(sizeof(struct xen_netbk) * xen_netbk_group_nr);
Joe Perches	e404dec	2012-01-29 12:56:23 +0000	[diff] [blame]	1636	if (!xen_netbk)
Ian Campbell	f942dc2	2011-03-15 00:06:18 +0000	[diff] [blame]	1637	return -ENOMEM;
Ian Campbell	f942dc2	2011-03-15 00:06:18 +0000	[diff] [blame]	1638
				1639	for (group = 0; group < xen_netbk_group_nr; group++) {
				1640	struct xen_netbk *netbk = &xen_netbk[group];
				1641	skb_queue_head_init(&netbk->rx_queue);
				1642	skb_queue_head_init(&netbk->tx_queue);
				1643
				1644	init_timer(&netbk->net_timer);
				1645	netbk->net_timer.data = (unsigned long)netbk;
				1646	netbk->net_timer.function = xen_netbk_alarm;
				1647
				1648	netbk->pending_cons = 0;
				1649	netbk->pending_prod = MAX_PENDING_REQS;
				1650	for (i = 0; i < MAX_PENDING_REQS; i++)
				1651	netbk->pending_ring[i] = i;
				1652
				1653	init_waitqueue_head(&netbk->wq);
				1654	netbk->task = kthread_create(xen_netbk_kthread,
				1655	(void *)netbk,
				1656	"netback/%u", group);
				1657
				1658	if (IS_ERR(netbk->task)) {
Wei Liu	6b84bd1	2011-12-05 06:57:44 +0000	[diff] [blame]	1659	printk(KERN_ALERT "kthread_create() fails at netback\n");
Ian Campbell	f942dc2	2011-03-15 00:06:18 +0000	[diff] [blame]	1660	del_timer(&netbk->net_timer);
				1661	rc = PTR_ERR(netbk->task);
				1662	goto failed_init;
				1663	}
				1664
				1665	kthread_bind(netbk->task, group);
				1666
				1667	INIT_LIST_HEAD(&netbk->net_schedule_list);
				1668
				1669	spin_lock_init(&netbk->net_schedule_list_lock);
				1670
				1671	atomic_set(&netbk->netfront_count, 0);
				1672
				1673	wake_up_process(netbk->task);
				1674	}
				1675
				1676	rc = xenvif_xenbus_init();
				1677	if (rc)
				1678	goto failed_init;
				1679
				1680	return 0;
				1681
				1682	failed_init:
				1683	while (--group >= 0) {
				1684	struct xen_netbk *netbk = &xen_netbk[group];
				1685	for (i = 0; i < MAX_PENDING_REQS; i++) {
				1686	if (netbk->mmap_pages[i])
				1687	__free_page(netbk->mmap_pages[i]);
				1688	}
				1689	del_timer(&netbk->net_timer);
				1690	kthread_stop(netbk->task);
				1691	}
				1692	vfree(xen_netbk);
				1693	return rc;
				1694
				1695	}
				1696
				1697	module_init(netback_init);
				1698
				1699	MODULE_LICENSE("Dual BSD/GPL");
Bastian Blank	f984cec	2011-06-30 11:19:09 -0700	[diff] [blame]	1700	MODULE_ALIAS("xen-backend:vif");