Blame - arch/um/drivers/vector_kern.c - SHIFTPHONES/mainline/linux

blob: 063c76b3a21527935d5ce0ca9a1da74cb5e71b61 [file] [log] [blame]

Anton Ivanov	49da7e6	2017-11-20 21:17:59 +0000	[diff] [blame]	1	/*
				2	* Copyright (C) 2017 - Cambridge Greys Limited
				3	* Copyright (C) 2011 - 2014 Cisco Systems Inc
				4	* Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
				5	* Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
				6	* James Leu (jleu@mindspring.net).
				7	* Copyright (C) 2001 by various other people who didn't put their name here.
				8	* Licensed under the GPL.
				9	*/
				10
				11	#include <linux/version.h>
				12	#include <linux/bootmem.h>
				13	#include <linux/etherdevice.h>
				14	#include <linux/ethtool.h>
				15	#include <linux/inetdevice.h>
				16	#include <linux/init.h>
				17	#include <linux/list.h>
				18	#include <linux/netdevice.h>
				19	#include <linux/platform_device.h>
				20	#include <linux/rtnetlink.h>
				21	#include <linux/skbuff.h>
				22	#include <linux/slab.h>
				23	#include <linux/interrupt.h>
				24	#include <init.h>
				25	#include <irq_kern.h>
				26	#include <irq_user.h>
				27	#include <net_kern.h>
				28	#include <os.h>
				29	#include "mconsole_kern.h"
				30	#include "vector_user.h"
				31	#include "vector_kern.h"
				32
				33	/*
				34	* Adapted from network devices with the following major changes:
				35	* All transports are static - simplifies the code significantly
				36	* Multiple FDs/IRQs per device
				37	* Vector IO optionally used for read/write, falling back to legacy
				38	* based on configuration and/or availability
				39	* Configuration is no longer positional - L2TPv3 and GRE require up to
				40	* 10 parameters, passing this as positional is not fit for purpose.
				41	* Only socket transports are supported
				42	*/
				43
				44
				45	#define DRIVER_NAME "uml-vector"
				46	#define DRIVER_VERSION "01"
				47	struct vector_cmd_line_arg {
				48	struct list_head list;
				49	int unit;
				50	char *arguments;
				51	};
				52
				53	struct vector_device {
				54	struct list_head list;
				55	struct net_device *dev;
				56	struct platform_device pdev;
				57	int unit;
				58	int opened;
				59	};
				60
				61	static LIST_HEAD(vec_cmd_line);
				62
				63	static DEFINE_SPINLOCK(vector_devices_lock);
				64	static LIST_HEAD(vector_devices);
				65
				66	static int driver_registered;
				67
				68	static void vector_eth_configure(int n, struct arglist *def);
				69
				70	/* Argument accessors to set variables (and/or set default values)
				71	* mtu, buffer sizing, default headroom, etc
				72	*/
				73
				74	#define DEFAULT_HEADROOM 2
				75	#define SAFETY_MARGIN 32
				76	#define DEFAULT_VECTOR_SIZE 64
				77	#define TX_SMALL_PACKET 128
				78	#define MAX_IOV_SIZE (MAX_SKB_FRAGS + 1)
				79
				80	static const struct {
				81	const char string[ETH_GSTRING_LEN];
				82	} ethtool_stats_keys[] = {
				83	{ "rx_queue_max" },
				84	{ "rx_queue_running_average" },
				85	{ "tx_queue_max" },
				86	{ "tx_queue_running_average" },
				87	{ "rx_encaps_errors" },
				88	{ "tx_timeout_count" },
				89	{ "tx_restart_queue" },
				90	{ "tx_kicks" },
				91	{ "tx_flow_control_xon" },
				92	{ "tx_flow_control_xoff" },
				93	{ "rx_csum_offload_good" },
				94	{ "rx_csum_offload_errors"},
				95	{ "sg_ok"},
				96	{ "sg_linearized"},
				97	};
				98
				99	#define VECTOR_NUM_STATS ARRAY_SIZE(ethtool_stats_keys)
				100
				101	static void vector_reset_stats(struct vector_private *vp)
				102	{
				103	vp->estats.rx_queue_max = 0;
				104	vp->estats.rx_queue_running_average = 0;
				105	vp->estats.tx_queue_max = 0;
				106	vp->estats.tx_queue_running_average = 0;
				107	vp->estats.rx_encaps_errors = 0;
				108	vp->estats.tx_timeout_count = 0;
				109	vp->estats.tx_restart_queue = 0;
				110	vp->estats.tx_kicks = 0;
				111	vp->estats.tx_flow_control_xon = 0;
				112	vp->estats.tx_flow_control_xoff = 0;
				113	vp->estats.sg_ok = 0;
				114	vp->estats.sg_linearized = 0;
				115	}
				116
				117	static int get_mtu(struct arglist *def)
				118	{
				119	char *mtu = uml_vector_fetch_arg(def, "mtu");
				120	long result;
				121
				122	if (mtu != NULL) {
				123	if (kstrtoul(mtu, 10, &result) == 0)
				124	return result;
				125	}
				126	return ETH_MAX_PACKET;
				127	}
				128
				129	static int get_depth(struct arglist *def)
				130	{
				131	char *mtu = uml_vector_fetch_arg(def, "depth");
				132	long result;
				133
				134	if (mtu != NULL) {
				135	if (kstrtoul(mtu, 10, &result) == 0)
				136	return result;
				137	}
				138	return DEFAULT_VECTOR_SIZE;
				139	}
				140
				141	static int get_headroom(struct arglist *def)
				142	{
				143	char *mtu = uml_vector_fetch_arg(def, "headroom");
				144	long result;
				145
				146	if (mtu != NULL) {
				147	if (kstrtoul(mtu, 10, &result) == 0)
				148	return result;
				149	}
				150	return DEFAULT_HEADROOM;
				151	}
				152
				153	static int get_req_size(struct arglist *def)
				154	{
				155	char *gro = uml_vector_fetch_arg(def, "gro");
				156	long result;
				157
				158	if (gro != NULL) {
				159	if (kstrtoul(gro, 10, &result) == 0) {
				160	if (result > 0)
				161	return 65536;
				162	}
				163	}
				164	return get_mtu(def) + ETH_HEADER_OTHER +
				165	get_headroom(def) + SAFETY_MARGIN;
				166	}
				167
				168
				169	static int get_transport_options(struct arglist *def)
				170	{
				171	char *transport = uml_vector_fetch_arg(def, "transport");
				172	char *vector = uml_vector_fetch_arg(def, "vec");
				173
				174	int vec_rx = VECTOR_RX;
				175	int vec_tx = VECTOR_TX;
				176	long parsed;
				177
				178	if (vector != NULL) {
				179	if (kstrtoul(vector, 10, &parsed) == 0) {
				180	if (parsed == 0) {
				181	vec_rx = 0;
				182	vec_tx = 0;
				183	}
				184	}
				185	}
				186
				187
				188	if (strncmp(transport, TRANS_TAP, TRANS_TAP_LEN) == 0)
				189	return (vec_rx \| VECTOR_BPF);
				190	if (strncmp(transport, TRANS_RAW, TRANS_RAW_LEN) == 0)
				191	return (vec_rx \| vec_tx \| VECTOR_BPF);
				192	return (vec_rx \| vec_tx);
				193	}
				194
				195
				196	/* A mini-buffer for packet drop read
				197	* All of our supported transports are datagram oriented and we always
				198	* read using recvmsg or recvmmsg. If we pass a buffer which is smaller
				199	* than the packet size it still counts as full packet read and will
				200	* clean the incoming stream to keep sigio/epoll happy
				201	*/
				202
				203	#define DROP_BUFFER_SIZE 32
				204
				205	static char *drop_buffer;
				206
				207	/* Array backed queues optimized for bulk enqueue/dequeue and
				208	* 1:N (small values of N) or 1:1 enqueuer/dequeuer ratios.
				209	* For more details and full design rationale see
				210	* http://foswiki.cambridgegreys.com/Main/EatYourTailAndEnjoyIt
				211	*/
				212
				213
				214	/*
				215	* Advance the mmsg queue head by n = advance. Resets the queue to
				216	* maximum enqueue/dequeue-at-once capacity if possible. Called by
				217	* dequeuers. Caller must hold the head_lock!
				218	*/
				219
				220	static int vector_advancehead(struct vector_queue *qi, int advance)
				221	{
				222	int queue_depth;
				223
				224	qi->head =
				225	(qi->head + advance)
				226	% qi->max_depth;
				227
				228
				229	spin_lock(&qi->tail_lock);
				230	qi->queue_depth -= advance;
				231
				232	/* we are at 0, use this to
				233	* reset head and tail so we can use max size vectors
				234	*/
				235
				236	if (qi->queue_depth == 0) {
				237	qi->head = 0;
				238	qi->tail = 0;
				239	}
				240	queue_depth = qi->queue_depth;
				241	spin_unlock(&qi->tail_lock);
				242	return queue_depth;
				243	}
				244
				245	/* Advance the queue tail by n = advance.
				246	* This is called by enqueuers which should hold the
				247	* head lock already
				248	*/
				249
				250	static int vector_advancetail(struct vector_queue *qi, int advance)
				251	{
				252	int queue_depth;
				253
				254	qi->tail =
				255	(qi->tail + advance)
				256	% qi->max_depth;
				257	spin_lock(&qi->head_lock);
				258	qi->queue_depth += advance;
				259	queue_depth = qi->queue_depth;
				260	spin_unlock(&qi->head_lock);
				261	return queue_depth;
				262	}
				263
				264	static int prep_msg(struct vector_private *vp,
				265	struct sk_buff *skb,
				266	struct iovec *iov)
				267	{
				268	int iov_index = 0;
				269	int nr_frags, frag;
				270	skb_frag_t *skb_frag;
				271
				272	nr_frags = skb_shinfo(skb)->nr_frags;
				273	if (nr_frags > MAX_IOV_SIZE) {
				274	if (skb_linearize(skb) != 0)
				275	goto drop;
				276	}
				277	if (vp->header_size > 0) {
				278	iov[iov_index].iov_len = vp->header_size;
				279	vp->form_header(iov[iov_index].iov_base, skb, vp);
				280	iov_index++;
				281	}
				282	iov[iov_index].iov_base = skb->data;
				283	if (nr_frags > 0) {
				284	iov[iov_index].iov_len = skb->len - skb->data_len;
				285	vp->estats.sg_ok++;
				286	} else
				287	iov[iov_index].iov_len = skb->len;
				288	iov_index++;
				289	for (frag = 0; frag < nr_frags; frag++) {
				290	skb_frag = &skb_shinfo(skb)->frags[frag];
				291	iov[iov_index].iov_base = skb_frag_address_safe(skb_frag);
				292	iov[iov_index].iov_len = skb_frag_size(skb_frag);
				293	iov_index++;
				294	}
				295	return iov_index;
				296	drop:
				297	return -1;
				298	}
				299	/*
				300	* Generic vector enqueue with support for forming headers using transport
				301	* specific callback. Allows GRE, L2TPv3, RAW and other transports
				302	* to use a common enqueue procedure in vector mode
				303	*/
				304
				305	static int vector_enqueue(struct vector_queue qi, struct sk_buff skb)
				306	{
				307	struct vector_private *vp = netdev_priv(qi->dev);
				308	int queue_depth;
				309	int packet_len;
				310	struct mmsghdr *mmsg_vector = qi->mmsg_vector;
				311	int iov_count;
				312
				313	spin_lock(&qi->tail_lock);
				314	spin_lock(&qi->head_lock);
				315	queue_depth = qi->queue_depth;
				316	spin_unlock(&qi->head_lock);
				317
				318	if (skb)
				319	packet_len = skb->len;
				320
				321	if (queue_depth < qi->max_depth) {
				322
				323	*(qi->skbuff_vector + qi->tail) = skb;
				324	mmsg_vector += qi->tail;
				325	iov_count = prep_msg(
				326	vp,
				327	skb,
				328	mmsg_vector->msg_hdr.msg_iov
				329	);
				330	if (iov_count < 1)
				331	goto drop;
				332	mmsg_vector->msg_hdr.msg_iovlen = iov_count;
				333	mmsg_vector->msg_hdr.msg_name = vp->fds->remote_addr;
				334	mmsg_vector->msg_hdr.msg_namelen = vp->fds->remote_addr_size;
				335	queue_depth = vector_advancetail(qi, 1);
				336	} else
				337	goto drop;
				338	spin_unlock(&qi->tail_lock);
				339	return queue_depth;
				340	drop:
				341	qi->dev->stats.tx_dropped++;
				342	if (skb != NULL) {
				343	packet_len = skb->len;
				344	dev_consume_skb_any(skb);
				345	netdev_completed_queue(qi->dev, 1, packet_len);
				346	}
				347	spin_unlock(&qi->tail_lock);
				348	return queue_depth;
				349	}
				350
				351	static int consume_vector_skbs(struct vector_queue *qi, int count)
				352	{
				353	struct sk_buff *skb;
				354	int skb_index;
				355	int bytes_compl = 0;
				356
				357	for (skb_index = qi->head; skb_index < qi->head + count; skb_index++) {
				358	skb = *(qi->skbuff_vector + skb_index);
				359	/* mark as empty to ensure correct destruction if
				360	* needed
				361	*/
				362	bytes_compl += skb->len;
				363	*(qi->skbuff_vector + skb_index) = NULL;
				364	dev_consume_skb_any(skb);
				365	}
				366	qi->dev->stats.tx_bytes += bytes_compl;
				367	qi->dev->stats.tx_packets += count;
				368	netdev_completed_queue(qi->dev, count, bytes_compl);
				369	return vector_advancehead(qi, count);
				370	}
				371
				372	/*
				373	* Generic vector deque via sendmmsg with support for forming headers
				374	* using transport specific callback. Allows GRE, L2TPv3, RAW and
				375	* other transports to use a common dequeue procedure in vector mode
				376	*/
				377
				378
				379	static int vector_send(struct vector_queue *qi)
				380	{
				381	struct vector_private *vp = netdev_priv(qi->dev);
				382	struct mmsghdr *send_from;
				383	int result = 0, send_len, queue_depth = qi->max_depth;
				384
				385	if (spin_trylock(&qi->head_lock)) {
				386	if (spin_trylock(&qi->tail_lock)) {
				387	/* update queue_depth to current value */
				388	queue_depth = qi->queue_depth;
				389	spin_unlock(&qi->tail_lock);
				390	while (queue_depth > 0) {
				391	/* Calculate the start of the vector */
				392	send_len = queue_depth;
				393	send_from = qi->mmsg_vector;
				394	send_from += qi->head;
				395	/* Adjust vector size if wraparound */
				396	if (send_len + qi->head > qi->max_depth)
				397	send_len = qi->max_depth - qi->head;
				398	/* Try to TX as many packets as possible */
				399	if (send_len > 0) {
				400	result = uml_vector_sendmmsg(
				401	vp->fds->tx_fd,
				402	send_from,
				403	send_len,
				404	0
				405	);
				406	vp->in_write_poll =
				407	(result != send_len);
				408	}
				409	/* For some of the sendmmsg error scenarios
				410	* we may end being unsure in the TX success
				411	* for all packets. It is safer to declare
				412	* them all TX-ed and blame the network.
				413	*/
				414	if (result < 0) {
				415	if (net_ratelimit())
				416	netdev_err(vp->dev, "sendmmsg err=%i\n",
				417	result);
				418	result = send_len;
				419	}
				420	if (result > 0) {
				421	queue_depth =
				422	consume_vector_skbs(qi, result);
				423	/* This is equivalent to an TX IRQ.
				424	* Restart the upper layers to feed us
				425	* more packets.
				426	*/
				427	if (result > vp->estats.tx_queue_max)
				428	vp->estats.tx_queue_max = result;
				429	vp->estats.tx_queue_running_average =
				430	(vp->estats.tx_queue_running_average + result) >> 1;
				431	}
				432	netif_trans_update(qi->dev);
				433	netif_wake_queue(qi->dev);
				434	/* if TX is busy, break out of the send loop,
				435	* poll write IRQ will reschedule xmit for us
				436	*/
				437	if (result != send_len) {
				438	vp->estats.tx_restart_queue++;
				439	break;
				440	}
				441	}
				442	}
				443	spin_unlock(&qi->head_lock);
				444	} else {
				445	tasklet_schedule(&vp->tx_poll);
				446	}
				447	return queue_depth;
				448	}
				449
				450	/* Queue destructor. Deliberately stateless so we can use
				451	* it in queue cleanup if initialization fails.
				452	*/
				453
				454	static void destroy_queue(struct vector_queue *qi)
				455	{
				456	int i;
				457	struct iovec *iov;
				458	struct vector_private *vp = netdev_priv(qi->dev);
				459	struct mmsghdr *mmsg_vector;
				460
				461	if (qi == NULL)
				462	return;
				463	/* deallocate any skbuffs - we rely on any unused to be
				464	* set to NULL.
				465	*/
				466	if (qi->skbuff_vector != NULL) {
				467	for (i = 0; i < qi->max_depth; i++) {
				468	if (*(qi->skbuff_vector + i) != NULL)
				469	dev_kfree_skb_any(*(qi->skbuff_vector + i));
				470	}
				471	kfree(qi->skbuff_vector);
				472	}
				473	/* deallocate matching IOV structures including header buffs */
				474	if (qi->mmsg_vector != NULL) {
				475	mmsg_vector = qi->mmsg_vector;
				476	for (i = 0; i < qi->max_depth; i++) {
				477	iov = mmsg_vector->msg_hdr.msg_iov;
				478	if (iov != NULL) {
				479	if ((vp->header_size > 0) &&
				480	(iov->iov_base != NULL))
				481	kfree(iov->iov_base);
				482	kfree(iov);
				483	}
				484	mmsg_vector++;
				485	}
				486	kfree(qi->mmsg_vector);
				487	}
				488	kfree(qi);
				489	}
				490
				491	/*
				492	* Queue constructor. Create a queue with a given side.
				493	*/
				494	static struct vector_queue *create_queue(
				495	struct vector_private *vp,
				496	int max_size,
				497	int header_size,
				498	int num_extra_frags)
				499	{
				500	struct vector_queue *result;
				501	int i;
				502	struct iovec *iov;
				503	struct mmsghdr *mmsg_vector;
				504
				505	result = kmalloc(sizeof(struct vector_queue), GFP_KERNEL);
				506	if (result == NULL)
				507	goto out_fail;
				508	result->max_depth = max_size;
				509	result->dev = vp->dev;
				510	result->mmsg_vector = kmalloc(
				511	(sizeof(struct mmsghdr) * max_size), GFP_KERNEL);
				512	result->skbuff_vector = kmalloc(
				513	(sizeof(void ) max_size), GFP_KERNEL);
				514	if (result->mmsg_vector == NULL \|\| result->skbuff_vector == NULL)
				515	goto out_fail;
				516
				517	mmsg_vector = result->mmsg_vector;
				518	for (i = 0; i < max_size; i++) {
				519	/* Clear all pointers - we use non-NULL as marking on
				520	* what to free on destruction
				521	*/
				522	*(result->skbuff_vector + i) = NULL;
				523	mmsg_vector->msg_hdr.msg_iov = NULL;
				524	mmsg_vector++;
				525	}
				526	mmsg_vector = result->mmsg_vector;
				527	result->max_iov_frags = num_extra_frags;
				528	for (i = 0; i < max_size; i++) {
				529	if (vp->header_size > 0)
				530	iov = kmalloc(
				531	sizeof(struct iovec) * (3 + num_extra_frags),
				532	GFP_KERNEL
				533	);
				534	else
				535	iov = kmalloc(
				536	sizeof(struct iovec) * (2 + num_extra_frags),
				537	GFP_KERNEL
				538	);
				539	if (iov == NULL)
				540	goto out_fail;
				541	mmsg_vector->msg_hdr.msg_iov = iov;
				542	mmsg_vector->msg_hdr.msg_iovlen = 1;
				543	mmsg_vector->msg_hdr.msg_control = NULL;
				544	mmsg_vector->msg_hdr.msg_controllen = 0;
				545	mmsg_vector->msg_hdr.msg_flags = MSG_DONTWAIT;
				546	mmsg_vector->msg_hdr.msg_name = NULL;
				547	mmsg_vector->msg_hdr.msg_namelen = 0;
				548	if (vp->header_size > 0) {
				549	iov->iov_base = kmalloc(header_size, GFP_KERNEL);
				550	if (iov->iov_base == NULL)
				551	goto out_fail;
				552	iov->iov_len = header_size;
				553	mmsg_vector->msg_hdr.msg_iovlen = 2;
				554	iov++;
				555	}
				556	iov->iov_base = NULL;
				557	iov->iov_len = 0;
				558	mmsg_vector++;
				559	}
				560	spin_lock_init(&result->head_lock);
				561	spin_lock_init(&result->tail_lock);
				562	result->queue_depth = 0;
				563	result->head = 0;
				564	result->tail = 0;
				565	return result;
				566	out_fail:
				567	destroy_queue(result);
				568	return NULL;
				569	}
				570
				571	/*
				572	* We do not use the RX queue as a proper wraparound queue for now
				573	* This is not necessary because the consumption via netif_rx()
				574	* happens in-line. While we can try using the return code of
				575	* netif_rx() for flow control there are no drivers doing this today.
				576	* For this RX specific use we ignore the tail/head locks and
				577	* just read into a prepared queue filled with skbuffs.
				578	*/
				579
				580	static struct sk_buff *prep_skb(
				581	struct vector_private *vp,
				582	struct user_msghdr *msg)
				583	{
				584	int linear = vp->max_packet + vp->headroom + SAFETY_MARGIN;
				585	struct sk_buff *result;
				586	int iov_index = 0, len;
				587	struct iovec *iov = msg->msg_iov;
				588	int err, nr_frags, frag;
				589	skb_frag_t *skb_frag;
				590
				591	if (vp->req_size <= linear)
				592	len = linear;
				593	else
				594	len = vp->req_size;
				595	result = alloc_skb_with_frags(
				596	linear,
				597	len - vp->max_packet,
				598	3,
				599	&err,
				600	GFP_ATOMIC
				601	);
				602	if (vp->header_size > 0)
				603	iov_index++;
				604	if (result == NULL) {
				605	iov[iov_index].iov_base = NULL;
				606	iov[iov_index].iov_len = 0;
				607	goto done;
				608	}
				609	skb_reserve(result, vp->headroom);
				610	result->dev = vp->dev;
				611	skb_put(result, vp->max_packet);
				612	result->data_len = len - vp->max_packet;
				613	result->len += len - vp->max_packet;
				614	skb_reset_mac_header(result);
				615	result->ip_summed = CHECKSUM_NONE;
				616	iov[iov_index].iov_base = result->data;
				617	iov[iov_index].iov_len = vp->max_packet;
				618	iov_index++;
				619
				620	nr_frags = skb_shinfo(result)->nr_frags;
				621	for (frag = 0; frag < nr_frags; frag++) {
				622	skb_frag = &skb_shinfo(result)->frags[frag];
				623	iov[iov_index].iov_base = skb_frag_address_safe(skb_frag);
				624	if (iov[iov_index].iov_base != NULL)
				625	iov[iov_index].iov_len = skb_frag_size(skb_frag);
				626	else
				627	iov[iov_index].iov_len = 0;
				628	iov_index++;
				629	}
				630	done:
				631	msg->msg_iovlen = iov_index;
				632	return result;
				633	}
				634
				635
				636	/* Prepare queue for recvmmsg one-shot rx - fill with fresh sk_buffs*/
				637
				638	static void prep_queue_for_rx(struct vector_queue *qi)
				639	{
				640	struct vector_private *vp = netdev_priv(qi->dev);
				641	struct mmsghdr *mmsg_vector = qi->mmsg_vector;
				642	void **skbuff_vector = qi->skbuff_vector;
				643	int i;
				644
				645	if (qi->queue_depth == 0)
				646	return;
				647	for (i = 0; i < qi->queue_depth; i++) {
				648	/* it is OK if allocation fails - recvmmsg with NULL data in
				649	* iov argument still performs an RX, just drops the packet
				650	* This allows us stop faffing around with a "drop buffer"
				651	*/
				652
				653	*skbuff_vector = prep_skb(vp, &mmsg_vector->msg_hdr);
				654	skbuff_vector++;
				655	mmsg_vector++;
				656	}
				657	qi->queue_depth = 0;
				658	}
				659
				660	static struct vector_device *find_device(int n)
				661	{
				662	struct vector_device *device;
				663	struct list_head *ele;
				664
				665	spin_lock(&vector_devices_lock);
				666	list_for_each(ele, &vector_devices) {
				667	device = list_entry(ele, struct vector_device, list);
				668	if (device->unit == n)
				669	goto out;
				670	}
				671	device = NULL;
				672	out:
				673	spin_unlock(&vector_devices_lock);
				674	return device;
				675	}
				676
				677	static int vector_parse(char str, int index_out, char **str_out,
				678	char **error_out)
				679	{
				680	int n, len, err = -EINVAL;
				681	char *start = str;
				682
				683	len = strlen(str);
				684
				685	while ((*str != ':') && (strlen(str) > 1))
				686	str++;
				687	if (*str != ':') {
				688	*error_out = "Expected ':' after device number";
				689	return err;
				690	}
				691	*str = '\0';
				692
				693	err = kstrtouint(start, 0, &n);
				694	if (err < 0) {
				695	*error_out = "Bad device number";
				696	return err;
				697	}
				698
				699	str++;
				700	if (find_device(n)) {
				701	*error_out = "Device already configured";
				702	return err;
				703	}
				704
				705	*index_out = n;
				706	*str_out = str;
				707	return 0;
				708	}
				709
				710	static int vector_config(char str, char *error_out)
				711	{
				712	int err, n;
				713	char *params;
				714	struct arglist *parsed;
				715
				716	err = vector_parse(str, &n, &params, error_out);
				717	if (err != 0)
				718	return err;
				719
				720	/* This string is broken up and the pieces used by the underlying
				721	* driver. We should copy it to make sure things do not go wrong
				722	* later.
				723	*/
				724
				725	params = kstrdup(params, GFP_KERNEL);
Christophe JAILLET	be967f7	2018-01-27 11:53:57 +0100	[diff] [blame^]	726	if (params == NULL) {
Anton Ivanov	49da7e6	2017-11-20 21:17:59 +0000	[diff] [blame]	727	*error_out = "vector_config failed to strdup string";
				728	return -ENOMEM;
				729	}
				730
				731	parsed = uml_parse_vector_ifspec(params);
				732
				733	if (parsed == NULL) {
				734	*error_out = "vector_config failed to parse parameters";
				735	return -EINVAL;
				736	}
				737
				738	vector_eth_configure(n, parsed);
				739	return 0;
				740	}
				741
				742	static int vector_id(char *str, int start_out, int *end_out)
				743	{
				744	char *end;
				745	int n;
				746
				747	n = simple_strtoul(*str, &end, 0);
				748	if ((end != '\0') \|\| (end == str))
				749	return -1;
				750
				751	*start_out = n;
				752	*end_out = n;
				753	*str = end;
				754	return n;
				755	}
				756
				757	static int vector_remove(int n, char **error_out)
				758	{
				759	struct vector_device *vec_d;
				760	struct net_device *dev;
				761	struct vector_private *vp;
				762
				763	vec_d = find_device(n);
				764	if (vec_d == NULL)
				765	return -ENODEV;
				766	dev = vec_d->dev;
				767	vp = netdev_priv(dev);
				768	if (vp->fds != NULL)
				769	return -EBUSY;
				770	unregister_netdev(dev);
				771	platform_device_unregister(&vec_d->pdev);
				772	return 0;
				773	}
				774
				775	/*
				776	* There is no shared per-transport initialization code, so
				777	* we will just initialize each interface one by one and
				778	* add them to a list
				779	*/
				780
				781	static struct platform_driver uml_net_driver = {
				782	.driver = {
				783	.name = DRIVER_NAME,
				784	},
				785	};
				786
				787
				788	static void vector_device_release(struct device *dev)
				789	{
				790	struct vector_device *device = dev_get_drvdata(dev);
				791	struct net_device *netdev = device->dev;
				792
				793	list_del(&device->list);
				794	kfree(device);
				795	free_netdev(netdev);
				796	}
				797
				798	/* Bog standard recv using recvmsg - not used normally unless the user
				799	* explicitly specifies not to use recvmmsg vector RX.
				800	*/
				801
				802	static int vector_legacy_rx(struct vector_private *vp)
				803	{
				804	int pkt_len;
				805	struct user_msghdr hdr;
				806	struct iovec iov[2 + MAX_IOV_SIZE]; /* header + data use case only */
				807	int iovpos = 0;
				808	struct sk_buff *skb;
				809	int header_check;
				810
				811	hdr.msg_name = NULL;
				812	hdr.msg_namelen = 0;
				813	hdr.msg_iov = (struct iovec *) &iov;
				814	hdr.msg_control = NULL;
				815	hdr.msg_controllen = 0;
				816	hdr.msg_flags = 0;
				817
				818	if (vp->header_size > 0) {
				819	iov[0].iov_base = vp->header_rxbuffer;
				820	iov[0].iov_len = vp->header_size;
				821	}
				822
				823	skb = prep_skb(vp, &hdr);
				824
				825	if (skb == NULL) {
				826	/* Read a packet into drop_buffer and don't do
				827	* anything with it.
				828	*/
				829	iov[iovpos].iov_base = drop_buffer;
				830	iov[iovpos].iov_len = DROP_BUFFER_SIZE;
				831	hdr.msg_iovlen = 1;
				832	vp->dev->stats.rx_dropped++;
				833	}
				834
				835	pkt_len = uml_vector_recvmsg(vp->fds->rx_fd, &hdr, 0);
				836
				837	if (skb != NULL) {
				838	if (pkt_len > vp->header_size) {
				839	if (vp->header_size > 0) {
				840	header_check = vp->verify_header(
				841	vp->header_rxbuffer, skb, vp);
				842	if (header_check < 0) {
				843	dev_kfree_skb_irq(skb);
				844	vp->dev->stats.rx_dropped++;
				845	vp->estats.rx_encaps_errors++;
				846	return 0;
				847	}
				848	if (header_check > 0) {
				849	vp->estats.rx_csum_offload_good++;
				850	skb->ip_summed = CHECKSUM_UNNECESSARY;
				851	}
				852	}
				853	pskb_trim(skb, pkt_len - vp->rx_header_size);
				854	skb->protocol = eth_type_trans(skb, skb->dev);
				855	vp->dev->stats.rx_bytes += skb->len;
				856	vp->dev->stats.rx_packets++;
				857	netif_rx(skb);
				858	} else {
				859	dev_kfree_skb_irq(skb);
				860	}
				861	}
				862	return pkt_len;
				863	}
				864
				865	/*
				866	* Packet at a time TX which falls back to vector TX if the
				867	* underlying transport is busy.
				868	*/
				869
				870
				871
				872	static int writev_tx(struct vector_private vp, struct sk_buff skb)
				873	{
				874	struct iovec iov[3 + MAX_IOV_SIZE];
				875	int iov_count, pkt_len = 0;
				876
				877	iov[0].iov_base = vp->header_txbuffer;
				878	iov_count = prep_msg(vp, skb, (struct iovec *) &iov);
				879
				880	if (iov_count < 1)
				881	goto drop;
				882	pkt_len = uml_vector_writev(
				883	vp->fds->tx_fd,
				884	(struct iovec *) &iov,
				885	iov_count
				886	);
				887
				888	netif_trans_update(vp->dev);
				889	netif_wake_queue(vp->dev);
				890
				891	if (pkt_len > 0) {
				892	vp->dev->stats.tx_bytes += skb->len;
				893	vp->dev->stats.tx_packets++;
				894	} else {
				895	vp->dev->stats.tx_dropped++;
				896	}
				897	consume_skb(skb);
				898	return pkt_len;
				899	drop:
				900	vp->dev->stats.tx_dropped++;
				901	consume_skb(skb);
				902	return pkt_len;
				903	}
				904
				905	/*
				906	* Receive as many messages as we can in one call using the special
				907	* mmsg vector matched to an skb vector which we prepared earlier.
				908	*/
				909
				910	static int vector_mmsg_rx(struct vector_private *vp)
				911	{
				912	int packet_count, i;
				913	struct vector_queue *qi = vp->rx_queue;
				914	struct sk_buff *skb;
				915	struct mmsghdr *mmsg_vector = qi->mmsg_vector;
				916	void **skbuff_vector = qi->skbuff_vector;
				917	int header_check;
				918
				919	/* Refresh the vector and make sure it is with new skbs and the
				920	* iovs are updated to point to them.
				921	*/
				922
				923	prep_queue_for_rx(qi);
				924
				925	/* Fire the Lazy Gun - get as many packets as we can in one go. */
				926
				927	packet_count = uml_vector_recvmmsg(
				928	vp->fds->rx_fd, qi->mmsg_vector, qi->max_depth, 0);
				929
				930	if (packet_count <= 0)
				931	return packet_count;
				932
				933	/* We treat packet processing as enqueue, buffer refresh as dequeue
				934	* The queue_depth tells us how many buffers have been used and how
				935	* many do we need to prep the next time prep_queue_for_rx() is called.
				936	*/
				937
				938	qi->queue_depth = packet_count;
				939
				940	for (i = 0; i < packet_count; i++) {
				941	skb = (*skbuff_vector);
				942	if (mmsg_vector->msg_len > vp->header_size) {
				943	if (vp->header_size > 0) {
				944	header_check = vp->verify_header(
				945	mmsg_vector->msg_hdr.msg_iov->iov_base,
				946	skb,
				947	vp
				948	);
				949	if (header_check < 0) {
				950	/* Overlay header failed to verify - discard.
				951	* We can actually keep this skb and reuse it,
				952	* but that will make the prep logic too
				953	* complex.
				954	*/
				955	dev_kfree_skb_irq(skb);
				956	vp->estats.rx_encaps_errors++;
				957	continue;
				958	}
				959	if (header_check > 0) {
				960	vp->estats.rx_csum_offload_good++;
				961	skb->ip_summed = CHECKSUM_UNNECESSARY;
				962	}
				963	}
				964	pskb_trim(skb,
				965	mmsg_vector->msg_len - vp->rx_header_size);
				966	skb->protocol = eth_type_trans(skb, skb->dev);
				967	/*
				968	* We do not need to lock on updating stats here
				969	* The interrupt loop is non-reentrant.
				970	*/
				971	vp->dev->stats.rx_bytes += skb->len;
				972	vp->dev->stats.rx_packets++;
				973	netif_rx(skb);
				974	} else {
				975	/* Overlay header too short to do anything - discard.
				976	* We can actually keep this skb and reuse it,
				977	* but that will make the prep logic too complex.
				978	*/
				979	if (skb != NULL)
				980	dev_kfree_skb_irq(skb);
				981	}
				982	(*skbuff_vector) = NULL;
				983	/* Move to the next buffer element */
				984	mmsg_vector++;
				985	skbuff_vector++;
				986	}
				987	if (packet_count > 0) {
				988	if (vp->estats.rx_queue_max < packet_count)
				989	vp->estats.rx_queue_max = packet_count;
				990	vp->estats.rx_queue_running_average =
				991	(vp->estats.rx_queue_running_average + packet_count) >> 1;
				992	}
				993	return packet_count;
				994	}
				995
				996	static void vector_rx(struct vector_private *vp)
				997	{
				998	int err;
				999
				1000	if ((vp->options & VECTOR_RX) > 0)
				1001	while ((err = vector_mmsg_rx(vp)) > 0)
				1002	;
				1003	else
				1004	while ((err = vector_legacy_rx(vp)) > 0)
				1005	;
				1006	if ((err != 0) && net_ratelimit())
				1007	netdev_err(vp->dev, "vector_rx: error(%d)\n", err);
				1008	}
				1009
				1010	static int vector_net_start_xmit(struct sk_buff skb, struct net_device dev)
				1011	{
				1012	struct vector_private *vp = netdev_priv(dev);
				1013	int queue_depth = 0;
				1014
				1015	if ((vp->options & VECTOR_TX) == 0) {
				1016	writev_tx(vp, skb);
				1017	return NETDEV_TX_OK;
				1018	}
				1019
				1020	/* We do BQL only in the vector path, no point doing it in
				1021	* packet at a time mode as there is no device queue
				1022	*/
				1023
				1024	netdev_sent_queue(vp->dev, skb->len);
				1025	queue_depth = vector_enqueue(vp->tx_queue, skb);
				1026
				1027	/* if the device queue is full, stop the upper layers and
				1028	* flush it.
				1029	*/
				1030
				1031	if (queue_depth >= vp->tx_queue->max_depth - 1) {
				1032	vp->estats.tx_kicks++;
				1033	netif_stop_queue(dev);
				1034	vector_send(vp->tx_queue);
				1035	return NETDEV_TX_OK;
				1036	}
				1037	if (skb->xmit_more) {
				1038	mod_timer(&vp->tl, vp->coalesce);
				1039	return NETDEV_TX_OK;
				1040	}
				1041	if (skb->len < TX_SMALL_PACKET) {
				1042	vp->estats.tx_kicks++;
				1043	vector_send(vp->tx_queue);
				1044	} else
				1045	tasklet_schedule(&vp->tx_poll);
				1046	return NETDEV_TX_OK;
				1047	}
				1048
				1049	static irqreturn_t vector_rx_interrupt(int irq, void *dev_id)
				1050	{
				1051	struct net_device *dev = dev_id;
				1052	struct vector_private *vp = netdev_priv(dev);
				1053
				1054	if (!netif_running(dev))
				1055	return IRQ_NONE;
				1056	vector_rx(vp);
				1057	return IRQ_HANDLED;
				1058
				1059	}
				1060
				1061	static irqreturn_t vector_tx_interrupt(int irq, void *dev_id)
				1062	{
				1063	struct net_device *dev = dev_id;
				1064	struct vector_private *vp = netdev_priv(dev);
				1065
				1066	if (!netif_running(dev))
				1067	return IRQ_NONE;
				1068	/* We need to pay attention to it only if we got
				1069	* -EAGAIN or -ENOBUFFS from sendmmsg. Otherwise
				1070	* we ignore it. In the future, it may be worth
				1071	* it to improve the IRQ controller a bit to make
				1072	* tweaking the IRQ mask less costly
				1073	*/
				1074
				1075	if (vp->in_write_poll)
				1076	tasklet_schedule(&vp->tx_poll);
				1077	return IRQ_HANDLED;
				1078
				1079	}
				1080
				1081	static int irq_rr;
				1082
				1083	static int vector_net_close(struct net_device *dev)
				1084	{
				1085	struct vector_private *vp = netdev_priv(dev);
				1086	unsigned long flags;
				1087
				1088	netif_stop_queue(dev);
				1089	del_timer(&vp->tl);
				1090
				1091	if (vp->fds == NULL)
				1092	return 0;
				1093
				1094	/* Disable and free all IRQS */
				1095	if (vp->rx_irq > 0) {
				1096	um_free_irq(vp->rx_irq, dev);
				1097	vp->rx_irq = 0;
				1098	}
				1099	if (vp->tx_irq > 0) {
				1100	um_free_irq(vp->tx_irq, dev);
				1101	vp->tx_irq = 0;
				1102	}
				1103	tasklet_kill(&vp->tx_poll);
				1104	if (vp->fds->rx_fd > 0) {
				1105	os_close_file(vp->fds->rx_fd);
				1106	vp->fds->rx_fd = -1;
				1107	}
				1108	if (vp->fds->tx_fd > 0) {
				1109	os_close_file(vp->fds->tx_fd);
				1110	vp->fds->tx_fd = -1;
				1111	}
				1112	if (vp->bpf != NULL)
				1113	kfree(vp->bpf);
				1114	if (vp->fds->remote_addr != NULL)
				1115	kfree(vp->fds->remote_addr);
				1116	if (vp->transport_data != NULL)
				1117	kfree(vp->transport_data);
				1118	if (vp->header_rxbuffer != NULL)
				1119	kfree(vp->header_rxbuffer);
				1120	if (vp->header_txbuffer != NULL)
				1121	kfree(vp->header_txbuffer);
				1122	if (vp->rx_queue != NULL)
				1123	destroy_queue(vp->rx_queue);
				1124	if (vp->tx_queue != NULL)
				1125	destroy_queue(vp->tx_queue);
				1126	kfree(vp->fds);
				1127	vp->fds = NULL;
				1128	spin_lock_irqsave(&vp->lock, flags);
				1129	vp->opened = false;
				1130	spin_unlock_irqrestore(&vp->lock, flags);
				1131	return 0;
				1132	}
				1133
				1134	/* TX tasklet */
				1135
				1136	static void vector_tx_poll(unsigned long data)
				1137	{
				1138	struct vector_private vp = (struct vector_private )data;
				1139
				1140	vp->estats.tx_kicks++;
				1141	vector_send(vp->tx_queue);
				1142	}
				1143	static void vector_reset_tx(struct work_struct *work)
				1144	{
				1145	struct vector_private *vp =
				1146	container_of(work, struct vector_private, reset_tx);
				1147	netdev_reset_queue(vp->dev);
				1148	netif_start_queue(vp->dev);
				1149	netif_wake_queue(vp->dev);
				1150	}
				1151	static int vector_net_open(struct net_device *dev)
				1152	{
				1153	struct vector_private *vp = netdev_priv(dev);
				1154	unsigned long flags;
				1155	int err = -EINVAL;
				1156	struct vector_device *vdevice;
				1157
				1158	spin_lock_irqsave(&vp->lock, flags);
Wei Yongjun	9f3199b	2018-01-05 07:22:52 +0000	[diff] [blame]	1159	if (vp->opened) {
				1160	spin_unlock_irqrestore(&vp->lock, flags);
Anton Ivanov	49da7e6	2017-11-20 21:17:59 +0000	[diff] [blame]	1161	return -ENXIO;
Wei Yongjun	9f3199b	2018-01-05 07:22:52 +0000	[diff] [blame]	1162	}
Anton Ivanov	49da7e6	2017-11-20 21:17:59 +0000	[diff] [blame]	1163	vp->opened = true;
				1164	spin_unlock_irqrestore(&vp->lock, flags);
				1165
				1166	vp->fds = uml_vector_user_open(vp->unit, vp->parsed);
				1167
				1168	if (vp->fds == NULL)
				1169	goto out_close;
				1170
				1171	if (build_transport_data(vp) < 0)
				1172	goto out_close;
				1173
				1174	if ((vp->options & VECTOR_RX) > 0) {
				1175	vp->rx_queue = create_queue(
				1176	vp,
				1177	get_depth(vp->parsed),
				1178	vp->rx_header_size,
				1179	MAX_IOV_SIZE
				1180	);
				1181	vp->rx_queue->queue_depth = get_depth(vp->parsed);
				1182	} else {
				1183	vp->header_rxbuffer = kmalloc(
				1184	vp->rx_header_size,
				1185	GFP_KERNEL
				1186	);
				1187	if (vp->header_rxbuffer == NULL)
				1188	goto out_close;
				1189	}
				1190	if ((vp->options & VECTOR_TX) > 0) {
				1191	vp->tx_queue = create_queue(
				1192	vp,
				1193	get_depth(vp->parsed),
				1194	vp->header_size,
				1195	MAX_IOV_SIZE
				1196	);
				1197	} else {
				1198	vp->header_txbuffer = kmalloc(vp->header_size, GFP_KERNEL);
				1199	if (vp->header_txbuffer == NULL)
				1200	goto out_close;
				1201	}
				1202
				1203	/* READ IRQ */
				1204	err = um_request_irq(
				1205	irq_rr + VECTOR_BASE_IRQ, vp->fds->rx_fd,
				1206	IRQ_READ, vector_rx_interrupt,
				1207	IRQF_SHARED, dev->name, dev);
				1208	if (err != 0) {
				1209	netdev_err(dev, "vector_open: failed to get rx irq(%d)\n", err);
				1210	err = -ENETUNREACH;
				1211	goto out_close;
				1212	}
				1213	vp->rx_irq = irq_rr + VECTOR_BASE_IRQ;
				1214	dev->irq = irq_rr + VECTOR_BASE_IRQ;
				1215	irq_rr = (irq_rr + 1) % VECTOR_IRQ_SPACE;
				1216
				1217	/* WRITE IRQ - we need it only if we have vector TX */
				1218	if ((vp->options & VECTOR_TX) > 0) {
				1219	err = um_request_irq(
				1220	irq_rr + VECTOR_BASE_IRQ, vp->fds->tx_fd,
				1221	IRQ_WRITE, vector_tx_interrupt,
				1222	IRQF_SHARED, dev->name, dev);
				1223	if (err != 0) {
				1224	netdev_err(dev,
				1225	"vector_open: failed to get tx irq(%d)\n", err);
				1226	err = -ENETUNREACH;
				1227	goto out_close;
				1228	}
				1229	vp->tx_irq = irq_rr + VECTOR_BASE_IRQ;
				1230	irq_rr = (irq_rr + 1) % VECTOR_IRQ_SPACE;
				1231	}
				1232
				1233	if ((vp->options & VECTOR_BPF) != 0)
				1234	vp->bpf = uml_vector_default_bpf(vp->fds->rx_fd, dev->dev_addr);
				1235
				1236	/* Write Timeout Timer */
				1237
				1238	vp->tl.data = (unsigned long) vp;
				1239	netif_start_queue(dev);
				1240
				1241	/* clear buffer - it can happen that the host side of the interface
				1242	* is full when we get here. In this case, new data is never queued,
				1243	* SIGIOs never arrive, and the net never works.
				1244	*/
				1245
				1246	vector_rx(vp);
				1247
				1248	vector_reset_stats(vp);
				1249	vdevice = find_device(vp->unit);
				1250	vdevice->opened = 1;
				1251
				1252	if ((vp->options & VECTOR_TX) != 0)
				1253	add_timer(&vp->tl);
				1254	return 0;
				1255	out_close:
				1256	vector_net_close(dev);
				1257	return err;
				1258	}
				1259
				1260
				1261	static void vector_net_set_multicast_list(struct net_device *dev)
				1262	{
				1263	/* TODO: - we can do some BPF games here */
				1264	return;
				1265	}
				1266
				1267	static void vector_net_tx_timeout(struct net_device *dev)
				1268	{
				1269	struct vector_private *vp = netdev_priv(dev);
				1270
				1271	vp->estats.tx_timeout_count++;
				1272	netif_trans_update(dev);
				1273	schedule_work(&vp->reset_tx);
				1274	}
				1275
				1276	static netdev_features_t vector_fix_features(struct net_device *dev,
				1277	netdev_features_t features)
				1278	{
				1279	features &= ~(NETIF_F_IP_CSUM\|NETIF_F_IPV6_CSUM);
				1280	return features;
				1281	}
				1282
				1283	static int vector_set_features(struct net_device *dev,
				1284	netdev_features_t features)
				1285	{
				1286	struct vector_private *vp = netdev_priv(dev);
				1287	/* Adjust buffer sizes for GSO/GRO. Unfortunately, there is
				1288	* no way to negotiate it on raw sockets, so we can change
				1289	* only our side.
				1290	*/
				1291	if (features & NETIF_F_GRO)
				1292	/* All new frame buffers will be GRO-sized */
				1293	vp->req_size = 65536;
				1294	else
				1295	/* All new frame buffers will be normal sized */
				1296	vp->req_size = vp->max_packet + vp->headroom + SAFETY_MARGIN;
				1297	return 0;
				1298	}
				1299
				1300	#ifdef CONFIG_NET_POLL_CONTROLLER
				1301	static void vector_net_poll_controller(struct net_device *dev)
				1302	{
				1303	disable_irq(dev->irq);
				1304	vector_rx_interrupt(dev->irq, dev);
				1305	enable_irq(dev->irq);
				1306	}
				1307	#endif
				1308
				1309	static void vector_net_get_drvinfo(struct net_device *dev,
				1310	struct ethtool_drvinfo *info)
				1311	{
				1312	strlcpy(info->driver, DRIVER_NAME, sizeof(info->driver));
				1313	strlcpy(info->version, DRIVER_VERSION, sizeof(info->version));
				1314	}
				1315
				1316	static void vector_get_ringparam(struct net_device *netdev,
				1317	struct ethtool_ringparam *ring)
				1318	{
				1319	struct vector_private *vp = netdev_priv(netdev);
				1320
				1321	ring->rx_max_pending = vp->rx_queue->max_depth;
				1322	ring->tx_max_pending = vp->tx_queue->max_depth;
				1323	ring->rx_pending = vp->rx_queue->max_depth;
				1324	ring->tx_pending = vp->tx_queue->max_depth;
				1325	}
				1326
				1327	static void vector_get_strings(struct net_device dev, u32 stringset, u8 buf)
				1328	{
				1329	switch (stringset) {
				1330	case ETH_SS_TEST:
				1331	*buf = '\0';
				1332	break;
				1333	case ETH_SS_STATS:
				1334	memcpy(buf, &ethtool_stats_keys, sizeof(ethtool_stats_keys));
				1335	break;
				1336	default:
				1337	WARN_ON(1);
				1338	break;
				1339	}
				1340	}
				1341
				1342	static int vector_get_sset_count(struct net_device *dev, int sset)
				1343	{
				1344	switch (sset) {
				1345	case ETH_SS_TEST:
				1346	return 0;
				1347	case ETH_SS_STATS:
				1348	return VECTOR_NUM_STATS;
				1349	default:
				1350	return -EOPNOTSUPP;
				1351	}
				1352	}
				1353
				1354	static void vector_get_ethtool_stats(struct net_device *dev,
				1355	struct ethtool_stats *estats,
				1356	u64 *tmp_stats)
				1357	{
				1358	struct vector_private *vp = netdev_priv(dev);
				1359
				1360	memcpy(tmp_stats, &vp->estats, sizeof(struct vector_estats));
				1361	}
				1362
				1363	static int vector_get_coalesce(struct net_device *netdev,
				1364	struct ethtool_coalesce *ec)
				1365	{
				1366	struct vector_private *vp = netdev_priv(netdev);
				1367
				1368	ec->tx_coalesce_usecs = (vp->coalesce * 1000000) / HZ;
				1369	return 0;
				1370	}
				1371
				1372	static int vector_set_coalesce(struct net_device *netdev,
				1373	struct ethtool_coalesce *ec)
				1374	{
				1375	struct vector_private *vp = netdev_priv(netdev);
				1376
				1377	vp->coalesce = (ec->tx_coalesce_usecs * HZ) / 1000000;
				1378	if (vp->coalesce == 0)
				1379	vp->coalesce = 1;
				1380	return 0;
				1381	}
				1382
				1383	static const struct ethtool_ops vector_net_ethtool_ops = {
				1384	.get_drvinfo = vector_net_get_drvinfo,
				1385	.get_link = ethtool_op_get_link,
				1386	.get_ts_info = ethtool_op_get_ts_info,
				1387	.get_ringparam = vector_get_ringparam,
				1388	.get_strings = vector_get_strings,
				1389	.get_sset_count = vector_get_sset_count,
				1390	.get_ethtool_stats = vector_get_ethtool_stats,
				1391	.get_coalesce = vector_get_coalesce,
				1392	.set_coalesce = vector_set_coalesce,
				1393	};
				1394
				1395
				1396	static const struct net_device_ops vector_netdev_ops = {
				1397	.ndo_open = vector_net_open,
				1398	.ndo_stop = vector_net_close,
				1399	.ndo_start_xmit = vector_net_start_xmit,
				1400	.ndo_set_rx_mode = vector_net_set_multicast_list,
				1401	.ndo_tx_timeout = vector_net_tx_timeout,
				1402	.ndo_set_mac_address = eth_mac_addr,
				1403	.ndo_validate_addr = eth_validate_addr,
				1404	.ndo_fix_features = vector_fix_features,
				1405	.ndo_set_features = vector_set_features,
				1406	#ifdef CONFIG_NET_POLL_CONTROLLER
				1407	.ndo_poll_controller = vector_net_poll_controller,
				1408	#endif
				1409	};
				1410
				1411
				1412	static void vector_timer_expire(unsigned long _conn)
				1413	{
				1414	struct vector_private vp = (struct vector_private )_conn;
				1415
				1416	vp->estats.tx_kicks++;
				1417	vector_send(vp->tx_queue);
				1418	}
				1419
				1420	static void vector_eth_configure(
				1421	int n,
				1422	struct arglist *def
				1423	)
				1424	{
				1425	struct vector_device *device;
				1426	struct net_device *dev;
				1427	struct vector_private *vp;
				1428	int err;
				1429
				1430	device = kzalloc(sizeof(*device), GFP_KERNEL);
				1431	if (device == NULL) {
				1432	printk(KERN_ERR "eth_configure failed to allocate struct "
				1433	"vector_device\n");
				1434	return;
				1435	}
				1436	dev = alloc_etherdev(sizeof(struct vector_private));
				1437	if (dev == NULL) {
				1438	printk(KERN_ERR "eth_configure: failed to allocate struct "
				1439	"net_device for vec%d\n", n);
				1440	goto out_free_device;
				1441	}
				1442
				1443	dev->mtu = get_mtu(def);
				1444
				1445	INIT_LIST_HEAD(&device->list);
				1446	device->unit = n;
				1447
				1448	/* If this name ends up conflicting with an existing registered
				1449	* netdevice, that is OK, register_netdev{,ice}() will notice this
				1450	* and fail.
				1451	*/
				1452	snprintf(dev->name, sizeof(dev->name), "vec%d", n);
				1453	uml_net_setup_etheraddr(dev, uml_vector_fetch_arg(def, "mac"));
				1454	vp = netdev_priv(dev);
				1455
				1456	/* sysfs register */
				1457	if (!driver_registered) {
				1458	platform_driver_register(&uml_net_driver);
				1459	driver_registered = 1;
				1460	}
				1461	device->pdev.id = n;
				1462	device->pdev.name = DRIVER_NAME;
				1463	device->pdev.dev.release = vector_device_release;
				1464	dev_set_drvdata(&device->pdev.dev, device);
				1465	if (platform_device_register(&device->pdev))
				1466	goto out_free_netdev;
				1467	SET_NETDEV_DEV(dev, &device->pdev.dev);
				1468
				1469	device->dev = dev;
				1470
				1471	*vp = ((struct vector_private)
				1472	{
				1473	.list = LIST_HEAD_INIT(vp->list),
				1474	.dev = dev,
				1475	.unit = n,
				1476	.options = get_transport_options(def),
				1477	.rx_irq = 0,
				1478	.tx_irq = 0,
				1479	.parsed = def,
				1480	.max_packet = get_mtu(def) + ETH_HEADER_OTHER,
				1481	/* TODO - we need to calculate headroom so that ip header
				1482	* is 16 byte aligned all the time
				1483	*/
				1484	.headroom = get_headroom(def),
				1485	.form_header = NULL,
				1486	.verify_header = NULL,
				1487	.header_rxbuffer = NULL,
				1488	.header_txbuffer = NULL,
				1489	.header_size = 0,
				1490	.rx_header_size = 0,
				1491	.rexmit_scheduled = false,
				1492	.opened = false,
				1493	.transport_data = NULL,
				1494	.in_write_poll = false,
				1495	.coalesce = 2,
				1496	.req_size = get_req_size(def)
				1497	});
				1498
				1499	dev->features = dev->hw_features = (NETIF_F_SG \| NETIF_F_FRAGLIST);
				1500	tasklet_init(&vp->tx_poll, vector_tx_poll, (unsigned long)vp);
				1501	INIT_WORK(&vp->reset_tx, vector_reset_tx);
				1502
				1503	init_timer(&vp->tl);
				1504	spin_lock_init(&vp->lock);
				1505	vp->tl.function = vector_timer_expire;
				1506
				1507	/* FIXME */
				1508	dev->netdev_ops = &vector_netdev_ops;
				1509	dev->ethtool_ops = &vector_net_ethtool_ops;
				1510	dev->watchdog_timeo = (HZ >> 1);
				1511	/* primary IRQ - fixme */
				1512	dev->irq = 0; /* we will adjust this once opened */
				1513
				1514	rtnl_lock();
				1515	err = register_netdevice(dev);
				1516	rtnl_unlock();
				1517	if (err)
				1518	goto out_undo_user_init;
				1519
				1520	spin_lock(&vector_devices_lock);
				1521	list_add(&device->list, &vector_devices);
				1522	spin_unlock(&vector_devices_lock);
				1523
				1524	return;
				1525
				1526	out_undo_user_init:
				1527	return;
				1528	out_free_netdev:
				1529	free_netdev(dev);
				1530	out_free_device:
				1531	kfree(device);
				1532	}
				1533
				1534
				1535
				1536
				1537	/*
				1538	* Invoked late in the init
				1539	*/
				1540
				1541	static int __init vector_init(void)
				1542	{
				1543	struct list_head *ele;
				1544	struct vector_cmd_line_arg *def;
				1545	struct arglist *parsed;
				1546
				1547	list_for_each(ele, &vec_cmd_line) {
				1548	def = list_entry(ele, struct vector_cmd_line_arg, list);
				1549	parsed = uml_parse_vector_ifspec(def->arguments);
				1550	if (parsed != NULL)
				1551	vector_eth_configure(def->unit, parsed);
				1552	}
				1553	return 0;
				1554	}
				1555
				1556
				1557	/* Invoked at initial argument parsing, only stores
				1558	* arguments until a proper vector_init is called
				1559	* later
				1560	*/
				1561
				1562	static int __init vector_setup(char *str)
				1563	{
				1564	char *error;
				1565	int n, err;
				1566	struct vector_cmd_line_arg *new;
				1567
				1568	err = vector_parse(str, &n, &str, &error);
				1569	if (err) {
				1570	printk(KERN_ERR "vector_setup - Couldn't parse '%s' : %s\n",
				1571	str, error);
				1572	return 1;
				1573	}
				1574	new = alloc_bootmem(sizeof(*new));
				1575	INIT_LIST_HEAD(&new->list);
				1576	new->unit = n;
				1577	new->arguments = str;
				1578	list_add_tail(&new->list, &vec_cmd_line);
				1579	return 1;
				1580	}
				1581
				1582	__setup("vec", vector_setup);
				1583	__uml_help(vector_setup,
				1584	"vec[0-9]+:<option>=<value>,<option>=<value>\n"
				1585	" Configure a vector io network device.\n\n"
				1586	);
				1587
				1588	late_initcall(vector_init);
				1589
				1590	static struct mc_device vector_mc = {
				1591	.list = LIST_HEAD_INIT(vector_mc.list),
				1592	.name = "vec",
				1593	.config = vector_config,
				1594	.get_config = NULL,
				1595	.id = vector_id,
				1596	.remove = vector_remove,
				1597	};
				1598
				1599	#ifdef CONFIG_INET
				1600	static int vector_inetaddr_event(
				1601	struct notifier_block *this,
				1602	unsigned long event,
				1603	void *ptr)
				1604	{
				1605	return NOTIFY_DONE;
				1606	}
				1607
				1608	static struct notifier_block vector_inetaddr_notifier = {
				1609	.notifier_call = vector_inetaddr_event,
				1610	};
				1611
				1612	static void inet_register(void)
				1613	{
				1614	register_inetaddr_notifier(&vector_inetaddr_notifier);
				1615	}
				1616	#else
				1617	static inline void inet_register(void)
				1618	{
				1619	}
				1620	#endif
				1621
				1622	static int vector_net_init(void)
				1623	{
				1624	mconsole_register_dev(&vector_mc);
				1625	inet_register();
				1626	return 0;
				1627	}
				1628
				1629	__initcall(vector_net_init);
				1630
				1631
				1632