blob: 0741deb86ca6a5901cf2b01c1d44469832a05c9d [file] [log] [blame]
Divy Le Ray4d22de32007-01-18 22:04:14 -05001/*
Divy Le Ray1d68e932007-01-30 19:44:35 -08002 * Copyright (c) 2005-2007 Chelsio, Inc. All rights reserved.
Divy Le Ray4d22de32007-01-18 22:04:14 -05003 *
Divy Le Ray1d68e932007-01-30 19:44:35 -08004 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
Divy Le Ray4d22de32007-01-18 22:04:14 -05009 *
Divy Le Ray1d68e932007-01-30 19:44:35 -080010 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
Divy Le Ray4d22de32007-01-18 22:04:14 -050031 */
Divy Le Ray4d22de32007-01-18 22:04:14 -050032#include <linux/skbuff.h>
33#include <linux/netdevice.h>
34#include <linux/etherdevice.h>
35#include <linux/if_vlan.h>
36#include <linux/ip.h>
37#include <linux/tcp.h>
38#include <linux/dma-mapping.h>
39#include "common.h"
40#include "regs.h"
41#include "sge_defs.h"
42#include "t3_cpl.h"
43#include "firmware_exports.h"
44
45#define USE_GTS 0
46
47#define SGE_RX_SM_BUF_SIZE 1536
Divy Le Raye0994eb2007-02-24 16:44:17 -080048
Divy Le Ray4d22de32007-01-18 22:04:14 -050049#define SGE_RX_COPY_THRES 256
Divy Le Raycf992af2007-05-30 21:10:47 -070050#define SGE_RX_PULL_LEN 128
Divy Le Ray4d22de32007-01-18 22:04:14 -050051
Divy Le Raye0994eb2007-02-24 16:44:17 -080052/*
Divy Le Raycf992af2007-05-30 21:10:47 -070053 * Page chunk size for FL0 buffers if FL0 is to be populated with page chunks.
54 * It must be a divisor of PAGE_SIZE. If set to 0 FL0 will use sk_buffs
55 * directly.
Divy Le Raye0994eb2007-02-24 16:44:17 -080056 */
Divy Le Raycf992af2007-05-30 21:10:47 -070057#define FL0_PG_CHUNK_SIZE 2048
58
Divy Le Raye0994eb2007-02-24 16:44:17 -080059#define SGE_RX_DROP_THRES 16
Divy Le Ray4d22de32007-01-18 22:04:14 -050060
61/*
62 * Period of the Tx buffer reclaim timer. This timer does not need to run
63 * frequently as Tx buffers are usually reclaimed by new Tx packets.
64 */
65#define TX_RECLAIM_PERIOD (HZ / 4)
66
67/* WR size in bytes */
68#define WR_LEN (WR_FLITS * 8)
69
70/*
71 * Types of Tx queues in each queue set. Order here matters, do not change.
72 */
73enum { TXQ_ETH, TXQ_OFLD, TXQ_CTRL };
74
75/* Values for sge_txq.flags */
76enum {
77 TXQ_RUNNING = 1 << 0, /* fetch engine is running */
78 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */
79};
80
81struct tx_desc {
Al Virofb8e4442007-08-23 03:04:12 -040082 __be64 flit[TX_DESC_FLITS];
Divy Le Ray4d22de32007-01-18 22:04:14 -050083};
84
85struct rx_desc {
86 __be32 addr_lo;
87 __be32 len_gen;
88 __be32 gen2;
89 __be32 addr_hi;
90};
91
92struct tx_sw_desc { /* SW state per Tx descriptor */
93 struct sk_buff *skb;
Divy Le Ray23561c92007-11-16 11:22:05 -080094 u8 eop; /* set if last descriptor for packet */
95 u8 addr_idx; /* buffer index of first SGL entry in descriptor */
96 u8 fragidx; /* first page fragment associated with descriptor */
97 s8 sflit; /* start flit of first SGL entry in descriptor */
Divy Le Ray4d22de32007-01-18 22:04:14 -050098};
99
Divy Le Raycf992af2007-05-30 21:10:47 -0700100struct rx_sw_desc { /* SW state per Rx descriptor */
Divy Le Raye0994eb2007-02-24 16:44:17 -0800101 union {
102 struct sk_buff *skb;
Divy Le Raycf992af2007-05-30 21:10:47 -0700103 struct fl_pg_chunk pg_chunk;
104 };
105 DECLARE_PCI_UNMAP_ADDR(dma_addr);
Divy Le Ray4d22de32007-01-18 22:04:14 -0500106};
107
108struct rsp_desc { /* response queue descriptor */
109 struct rss_header rss_hdr;
110 __be32 flags;
111 __be32 len_cq;
112 u8 imm_data[47];
113 u8 intr_gen;
114};
115
Divy Le Ray4d22de32007-01-18 22:04:14 -0500116/*
Divy Le Ray99d7cf32007-02-24 16:44:06 -0800117 * Holds unmapping information for Tx packets that need deferred unmapping.
118 * This structure lives at skb->head and must be allocated by callers.
119 */
120struct deferred_unmap_info {
121 struct pci_dev *pdev;
122 dma_addr_t addr[MAX_SKB_FRAGS + 1];
123};
124
125/*
Divy Le Ray4d22de32007-01-18 22:04:14 -0500126 * Maps a number of flits to the number of Tx descriptors that can hold them.
127 * The formula is
128 *
129 * desc = 1 + (flits - 2) / (WR_FLITS - 1).
130 *
131 * HW allows up to 4 descriptors to be combined into a WR.
132 */
133static u8 flit_desc_map[] = {
134 0,
135#if SGE_NUM_GENBITS == 1
136 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
137 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
138 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
139 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
140#elif SGE_NUM_GENBITS == 2
141 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
142 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
143 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
144 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
145#else
146# error "SGE_NUM_GENBITS must be 1 or 2"
147#endif
148};
149
150static inline struct sge_qset *fl_to_qset(const struct sge_fl *q, int qidx)
151{
152 return container_of(q, struct sge_qset, fl[qidx]);
153}
154
155static inline struct sge_qset *rspq_to_qset(const struct sge_rspq *q)
156{
157 return container_of(q, struct sge_qset, rspq);
158}
159
160static inline struct sge_qset *txq_to_qset(const struct sge_txq *q, int qidx)
161{
162 return container_of(q, struct sge_qset, txq[qidx]);
163}
164
165/**
166 * refill_rspq - replenish an SGE response queue
167 * @adapter: the adapter
168 * @q: the response queue to replenish
169 * @credits: how many new responses to make available
170 *
171 * Replenishes a response queue by making the supplied number of responses
172 * available to HW.
173 */
174static inline void refill_rspq(struct adapter *adapter,
175 const struct sge_rspq *q, unsigned int credits)
176{
Divy Le Rayafefce62007-11-16 11:22:21 -0800177 rmb();
Divy Le Ray4d22de32007-01-18 22:04:14 -0500178 t3_write_reg(adapter, A_SG_RSPQ_CREDIT_RETURN,
179 V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
180}
181
182/**
183 * need_skb_unmap - does the platform need unmapping of sk_buffs?
184 *
185 * Returns true if the platfrom needs sk_buff unmapping. The compiler
186 * optimizes away unecessary code if this returns true.
187 */
188static inline int need_skb_unmap(void)
189{
190 /*
191 * This structure is used to tell if the platfrom needs buffer
192 * unmapping by checking if DECLARE_PCI_UNMAP_ADDR defines anything.
193 */
194 struct dummy {
195 DECLARE_PCI_UNMAP_ADDR(addr);
196 };
197
198 return sizeof(struct dummy) != 0;
199}
200
201/**
202 * unmap_skb - unmap a packet main body and its page fragments
203 * @skb: the packet
204 * @q: the Tx queue containing Tx descriptors for the packet
205 * @cidx: index of Tx descriptor
206 * @pdev: the PCI device
207 *
208 * Unmap the main body of an sk_buff and its page fragments, if any.
209 * Because of the fairly complicated structure of our SGLs and the desire
Divy Le Ray23561c92007-11-16 11:22:05 -0800210 * to conserve space for metadata, the information necessary to unmap an
211 * sk_buff is spread across the sk_buff itself (buffer lengths), the HW Tx
212 * descriptors (the physical addresses of the various data buffers), and
213 * the SW descriptor state (assorted indices). The send functions
214 * initialize the indices for the first packet descriptor so we can unmap
215 * the buffers held in the first Tx descriptor here, and we have enough
216 * information at this point to set the state for the next Tx descriptor.
217 *
218 * Note that it is possible to clean up the first descriptor of a packet
219 * before the send routines have written the next descriptors, but this
220 * race does not cause any problem. We just end up writing the unmapping
221 * info for the descriptor first.
Divy Le Ray4d22de32007-01-18 22:04:14 -0500222 */
223static inline void unmap_skb(struct sk_buff *skb, struct sge_txq *q,
224 unsigned int cidx, struct pci_dev *pdev)
225{
226 const struct sg_ent *sgp;
Divy Le Ray23561c92007-11-16 11:22:05 -0800227 struct tx_sw_desc *d = &q->sdesc[cidx];
228 int nfrags, frag_idx, curflit, j = d->addr_idx;
Divy Le Ray4d22de32007-01-18 22:04:14 -0500229
Divy Le Ray23561c92007-11-16 11:22:05 -0800230 sgp = (struct sg_ent *)&q->desc[cidx].flit[d->sflit];
231 frag_idx = d->fragidx;
Divy Le Ray4d22de32007-01-18 22:04:14 -0500232
Divy Le Ray23561c92007-11-16 11:22:05 -0800233 if (frag_idx == 0 && skb_headlen(skb)) {
234 pci_unmap_single(pdev, be64_to_cpu(sgp->addr[0]),
235 skb_headlen(skb), PCI_DMA_TODEVICE);
Divy Le Ray4d22de32007-01-18 22:04:14 -0500236 j = 1;
237 }
238
Divy Le Ray23561c92007-11-16 11:22:05 -0800239 curflit = d->sflit + 1 + j;
Divy Le Ray4d22de32007-01-18 22:04:14 -0500240 nfrags = skb_shinfo(skb)->nr_frags;
241
242 while (frag_idx < nfrags && curflit < WR_FLITS) {
243 pci_unmap_page(pdev, be64_to_cpu(sgp->addr[j]),
244 skb_shinfo(skb)->frags[frag_idx].size,
245 PCI_DMA_TODEVICE);
246 j ^= 1;
247 if (j == 0) {
248 sgp++;
249 curflit++;
250 }
251 curflit++;
252 frag_idx++;
253 }
254
Divy Le Ray23561c92007-11-16 11:22:05 -0800255 if (frag_idx < nfrags) { /* SGL continues into next Tx descriptor */
256 d = cidx + 1 == q->size ? q->sdesc : d + 1;
257 d->fragidx = frag_idx;
258 d->addr_idx = j;
259 d->sflit = curflit - WR_FLITS - j; /* sflit can be -1 */
Divy Le Ray4d22de32007-01-18 22:04:14 -0500260 }
261}
262
263/**
264 * free_tx_desc - reclaims Tx descriptors and their buffers
265 * @adapter: the adapter
266 * @q: the Tx queue to reclaim descriptors from
267 * @n: the number of descriptors to reclaim
268 *
269 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated
270 * Tx buffers. Called with the Tx queue lock held.
271 */
272static void free_tx_desc(struct adapter *adapter, struct sge_txq *q,
273 unsigned int n)
274{
275 struct tx_sw_desc *d;
276 struct pci_dev *pdev = adapter->pdev;
277 unsigned int cidx = q->cidx;
278
Divy Le Ray99d7cf32007-02-24 16:44:06 -0800279 const int need_unmap = need_skb_unmap() &&
280 q->cntxt_id >= FW_TUNNEL_SGEEC_START;
281
Divy Le Ray4d22de32007-01-18 22:04:14 -0500282 d = &q->sdesc[cidx];
283 while (n--) {
284 if (d->skb) { /* an SGL is present */
Divy Le Ray99d7cf32007-02-24 16:44:06 -0800285 if (need_unmap)
Divy Le Ray4d22de32007-01-18 22:04:14 -0500286 unmap_skb(d->skb, q, cidx, pdev);
Divy Le Ray23561c92007-11-16 11:22:05 -0800287 if (d->eop)
Divy Le Ray4d22de32007-01-18 22:04:14 -0500288 kfree_skb(d->skb);
289 }
290 ++d;
291 if (++cidx == q->size) {
292 cidx = 0;
293 d = q->sdesc;
294 }
295 }
296 q->cidx = cidx;
297}
298
299/**
300 * reclaim_completed_tx - reclaims completed Tx descriptors
301 * @adapter: the adapter
302 * @q: the Tx queue to reclaim completed descriptors from
303 *
304 * Reclaims Tx descriptors that the SGE has indicated it has processed,
305 * and frees the associated buffers if possible. Called with the Tx
306 * queue's lock held.
307 */
308static inline void reclaim_completed_tx(struct adapter *adapter,
309 struct sge_txq *q)
310{
311 unsigned int reclaim = q->processed - q->cleaned;
312
313 if (reclaim) {
314 free_tx_desc(adapter, q, reclaim);
315 q->cleaned += reclaim;
316 q->in_use -= reclaim;
317 }
318}
319
320/**
321 * should_restart_tx - are there enough resources to restart a Tx queue?
322 * @q: the Tx queue
323 *
324 * Checks if there are enough descriptors to restart a suspended Tx queue.
325 */
326static inline int should_restart_tx(const struct sge_txq *q)
327{
328 unsigned int r = q->processed - q->cleaned;
329
330 return q->in_use - r < (q->size >> 1);
331}
332
333/**
334 * free_rx_bufs - free the Rx buffers on an SGE free list
335 * @pdev: the PCI device associated with the adapter
336 * @rxq: the SGE free list to clean up
337 *
338 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from
339 * this queue should be stopped before calling this function.
340 */
341static void free_rx_bufs(struct pci_dev *pdev, struct sge_fl *q)
342{
343 unsigned int cidx = q->cidx;
344
345 while (q->credits--) {
346 struct rx_sw_desc *d = &q->sdesc[cidx];
347
348 pci_unmap_single(pdev, pci_unmap_addr(d, dma_addr),
349 q->buf_size, PCI_DMA_FROMDEVICE);
Divy Le Raycf992af2007-05-30 21:10:47 -0700350 if (q->use_pages) {
351 put_page(d->pg_chunk.page);
352 d->pg_chunk.page = NULL;
Divy Le Raye0994eb2007-02-24 16:44:17 -0800353 } else {
Divy Le Raycf992af2007-05-30 21:10:47 -0700354 kfree_skb(d->skb);
355 d->skb = NULL;
Divy Le Raye0994eb2007-02-24 16:44:17 -0800356 }
Divy Le Ray4d22de32007-01-18 22:04:14 -0500357 if (++cidx == q->size)
358 cidx = 0;
359 }
Divy Le Raye0994eb2007-02-24 16:44:17 -0800360
Divy Le Raycf992af2007-05-30 21:10:47 -0700361 if (q->pg_chunk.page) {
362 __free_page(q->pg_chunk.page);
363 q->pg_chunk.page = NULL;
364 }
Divy Le Ray4d22de32007-01-18 22:04:14 -0500365}
366
367/**
368 * add_one_rx_buf - add a packet buffer to a free-buffer list
Divy Le Raycf992af2007-05-30 21:10:47 -0700369 * @va: buffer start VA
Divy Le Ray4d22de32007-01-18 22:04:14 -0500370 * @len: the buffer length
371 * @d: the HW Rx descriptor to write
372 * @sd: the SW Rx descriptor to write
373 * @gen: the generation bit value
374 * @pdev: the PCI device associated with the adapter
375 *
376 * Add a buffer of the given length to the supplied HW and SW Rx
377 * descriptors.
378 */
Divy Le Rayb1fb1f22008-05-21 18:56:16 -0700379static inline int add_one_rx_buf(void *va, unsigned int len,
380 struct rx_desc *d, struct rx_sw_desc *sd,
381 unsigned int gen, struct pci_dev *pdev)
Divy Le Ray4d22de32007-01-18 22:04:14 -0500382{
383 dma_addr_t mapping;
384
Divy Le Raye0994eb2007-02-24 16:44:17 -0800385 mapping = pci_map_single(pdev, va, len, PCI_DMA_FROMDEVICE);
Divy Le Rayb1fb1f22008-05-21 18:56:16 -0700386 if (unlikely(pci_dma_mapping_error(mapping)))
387 return -ENOMEM;
388
Divy Le Ray4d22de32007-01-18 22:04:14 -0500389 pci_unmap_addr_set(sd, dma_addr, mapping);
390
391 d->addr_lo = cpu_to_be32(mapping);
392 d->addr_hi = cpu_to_be32((u64) mapping >> 32);
393 wmb();
394 d->len_gen = cpu_to_be32(V_FLD_GEN1(gen));
395 d->gen2 = cpu_to_be32(V_FLD_GEN2(gen));
Divy Le Rayb1fb1f22008-05-21 18:56:16 -0700396 return 0;
Divy Le Ray4d22de32007-01-18 22:04:14 -0500397}
398
Divy Le Raycf992af2007-05-30 21:10:47 -0700399static int alloc_pg_chunk(struct sge_fl *q, struct rx_sw_desc *sd, gfp_t gfp)
400{
401 if (!q->pg_chunk.page) {
402 q->pg_chunk.page = alloc_page(gfp);
403 if (unlikely(!q->pg_chunk.page))
404 return -ENOMEM;
405 q->pg_chunk.va = page_address(q->pg_chunk.page);
406 q->pg_chunk.offset = 0;
407 }
408 sd->pg_chunk = q->pg_chunk;
409
410 q->pg_chunk.offset += q->buf_size;
411 if (q->pg_chunk.offset == PAGE_SIZE)
412 q->pg_chunk.page = NULL;
413 else {
414 q->pg_chunk.va += q->buf_size;
415 get_page(q->pg_chunk.page);
416 }
417 return 0;
418}
419
Divy Le Ray4d22de32007-01-18 22:04:14 -0500420/**
421 * refill_fl - refill an SGE free-buffer list
422 * @adapter: the adapter
423 * @q: the free-list to refill
424 * @n: the number of new buffers to allocate
425 * @gfp: the gfp flags for allocating new buffers
426 *
427 * (Re)populate an SGE free-buffer list with up to @n new packet buffers,
428 * allocated with the supplied gfp flags. The caller must assure that
429 * @n does not exceed the queue's capacity.
430 */
Divy Le Rayb1fb1f22008-05-21 18:56:16 -0700431static int refill_fl(struct adapter *adap, struct sge_fl *q, int n, gfp_t gfp)
Divy Le Ray4d22de32007-01-18 22:04:14 -0500432{
Divy Le Raycf992af2007-05-30 21:10:47 -0700433 void *buf_start;
Divy Le Ray4d22de32007-01-18 22:04:14 -0500434 struct rx_sw_desc *sd = &q->sdesc[q->pidx];
435 struct rx_desc *d = &q->desc[q->pidx];
Divy Le Rayb1fb1f22008-05-21 18:56:16 -0700436 unsigned int count = 0;
Divy Le Ray4d22de32007-01-18 22:04:14 -0500437
438 while (n--) {
Divy Le Rayb1fb1f22008-05-21 18:56:16 -0700439 int err;
440
Divy Le Raycf992af2007-05-30 21:10:47 -0700441 if (q->use_pages) {
442 if (unlikely(alloc_pg_chunk(q, sd, gfp))) {
443nomem: q->alloc_failed++;
Divy Le Raye0994eb2007-02-24 16:44:17 -0800444 break;
445 }
Divy Le Raycf992af2007-05-30 21:10:47 -0700446 buf_start = sd->pg_chunk.va;
Divy Le Raye0994eb2007-02-24 16:44:17 -0800447 } else {
Divy Le Raycf992af2007-05-30 21:10:47 -0700448 struct sk_buff *skb = alloc_skb(q->buf_size, gfp);
Divy Le Raye0994eb2007-02-24 16:44:17 -0800449
Divy Le Raycf992af2007-05-30 21:10:47 -0700450 if (!skb)
451 goto nomem;
Divy Le Raye0994eb2007-02-24 16:44:17 -0800452
Divy Le Raycf992af2007-05-30 21:10:47 -0700453 sd->skb = skb;
454 buf_start = skb->data;
Divy Le Raye0994eb2007-02-24 16:44:17 -0800455 }
456
Divy Le Rayb1fb1f22008-05-21 18:56:16 -0700457 err = add_one_rx_buf(buf_start, q->buf_size, d, sd, q->gen,
458 adap->pdev);
459 if (unlikely(err)) {
460 if (!q->use_pages) {
461 kfree_skb(sd->skb);
462 sd->skb = NULL;
463 }
464 break;
465 }
466
Divy Le Ray4d22de32007-01-18 22:04:14 -0500467 d++;
468 sd++;
469 if (++q->pidx == q->size) {
470 q->pidx = 0;
471 q->gen ^= 1;
472 sd = q->sdesc;
473 d = q->desc;
474 }
475 q->credits++;
Divy Le Rayb1fb1f22008-05-21 18:56:16 -0700476 count++;
Divy Le Ray4d22de32007-01-18 22:04:14 -0500477 }
Divy Le Rayafefce62007-11-16 11:22:21 -0800478 wmb();
Divy Le Rayb1fb1f22008-05-21 18:56:16 -0700479 if (likely(count))
480 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
481
482 return count;
Divy Le Ray4d22de32007-01-18 22:04:14 -0500483}
484
485static inline void __refill_fl(struct adapter *adap, struct sge_fl *fl)
486{
487 refill_fl(adap, fl, min(16U, fl->size - fl->credits), GFP_ATOMIC);
488}
489
490/**
491 * recycle_rx_buf - recycle a receive buffer
492 * @adapter: the adapter
493 * @q: the SGE free list
494 * @idx: index of buffer to recycle
495 *
496 * Recycles the specified buffer on the given free list by adding it at
497 * the next available slot on the list.
498 */
499static void recycle_rx_buf(struct adapter *adap, struct sge_fl *q,
500 unsigned int idx)
501{
502 struct rx_desc *from = &q->desc[idx];
503 struct rx_desc *to = &q->desc[q->pidx];
504
Divy Le Raycf992af2007-05-30 21:10:47 -0700505 q->sdesc[q->pidx] = q->sdesc[idx];
Divy Le Ray4d22de32007-01-18 22:04:14 -0500506 to->addr_lo = from->addr_lo; /* already big endian */
507 to->addr_hi = from->addr_hi; /* likewise */
508 wmb();
509 to->len_gen = cpu_to_be32(V_FLD_GEN1(q->gen));
510 to->gen2 = cpu_to_be32(V_FLD_GEN2(q->gen));
511 q->credits++;
512
513 if (++q->pidx == q->size) {
514 q->pidx = 0;
515 q->gen ^= 1;
516 }
517 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
518}
519
520/**
521 * alloc_ring - allocate resources for an SGE descriptor ring
522 * @pdev: the PCI device
523 * @nelem: the number of descriptors
524 * @elem_size: the size of each descriptor
525 * @sw_size: the size of the SW state associated with each ring element
526 * @phys: the physical address of the allocated ring
527 * @metadata: address of the array holding the SW state for the ring
528 *
529 * Allocates resources for an SGE descriptor ring, such as Tx queues,
530 * free buffer lists, or response queues. Each SGE ring requires
531 * space for its HW descriptors plus, optionally, space for the SW state
532 * associated with each HW entry (the metadata). The function returns
533 * three values: the virtual address for the HW ring (the return value
534 * of the function), the physical address of the HW ring, and the address
535 * of the SW ring.
536 */
537static void *alloc_ring(struct pci_dev *pdev, size_t nelem, size_t elem_size,
Divy Le Raye0994eb2007-02-24 16:44:17 -0800538 size_t sw_size, dma_addr_t * phys, void *metadata)
Divy Le Ray4d22de32007-01-18 22:04:14 -0500539{
540 size_t len = nelem * elem_size;
541 void *s = NULL;
542 void *p = dma_alloc_coherent(&pdev->dev, len, phys, GFP_KERNEL);
543
544 if (!p)
545 return NULL;
546 if (sw_size) {
547 s = kcalloc(nelem, sw_size, GFP_KERNEL);
548
549 if (!s) {
550 dma_free_coherent(&pdev->dev, len, p, *phys);
551 return NULL;
552 }
553 }
554 if (metadata)
555 *(void **)metadata = s;
556 memset(p, 0, len);
557 return p;
558}
559
560/**
Divy Le Ray204e2f92008-05-06 19:26:01 -0700561 * t3_reset_qset - reset a sge qset
562 * @q: the queue set
563 *
564 * Reset the qset structure.
565 * the NAPI structure is preserved in the event of
566 * the qset's reincarnation, for example during EEH recovery.
567 */
568static void t3_reset_qset(struct sge_qset *q)
569{
570 if (q->adap &&
571 !(q->adap->flags & NAPI_INIT)) {
572 memset(q, 0, sizeof(*q));
573 return;
574 }
575
576 q->adap = NULL;
577 memset(&q->rspq, 0, sizeof(q->rspq));
578 memset(q->fl, 0, sizeof(struct sge_fl) * SGE_RXQ_PER_SET);
579 memset(q->txq, 0, sizeof(struct sge_txq) * SGE_TXQ_PER_SET);
580 q->txq_stopped = 0;
581 memset(&q->tx_reclaim_timer, 0, sizeof(q->tx_reclaim_timer));
582}
583
584
585/**
Divy Le Ray4d22de32007-01-18 22:04:14 -0500586 * free_qset - free the resources of an SGE queue set
587 * @adapter: the adapter owning the queue set
588 * @q: the queue set
589 *
590 * Release the HW and SW resources associated with an SGE queue set, such
591 * as HW contexts, packet buffers, and descriptor rings. Traffic to the
592 * queue set must be quiesced prior to calling this.
593 */
Stephen Hemminger9265fab2007-10-08 16:22:29 -0700594static void t3_free_qset(struct adapter *adapter, struct sge_qset *q)
Divy Le Ray4d22de32007-01-18 22:04:14 -0500595{
596 int i;
597 struct pci_dev *pdev = adapter->pdev;
598
599 if (q->tx_reclaim_timer.function)
600 del_timer_sync(&q->tx_reclaim_timer);
601
602 for (i = 0; i < SGE_RXQ_PER_SET; ++i)
603 if (q->fl[i].desc) {
Roland Dreierb1186de2008-03-20 13:30:48 -0700604 spin_lock_irq(&adapter->sge.reg_lock);
Divy Le Ray4d22de32007-01-18 22:04:14 -0500605 t3_sge_disable_fl(adapter, q->fl[i].cntxt_id);
Roland Dreierb1186de2008-03-20 13:30:48 -0700606 spin_unlock_irq(&adapter->sge.reg_lock);
Divy Le Ray4d22de32007-01-18 22:04:14 -0500607 free_rx_bufs(pdev, &q->fl[i]);
608 kfree(q->fl[i].sdesc);
609 dma_free_coherent(&pdev->dev,
610 q->fl[i].size *
611 sizeof(struct rx_desc), q->fl[i].desc,
612 q->fl[i].phys_addr);
613 }
614
615 for (i = 0; i < SGE_TXQ_PER_SET; ++i)
616 if (q->txq[i].desc) {
Roland Dreierb1186de2008-03-20 13:30:48 -0700617 spin_lock_irq(&adapter->sge.reg_lock);
Divy Le Ray4d22de32007-01-18 22:04:14 -0500618 t3_sge_enable_ecntxt(adapter, q->txq[i].cntxt_id, 0);
Roland Dreierb1186de2008-03-20 13:30:48 -0700619 spin_unlock_irq(&adapter->sge.reg_lock);
Divy Le Ray4d22de32007-01-18 22:04:14 -0500620 if (q->txq[i].sdesc) {
621 free_tx_desc(adapter, &q->txq[i],
622 q->txq[i].in_use);
623 kfree(q->txq[i].sdesc);
624 }
625 dma_free_coherent(&pdev->dev,
626 q->txq[i].size *
627 sizeof(struct tx_desc),
628 q->txq[i].desc, q->txq[i].phys_addr);
629 __skb_queue_purge(&q->txq[i].sendq);
630 }
631
632 if (q->rspq.desc) {
Roland Dreierb1186de2008-03-20 13:30:48 -0700633 spin_lock_irq(&adapter->sge.reg_lock);
Divy Le Ray4d22de32007-01-18 22:04:14 -0500634 t3_sge_disable_rspcntxt(adapter, q->rspq.cntxt_id);
Roland Dreierb1186de2008-03-20 13:30:48 -0700635 spin_unlock_irq(&adapter->sge.reg_lock);
Divy Le Ray4d22de32007-01-18 22:04:14 -0500636 dma_free_coherent(&pdev->dev,
637 q->rspq.size * sizeof(struct rsp_desc),
638 q->rspq.desc, q->rspq.phys_addr);
639 }
640
Divy Le Ray204e2f92008-05-06 19:26:01 -0700641 t3_reset_qset(q);
Divy Le Ray4d22de32007-01-18 22:04:14 -0500642}
643
644/**
645 * init_qset_cntxt - initialize an SGE queue set context info
646 * @qs: the queue set
647 * @id: the queue set id
648 *
649 * Initializes the TIDs and context ids for the queues of a queue set.
650 */
651static void init_qset_cntxt(struct sge_qset *qs, unsigned int id)
652{
653 qs->rspq.cntxt_id = id;
654 qs->fl[0].cntxt_id = 2 * id;
655 qs->fl[1].cntxt_id = 2 * id + 1;
656 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
657 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
658 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
659 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
660 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
661}
662
663/**
664 * sgl_len - calculates the size of an SGL of the given capacity
665 * @n: the number of SGL entries
666 *
667 * Calculates the number of flits needed for a scatter/gather list that
668 * can hold the given number of entries.
669 */
670static inline unsigned int sgl_len(unsigned int n)
671{
672 /* alternatively: 3 * (n / 2) + 2 * (n & 1) */
673 return (3 * n) / 2 + (n & 1);
674}
675
676/**
677 * flits_to_desc - returns the num of Tx descriptors for the given flits
678 * @n: the number of flits
679 *
680 * Calculates the number of Tx descriptors needed for the supplied number
681 * of flits.
682 */
683static inline unsigned int flits_to_desc(unsigned int n)
684{
685 BUG_ON(n >= ARRAY_SIZE(flit_desc_map));
686 return flit_desc_map[n];
687}
688
689/**
Divy Le Raycf992af2007-05-30 21:10:47 -0700690 * get_packet - return the next ingress packet buffer from a free list
691 * @adap: the adapter that received the packet
692 * @fl: the SGE free list holding the packet
693 * @len: the packet length including any SGE padding
694 * @drop_thres: # of remaining buffers before we start dropping packets
695 *
696 * Get the next packet from a free list and complete setup of the
697 * sk_buff. If the packet is small we make a copy and recycle the
698 * original buffer, otherwise we use the original buffer itself. If a
699 * positive drop threshold is supplied packets are dropped and their
700 * buffers recycled if (a) the number of remaining buffers is under the
701 * threshold and the packet is too big to copy, or (b) the packet should
702 * be copied but there is no memory for the copy.
703 */
704static struct sk_buff *get_packet(struct adapter *adap, struct sge_fl *fl,
705 unsigned int len, unsigned int drop_thres)
706{
707 struct sk_buff *skb = NULL;
708 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
709
710 prefetch(sd->skb->data);
711 fl->credits--;
712
713 if (len <= SGE_RX_COPY_THRES) {
714 skb = alloc_skb(len, GFP_ATOMIC);
715 if (likely(skb != NULL)) {
716 __skb_put(skb, len);
717 pci_dma_sync_single_for_cpu(adap->pdev,
718 pci_unmap_addr(sd, dma_addr), len,
719 PCI_DMA_FROMDEVICE);
720 memcpy(skb->data, sd->skb->data, len);
721 pci_dma_sync_single_for_device(adap->pdev,
722 pci_unmap_addr(sd, dma_addr), len,
723 PCI_DMA_FROMDEVICE);
724 } else if (!drop_thres)
725 goto use_orig_buf;
726recycle:
727 recycle_rx_buf(adap, fl, fl->cidx);
728 return skb;
729 }
730
731 if (unlikely(fl->credits < drop_thres))
732 goto recycle;
733
734use_orig_buf:
735 pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr),
736 fl->buf_size, PCI_DMA_FROMDEVICE);
737 skb = sd->skb;
738 skb_put(skb, len);
739 __refill_fl(adap, fl);
740 return skb;
741}
742
743/**
744 * get_packet_pg - return the next ingress packet buffer from a free list
745 * @adap: the adapter that received the packet
746 * @fl: the SGE free list holding the packet
747 * @len: the packet length including any SGE padding
748 * @drop_thres: # of remaining buffers before we start dropping packets
749 *
750 * Get the next packet from a free list populated with page chunks.
751 * If the packet is small we make a copy and recycle the original buffer,
752 * otherwise we attach the original buffer as a page fragment to a fresh
753 * sk_buff. If a positive drop threshold is supplied packets are dropped
754 * and their buffers recycled if (a) the number of remaining buffers is
755 * under the threshold and the packet is too big to copy, or (b) there's
756 * no system memory.
757 *
758 * Note: this function is similar to @get_packet but deals with Rx buffers
759 * that are page chunks rather than sk_buffs.
760 */
761static struct sk_buff *get_packet_pg(struct adapter *adap, struct sge_fl *fl,
762 unsigned int len, unsigned int drop_thres)
763{
764 struct sk_buff *skb = NULL;
765 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
766
767 if (len <= SGE_RX_COPY_THRES) {
768 skb = alloc_skb(len, GFP_ATOMIC);
769 if (likely(skb != NULL)) {
770 __skb_put(skb, len);
771 pci_dma_sync_single_for_cpu(adap->pdev,
772 pci_unmap_addr(sd, dma_addr), len,
773 PCI_DMA_FROMDEVICE);
774 memcpy(skb->data, sd->pg_chunk.va, len);
775 pci_dma_sync_single_for_device(adap->pdev,
776 pci_unmap_addr(sd, dma_addr), len,
777 PCI_DMA_FROMDEVICE);
778 } else if (!drop_thres)
779 return NULL;
780recycle:
781 fl->credits--;
782 recycle_rx_buf(adap, fl, fl->cidx);
783 return skb;
784 }
785
786 if (unlikely(fl->credits <= drop_thres))
787 goto recycle;
788
789 skb = alloc_skb(SGE_RX_PULL_LEN, GFP_ATOMIC);
790 if (unlikely(!skb)) {
791 if (!drop_thres)
792 return NULL;
793 goto recycle;
794 }
795
796 pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr),
797 fl->buf_size, PCI_DMA_FROMDEVICE);
798 __skb_put(skb, SGE_RX_PULL_LEN);
799 memcpy(skb->data, sd->pg_chunk.va, SGE_RX_PULL_LEN);
800 skb_fill_page_desc(skb, 0, sd->pg_chunk.page,
801 sd->pg_chunk.offset + SGE_RX_PULL_LEN,
802 len - SGE_RX_PULL_LEN);
803 skb->len = len;
804 skb->data_len = len - SGE_RX_PULL_LEN;
805 skb->truesize += skb->data_len;
806
807 fl->credits--;
808 /*
809 * We do not refill FLs here, we let the caller do it to overlap a
810 * prefetch.
811 */
812 return skb;
813}
814
815/**
Divy Le Ray4d22de32007-01-18 22:04:14 -0500816 * get_imm_packet - return the next ingress packet buffer from a response
817 * @resp: the response descriptor containing the packet data
818 *
819 * Return a packet containing the immediate data of the given response.
820 */
821static inline struct sk_buff *get_imm_packet(const struct rsp_desc *resp)
822{
823 struct sk_buff *skb = alloc_skb(IMMED_PKT_SIZE, GFP_ATOMIC);
824
825 if (skb) {
826 __skb_put(skb, IMMED_PKT_SIZE);
Arnaldo Carvalho de Melo27d7ff42007-03-31 11:55:19 -0300827 skb_copy_to_linear_data(skb, resp->imm_data, IMMED_PKT_SIZE);
Divy Le Ray4d22de32007-01-18 22:04:14 -0500828 }
829 return skb;
830}
831
832/**
833 * calc_tx_descs - calculate the number of Tx descriptors for a packet
834 * @skb: the packet
835 *
836 * Returns the number of Tx descriptors needed for the given Ethernet
837 * packet. Ethernet packets require addition of WR and CPL headers.
838 */
839static inline unsigned int calc_tx_descs(const struct sk_buff *skb)
840{
841 unsigned int flits;
842
843 if (skb->len <= WR_LEN - sizeof(struct cpl_tx_pkt))
844 return 1;
845
846 flits = sgl_len(skb_shinfo(skb)->nr_frags + 1) + 2;
847 if (skb_shinfo(skb)->gso_size)
848 flits++;
849 return flits_to_desc(flits);
850}
851
852/**
853 * make_sgl - populate a scatter/gather list for a packet
854 * @skb: the packet
855 * @sgp: the SGL to populate
856 * @start: start address of skb main body data to include in the SGL
857 * @len: length of skb main body data to include in the SGL
858 * @pdev: the PCI device
859 *
860 * Generates a scatter/gather list for the buffers that make up a packet
861 * and returns the SGL size in 8-byte words. The caller must size the SGL
862 * appropriately.
863 */
864static inline unsigned int make_sgl(const struct sk_buff *skb,
865 struct sg_ent *sgp, unsigned char *start,
866 unsigned int len, struct pci_dev *pdev)
867{
868 dma_addr_t mapping;
869 unsigned int i, j = 0, nfrags;
870
871 if (len) {
872 mapping = pci_map_single(pdev, start, len, PCI_DMA_TODEVICE);
873 sgp->len[0] = cpu_to_be32(len);
874 sgp->addr[0] = cpu_to_be64(mapping);
875 j = 1;
876 }
877
878 nfrags = skb_shinfo(skb)->nr_frags;
879 for (i = 0; i < nfrags; i++) {
880 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
881
882 mapping = pci_map_page(pdev, frag->page, frag->page_offset,
883 frag->size, PCI_DMA_TODEVICE);
884 sgp->len[j] = cpu_to_be32(frag->size);
885 sgp->addr[j] = cpu_to_be64(mapping);
886 j ^= 1;
887 if (j == 0)
888 ++sgp;
889 }
890 if (j)
891 sgp->len[j] = 0;
892 return ((nfrags + (len != 0)) * 3) / 2 + j;
893}
894
895/**
896 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell
897 * @adap: the adapter
898 * @q: the Tx queue
899 *
900 * Ring the doorbel if a Tx queue is asleep. There is a natural race,
901 * where the HW is going to sleep just after we checked, however,
902 * then the interrupt handler will detect the outstanding TX packet
903 * and ring the doorbell for us.
904 *
905 * When GTS is disabled we unconditionally ring the doorbell.
906 */
907static inline void check_ring_tx_db(struct adapter *adap, struct sge_txq *q)
908{
909#if USE_GTS
910 clear_bit(TXQ_LAST_PKT_DB, &q->flags);
911 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
912 set_bit(TXQ_LAST_PKT_DB, &q->flags);
913 t3_write_reg(adap, A_SG_KDOORBELL,
914 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
915 }
916#else
917 wmb(); /* write descriptors before telling HW */
918 t3_write_reg(adap, A_SG_KDOORBELL,
919 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
920#endif
921}
922
923static inline void wr_gen2(struct tx_desc *d, unsigned int gen)
924{
925#if SGE_NUM_GENBITS == 2
926 d->flit[TX_DESC_FLITS - 1] = cpu_to_be64(gen);
927#endif
928}
929
930/**
931 * write_wr_hdr_sgl - write a WR header and, optionally, SGL
932 * @ndesc: number of Tx descriptors spanned by the SGL
933 * @skb: the packet corresponding to the WR
934 * @d: first Tx descriptor to be written
935 * @pidx: index of above descriptors
936 * @q: the SGE Tx queue
937 * @sgl: the SGL
938 * @flits: number of flits to the start of the SGL in the first descriptor
939 * @sgl_flits: the SGL size in flits
940 * @gen: the Tx descriptor generation
941 * @wr_hi: top 32 bits of WR header based on WR type (big endian)
942 * @wr_lo: low 32 bits of WR header based on WR type (big endian)
943 *
944 * Write a work request header and an associated SGL. If the SGL is
945 * small enough to fit into one Tx descriptor it has already been written
946 * and we just need to write the WR header. Otherwise we distribute the
947 * SGL across the number of descriptors it spans.
948 */
949static void write_wr_hdr_sgl(unsigned int ndesc, struct sk_buff *skb,
950 struct tx_desc *d, unsigned int pidx,
951 const struct sge_txq *q,
952 const struct sg_ent *sgl,
953 unsigned int flits, unsigned int sgl_flits,
Al Virofb8e4442007-08-23 03:04:12 -0400954 unsigned int gen, __be32 wr_hi,
955 __be32 wr_lo)
Divy Le Ray4d22de32007-01-18 22:04:14 -0500956{
957 struct work_request_hdr *wrp = (struct work_request_hdr *)d;
958 struct tx_sw_desc *sd = &q->sdesc[pidx];
959
960 sd->skb = skb;
961 if (need_skb_unmap()) {
Divy Le Ray23561c92007-11-16 11:22:05 -0800962 sd->fragidx = 0;
963 sd->addr_idx = 0;
964 sd->sflit = flits;
Divy Le Ray4d22de32007-01-18 22:04:14 -0500965 }
966
967 if (likely(ndesc == 1)) {
Divy Le Ray23561c92007-11-16 11:22:05 -0800968 sd->eop = 1;
Divy Le Ray4d22de32007-01-18 22:04:14 -0500969 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
970 V_WR_SGLSFLT(flits)) | wr_hi;
971 wmb();
972 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |
973 V_WR_GEN(gen)) | wr_lo;
974 wr_gen2(d, gen);
975 } else {
976 unsigned int ogen = gen;
977 const u64 *fp = (const u64 *)sgl;
978 struct work_request_hdr *wp = wrp;
979
980 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
981 V_WR_SGLSFLT(flits)) | wr_hi;
982
983 while (sgl_flits) {
984 unsigned int avail = WR_FLITS - flits;
985
986 if (avail > sgl_flits)
987 avail = sgl_flits;
988 memcpy(&d->flit[flits], fp, avail * sizeof(*fp));
989 sgl_flits -= avail;
990 ndesc--;
991 if (!sgl_flits)
992 break;
993
994 fp += avail;
995 d++;
Divy Le Ray23561c92007-11-16 11:22:05 -0800996 sd->eop = 0;
Divy Le Ray4d22de32007-01-18 22:04:14 -0500997 sd++;
998 if (++pidx == q->size) {
999 pidx = 0;
1000 gen ^= 1;
1001 d = q->desc;
1002 sd = q->sdesc;
1003 }
1004
1005 sd->skb = skb;
1006 wrp = (struct work_request_hdr *)d;
1007 wrp->wr_hi = htonl(V_WR_DATATYPE(1) |
1008 V_WR_SGLSFLT(1)) | wr_hi;
1009 wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
1010 sgl_flits + 1)) |
1011 V_WR_GEN(gen)) | wr_lo;
1012 wr_gen2(d, gen);
1013 flits = 1;
1014 }
Divy Le Ray23561c92007-11-16 11:22:05 -08001015 sd->eop = 1;
Divy Le Ray4d22de32007-01-18 22:04:14 -05001016 wrp->wr_hi |= htonl(F_WR_EOP);
1017 wmb();
1018 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1019 wr_gen2((struct tx_desc *)wp, ogen);
1020 WARN_ON(ndesc != 0);
1021 }
1022}
1023
1024/**
1025 * write_tx_pkt_wr - write a TX_PKT work request
1026 * @adap: the adapter
1027 * @skb: the packet to send
1028 * @pi: the egress interface
1029 * @pidx: index of the first Tx descriptor to write
1030 * @gen: the generation value to use
1031 * @q: the Tx queue
1032 * @ndesc: number of descriptors the packet will occupy
1033 * @compl: the value of the COMPL bit to use
1034 *
1035 * Generate a TX_PKT work request to send the supplied packet.
1036 */
1037static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb,
1038 const struct port_info *pi,
1039 unsigned int pidx, unsigned int gen,
1040 struct sge_txq *q, unsigned int ndesc,
1041 unsigned int compl)
1042{
1043 unsigned int flits, sgl_flits, cntrl, tso_info;
1044 struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
1045 struct tx_desc *d = &q->desc[pidx];
1046 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)d;
1047
1048 cpl->len = htonl(skb->len | 0x80000000);
1049 cntrl = V_TXPKT_INTF(pi->port_id);
1050
1051 if (vlan_tx_tag_present(skb) && pi->vlan_grp)
1052 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(vlan_tx_tag_get(skb));
1053
1054 tso_info = V_LSO_MSS(skb_shinfo(skb)->gso_size);
1055 if (tso_info) {
1056 int eth_type;
1057 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)cpl;
1058
1059 d->flit[2] = 0;
1060 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1061 hdr->cntrl = htonl(cntrl);
Arnaldo Carvalho de Melobbe735e2007-03-10 22:16:10 -03001062 eth_type = skb_network_offset(skb) == ETH_HLEN ?
Divy Le Ray4d22de32007-01-18 22:04:14 -05001063 CPL_ETH_II : CPL_ETH_II_VLAN;
1064 tso_info |= V_LSO_ETH_TYPE(eth_type) |
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001065 V_LSO_IPHDR_WORDS(ip_hdr(skb)->ihl) |
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -07001066 V_LSO_TCPHDR_WORDS(tcp_hdr(skb)->doff);
Divy Le Ray4d22de32007-01-18 22:04:14 -05001067 hdr->lso_info = htonl(tso_info);
1068 flits = 3;
1069 } else {
1070 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1071 cntrl |= F_TXPKT_IPCSUM_DIS; /* SW calculates IP csum */
1072 cntrl |= V_TXPKT_L4CSUM_DIS(skb->ip_summed != CHECKSUM_PARTIAL);
1073 cpl->cntrl = htonl(cntrl);
1074
1075 if (skb->len <= WR_LEN - sizeof(*cpl)) {
1076 q->sdesc[pidx].skb = NULL;
1077 if (!skb->data_len)
Arnaldo Carvalho de Melod626f622007-03-27 18:55:52 -03001078 skb_copy_from_linear_data(skb, &d->flit[2],
1079 skb->len);
Divy Le Ray4d22de32007-01-18 22:04:14 -05001080 else
1081 skb_copy_bits(skb, 0, &d->flit[2], skb->len);
1082
1083 flits = (skb->len + 7) / 8 + 2;
1084 cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(skb->len & 7) |
1085 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT)
1086 | F_WR_SOP | F_WR_EOP | compl);
1087 wmb();
1088 cpl->wr.wr_lo = htonl(V_WR_LEN(flits) | V_WR_GEN(gen) |
1089 V_WR_TID(q->token));
1090 wr_gen2(d, gen);
1091 kfree_skb(skb);
1092 return;
1093 }
1094
1095 flits = 2;
1096 }
1097
1098 sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
1099 sgl_flits = make_sgl(skb, sgp, skb->data, skb_headlen(skb), adap->pdev);
Divy Le Ray4d22de32007-01-18 22:04:14 -05001100
1101 write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits, gen,
1102 htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | compl),
1103 htonl(V_WR_TID(q->token)));
1104}
1105
Krishna Kumara8cc21f2008-01-30 12:30:16 +05301106static inline void t3_stop_queue(struct net_device *dev, struct sge_qset *qs,
1107 struct sge_txq *q)
1108{
1109 netif_stop_queue(dev);
1110 set_bit(TXQ_ETH, &qs->txq_stopped);
1111 q->stops++;
1112}
1113
Divy Le Ray4d22de32007-01-18 22:04:14 -05001114/**
1115 * eth_xmit - add a packet to the Ethernet Tx queue
1116 * @skb: the packet
1117 * @dev: the egress net device
1118 *
1119 * Add a packet to an SGE Tx queue. Runs with softirqs disabled.
1120 */
1121int t3_eth_xmit(struct sk_buff *skb, struct net_device *dev)
1122{
1123 unsigned int ndesc, pidx, credits, gen, compl;
1124 const struct port_info *pi = netdev_priv(dev);
Divy Le Ray5fbf8162007-08-29 19:15:47 -07001125 struct adapter *adap = pi->adapter;
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001126 struct sge_qset *qs = pi->qs;
Divy Le Ray4d22de32007-01-18 22:04:14 -05001127 struct sge_txq *q = &qs->txq[TXQ_ETH];
1128
1129 /*
1130 * The chip min packet length is 9 octets but play safe and reject
1131 * anything shorter than an Ethernet header.
1132 */
1133 if (unlikely(skb->len < ETH_HLEN)) {
1134 dev_kfree_skb(skb);
1135 return NETDEV_TX_OK;
1136 }
1137
1138 spin_lock(&q->lock);
1139 reclaim_completed_tx(adap, q);
1140
1141 credits = q->size - q->in_use;
1142 ndesc = calc_tx_descs(skb);
1143
1144 if (unlikely(credits < ndesc)) {
Krishna Kumara8cc21f2008-01-30 12:30:16 +05301145 t3_stop_queue(dev, qs, q);
1146 dev_err(&adap->pdev->dev,
1147 "%s: Tx ring %u full while queue awake!\n",
1148 dev->name, q->cntxt_id & 7);
Divy Le Ray4d22de32007-01-18 22:04:14 -05001149 spin_unlock(&q->lock);
1150 return NETDEV_TX_BUSY;
1151 }
1152
1153 q->in_use += ndesc;
Divy Le Raycd7e9032008-03-13 00:13:30 -07001154 if (unlikely(credits - ndesc < q->stop_thres)) {
1155 t3_stop_queue(dev, qs, q);
1156
1157 if (should_restart_tx(q) &&
1158 test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {
1159 q->restarts++;
1160 netif_wake_queue(dev);
1161 }
1162 }
Divy Le Ray4d22de32007-01-18 22:04:14 -05001163
1164 gen = q->gen;
1165 q->unacked += ndesc;
1166 compl = (q->unacked & 8) << (S_WR_COMPL - 3);
1167 q->unacked &= 7;
1168 pidx = q->pidx;
1169 q->pidx += ndesc;
1170 if (q->pidx >= q->size) {
1171 q->pidx -= q->size;
1172 q->gen ^= 1;
1173 }
1174
1175 /* update port statistics */
1176 if (skb->ip_summed == CHECKSUM_COMPLETE)
1177 qs->port_stats[SGE_PSTAT_TX_CSUM]++;
1178 if (skb_shinfo(skb)->gso_size)
1179 qs->port_stats[SGE_PSTAT_TSO]++;
1180 if (vlan_tx_tag_present(skb) && pi->vlan_grp)
1181 qs->port_stats[SGE_PSTAT_VLANINS]++;
1182
1183 dev->trans_start = jiffies;
1184 spin_unlock(&q->lock);
1185
1186 /*
1187 * We do not use Tx completion interrupts to free DMAd Tx packets.
1188 * This is good for performamce but means that we rely on new Tx
1189 * packets arriving to run the destructors of completed packets,
1190 * which open up space in their sockets' send queues. Sometimes
1191 * we do not get such new packets causing Tx to stall. A single
1192 * UDP transmitter is a good example of this situation. We have
1193 * a clean up timer that periodically reclaims completed packets
1194 * but it doesn't run often enough (nor do we want it to) to prevent
1195 * lengthy stalls. A solution to this problem is to run the
1196 * destructor early, after the packet is queued but before it's DMAd.
1197 * A cons is that we lie to socket memory accounting, but the amount
1198 * of extra memory is reasonable (limited by the number of Tx
1199 * descriptors), the packets do actually get freed quickly by new
1200 * packets almost always, and for protocols like TCP that wait for
1201 * acks to really free up the data the extra memory is even less.
1202 * On the positive side we run the destructors on the sending CPU
1203 * rather than on a potentially different completing CPU, usually a
1204 * good thing. We also run them without holding our Tx queue lock,
1205 * unlike what reclaim_completed_tx() would otherwise do.
1206 *
1207 * Run the destructor before telling the DMA engine about the packet
1208 * to make sure it doesn't complete and get freed prematurely.
1209 */
1210 if (likely(!skb_shared(skb)))
1211 skb_orphan(skb);
1212
1213 write_tx_pkt_wr(adap, skb, pi, pidx, gen, q, ndesc, compl);
1214 check_ring_tx_db(adap, q);
1215 return NETDEV_TX_OK;
1216}
1217
1218/**
1219 * write_imm - write a packet into a Tx descriptor as immediate data
1220 * @d: the Tx descriptor to write
1221 * @skb: the packet
1222 * @len: the length of packet data to write as immediate data
1223 * @gen: the generation bit value to write
1224 *
1225 * Writes a packet as immediate data into a Tx descriptor. The packet
1226 * contains a work request at its beginning. We must write the packet
Divy Le Ray27186dc2007-08-21 20:49:15 -07001227 * carefully so the SGE doesn't read it accidentally before it's written
1228 * in its entirety.
Divy Le Ray4d22de32007-01-18 22:04:14 -05001229 */
1230static inline void write_imm(struct tx_desc *d, struct sk_buff *skb,
1231 unsigned int len, unsigned int gen)
1232{
1233 struct work_request_hdr *from = (struct work_request_hdr *)skb->data;
1234 struct work_request_hdr *to = (struct work_request_hdr *)d;
1235
Divy Le Ray27186dc2007-08-21 20:49:15 -07001236 if (likely(!skb->data_len))
1237 memcpy(&to[1], &from[1], len - sizeof(*from));
1238 else
1239 skb_copy_bits(skb, sizeof(*from), &to[1], len - sizeof(*from));
1240
Divy Le Ray4d22de32007-01-18 22:04:14 -05001241 to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP |
1242 V_WR_BCNTLFLT(len & 7));
1243 wmb();
1244 to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) |
1245 V_WR_LEN((len + 7) / 8));
1246 wr_gen2(d, gen);
1247 kfree_skb(skb);
1248}
1249
1250/**
1251 * check_desc_avail - check descriptor availability on a send queue
1252 * @adap: the adapter
1253 * @q: the send queue
1254 * @skb: the packet needing the descriptors
1255 * @ndesc: the number of Tx descriptors needed
1256 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1257 *
1258 * Checks if the requested number of Tx descriptors is available on an
1259 * SGE send queue. If the queue is already suspended or not enough
1260 * descriptors are available the packet is queued for later transmission.
1261 * Must be called with the Tx queue locked.
1262 *
1263 * Returns 0 if enough descriptors are available, 1 if there aren't
1264 * enough descriptors and the packet has been queued, and 2 if the caller
1265 * needs to retry because there weren't enough descriptors at the
1266 * beginning of the call but some freed up in the mean time.
1267 */
1268static inline int check_desc_avail(struct adapter *adap, struct sge_txq *q,
1269 struct sk_buff *skb, unsigned int ndesc,
1270 unsigned int qid)
1271{
1272 if (unlikely(!skb_queue_empty(&q->sendq))) {
1273 addq_exit:__skb_queue_tail(&q->sendq, skb);
1274 return 1;
1275 }
1276 if (unlikely(q->size - q->in_use < ndesc)) {
1277 struct sge_qset *qs = txq_to_qset(q, qid);
1278
1279 set_bit(qid, &qs->txq_stopped);
1280 smp_mb__after_clear_bit();
1281
1282 if (should_restart_tx(q) &&
1283 test_and_clear_bit(qid, &qs->txq_stopped))
1284 return 2;
1285
1286 q->stops++;
1287 goto addq_exit;
1288 }
1289 return 0;
1290}
1291
1292/**
1293 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1294 * @q: the SGE control Tx queue
1295 *
1296 * This is a variant of reclaim_completed_tx() that is used for Tx queues
1297 * that send only immediate data (presently just the control queues) and
1298 * thus do not have any sk_buffs to release.
1299 */
1300static inline void reclaim_completed_tx_imm(struct sge_txq *q)
1301{
1302 unsigned int reclaim = q->processed - q->cleaned;
1303
1304 q->in_use -= reclaim;
1305 q->cleaned += reclaim;
1306}
1307
1308static inline int immediate(const struct sk_buff *skb)
1309{
Divy Le Ray27186dc2007-08-21 20:49:15 -07001310 return skb->len <= WR_LEN;
Divy Le Ray4d22de32007-01-18 22:04:14 -05001311}
1312
1313/**
1314 * ctrl_xmit - send a packet through an SGE control Tx queue
1315 * @adap: the adapter
1316 * @q: the control queue
1317 * @skb: the packet
1318 *
1319 * Send a packet through an SGE control Tx queue. Packets sent through
1320 * a control queue must fit entirely as immediate data in a single Tx
1321 * descriptor and have no page fragments.
1322 */
1323static int ctrl_xmit(struct adapter *adap, struct sge_txq *q,
1324 struct sk_buff *skb)
1325{
1326 int ret;
1327 struct work_request_hdr *wrp = (struct work_request_hdr *)skb->data;
1328
1329 if (unlikely(!immediate(skb))) {
1330 WARN_ON(1);
1331 dev_kfree_skb(skb);
1332 return NET_XMIT_SUCCESS;
1333 }
1334
1335 wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP);
1336 wrp->wr_lo = htonl(V_WR_TID(q->token));
1337
1338 spin_lock(&q->lock);
1339 again:reclaim_completed_tx_imm(q);
1340
1341 ret = check_desc_avail(adap, q, skb, 1, TXQ_CTRL);
1342 if (unlikely(ret)) {
1343 if (ret == 1) {
1344 spin_unlock(&q->lock);
1345 return NET_XMIT_CN;
1346 }
1347 goto again;
1348 }
1349
1350 write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
1351
1352 q->in_use++;
1353 if (++q->pidx >= q->size) {
1354 q->pidx = 0;
1355 q->gen ^= 1;
1356 }
1357 spin_unlock(&q->lock);
1358 wmb();
1359 t3_write_reg(adap, A_SG_KDOORBELL,
1360 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1361 return NET_XMIT_SUCCESS;
1362}
1363
1364/**
1365 * restart_ctrlq - restart a suspended control queue
1366 * @qs: the queue set cotaining the control queue
1367 *
1368 * Resumes transmission on a suspended Tx control queue.
1369 */
1370static void restart_ctrlq(unsigned long data)
1371{
1372 struct sk_buff *skb;
1373 struct sge_qset *qs = (struct sge_qset *)data;
1374 struct sge_txq *q = &qs->txq[TXQ_CTRL];
Divy Le Ray4d22de32007-01-18 22:04:14 -05001375
1376 spin_lock(&q->lock);
1377 again:reclaim_completed_tx_imm(q);
1378
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001379 while (q->in_use < q->size &&
1380 (skb = __skb_dequeue(&q->sendq)) != NULL) {
Divy Le Ray4d22de32007-01-18 22:04:14 -05001381
1382 write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
1383
1384 if (++q->pidx >= q->size) {
1385 q->pidx = 0;
1386 q->gen ^= 1;
1387 }
1388 q->in_use++;
1389 }
1390
1391 if (!skb_queue_empty(&q->sendq)) {
1392 set_bit(TXQ_CTRL, &qs->txq_stopped);
1393 smp_mb__after_clear_bit();
1394
1395 if (should_restart_tx(q) &&
1396 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1397 goto again;
1398 q->stops++;
1399 }
1400
1401 spin_unlock(&q->lock);
Divy Le Rayafefce62007-11-16 11:22:21 -08001402 wmb();
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001403 t3_write_reg(qs->adap, A_SG_KDOORBELL,
Divy Le Ray4d22de32007-01-18 22:04:14 -05001404 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1405}
1406
Divy Le Ray14ab9892007-01-30 19:43:50 -08001407/*
1408 * Send a management message through control queue 0
1409 */
1410int t3_mgmt_tx(struct adapter *adap, struct sk_buff *skb)
1411{
Divy Le Ray204e2f92008-05-06 19:26:01 -07001412 int ret;
Divy Le Raybc4b6b522007-12-17 18:47:41 -08001413 local_bh_disable();
1414 ret = ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], skb);
1415 local_bh_enable();
1416
1417 return ret;
Divy Le Ray14ab9892007-01-30 19:43:50 -08001418}
1419
Divy Le Ray4d22de32007-01-18 22:04:14 -05001420/**
Divy Le Ray99d7cf32007-02-24 16:44:06 -08001421 * deferred_unmap_destructor - unmap a packet when it is freed
1422 * @skb: the packet
1423 *
1424 * This is the packet destructor used for Tx packets that need to remain
1425 * mapped until they are freed rather than until their Tx descriptors are
1426 * freed.
1427 */
1428static void deferred_unmap_destructor(struct sk_buff *skb)
1429{
1430 int i;
1431 const dma_addr_t *p;
1432 const struct skb_shared_info *si;
1433 const struct deferred_unmap_info *dui;
Divy Le Ray99d7cf32007-02-24 16:44:06 -08001434
1435 dui = (struct deferred_unmap_info *)skb->head;
1436 p = dui->addr;
1437
Divy Le Ray23561c92007-11-16 11:22:05 -08001438 if (skb->tail - skb->transport_header)
1439 pci_unmap_single(dui->pdev, *p++,
1440 skb->tail - skb->transport_header,
1441 PCI_DMA_TODEVICE);
Divy Le Ray99d7cf32007-02-24 16:44:06 -08001442
1443 si = skb_shinfo(skb);
1444 for (i = 0; i < si->nr_frags; i++)
1445 pci_unmap_page(dui->pdev, *p++, si->frags[i].size,
1446 PCI_DMA_TODEVICE);
1447}
1448
1449static void setup_deferred_unmapping(struct sk_buff *skb, struct pci_dev *pdev,
1450 const struct sg_ent *sgl, int sgl_flits)
1451{
1452 dma_addr_t *p;
1453 struct deferred_unmap_info *dui;
1454
1455 dui = (struct deferred_unmap_info *)skb->head;
1456 dui->pdev = pdev;
1457 for (p = dui->addr; sgl_flits >= 3; sgl++, sgl_flits -= 3) {
1458 *p++ = be64_to_cpu(sgl->addr[0]);
1459 *p++ = be64_to_cpu(sgl->addr[1]);
1460 }
1461 if (sgl_flits)
1462 *p = be64_to_cpu(sgl->addr[0]);
1463}
1464
1465/**
Divy Le Ray4d22de32007-01-18 22:04:14 -05001466 * write_ofld_wr - write an offload work request
1467 * @adap: the adapter
1468 * @skb: the packet to send
1469 * @q: the Tx queue
1470 * @pidx: index of the first Tx descriptor to write
1471 * @gen: the generation value to use
1472 * @ndesc: number of descriptors the packet will occupy
1473 *
1474 * Write an offload work request to send the supplied packet. The packet
1475 * data already carry the work request with most fields populated.
1476 */
1477static void write_ofld_wr(struct adapter *adap, struct sk_buff *skb,
1478 struct sge_txq *q, unsigned int pidx,
1479 unsigned int gen, unsigned int ndesc)
1480{
1481 unsigned int sgl_flits, flits;
1482 struct work_request_hdr *from;
1483 struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
1484 struct tx_desc *d = &q->desc[pidx];
1485
1486 if (immediate(skb)) {
1487 q->sdesc[pidx].skb = NULL;
1488 write_imm(d, skb, skb->len, gen);
1489 return;
1490 }
1491
1492 /* Only TX_DATA builds SGLs */
1493
1494 from = (struct work_request_hdr *)skb->data;
Arnaldo Carvalho de Meloea2ae172007-04-25 17:55:53 -07001495 memcpy(&d->flit[1], &from[1],
1496 skb_transport_offset(skb) - sizeof(*from));
Divy Le Ray4d22de32007-01-18 22:04:14 -05001497
Arnaldo Carvalho de Meloea2ae172007-04-25 17:55:53 -07001498 flits = skb_transport_offset(skb) / 8;
Divy Le Ray4d22de32007-01-18 22:04:14 -05001499 sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
Arnaldo Carvalho de Melo9c702202007-04-25 18:04:18 -07001500 sgl_flits = make_sgl(skb, sgp, skb_transport_header(skb),
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001501 skb->tail - skb->transport_header,
Divy Le Ray4d22de32007-01-18 22:04:14 -05001502 adap->pdev);
Divy Le Ray99d7cf32007-02-24 16:44:06 -08001503 if (need_skb_unmap()) {
1504 setup_deferred_unmapping(skb, adap->pdev, sgp, sgl_flits);
1505 skb->destructor = deferred_unmap_destructor;
Divy Le Ray99d7cf32007-02-24 16:44:06 -08001506 }
Divy Le Ray4d22de32007-01-18 22:04:14 -05001507
1508 write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits,
1509 gen, from->wr_hi, from->wr_lo);
1510}
1511
1512/**
1513 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
1514 * @skb: the packet
1515 *
1516 * Returns the number of Tx descriptors needed for the given offload
1517 * packet. These packets are already fully constructed.
1518 */
1519static inline unsigned int calc_tx_descs_ofld(const struct sk_buff *skb)
1520{
Divy Le Ray27186dc2007-08-21 20:49:15 -07001521 unsigned int flits, cnt;
Divy Le Ray4d22de32007-01-18 22:04:14 -05001522
Divy Le Ray27186dc2007-08-21 20:49:15 -07001523 if (skb->len <= WR_LEN)
Divy Le Ray4d22de32007-01-18 22:04:14 -05001524 return 1; /* packet fits as immediate data */
1525
Arnaldo Carvalho de Meloea2ae172007-04-25 17:55:53 -07001526 flits = skb_transport_offset(skb) / 8; /* headers */
Divy Le Ray27186dc2007-08-21 20:49:15 -07001527 cnt = skb_shinfo(skb)->nr_frags;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001528 if (skb->tail != skb->transport_header)
Divy Le Ray4d22de32007-01-18 22:04:14 -05001529 cnt++;
1530 return flits_to_desc(flits + sgl_len(cnt));
1531}
1532
1533/**
1534 * ofld_xmit - send a packet through an offload queue
1535 * @adap: the adapter
1536 * @q: the Tx offload queue
1537 * @skb: the packet
1538 *
1539 * Send an offload packet through an SGE offload queue.
1540 */
1541static int ofld_xmit(struct adapter *adap, struct sge_txq *q,
1542 struct sk_buff *skb)
1543{
1544 int ret;
1545 unsigned int ndesc = calc_tx_descs_ofld(skb), pidx, gen;
1546
1547 spin_lock(&q->lock);
1548 again:reclaim_completed_tx(adap, q);
1549
1550 ret = check_desc_avail(adap, q, skb, ndesc, TXQ_OFLD);
1551 if (unlikely(ret)) {
1552 if (ret == 1) {
1553 skb->priority = ndesc; /* save for restart */
1554 spin_unlock(&q->lock);
1555 return NET_XMIT_CN;
1556 }
1557 goto again;
1558 }
1559
1560 gen = q->gen;
1561 q->in_use += ndesc;
1562 pidx = q->pidx;
1563 q->pidx += ndesc;
1564 if (q->pidx >= q->size) {
1565 q->pidx -= q->size;
1566 q->gen ^= 1;
1567 }
1568 spin_unlock(&q->lock);
1569
1570 write_ofld_wr(adap, skb, q, pidx, gen, ndesc);
1571 check_ring_tx_db(adap, q);
1572 return NET_XMIT_SUCCESS;
1573}
1574
1575/**
1576 * restart_offloadq - restart a suspended offload queue
1577 * @qs: the queue set cotaining the offload queue
1578 *
1579 * Resumes transmission on a suspended Tx offload queue.
1580 */
1581static void restart_offloadq(unsigned long data)
1582{
1583 struct sk_buff *skb;
1584 struct sge_qset *qs = (struct sge_qset *)data;
1585 struct sge_txq *q = &qs->txq[TXQ_OFLD];
Divy Le Ray5fbf8162007-08-29 19:15:47 -07001586 const struct port_info *pi = netdev_priv(qs->netdev);
1587 struct adapter *adap = pi->adapter;
Divy Le Ray4d22de32007-01-18 22:04:14 -05001588
1589 spin_lock(&q->lock);
1590 again:reclaim_completed_tx(adap, q);
1591
1592 while ((skb = skb_peek(&q->sendq)) != NULL) {
1593 unsigned int gen, pidx;
1594 unsigned int ndesc = skb->priority;
1595
1596 if (unlikely(q->size - q->in_use < ndesc)) {
1597 set_bit(TXQ_OFLD, &qs->txq_stopped);
1598 smp_mb__after_clear_bit();
1599
1600 if (should_restart_tx(q) &&
1601 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
1602 goto again;
1603 q->stops++;
1604 break;
1605 }
1606
1607 gen = q->gen;
1608 q->in_use += ndesc;
1609 pidx = q->pidx;
1610 q->pidx += ndesc;
1611 if (q->pidx >= q->size) {
1612 q->pidx -= q->size;
1613 q->gen ^= 1;
1614 }
1615 __skb_unlink(skb, &q->sendq);
1616 spin_unlock(&q->lock);
1617
1618 write_ofld_wr(adap, skb, q, pidx, gen, ndesc);
1619 spin_lock(&q->lock);
1620 }
1621 spin_unlock(&q->lock);
1622
1623#if USE_GTS
1624 set_bit(TXQ_RUNNING, &q->flags);
1625 set_bit(TXQ_LAST_PKT_DB, &q->flags);
1626#endif
Divy Le Rayafefce62007-11-16 11:22:21 -08001627 wmb();
Divy Le Ray4d22de32007-01-18 22:04:14 -05001628 t3_write_reg(adap, A_SG_KDOORBELL,
1629 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1630}
1631
1632/**
1633 * queue_set - return the queue set a packet should use
1634 * @skb: the packet
1635 *
1636 * Maps a packet to the SGE queue set it should use. The desired queue
1637 * set is carried in bits 1-3 in the packet's priority.
1638 */
1639static inline int queue_set(const struct sk_buff *skb)
1640{
1641 return skb->priority >> 1;
1642}
1643
1644/**
1645 * is_ctrl_pkt - return whether an offload packet is a control packet
1646 * @skb: the packet
1647 *
1648 * Determines whether an offload packet should use an OFLD or a CTRL
1649 * Tx queue. This is indicated by bit 0 in the packet's priority.
1650 */
1651static inline int is_ctrl_pkt(const struct sk_buff *skb)
1652{
1653 return skb->priority & 1;
1654}
1655
1656/**
1657 * t3_offload_tx - send an offload packet
1658 * @tdev: the offload device to send to
1659 * @skb: the packet
1660 *
1661 * Sends an offload packet. We use the packet priority to select the
1662 * appropriate Tx queue as follows: bit 0 indicates whether the packet
1663 * should be sent as regular or control, bits 1-3 select the queue set.
1664 */
1665int t3_offload_tx(struct t3cdev *tdev, struct sk_buff *skb)
1666{
1667 struct adapter *adap = tdev2adap(tdev);
1668 struct sge_qset *qs = &adap->sge.qs[queue_set(skb)];
1669
1670 if (unlikely(is_ctrl_pkt(skb)))
1671 return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], skb);
1672
1673 return ofld_xmit(adap, &qs->txq[TXQ_OFLD], skb);
1674}
1675
1676/**
1677 * offload_enqueue - add an offload packet to an SGE offload receive queue
1678 * @q: the SGE response queue
1679 * @skb: the packet
1680 *
1681 * Add a new offload packet to an SGE response queue's offload packet
1682 * queue. If the packet is the first on the queue it schedules the RX
1683 * softirq to process the queue.
1684 */
1685static inline void offload_enqueue(struct sge_rspq *q, struct sk_buff *skb)
1686{
1687 skb->next = skb->prev = NULL;
1688 if (q->rx_tail)
1689 q->rx_tail->next = skb;
1690 else {
1691 struct sge_qset *qs = rspq_to_qset(q);
1692
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001693 napi_schedule(&qs->napi);
Divy Le Ray4d22de32007-01-18 22:04:14 -05001694 q->rx_head = skb;
1695 }
1696 q->rx_tail = skb;
1697}
1698
1699/**
1700 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
1701 * @tdev: the offload device that will be receiving the packets
1702 * @q: the SGE response queue that assembled the bundle
1703 * @skbs: the partial bundle
1704 * @n: the number of packets in the bundle
1705 *
1706 * Delivers a (partial) bundle of Rx offload packets to an offload device.
1707 */
1708static inline void deliver_partial_bundle(struct t3cdev *tdev,
1709 struct sge_rspq *q,
1710 struct sk_buff *skbs[], int n)
1711{
1712 if (n) {
1713 q->offload_bundles++;
1714 tdev->recv(tdev, skbs, n);
1715 }
1716}
1717
1718/**
1719 * ofld_poll - NAPI handler for offload packets in interrupt mode
1720 * @dev: the network device doing the polling
1721 * @budget: polling budget
1722 *
1723 * The NAPI handler for offload packets when a response queue is serviced
1724 * by the hard interrupt handler, i.e., when it's operating in non-polling
1725 * mode. Creates small packet batches and sends them through the offload
1726 * receive handler. Batches need to be of modest size as we do prefetches
1727 * on the packets in each.
1728 */
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001729static int ofld_poll(struct napi_struct *napi, int budget)
Divy Le Ray4d22de32007-01-18 22:04:14 -05001730{
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001731 struct sge_qset *qs = container_of(napi, struct sge_qset, napi);
Divy Le Ray4d22de32007-01-18 22:04:14 -05001732 struct sge_rspq *q = &qs->rspq;
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001733 struct adapter *adapter = qs->adap;
1734 int work_done = 0;
Divy Le Ray4d22de32007-01-18 22:04:14 -05001735
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001736 while (work_done < budget) {
Divy Le Ray4d22de32007-01-18 22:04:14 -05001737 struct sk_buff *head, *tail, *skbs[RX_BUNDLE_SIZE];
1738 int ngathered;
1739
1740 spin_lock_irq(&q->lock);
1741 head = q->rx_head;
1742 if (!head) {
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001743 napi_complete(napi);
Divy Le Ray4d22de32007-01-18 22:04:14 -05001744 spin_unlock_irq(&q->lock);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001745 return work_done;
Divy Le Ray4d22de32007-01-18 22:04:14 -05001746 }
1747
1748 tail = q->rx_tail;
1749 q->rx_head = q->rx_tail = NULL;
1750 spin_unlock_irq(&q->lock);
1751
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001752 for (ngathered = 0; work_done < budget && head; work_done++) {
Divy Le Ray4d22de32007-01-18 22:04:14 -05001753 prefetch(head->data);
1754 skbs[ngathered] = head;
1755 head = head->next;
1756 skbs[ngathered]->next = NULL;
1757 if (++ngathered == RX_BUNDLE_SIZE) {
1758 q->offload_bundles++;
1759 adapter->tdev.recv(&adapter->tdev, skbs,
1760 ngathered);
1761 ngathered = 0;
1762 }
1763 }
1764 if (head) { /* splice remaining packets back onto Rx queue */
1765 spin_lock_irq(&q->lock);
1766 tail->next = q->rx_head;
1767 if (!q->rx_head)
1768 q->rx_tail = tail;
1769 q->rx_head = head;
1770 spin_unlock_irq(&q->lock);
1771 }
1772 deliver_partial_bundle(&adapter->tdev, q, skbs, ngathered);
1773 }
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001774
1775 return work_done;
Divy Le Ray4d22de32007-01-18 22:04:14 -05001776}
1777
1778/**
1779 * rx_offload - process a received offload packet
1780 * @tdev: the offload device receiving the packet
1781 * @rq: the response queue that received the packet
1782 * @skb: the packet
1783 * @rx_gather: a gather list of packets if we are building a bundle
1784 * @gather_idx: index of the next available slot in the bundle
1785 *
1786 * Process an ingress offload pakcet and add it to the offload ingress
1787 * queue. Returns the index of the next available slot in the bundle.
1788 */
1789static inline int rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
1790 struct sk_buff *skb, struct sk_buff *rx_gather[],
1791 unsigned int gather_idx)
1792{
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07001793 skb_reset_mac_header(skb);
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07001794 skb_reset_network_header(skb);
Arnaldo Carvalho de Melobadff6d2007-03-13 13:06:52 -03001795 skb_reset_transport_header(skb);
Divy Le Ray4d22de32007-01-18 22:04:14 -05001796
1797 if (rq->polling) {
1798 rx_gather[gather_idx++] = skb;
1799 if (gather_idx == RX_BUNDLE_SIZE) {
1800 tdev->recv(tdev, rx_gather, RX_BUNDLE_SIZE);
1801 gather_idx = 0;
1802 rq->offload_bundles++;
1803 }
1804 } else
1805 offload_enqueue(rq, skb);
1806
1807 return gather_idx;
1808}
1809
1810/**
Divy Le Ray4d22de32007-01-18 22:04:14 -05001811 * restart_tx - check whether to restart suspended Tx queues
1812 * @qs: the queue set to resume
1813 *
1814 * Restarts suspended Tx queues of an SGE queue set if they have enough
1815 * free resources to resume operation.
1816 */
1817static void restart_tx(struct sge_qset *qs)
1818{
1819 if (test_bit(TXQ_ETH, &qs->txq_stopped) &&
1820 should_restart_tx(&qs->txq[TXQ_ETH]) &&
1821 test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {
1822 qs->txq[TXQ_ETH].restarts++;
1823 if (netif_running(qs->netdev))
1824 netif_wake_queue(qs->netdev);
1825 }
1826
1827 if (test_bit(TXQ_OFLD, &qs->txq_stopped) &&
1828 should_restart_tx(&qs->txq[TXQ_OFLD]) &&
1829 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
1830 qs->txq[TXQ_OFLD].restarts++;
1831 tasklet_schedule(&qs->txq[TXQ_OFLD].qresume_tsk);
1832 }
1833 if (test_bit(TXQ_CTRL, &qs->txq_stopped) &&
1834 should_restart_tx(&qs->txq[TXQ_CTRL]) &&
1835 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
1836 qs->txq[TXQ_CTRL].restarts++;
1837 tasklet_schedule(&qs->txq[TXQ_CTRL].qresume_tsk);
1838 }
1839}
1840
1841/**
1842 * rx_eth - process an ingress ethernet packet
1843 * @adap: the adapter
1844 * @rq: the response queue that received the packet
1845 * @skb: the packet
1846 * @pad: amount of padding at the start of the buffer
1847 *
1848 * Process an ingress ethernet pakcet and deliver it to the stack.
1849 * The padding is 2 if the packet was delivered in an Rx buffer and 0
1850 * if it was immediate data in a response.
1851 */
1852static void rx_eth(struct adapter *adap, struct sge_rspq *rq,
1853 struct sk_buff *skb, int pad)
1854{
1855 struct cpl_rx_pkt *p = (struct cpl_rx_pkt *)(skb->data + pad);
1856 struct port_info *pi;
1857
Divy Le Ray4d22de32007-01-18 22:04:14 -05001858 skb_pull(skb, sizeof(*p) + pad);
Arnaldo Carvalho de Melo4c13eb62007-04-25 17:40:23 -07001859 skb->protocol = eth_type_trans(skb, adap->port[p->iff]);
Divy Le Raye360b562007-05-30 10:01:29 -07001860 skb->dev->last_rx = jiffies;
Divy Le Ray4d22de32007-01-18 22:04:14 -05001861 pi = netdev_priv(skb->dev);
Al Viro05e5c112007-12-22 18:56:23 +00001862 if (pi->rx_csum_offload && p->csum_valid && p->csum == htons(0xffff) &&
Divy Le Ray4d22de32007-01-18 22:04:14 -05001863 !p->fragment) {
1864 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
1865 skb->ip_summed = CHECKSUM_UNNECESSARY;
1866 } else
1867 skb->ip_summed = CHECKSUM_NONE;
1868
1869 if (unlikely(p->vlan_valid)) {
1870 struct vlan_group *grp = pi->vlan_grp;
1871
1872 rspq_to_qset(rq)->port_stats[SGE_PSTAT_VLANEX]++;
1873 if (likely(grp))
1874 __vlan_hwaccel_rx(skb, grp, ntohs(p->vlan),
1875 rq->polling);
1876 else
1877 dev_kfree_skb_any(skb);
1878 } else if (rq->polling)
1879 netif_receive_skb(skb);
1880 else
1881 netif_rx(skb);
1882}
1883
1884/**
1885 * handle_rsp_cntrl_info - handles control information in a response
1886 * @qs: the queue set corresponding to the response
1887 * @flags: the response control flags
Divy Le Ray4d22de32007-01-18 22:04:14 -05001888 *
1889 * Handles the control information of an SGE response, such as GTS
1890 * indications and completion credits for the queue set's Tx queues.
Divy Le Ray6195c712007-01-30 19:43:56 -08001891 * HW coalesces credits, we don't do any extra SW coalescing.
Divy Le Ray4d22de32007-01-18 22:04:14 -05001892 */
Divy Le Ray6195c712007-01-30 19:43:56 -08001893static inline void handle_rsp_cntrl_info(struct sge_qset *qs, u32 flags)
Divy Le Ray4d22de32007-01-18 22:04:14 -05001894{
1895 unsigned int credits;
1896
1897#if USE_GTS
1898 if (flags & F_RSPD_TXQ0_GTS)
1899 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
1900#endif
1901
Divy Le Ray4d22de32007-01-18 22:04:14 -05001902 credits = G_RSPD_TXQ0_CR(flags);
1903 if (credits)
1904 qs->txq[TXQ_ETH].processed += credits;
1905
Divy Le Ray6195c712007-01-30 19:43:56 -08001906 credits = G_RSPD_TXQ2_CR(flags);
1907 if (credits)
1908 qs->txq[TXQ_CTRL].processed += credits;
1909
Divy Le Ray4d22de32007-01-18 22:04:14 -05001910# if USE_GTS
1911 if (flags & F_RSPD_TXQ1_GTS)
1912 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
1913# endif
Divy Le Ray6195c712007-01-30 19:43:56 -08001914 credits = G_RSPD_TXQ1_CR(flags);
1915 if (credits)
1916 qs->txq[TXQ_OFLD].processed += credits;
Divy Le Ray4d22de32007-01-18 22:04:14 -05001917}
1918
1919/**
1920 * check_ring_db - check if we need to ring any doorbells
1921 * @adapter: the adapter
1922 * @qs: the queue set whose Tx queues are to be examined
1923 * @sleeping: indicates which Tx queue sent GTS
1924 *
1925 * Checks if some of a queue set's Tx queues need to ring their doorbells
1926 * to resume transmission after idling while they still have unprocessed
1927 * descriptors.
1928 */
1929static void check_ring_db(struct adapter *adap, struct sge_qset *qs,
1930 unsigned int sleeping)
1931{
1932 if (sleeping & F_RSPD_TXQ0_GTS) {
1933 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1934
1935 if (txq->cleaned + txq->in_use != txq->processed &&
1936 !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {
1937 set_bit(TXQ_RUNNING, &txq->flags);
1938 t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX |
1939 V_EGRCNTX(txq->cntxt_id));
1940 }
1941 }
1942
1943 if (sleeping & F_RSPD_TXQ1_GTS) {
1944 struct sge_txq *txq = &qs->txq[TXQ_OFLD];
1945
1946 if (txq->cleaned + txq->in_use != txq->processed &&
1947 !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {
1948 set_bit(TXQ_RUNNING, &txq->flags);
1949 t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX |
1950 V_EGRCNTX(txq->cntxt_id));
1951 }
1952 }
1953}
1954
1955/**
1956 * is_new_response - check if a response is newly written
1957 * @r: the response descriptor
1958 * @q: the response queue
1959 *
1960 * Returns true if a response descriptor contains a yet unprocessed
1961 * response.
1962 */
1963static inline int is_new_response(const struct rsp_desc *r,
1964 const struct sge_rspq *q)
1965{
1966 return (r->intr_gen & F_RSPD_GEN2) == q->gen;
1967}
1968
1969#define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
1970#define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
1971 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
1972 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
1973 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
1974
1975/* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
1976#define NOMEM_INTR_DELAY 2500
1977
1978/**
1979 * process_responses - process responses from an SGE response queue
1980 * @adap: the adapter
1981 * @qs: the queue set to which the response queue belongs
1982 * @budget: how many responses can be processed in this round
1983 *
1984 * Process responses from an SGE response queue up to the supplied budget.
1985 * Responses include received packets as well as credits and other events
1986 * for the queues that belong to the response queue's queue set.
1987 * A negative budget is effectively unlimited.
1988 *
1989 * Additionally choose the interrupt holdoff time for the next interrupt
1990 * on this queue. If the system is under memory shortage use a fairly
1991 * long delay to help recovery.
1992 */
1993static int process_responses(struct adapter *adap, struct sge_qset *qs,
1994 int budget)
1995{
1996 struct sge_rspq *q = &qs->rspq;
1997 struct rsp_desc *r = &q->desc[q->cidx];
1998 int budget_left = budget;
Divy Le Ray6195c712007-01-30 19:43:56 -08001999 unsigned int sleeping = 0;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002000 struct sk_buff *offload_skbs[RX_BUNDLE_SIZE];
2001 int ngathered = 0;
2002
2003 q->next_holdoff = q->holdoff_tmr;
2004
2005 while (likely(budget_left && is_new_response(r, q))) {
Divy Le Raye0994eb2007-02-24 16:44:17 -08002006 int eth, ethpad = 2;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002007 struct sk_buff *skb = NULL;
2008 u32 len, flags = ntohl(r->flags);
Al Viro05e5c112007-12-22 18:56:23 +00002009 __be32 rss_hi = *(const __be32 *)r, rss_lo = r->rss_hdr.rss_hash_val;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002010
2011 eth = r->rss_hdr.opcode == CPL_RX_PKT;
2012
2013 if (unlikely(flags & F_RSPD_ASYNC_NOTIF)) {
2014 skb = alloc_skb(AN_PKT_SIZE, GFP_ATOMIC);
2015 if (!skb)
2016 goto no_mem;
2017
2018 memcpy(__skb_put(skb, AN_PKT_SIZE), r, AN_PKT_SIZE);
2019 skb->data[0] = CPL_ASYNC_NOTIF;
2020 rss_hi = htonl(CPL_ASYNC_NOTIF << 24);
2021 q->async_notif++;
2022 } else if (flags & F_RSPD_IMM_DATA_VALID) {
2023 skb = get_imm_packet(r);
2024 if (unlikely(!skb)) {
Divy Le Raycf992af2007-05-30 21:10:47 -07002025no_mem:
Divy Le Ray4d22de32007-01-18 22:04:14 -05002026 q->next_holdoff = NOMEM_INTR_DELAY;
2027 q->nomem++;
2028 /* consume one credit since we tried */
2029 budget_left--;
2030 break;
2031 }
2032 q->imm_data++;
Divy Le Raye0994eb2007-02-24 16:44:17 -08002033 ethpad = 0;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002034 } else if ((len = ntohl(r->len_cq)) != 0) {
Divy Le Raycf992af2007-05-30 21:10:47 -07002035 struct sge_fl *fl;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002036
Divy Le Raycf992af2007-05-30 21:10:47 -07002037 fl = (len & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2038 if (fl->use_pages) {
2039 void *addr = fl->sdesc[fl->cidx].pg_chunk.va;
Divy Le Raye0994eb2007-02-24 16:44:17 -08002040
Divy Le Raycf992af2007-05-30 21:10:47 -07002041 prefetch(addr);
2042#if L1_CACHE_BYTES < 128
2043 prefetch(addr + L1_CACHE_BYTES);
2044#endif
Divy Le Raye0994eb2007-02-24 16:44:17 -08002045 __refill_fl(adap, fl);
2046
Divy Le Raycf992af2007-05-30 21:10:47 -07002047 skb = get_packet_pg(adap, fl, G_RSPD_LEN(len),
2048 eth ? SGE_RX_DROP_THRES : 0);
2049 } else
Divy Le Raye0994eb2007-02-24 16:44:17 -08002050 skb = get_packet(adap, fl, G_RSPD_LEN(len),
2051 eth ? SGE_RX_DROP_THRES : 0);
Divy Le Raycf992af2007-05-30 21:10:47 -07002052 if (unlikely(!skb)) {
2053 if (!eth)
2054 goto no_mem;
2055 q->rx_drops++;
2056 } else if (unlikely(r->rss_hdr.opcode == CPL_TRACE_PKT))
2057 __skb_pull(skb, 2);
Divy Le Raye0994eb2007-02-24 16:44:17 -08002058
Divy Le Ray4d22de32007-01-18 22:04:14 -05002059 if (++fl->cidx == fl->size)
2060 fl->cidx = 0;
2061 } else
2062 q->pure_rsps++;
2063
2064 if (flags & RSPD_CTRL_MASK) {
2065 sleeping |= flags & RSPD_GTS_MASK;
Divy Le Ray6195c712007-01-30 19:43:56 -08002066 handle_rsp_cntrl_info(qs, flags);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002067 }
2068
2069 r++;
2070 if (unlikely(++q->cidx == q->size)) {
2071 q->cidx = 0;
2072 q->gen ^= 1;
2073 r = q->desc;
2074 }
2075 prefetch(r);
2076
2077 if (++q->credits >= (q->size / 4)) {
2078 refill_rspq(adap, q, q->credits);
2079 q->credits = 0;
2080 }
2081
Divy Le Raycf992af2007-05-30 21:10:47 -07002082 if (likely(skb != NULL)) {
Divy Le Ray4d22de32007-01-18 22:04:14 -05002083 if (eth)
2084 rx_eth(adap, q, skb, ethpad);
2085 else {
Divy Le Rayafefce62007-11-16 11:22:21 -08002086 q->offload_pkts++;
Divy Le Raycf992af2007-05-30 21:10:47 -07002087 /* Preserve the RSS info in csum & priority */
2088 skb->csum = rss_hi;
2089 skb->priority = rss_lo;
2090 ngathered = rx_offload(&adap->tdev, q, skb,
2091 offload_skbs,
Divy Le Raye0994eb2007-02-24 16:44:17 -08002092 ngathered);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002093 }
2094 }
Divy Le Ray4d22de32007-01-18 22:04:14 -05002095 --budget_left;
2096 }
2097
Divy Le Ray4d22de32007-01-18 22:04:14 -05002098 deliver_partial_bundle(&adap->tdev, q, offload_skbs, ngathered);
2099 if (sleeping)
2100 check_ring_db(adap, qs, sleeping);
2101
2102 smp_mb(); /* commit Tx queue .processed updates */
2103 if (unlikely(qs->txq_stopped != 0))
2104 restart_tx(qs);
2105
2106 budget -= budget_left;
2107 return budget;
2108}
2109
2110static inline int is_pure_response(const struct rsp_desc *r)
2111{
2112 u32 n = ntohl(r->flags) & (F_RSPD_ASYNC_NOTIF | F_RSPD_IMM_DATA_VALID);
2113
2114 return (n | r->len_cq) == 0;
2115}
2116
2117/**
2118 * napi_rx_handler - the NAPI handler for Rx processing
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002119 * @napi: the napi instance
Divy Le Ray4d22de32007-01-18 22:04:14 -05002120 * @budget: how many packets we can process in this round
2121 *
2122 * Handler for new data events when using NAPI.
2123 */
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002124static int napi_rx_handler(struct napi_struct *napi, int budget)
Divy Le Ray4d22de32007-01-18 22:04:14 -05002125{
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002126 struct sge_qset *qs = container_of(napi, struct sge_qset, napi);
2127 struct adapter *adap = qs->adap;
2128 int work_done = process_responses(adap, qs, budget);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002129
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002130 if (likely(work_done < budget)) {
2131 napi_complete(napi);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002132
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002133 /*
2134 * Because we don't atomically flush the following
2135 * write it is possible that in very rare cases it can
2136 * reach the device in a way that races with a new
2137 * response being written plus an error interrupt
2138 * causing the NAPI interrupt handler below to return
2139 * unhandled status to the OS. To protect against
2140 * this would require flushing the write and doing
2141 * both the write and the flush with interrupts off.
2142 * Way too expensive and unjustifiable given the
2143 * rarity of the race.
2144 *
2145 * The race cannot happen at all with MSI-X.
2146 */
2147 t3_write_reg(adap, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
2148 V_NEWTIMER(qs->rspq.next_holdoff) |
2149 V_NEWINDEX(qs->rspq.cidx));
2150 }
2151 return work_done;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002152}
2153
2154/*
2155 * Returns true if the device is already scheduled for polling.
2156 */
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002157static inline int napi_is_scheduled(struct napi_struct *napi)
Divy Le Ray4d22de32007-01-18 22:04:14 -05002158{
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002159 return test_bit(NAPI_STATE_SCHED, &napi->state);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002160}
2161
2162/**
2163 * process_pure_responses - process pure responses from a response queue
2164 * @adap: the adapter
2165 * @qs: the queue set owning the response queue
2166 * @r: the first pure response to process
2167 *
2168 * A simpler version of process_responses() that handles only pure (i.e.,
2169 * non data-carrying) responses. Such respones are too light-weight to
2170 * justify calling a softirq under NAPI, so we handle them specially in
2171 * the interrupt handler. The function is called with a pointer to a
2172 * response, which the caller must ensure is a valid pure response.
2173 *
2174 * Returns 1 if it encounters a valid data-carrying response, 0 otherwise.
2175 */
2176static int process_pure_responses(struct adapter *adap, struct sge_qset *qs,
2177 struct rsp_desc *r)
2178{
2179 struct sge_rspq *q = &qs->rspq;
Divy Le Ray6195c712007-01-30 19:43:56 -08002180 unsigned int sleeping = 0;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002181
2182 do {
2183 u32 flags = ntohl(r->flags);
2184
2185 r++;
2186 if (unlikely(++q->cidx == q->size)) {
2187 q->cidx = 0;
2188 q->gen ^= 1;
2189 r = q->desc;
2190 }
2191 prefetch(r);
2192
2193 if (flags & RSPD_CTRL_MASK) {
2194 sleeping |= flags & RSPD_GTS_MASK;
Divy Le Ray6195c712007-01-30 19:43:56 -08002195 handle_rsp_cntrl_info(qs, flags);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002196 }
2197
2198 q->pure_rsps++;
2199 if (++q->credits >= (q->size / 4)) {
2200 refill_rspq(adap, q, q->credits);
2201 q->credits = 0;
2202 }
2203 } while (is_new_response(r, q) && is_pure_response(r));
2204
Divy Le Ray4d22de32007-01-18 22:04:14 -05002205 if (sleeping)
2206 check_ring_db(adap, qs, sleeping);
2207
2208 smp_mb(); /* commit Tx queue .processed updates */
2209 if (unlikely(qs->txq_stopped != 0))
2210 restart_tx(qs);
2211
2212 return is_new_response(r, q);
2213}
2214
2215/**
2216 * handle_responses - decide what to do with new responses in NAPI mode
2217 * @adap: the adapter
2218 * @q: the response queue
2219 *
2220 * This is used by the NAPI interrupt handlers to decide what to do with
2221 * new SGE responses. If there are no new responses it returns -1. If
2222 * there are new responses and they are pure (i.e., non-data carrying)
2223 * it handles them straight in hard interrupt context as they are very
2224 * cheap and don't deliver any packets. Finally, if there are any data
2225 * signaling responses it schedules the NAPI handler. Returns 1 if it
2226 * schedules NAPI, 0 if all new responses were pure.
2227 *
2228 * The caller must ascertain NAPI is not already running.
2229 */
2230static inline int handle_responses(struct adapter *adap, struct sge_rspq *q)
2231{
2232 struct sge_qset *qs = rspq_to_qset(q);
2233 struct rsp_desc *r = &q->desc[q->cidx];
2234
2235 if (!is_new_response(r, q))
2236 return -1;
2237 if (is_pure_response(r) && process_pure_responses(adap, qs, r) == 0) {
2238 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2239 V_NEWTIMER(q->holdoff_tmr) | V_NEWINDEX(q->cidx));
2240 return 0;
2241 }
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002242 napi_schedule(&qs->napi);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002243 return 1;
2244}
2245
2246/*
2247 * The MSI-X interrupt handler for an SGE response queue for the non-NAPI case
2248 * (i.e., response queue serviced in hard interrupt).
2249 */
2250irqreturn_t t3_sge_intr_msix(int irq, void *cookie)
2251{
2252 struct sge_qset *qs = cookie;
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002253 struct adapter *adap = qs->adap;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002254 struct sge_rspq *q = &qs->rspq;
2255
2256 spin_lock(&q->lock);
2257 if (process_responses(adap, qs, -1) == 0)
2258 q->unhandled_irqs++;
2259 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2260 V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));
2261 spin_unlock(&q->lock);
2262 return IRQ_HANDLED;
2263}
2264
2265/*
2266 * The MSI-X interrupt handler for an SGE response queue for the NAPI case
2267 * (i.e., response queue serviced by NAPI polling).
2268 */
Stephen Hemminger9265fab2007-10-08 16:22:29 -07002269static irqreturn_t t3_sge_intr_msix_napi(int irq, void *cookie)
Divy Le Ray4d22de32007-01-18 22:04:14 -05002270{
2271 struct sge_qset *qs = cookie;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002272 struct sge_rspq *q = &qs->rspq;
2273
2274 spin_lock(&q->lock);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002275
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002276 if (handle_responses(qs->adap, q) < 0)
Divy Le Ray4d22de32007-01-18 22:04:14 -05002277 q->unhandled_irqs++;
2278 spin_unlock(&q->lock);
2279 return IRQ_HANDLED;
2280}
2281
2282/*
2283 * The non-NAPI MSI interrupt handler. This needs to handle data events from
2284 * SGE response queues as well as error and other async events as they all use
2285 * the same MSI vector. We use one SGE response queue per port in this mode
2286 * and protect all response queues with queue 0's lock.
2287 */
2288static irqreturn_t t3_intr_msi(int irq, void *cookie)
2289{
2290 int new_packets = 0;
2291 struct adapter *adap = cookie;
2292 struct sge_rspq *q = &adap->sge.qs[0].rspq;
2293
2294 spin_lock(&q->lock);
2295
2296 if (process_responses(adap, &adap->sge.qs[0], -1)) {
2297 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2298 V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));
2299 new_packets = 1;
2300 }
2301
2302 if (adap->params.nports == 2 &&
2303 process_responses(adap, &adap->sge.qs[1], -1)) {
2304 struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2305
2306 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q1->cntxt_id) |
2307 V_NEWTIMER(q1->next_holdoff) |
2308 V_NEWINDEX(q1->cidx));
2309 new_packets = 1;
2310 }
2311
2312 if (!new_packets && t3_slow_intr_handler(adap) == 0)
2313 q->unhandled_irqs++;
2314
2315 spin_unlock(&q->lock);
2316 return IRQ_HANDLED;
2317}
2318
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002319static int rspq_check_napi(struct sge_qset *qs)
Divy Le Ray4d22de32007-01-18 22:04:14 -05002320{
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002321 struct sge_rspq *q = &qs->rspq;
2322
2323 if (!napi_is_scheduled(&qs->napi) &&
2324 is_new_response(&q->desc[q->cidx], q)) {
2325 napi_schedule(&qs->napi);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002326 return 1;
2327 }
2328 return 0;
2329}
2330
2331/*
2332 * The MSI interrupt handler for the NAPI case (i.e., response queues serviced
2333 * by NAPI polling). Handles data events from SGE response queues as well as
2334 * error and other async events as they all use the same MSI vector. We use
2335 * one SGE response queue per port in this mode and protect all response
2336 * queues with queue 0's lock.
2337 */
Stephen Hemminger9265fab2007-10-08 16:22:29 -07002338static irqreturn_t t3_intr_msi_napi(int irq, void *cookie)
Divy Le Ray4d22de32007-01-18 22:04:14 -05002339{
2340 int new_packets;
2341 struct adapter *adap = cookie;
2342 struct sge_rspq *q = &adap->sge.qs[0].rspq;
2343
2344 spin_lock(&q->lock);
2345
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002346 new_packets = rspq_check_napi(&adap->sge.qs[0]);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002347 if (adap->params.nports == 2)
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002348 new_packets += rspq_check_napi(&adap->sge.qs[1]);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002349 if (!new_packets && t3_slow_intr_handler(adap) == 0)
2350 q->unhandled_irqs++;
2351
2352 spin_unlock(&q->lock);
2353 return IRQ_HANDLED;
2354}
2355
2356/*
2357 * A helper function that processes responses and issues GTS.
2358 */
2359static inline int process_responses_gts(struct adapter *adap,
2360 struct sge_rspq *rq)
2361{
2362 int work;
2363
2364 work = process_responses(adap, rspq_to_qset(rq), -1);
2365 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
2366 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
2367 return work;
2368}
2369
2370/*
2371 * The legacy INTx interrupt handler. This needs to handle data events from
2372 * SGE response queues as well as error and other async events as they all use
2373 * the same interrupt pin. We use one SGE response queue per port in this mode
2374 * and protect all response queues with queue 0's lock.
2375 */
2376static irqreturn_t t3_intr(int irq, void *cookie)
2377{
2378 int work_done, w0, w1;
2379 struct adapter *adap = cookie;
2380 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2381 struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2382
2383 spin_lock(&q0->lock);
2384
2385 w0 = is_new_response(&q0->desc[q0->cidx], q0);
2386 w1 = adap->params.nports == 2 &&
2387 is_new_response(&q1->desc[q1->cidx], q1);
2388
2389 if (likely(w0 | w1)) {
2390 t3_write_reg(adap, A_PL_CLI, 0);
2391 t3_read_reg(adap, A_PL_CLI); /* flush */
2392
2393 if (likely(w0))
2394 process_responses_gts(adap, q0);
2395
2396 if (w1)
2397 process_responses_gts(adap, q1);
2398
2399 work_done = w0 | w1;
2400 } else
2401 work_done = t3_slow_intr_handler(adap);
2402
2403 spin_unlock(&q0->lock);
2404 return IRQ_RETVAL(work_done != 0);
2405}
2406
2407/*
2408 * Interrupt handler for legacy INTx interrupts for T3B-based cards.
2409 * Handles data events from SGE response queues as well as error and other
2410 * async events as they all use the same interrupt pin. We use one SGE
2411 * response queue per port in this mode and protect all response queues with
2412 * queue 0's lock.
2413 */
2414static irqreturn_t t3b_intr(int irq, void *cookie)
2415{
2416 u32 map;
2417 struct adapter *adap = cookie;
2418 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2419
2420 t3_write_reg(adap, A_PL_CLI, 0);
2421 map = t3_read_reg(adap, A_SG_DATA_INTR);
2422
2423 if (unlikely(!map)) /* shared interrupt, most likely */
2424 return IRQ_NONE;
2425
2426 spin_lock(&q0->lock);
2427
2428 if (unlikely(map & F_ERRINTR))
2429 t3_slow_intr_handler(adap);
2430
2431 if (likely(map & 1))
2432 process_responses_gts(adap, q0);
2433
2434 if (map & 2)
2435 process_responses_gts(adap, &adap->sge.qs[1].rspq);
2436
2437 spin_unlock(&q0->lock);
2438 return IRQ_HANDLED;
2439}
2440
2441/*
2442 * NAPI interrupt handler for legacy INTx interrupts for T3B-based cards.
2443 * Handles data events from SGE response queues as well as error and other
2444 * async events as they all use the same interrupt pin. We use one SGE
2445 * response queue per port in this mode and protect all response queues with
2446 * queue 0's lock.
2447 */
2448static irqreturn_t t3b_intr_napi(int irq, void *cookie)
2449{
2450 u32 map;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002451 struct adapter *adap = cookie;
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002452 struct sge_qset *qs0 = &adap->sge.qs[0];
2453 struct sge_rspq *q0 = &qs0->rspq;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002454
2455 t3_write_reg(adap, A_PL_CLI, 0);
2456 map = t3_read_reg(adap, A_SG_DATA_INTR);
2457
2458 if (unlikely(!map)) /* shared interrupt, most likely */
2459 return IRQ_NONE;
2460
2461 spin_lock(&q0->lock);
2462
2463 if (unlikely(map & F_ERRINTR))
2464 t3_slow_intr_handler(adap);
2465
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002466 if (likely(map & 1))
2467 napi_schedule(&qs0->napi);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002468
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002469 if (map & 2)
2470 napi_schedule(&adap->sge.qs[1].napi);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002471
2472 spin_unlock(&q0->lock);
2473 return IRQ_HANDLED;
2474}
2475
2476/**
2477 * t3_intr_handler - select the top-level interrupt handler
2478 * @adap: the adapter
2479 * @polling: whether using NAPI to service response queues
2480 *
2481 * Selects the top-level interrupt handler based on the type of interrupts
2482 * (MSI-X, MSI, or legacy) and whether NAPI will be used to service the
2483 * response queues.
2484 */
Jeff Garzik7c239972007-10-19 03:12:20 -04002485irq_handler_t t3_intr_handler(struct adapter *adap, int polling)
Divy Le Ray4d22de32007-01-18 22:04:14 -05002486{
2487 if (adap->flags & USING_MSIX)
2488 return polling ? t3_sge_intr_msix_napi : t3_sge_intr_msix;
2489 if (adap->flags & USING_MSI)
2490 return polling ? t3_intr_msi_napi : t3_intr_msi;
2491 if (adap->params.rev > 0)
2492 return polling ? t3b_intr_napi : t3b_intr;
2493 return t3_intr;
2494}
2495
Divy Le Rayb8819552007-12-17 18:47:31 -08002496#define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
2497 F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
2498 V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
2499 F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
2500 F_HIRCQPARITYERROR)
2501#define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
2502#define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
2503 F_RSPQDISABLED)
2504
Divy Le Ray4d22de32007-01-18 22:04:14 -05002505/**
2506 * t3_sge_err_intr_handler - SGE async event interrupt handler
2507 * @adapter: the adapter
2508 *
2509 * Interrupt handler for SGE asynchronous (non-data) events.
2510 */
2511void t3_sge_err_intr_handler(struct adapter *adapter)
2512{
2513 unsigned int v, status = t3_read_reg(adapter, A_SG_INT_CAUSE);
2514
Divy Le Rayb8819552007-12-17 18:47:31 -08002515 if (status & SGE_PARERR)
2516 CH_ALERT(adapter, "SGE parity error (0x%x)\n",
2517 status & SGE_PARERR);
2518 if (status & SGE_FRAMINGERR)
2519 CH_ALERT(adapter, "SGE framing error (0x%x)\n",
2520 status & SGE_FRAMINGERR);
2521
Divy Le Ray4d22de32007-01-18 22:04:14 -05002522 if (status & F_RSPQCREDITOVERFOW)
2523 CH_ALERT(adapter, "SGE response queue credit overflow\n");
2524
2525 if (status & F_RSPQDISABLED) {
2526 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
2527
2528 CH_ALERT(adapter,
2529 "packet delivered to disabled response queue "
2530 "(0x%x)\n", (v >> S_RSPQ0DISABLED) & 0xff);
2531 }
2532
Divy Le Ray6e3f03b2007-08-21 20:49:10 -07002533 if (status & (F_HIPIODRBDROPERR | F_LOPIODRBDROPERR))
2534 CH_ALERT(adapter, "SGE dropped %s priority doorbell\n",
2535 status & F_HIPIODRBDROPERR ? "high" : "lo");
2536
Divy Le Ray4d22de32007-01-18 22:04:14 -05002537 t3_write_reg(adapter, A_SG_INT_CAUSE, status);
Divy Le Rayb8819552007-12-17 18:47:31 -08002538 if (status & SGE_FATALERR)
Divy Le Ray4d22de32007-01-18 22:04:14 -05002539 t3_fatal_err(adapter);
2540}
2541
2542/**
2543 * sge_timer_cb - perform periodic maintenance of an SGE qset
2544 * @data: the SGE queue set to maintain
2545 *
2546 * Runs periodically from a timer to perform maintenance of an SGE queue
2547 * set. It performs two tasks:
2548 *
2549 * a) Cleans up any completed Tx descriptors that may still be pending.
2550 * Normal descriptor cleanup happens when new packets are added to a Tx
2551 * queue so this timer is relatively infrequent and does any cleanup only
2552 * if the Tx queue has not seen any new packets in a while. We make a
2553 * best effort attempt to reclaim descriptors, in that we don't wait
2554 * around if we cannot get a queue's lock (which most likely is because
2555 * someone else is queueing new packets and so will also handle the clean
2556 * up). Since control queues use immediate data exclusively we don't
2557 * bother cleaning them up here.
2558 *
2559 * b) Replenishes Rx queues that have run out due to memory shortage.
2560 * Normally new Rx buffers are added when existing ones are consumed but
2561 * when out of memory a queue can become empty. We try to add only a few
2562 * buffers here, the queue will be replenished fully as these new buffers
2563 * are used up if memory shortage has subsided.
2564 */
2565static void sge_timer_cb(unsigned long data)
2566{
2567 spinlock_t *lock;
2568 struct sge_qset *qs = (struct sge_qset *)data;
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002569 struct adapter *adap = qs->adap;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002570
2571 if (spin_trylock(&qs->txq[TXQ_ETH].lock)) {
2572 reclaim_completed_tx(adap, &qs->txq[TXQ_ETH]);
2573 spin_unlock(&qs->txq[TXQ_ETH].lock);
2574 }
2575 if (spin_trylock(&qs->txq[TXQ_OFLD].lock)) {
2576 reclaim_completed_tx(adap, &qs->txq[TXQ_OFLD]);
2577 spin_unlock(&qs->txq[TXQ_OFLD].lock);
2578 }
2579 lock = (adap->flags & USING_MSIX) ? &qs->rspq.lock :
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002580 &adap->sge.qs[0].rspq.lock;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002581 if (spin_trylock_irq(lock)) {
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002582 if (!napi_is_scheduled(&qs->napi)) {
Divy Le Raybae73f42007-02-24 16:44:12 -08002583 u32 status = t3_read_reg(adap, A_SG_RSPQ_FL_STATUS);
2584
Divy Le Ray4d22de32007-01-18 22:04:14 -05002585 if (qs->fl[0].credits < qs->fl[0].size)
2586 __refill_fl(adap, &qs->fl[0]);
2587 if (qs->fl[1].credits < qs->fl[1].size)
2588 __refill_fl(adap, &qs->fl[1]);
Divy Le Raybae73f42007-02-24 16:44:12 -08002589
2590 if (status & (1 << qs->rspq.cntxt_id)) {
2591 qs->rspq.starved++;
2592 if (qs->rspq.credits) {
2593 refill_rspq(adap, &qs->rspq, 1);
2594 qs->rspq.credits--;
2595 qs->rspq.restarted++;
Divy Le Raye0994eb2007-02-24 16:44:17 -08002596 t3_write_reg(adap, A_SG_RSPQ_FL_STATUS,
Divy Le Raybae73f42007-02-24 16:44:12 -08002597 1 << qs->rspq.cntxt_id);
2598 }
2599 }
Divy Le Ray4d22de32007-01-18 22:04:14 -05002600 }
2601 spin_unlock_irq(lock);
2602 }
2603 mod_timer(&qs->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD);
2604}
2605
2606/**
2607 * t3_update_qset_coalesce - update coalescing settings for a queue set
2608 * @qs: the SGE queue set
2609 * @p: new queue set parameters
2610 *
2611 * Update the coalescing settings for an SGE queue set. Nothing is done
2612 * if the queue set is not initialized yet.
2613 */
2614void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
2615{
Divy Le Ray4d22de32007-01-18 22:04:14 -05002616 qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);/* can't be 0 */
2617 qs->rspq.polling = p->polling;
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002618 qs->napi.poll = p->polling ? napi_rx_handler : ofld_poll;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002619}
2620
2621/**
2622 * t3_sge_alloc_qset - initialize an SGE queue set
2623 * @adapter: the adapter
2624 * @id: the queue set id
2625 * @nports: how many Ethernet ports will be using this queue set
2626 * @irq_vec_idx: the IRQ vector index for response queue interrupts
2627 * @p: configuration parameters for this queue set
2628 * @ntxq: number of Tx queues for the queue set
2629 * @netdev: net device associated with this queue set
2630 *
2631 * Allocate resources and initialize an SGE queue set. A queue set
2632 * comprises a response queue, two Rx free-buffer queues, and up to 3
2633 * Tx queues. The Tx queues are assigned roles in the order Ethernet
2634 * queue, offload queue, and control queue.
2635 */
2636int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
2637 int irq_vec_idx, const struct qset_params *p,
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002638 int ntxq, struct net_device *dev)
Divy Le Ray4d22de32007-01-18 22:04:14 -05002639{
Divy Le Rayb1fb1f22008-05-21 18:56:16 -07002640 int i, avail, ret = -ENOMEM;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002641 struct sge_qset *q = &adapter->sge.qs[id];
2642
2643 init_qset_cntxt(q, id);
2644 init_timer(&q->tx_reclaim_timer);
2645 q->tx_reclaim_timer.data = (unsigned long)q;
2646 q->tx_reclaim_timer.function = sge_timer_cb;
2647
2648 q->fl[0].desc = alloc_ring(adapter->pdev, p->fl_size,
2649 sizeof(struct rx_desc),
2650 sizeof(struct rx_sw_desc),
2651 &q->fl[0].phys_addr, &q->fl[0].sdesc);
2652 if (!q->fl[0].desc)
2653 goto err;
2654
2655 q->fl[1].desc = alloc_ring(adapter->pdev, p->jumbo_size,
2656 sizeof(struct rx_desc),
2657 sizeof(struct rx_sw_desc),
2658 &q->fl[1].phys_addr, &q->fl[1].sdesc);
2659 if (!q->fl[1].desc)
2660 goto err;
2661
2662 q->rspq.desc = alloc_ring(adapter->pdev, p->rspq_size,
2663 sizeof(struct rsp_desc), 0,
2664 &q->rspq.phys_addr, NULL);
2665 if (!q->rspq.desc)
2666 goto err;
2667
2668 for (i = 0; i < ntxq; ++i) {
2669 /*
2670 * The control queue always uses immediate data so does not
2671 * need to keep track of any sk_buffs.
2672 */
2673 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2674
2675 q->txq[i].desc = alloc_ring(adapter->pdev, p->txq_size[i],
2676 sizeof(struct tx_desc), sz,
2677 &q->txq[i].phys_addr,
2678 &q->txq[i].sdesc);
2679 if (!q->txq[i].desc)
2680 goto err;
2681
2682 q->txq[i].gen = 1;
2683 q->txq[i].size = p->txq_size[i];
2684 spin_lock_init(&q->txq[i].lock);
2685 skb_queue_head_init(&q->txq[i].sendq);
2686 }
2687
2688 tasklet_init(&q->txq[TXQ_OFLD].qresume_tsk, restart_offloadq,
2689 (unsigned long)q);
2690 tasklet_init(&q->txq[TXQ_CTRL].qresume_tsk, restart_ctrlq,
2691 (unsigned long)q);
2692
2693 q->fl[0].gen = q->fl[1].gen = 1;
2694 q->fl[0].size = p->fl_size;
2695 q->fl[1].size = p->jumbo_size;
2696
2697 q->rspq.gen = 1;
2698 q->rspq.size = p->rspq_size;
2699 spin_lock_init(&q->rspq.lock);
2700
2701 q->txq[TXQ_ETH].stop_thres = nports *
2702 flits_to_desc(sgl_len(MAX_SKB_FRAGS + 1) + 3);
2703
Divy Le Raycf992af2007-05-30 21:10:47 -07002704#if FL0_PG_CHUNK_SIZE > 0
2705 q->fl[0].buf_size = FL0_PG_CHUNK_SIZE;
Divy Le Raye0994eb2007-02-24 16:44:17 -08002706#else
Divy Le Raycf992af2007-05-30 21:10:47 -07002707 q->fl[0].buf_size = SGE_RX_SM_BUF_SIZE + sizeof(struct cpl_rx_data);
Divy Le Raye0994eb2007-02-24 16:44:17 -08002708#endif
Divy Le Raycf992af2007-05-30 21:10:47 -07002709 q->fl[0].use_pages = FL0_PG_CHUNK_SIZE > 0;
2710 q->fl[1].buf_size = is_offload(adapter) ?
2711 (16 * 1024) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
2712 MAX_FRAME_SIZE + 2 + sizeof(struct cpl_rx_pkt);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002713
Roland Dreierb1186de2008-03-20 13:30:48 -07002714 spin_lock_irq(&adapter->sge.reg_lock);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002715
2716 /* FL threshold comparison uses < */
2717 ret = t3_sge_init_rspcntxt(adapter, q->rspq.cntxt_id, irq_vec_idx,
2718 q->rspq.phys_addr, q->rspq.size,
2719 q->fl[0].buf_size, 1, 0);
2720 if (ret)
2721 goto err_unlock;
2722
2723 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2724 ret = t3_sge_init_flcntxt(adapter, q->fl[i].cntxt_id, 0,
2725 q->fl[i].phys_addr, q->fl[i].size,
2726 q->fl[i].buf_size, p->cong_thres, 1,
2727 0);
2728 if (ret)
2729 goto err_unlock;
2730 }
2731
2732 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2733 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2734 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2735 1, 0);
2736 if (ret)
2737 goto err_unlock;
2738
2739 if (ntxq > 1) {
2740 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_OFLD].cntxt_id,
2741 USE_GTS, SGE_CNTXT_OFLD, id,
2742 q->txq[TXQ_OFLD].phys_addr,
2743 q->txq[TXQ_OFLD].size, 0, 1, 0);
2744 if (ret)
2745 goto err_unlock;
2746 }
2747
2748 if (ntxq > 2) {
2749 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_CTRL].cntxt_id, 0,
2750 SGE_CNTXT_CTRL, id,
2751 q->txq[TXQ_CTRL].phys_addr,
2752 q->txq[TXQ_CTRL].size,
2753 q->txq[TXQ_CTRL].token, 1, 0);
2754 if (ret)
2755 goto err_unlock;
2756 }
2757
Roland Dreierb1186de2008-03-20 13:30:48 -07002758 spin_unlock_irq(&adapter->sge.reg_lock);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002759
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002760 q->adap = adapter;
2761 q->netdev = dev;
2762 t3_update_qset_coalesce(q, p);
Divy Le Rayb1fb1f22008-05-21 18:56:16 -07002763 avail = refill_fl(adapter, &q->fl[0], q->fl[0].size, GFP_KERNEL);
2764 if (!avail) {
2765 CH_ALERT(adapter, "free list queue 0 initialization failed\n");
2766 goto err;
2767 }
2768 if (avail < q->fl[0].size)
2769 CH_WARN(adapter, "free list queue 0 enabled with %d credits\n",
2770 avail);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002771
Divy Le Rayb1fb1f22008-05-21 18:56:16 -07002772 avail = refill_fl(adapter, &q->fl[1], q->fl[1].size, GFP_KERNEL);
2773 if (avail < q->fl[1].size)
2774 CH_WARN(adapter, "free list queue 1 enabled with %d credits\n",
2775 avail);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002776 refill_rspq(adapter, &q->rspq, q->rspq.size - 1);
2777
2778 t3_write_reg(adapter, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2779 V_NEWTIMER(q->rspq.holdoff_tmr));
2780
2781 mod_timer(&q->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD);
2782 return 0;
2783
Divy Le Rayb1fb1f22008-05-21 18:56:16 -07002784err_unlock:
Roland Dreierb1186de2008-03-20 13:30:48 -07002785 spin_unlock_irq(&adapter->sge.reg_lock);
Divy Le Rayb1fb1f22008-05-21 18:56:16 -07002786err:
Divy Le Ray4d22de32007-01-18 22:04:14 -05002787 t3_free_qset(adapter, q);
2788 return ret;
2789}
2790
2791/**
2792 * t3_free_sge_resources - free SGE resources
2793 * @adap: the adapter
2794 *
2795 * Frees resources used by the SGE queue sets.
2796 */
2797void t3_free_sge_resources(struct adapter *adap)
2798{
2799 int i;
2800
2801 for (i = 0; i < SGE_QSETS; ++i)
2802 t3_free_qset(adap, &adap->sge.qs[i]);
2803}
2804
2805/**
2806 * t3_sge_start - enable SGE
2807 * @adap: the adapter
2808 *
2809 * Enables the SGE for DMAs. This is the last step in starting packet
2810 * transfers.
2811 */
2812void t3_sge_start(struct adapter *adap)
2813{
2814 t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
2815}
2816
2817/**
2818 * t3_sge_stop - disable SGE operation
2819 * @adap: the adapter
2820 *
2821 * Disables the DMA engine. This can be called in emeregencies (e.g.,
2822 * from error interrupts) or from normal process context. In the latter
2823 * case it also disables any pending queue restart tasklets. Note that
2824 * if it is called in interrupt context it cannot disable the restart
2825 * tasklets as it cannot wait, however the tasklets will have no effect
2826 * since the doorbells are disabled and the driver will call this again
2827 * later from process context, at which time the tasklets will be stopped
2828 * if they are still running.
2829 */
2830void t3_sge_stop(struct adapter *adap)
2831{
2832 t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, 0);
2833 if (!in_interrupt()) {
2834 int i;
2835
2836 for (i = 0; i < SGE_QSETS; ++i) {
2837 struct sge_qset *qs = &adap->sge.qs[i];
2838
2839 tasklet_kill(&qs->txq[TXQ_OFLD].qresume_tsk);
2840 tasklet_kill(&qs->txq[TXQ_CTRL].qresume_tsk);
2841 }
2842 }
2843}
2844
2845/**
2846 * t3_sge_init - initialize SGE
2847 * @adap: the adapter
2848 * @p: the SGE parameters
2849 *
2850 * Performs SGE initialization needed every time after a chip reset.
2851 * We do not initialize any of the queue sets here, instead the driver
2852 * top-level must request those individually. We also do not enable DMA
2853 * here, that should be done after the queues have been set up.
2854 */
2855void t3_sge_init(struct adapter *adap, struct sge_params *p)
2856{
2857 unsigned int ctrl, ups = ffs(pci_resource_len(adap->pdev, 2) >> 12);
2858
2859 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
Divy Le Rayb8819552007-12-17 18:47:31 -08002860 F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
Divy Le Ray4d22de32007-01-18 22:04:14 -05002861 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
2862 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
2863#if SGE_NUM_GENBITS == 1
2864 ctrl |= F_EGRGENCTRL;
2865#endif
2866 if (adap->params.rev > 0) {
2867 if (!(adap->flags & (USING_MSIX | USING_MSI)))
2868 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002869 }
2870 t3_write_reg(adap, A_SG_CONTROL, ctrl);
2871 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
2872 V_LORCQDRBTHRSH(512));
2873 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
2874 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
Divy Le Ray6195c712007-01-30 19:43:56 -08002875 V_TIMEOUT(200 * core_ticks_per_usec(adap)));
Divy Le Rayb8819552007-12-17 18:47:31 -08002876 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
2877 adap->params.rev < T3_REV_C ? 1000 : 500);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002878 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
2879 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
2880 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
2881 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
2882 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
2883}
2884
2885/**
2886 * t3_sge_prep - one-time SGE initialization
2887 * @adap: the associated adapter
2888 * @p: SGE parameters
2889 *
2890 * Performs one-time initialization of SGE SW state. Includes determining
2891 * defaults for the assorted SGE parameters, which admins can change until
2892 * they are used to initialize the SGE.
2893 */
Roland Dreier7b9b0942008-01-29 14:45:11 -08002894void t3_sge_prep(struct adapter *adap, struct sge_params *p)
Divy Le Ray4d22de32007-01-18 22:04:14 -05002895{
2896 int i;
2897
2898 p->max_pkt_size = (16 * 1024) - sizeof(struct cpl_rx_data) -
2899 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
2900
2901 for (i = 0; i < SGE_QSETS; ++i) {
2902 struct qset_params *q = p->qset + i;
2903
2904 q->polling = adap->params.rev > 0;
2905 q->coalesce_usecs = 5;
2906 q->rspq_size = 1024;
Divy Le Raye0994eb2007-02-24 16:44:17 -08002907 q->fl_size = 1024;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002908 q->jumbo_size = 512;
2909 q->txq_size[TXQ_ETH] = 1024;
2910 q->txq_size[TXQ_OFLD] = 1024;
2911 q->txq_size[TXQ_CTRL] = 256;
2912 q->cong_thres = 0;
2913 }
2914
2915 spin_lock_init(&adap->sge.reg_lock);
2916}
2917
2918/**
2919 * t3_get_desc - dump an SGE descriptor for debugging purposes
2920 * @qs: the queue set
2921 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
2922 * @idx: the descriptor index in the queue
2923 * @data: where to dump the descriptor contents
2924 *
2925 * Dumps the contents of a HW descriptor of an SGE queue. Returns the
2926 * size of the descriptor.
2927 */
2928int t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
2929 unsigned char *data)
2930{
2931 if (qnum >= 6)
2932 return -EINVAL;
2933
2934 if (qnum < 3) {
2935 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
2936 return -EINVAL;
2937 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
2938 return sizeof(struct tx_desc);
2939 }
2940
2941 if (qnum == 3) {
2942 if (!qs->rspq.desc || idx >= qs->rspq.size)
2943 return -EINVAL;
2944 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
2945 return sizeof(struct rsp_desc);
2946 }
2947
2948 qnum -= 4;
2949 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
2950 return -EINVAL;
2951 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
2952 return sizeof(struct rx_desc);
2953}