[SK_BUFF]: Convert skb->tail to sk_buff_data_t

So that it is also an offset from skb->head, reduces its size from 8 to 4 bytes
on 64bit architectures, allowing us to combine the 4 bytes hole left by the
layer headers conversion, reducing struct sk_buff size to 256 bytes, i.e. 4
64byte cachelines, and since the sk_buff slab cache is SLAB_HWCACHE_ALIGN...
:-)

Many calculations that previously required that skb->{transport,network,
mac}_header be first converted to a pointer now can be done directly, being
meaningful as offsets or pointers.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 7563fdc..0537d60 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -448,7 +448,7 @@
 {
 	struct tcindex_data *p = PRIV(tp);
 	struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh;
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct rtattr *rta;
 
 	DPRINTK("tcindex_dump(tp %p,fh 0x%lx,skb %p,t %p),p %p,r %p,b %p\n",
@@ -463,7 +463,7 @@
 		RTA_PUT(skb,TCA_TCINDEX_SHIFT,sizeof(p->shift),&p->shift);
 		RTA_PUT(skb,TCA_TCINDEX_FALL_THROUGH,sizeof(p->fall_through),
 		    &p->fall_through);
-		rta->rta_len = skb->tail-b;
+		rta->rta_len = skb_tail_pointer(skb) - b;
 	} else {
 		if (p->perfect) {
 			t->tcm_handle = r-p->perfect;
@@ -486,7 +486,7 @@
 
 		if (tcf_exts_dump(skb, &r->exts, &tcindex_ext_map) < 0)
 			goto rtattr_failure;
-		rta->rta_len = skb->tail-b;
+		rta->rta_len = skb_tail_pointer(skb) - b;
 
 		if (tcf_exts_dump_stats(skb, &r->exts, &tcindex_ext_map) < 0)
 			goto rtattr_failure;