patch-2.3.15 linux/net/ipv6/tcp_ipv6.c
Next file: linux/net/ipv6/udp.c
Previous file: linux/net/ipv6/sit.c
Back to the patch index
Back to the overall index
- Lines: 1726
- Date:
Mon Aug 23 10:01:02 1999
- Orig file:
v2.3.14/linux/net/ipv6/tcp_ipv6.c
- Orig date:
Sat Jul 3 17:57:23 1999
diff -u --recursive --new-file v2.3.14/linux/net/ipv6/tcp_ipv6.c linux/net/ipv6/tcp_ipv6.c
@@ -5,7 +5,7 @@
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
*
- * $Id: tcp_ipv6.c,v 1.109 1999/07/02 11:26:41 davem Exp $
+ * $Id: tcp_ipv6.c,v 1.111 1999/08/21 21:46:35 davem Exp $
*
* Based on:
* linux/net/ipv4/tcp.c
@@ -18,6 +18,8 @@
* 2 of the License, or (at your option) any later version.
*/
+#define __NO_VERSION__
+#include <linux/module.h>
#include <linux/config.h>
#include <linux/errno.h>
#include <linux/types.h>
@@ -29,6 +31,7 @@
#include <linux/in6.h>
#include <linux/netdevice.h>
#include <linux/init.h>
+#include <linux/ipsec.h>
#include <linux/ipv6.h>
#include <linux/icmpv6.h>
@@ -44,13 +47,17 @@
#include <asm/uaccess.h>
extern int sysctl_max_syn_backlog;
+extern int sysctl_tcp_tw_recycle;
+extern __u32 sysctl_wmem_max;
+extern __u32 sysctl_rmem_max;
static void tcp_v6_send_reset(struct sk_buff *skb);
+static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req);
static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
struct sk_buff *skb);
static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
-static void tcp_v6_xmit(struct sk_buff *skb);
+static int tcp_v6_xmit(struct sk_buff *skb);
static struct open_request *tcp_v6_search_req(struct tcp_opt *tp,
struct ipv6hdr *ip6h,
struct tcphdr *th,
@@ -67,7 +74,9 @@
int hashent = (lport ^ fport);
hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
- return (hashent & ((tcp_ehash_size >> 1) - 1));
+ hashent ^= hashent>>16;
+ hashent ^= hashent>>8;
+ return (hashent & (tcp_ehash_size - 1));
}
static __inline__ int tcp_v6_sk_hashfn(struct sock *sk)
@@ -86,28 +95,36 @@
*/
static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
{
+ struct tcp_bind_hashbucket *head;
struct tcp_bind_bucket *tb;
+ int ret;
- SOCKHASH_LOCK_WRITE();
+ local_bh_disable();
if (snum == 0) {
- int rover = tcp_port_rover;
int low = sysctl_local_port_range[0];
int high = sysctl_local_port_range[1];
int remaining = (high - low) + 1;
+ int rover;
+ spin_lock(&tcp_portalloc_lock);
+ rover = tcp_port_rover;
do { rover++;
if ((rover < low) || (rover > high))
rover = low;
- tb = tcp_bhash[tcp_bhashfn(rover)];
- for ( ; tb; tb = tb->next)
+ head = &tcp_bhash[tcp_bhashfn(rover)];
+ spin_lock(&head->lock);
+ for (tb = head->chain; tb; tb = tb->next)
if (tb->port == rover)
goto next;
break;
next:
+ spin_unlock(&head->lock);
} while (--remaining > 0);
tcp_port_rover = rover;
+ spin_unlock(&tcp_portalloc_lock);
/* Exhausted local port range during search? */
+ ret = 1;
if (remaining <= 0)
goto fail;
@@ -115,9 +132,9 @@
snum = rover;
tb = NULL;
} else {
- for (tb = tcp_bhash[tcp_bhashfn(snum)];
- tb != NULL;
- tb = tb->next)
+ head = &tcp_bhash[tcp_bhashfn(snum)];
+ spin_lock(&head->lock);
+ for (tb = head->chain; tb != NULL; tb = tb->next)
if (tb->port == snum)
break;
}
@@ -135,22 +152,27 @@
if (!sk_reuse ||
!sk2->reuse ||
sk2->state == TCP_LISTEN) {
+ /* NOTE: IPv6 tw bucket have different format */
if (!sk2->rcv_saddr ||
- !addr_type == IPV6_ADDR_ANY ||
+ addr_type == IPV6_ADDR_ANY ||
!ipv6_addr_cmp(&sk->net_pinfo.af_inet6.rcv_saddr,
- &sk2->net_pinfo.af_inet6.rcv_saddr))
+ sk2->state != TCP_TIME_WAIT ?
+ &sk2->net_pinfo.af_inet6.rcv_saddr :
+ &((struct tcp_tw_bucket*)sk)->v6_rcv_saddr))
break;
}
}
}
/* If we found a conflict, fail. */
+ ret = 1;
if (sk2 != NULL)
- goto fail;
+ goto fail_unlock;
}
}
+ ret = 1;
if (tb == NULL &&
- (tb = tcp_bucket_create(snum)) == NULL)
- goto fail;
+ (tb = tcp_bucket_create(head, snum)) == NULL)
+ goto fail_unlock;
if (tb->owners == NULL) {
if (sk->reuse && sk->state != TCP_LISTEN)
tb->fastreuse = 1;
@@ -167,58 +189,54 @@
tb->owners = sk;
sk->bind_pprev = &tb->owners;
sk->prev = (struct sock *) tb;
+ ret = 0;
- SOCKHASH_UNLOCK_WRITE();
- return 0;
-
+fail_unlock:
+ spin_unlock(&head->lock);
fail:
- SOCKHASH_UNLOCK_WRITE();
- return 1;
+ local_bh_enable();
+ return ret;
+}
+
+static __inline__ void __tcp_v6_hash(struct sock *sk)
+{
+ struct sock **skp;
+ rwlock_t *lock;
+
+ BUG_TRAP(sk->pprev==NULL);
+
+ if(sk->state == TCP_LISTEN) {
+ skp = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
+ lock = &tcp_lhash_lock;
+ tcp_listen_wlock();
+ } else {
+ skp = &tcp_ehash[(sk->hashent = tcp_v6_sk_hashfn(sk))].chain;
+ lock = &tcp_ehash[sk->hashent].lock;
+ write_lock(lock);
+ }
+
+ if((sk->next = *skp) != NULL)
+ (*skp)->pprev = &sk->next;
+ *skp = sk;
+ sk->pprev = skp;
+ sk->prot->inuse++;
+ if(sk->prot->highestinuse < sk->prot->inuse)
+ sk->prot->highestinuse = sk->prot->inuse;
+ write_unlock(lock);
}
+
static void tcp_v6_hash(struct sock *sk)
{
if(sk->state != TCP_CLOSE) {
- struct sock **skp;
-
- /* Well, I know that it is ugly...
- * All this ->prot, ->af_specific etc. need LARGE cleanup --ANK
- */
if (sk->tp_pinfo.af_tcp.af_specific == &ipv6_mapped) {
tcp_prot.hash(sk);
return;
}
-
- if(sk->state == TCP_LISTEN)
- skp = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
- else
- skp = &tcp_ehash[(sk->hashent = tcp_v6_sk_hashfn(sk))];
-
- SOCKHASH_LOCK_WRITE();
- if((sk->next = *skp) != NULL)
- (*skp)->pprev = &sk->next;
- *skp = sk;
- sk->pprev = skp;
- sk->prot->inuse++;
- if(sk->prot->highestinuse < sk->prot->inuse)
- sk->prot->highestinuse = sk->prot->inuse;
- SOCKHASH_UNLOCK_WRITE();
- }
-}
-
-static void tcp_v6_unhash(struct sock *sk)
-{
- SOCKHASH_LOCK_WRITE();
- if(sk->pprev) {
- if(sk->next)
- sk->next->pprev = sk->pprev;
- *sk->pprev = sk->next;
- sk->pprev = NULL;
- sk->prot->inuse--;
- tcp_reg_zap(sk);
- __tcp_put_port(sk);
+ local_bh_disable();
+ __tcp_v6_hash(sk);
+ local_bh_enable();
}
- SOCKHASH_UNLOCK_WRITE();
}
static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned short hnum, int dif)
@@ -228,6 +246,7 @@
int score, hiscore;
hiscore=0;
+ read_lock(&tcp_lhash_lock);
sk = tcp_listening_hash[tcp_lhashfn(hnum)];
for(; sk; sk = sk->next) {
if((sk->num == hnum) && (sk->family == PF_INET6)) {
@@ -244,14 +263,19 @@
continue;
score++;
}
- if (score == 3)
- return sk;
+ if (score == 3) {
+ result = sk;
+ break;
+ }
if (score > hiscore) {
hiscore = score;
result = sk;
}
}
}
+ if (sk)
+ sock_hold(sk);
+ read_unlock(&tcp_lhash_lock);
return result;
}
@@ -261,33 +285,27 @@
* The sockhash lock must be held as a reader here.
*/
static inline struct sock *__tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
- struct in6_addr *daddr, u16 dport,
+ struct in6_addr *daddr, u16 hnum,
int dif)
{
+ struct tcp_ehash_bucket *head;
struct sock *sk;
- __u16 hnum = ntohs(dport);
__u32 ports = TCP_COMBINED_PORTS(sport, hnum);
int hash;
- /* Check TCP register quick cache first. */
- sk = TCP_RHASH(sport);
- if(sk && TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif))
- goto hit;
-
/* Optimize here for direct hit, only listening connections can
* have wildcards anyways.
*/
hash = tcp_v6_hashfn(daddr, hnum, saddr, sport);
- for(sk = tcp_ehash[hash]; sk; sk = sk->next) {
+ head = &tcp_ehash[hash];
+ read_lock(&head->lock);
+ for(sk = head->chain; sk; sk = sk->next) {
/* For IPV6 do the cheaper port and family tests first. */
- if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif)) {
- if (sk->state == TCP_ESTABLISHED)
- TCP_RHASH(sport) = sk;
+ if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif))
goto hit; /* You sunk my battleship! */
- }
}
/* Must check for a TIME_WAIT'er before going to listener hash. */
- for(sk = tcp_ehash[hash+(tcp_ehash_size >> 1)]; sk; sk = sk->next) {
+ for(sk = (head + tcp_ehash_size)->chain; sk; sk = sk->next) {
if(*((__u32 *)&(sk->dport)) == ports &&
sk->family == PF_INET6) {
struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
@@ -297,16 +315,21 @@
goto hit;
}
}
- sk = tcp_v6_lookup_listener(daddr, hnum, dif);
+ read_unlock(&head->lock);
+
+ return tcp_v6_lookup_listener(daddr, hnum, dif);
+
hit:
+ sock_hold(sk);
+ read_unlock(&head->lock);
return sk;
}
#define tcp_v6_lookup(sa, sp, da, dp, dif) \
({ struct sock *___sk; \
- SOCKHASH_LOCK_READ(); \
- ___sk = __tcp_v6_lookup((sa),(sp),(da),(dp),(dif)); \
- SOCKHASH_UNLOCK_READ(); \
+ local_bh_disable(); \
+ ___sk = __tcp_v6_lookup((sa),(sp),(da),ntohs(dp),(dif)); \
+ local_bh_enable(); \
___sk; \
})
@@ -336,34 +359,99 @@
skb->h.th->source);
}
-static int tcp_v6_unique_address(struct sock *sk)
+static int tcp_v6_check_established(struct sock *sk)
{
- struct tcp_bind_bucket *tb;
- unsigned short snum = sk->num;
- int retval = 1;
+ struct in6_addr *daddr = &sk->net_pinfo.af_inet6.rcv_saddr;
+ struct in6_addr *saddr = &sk->net_pinfo.af_inet6.daddr;
+ int dif = sk->bound_dev_if;
+ u32 ports = TCP_COMBINED_PORTS(sk->dport, sk->num);
+ int hash = tcp_v6_hashfn(daddr, sk->num, saddr, sk->dport);
+ struct tcp_ehash_bucket *head = &tcp_ehash[hash];
+ struct sock *sk2, **skp;
+ struct tcp_tw_bucket *tw;
+
+ write_lock(&head->lock);
+
+ for(skp = &(head + tcp_ehash_size)->chain; (sk2=*skp)!=NULL; skp = &sk2->next) {
+ tw = (struct tcp_tw_bucket*)sk2;
+
+ if(*((__u32 *)&(sk2->dport)) == ports &&
+ sk2->family == PF_INET6 &&
+ !ipv6_addr_cmp(&tw->v6_daddr, saddr) &&
+ !ipv6_addr_cmp(&tw->v6_rcv_saddr, daddr) &&
+ sk2->bound_dev_if == sk->bound_dev_if) {
+#ifdef CONFIG_TCP_TW_RECYCLE
+ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
- /* Freeze the hash while we snoop around. */
- SOCKHASH_LOCK_READ();
- tb = tcp_bhash[tcp_bhashfn(snum)];
- for(; tb; tb = tb->next) {
- if(tb->port == snum && tb->owners != NULL) {
- /* Almost certainly the re-use port case, search the real hashes
- * so it actually scales. (we hope that all ipv6 ftp servers will
- * use passive ftp, I just cover this case for completeness)
- */
- sk = __tcp_v6_lookup(&sk->net_pinfo.af_inet6.daddr,
- sk->dport,
- &sk->net_pinfo.af_inet6.rcv_saddr, snum,
- sk->bound_dev_if);
- SOCKHASH_UNLOCK_READ();
-
- if((sk != NULL) && (sk->state != TCP_LISTEN))
- retval = 0;
- return retval;
+ if (sysctl_tcp_tw_recycle && tw->ts_recent_stamp) {
+ /* See comment in tcp_ipv4.c */
+ if ((tp->write_seq = tw->snd_nxt + 2) == 0)
+ tp->write_seq = 1;
+ tp->ts_recent = tw->ts_recent;
+ tp->ts_recent_stamp = tw->ts_recent_stamp;
+ sock_hold(sk2);
+ skp = &head->chain;
+ goto unique;
+ } else
+#endif
+ goto not_unique;
}
}
- SOCKHASH_UNLOCK_READ();
- return retval;
+ tw = NULL;
+
+ for(skp = &head->chain; (sk2=*skp)!=NULL; skp = &sk2->next) {
+ if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif))
+ goto not_unique;
+ }
+
+#ifdef CONFIG_TCP_TW_RECYCLE
+unique:
+#endif
+ BUG_TRAP(sk->pprev==NULL);
+ if ((sk->next = *skp) != NULL)
+ (*skp)->pprev = &sk->next;
+
+ *skp = sk;
+ sk->pprev = skp;
+ sk->prot->inuse++;
+ if(sk->prot->highestinuse < sk->prot->inuse)
+ sk->prot->highestinuse = sk->prot->inuse;
+ write_unlock_bh(&head->lock);
+
+#ifdef CONFIG_TCP_TW_RECYCLE
+ if (tw) {
+ /* Silly. Should hash-dance instead... */
+ local_bh_disable();
+ tcp_tw_deschedule(tw);
+ tcp_timewait_kill(tw);
+ local_bh_enable();
+
+ tcp_tw_put(tw);
+ }
+#endif
+ return 0;
+
+not_unique:
+ write_unlock_bh(&head->lock);
+ return -EADDRNOTAVAIL;
+}
+
+static int tcp_v6_hash_connecting(struct sock *sk)
+{
+ unsigned short snum = sk->num;
+ struct tcp_bind_hashbucket *head = &tcp_bhash[tcp_bhashfn(snum)];
+ struct tcp_bind_bucket *tb = head->chain;
+
+ spin_lock_bh(&head->lock);
+
+ if (tb->owners == sk && sk->bind_next == NULL) {
+ __tcp_v6_hash(sk);
+ spin_unlock_bh(&head->lock);
+ return 0;
+ } else {
+ spin_unlock_bh(&head->lock);
+ return tcp_v6_check_established(sk);
+ }
}
static __inline__ int tcp_v6_iif(struct sk_buff *skb)
@@ -389,17 +477,10 @@
if (sk->state != TCP_CLOSE)
return(-EISCONN);
- /*
- * Don't allow a double connect.
- */
-
- if(!ipv6_addr_any(&np->daddr))
- return -EINVAL;
-
if (addr_len < sizeof(struct sockaddr_in6))
return(-EINVAL);
- if (usin->sin6_family && usin->sin6_family != AF_INET6)
+ if (usin->sin6_family != AF_INET6)
return(-EAFNOSUPPORT);
fl.fl6_flowlabel = 0;
@@ -427,15 +508,20 @@
if(addr_type & IPV6_ADDR_MULTICAST)
return -ENETUNREACH;
- /*
- * connect to self not allowed
- */
-
- if (ipv6_addr_cmp(&usin->sin6_addr, &np->saddr) == 0 &&
- usin->sin6_port == sk->sport)
- return (-EINVAL);
+ /* We may need to bind the socket. */
+ if (sk->num==0 && sk->prot->get_port(sk, 0))
+ return -EAGAIN;
+ sk->sport = htons(sk->num);
+
+#ifdef CONFIG_TCP_TW_RECYCLE
+ if (tp->ts_recent_stamp && ipv6_addr_cmp(&np->daddr, &usin->sin6_addr)) {
+ tp->ts_recent = 0;
+ tp->ts_recent_stamp = 0;
+ tp->write_seq = 0;
+ }
+#endif
- memcpy(&np->daddr, &usin->sin6_addr, sizeof(struct in6_addr));
+ ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
np->flow_label = fl.fl6_flowlabel;
/*
@@ -520,8 +606,7 @@
tp->ext_header_len = 0;
if (np->opt)
tp->ext_header_len = np->opt->opt_flen+np->opt->opt_nflen;
- /* Reset mss clamp */
- tp->mss_clamp = ~0;
+ tp->mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
err = -ENOBUFS;
buff = sock_wmalloc(sk, (MAX_HEADER + sk->prot->max_header),
@@ -532,28 +617,22 @@
sk->dport = usin->sin6_port;
- if (!tcp_v6_unique_address(sk)) {
- kfree_skb(buff);
- err = -EADDRNOTAVAIL;
- goto failure;
- }
-
/*
* Init variables
*/
- tp->write_seq = secure_tcp_sequence_number(np->saddr.s6_addr32[3],
- np->daddr.s6_addr32[3],
- sk->sport, sk->dport);
+ if (!tp->write_seq)
+ tp->write_seq = secure_tcp_sequence_number(np->saddr.s6_addr32[3],
+ np->daddr.s6_addr32[3],
+ sk->sport, sk->dport);
- tcp_connect(sk, buff, dst->pmtu);
-
- return 0;
+ err = tcp_connect(sk, buff);
+ if (err == 0)
+ return 0;
failure:
- dst_release(xchg(&sk->dst_cache, NULL));
- memset(&np->daddr, 0, sizeof(struct in6_addr));
- sk->daddr = 0;
+ __sk_dst_reset(sk);
+ sk->dport = 0;
return err;
}
@@ -562,6 +641,7 @@
struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
int retval = -EINVAL;
+ lock_sock(sk);
/*
* Do sanity checking for sendmsg/sendto/send
*/
@@ -592,6 +672,7 @@
retval = tcp_do_sendmsg(sk, msg);
out:
+ release_sock(sk);
return retval;
}
@@ -606,41 +687,46 @@
struct sock *sk;
int err;
struct tcp_opt *tp;
- __u32 seq;
+ __u32 seq;
if (header + 8 > skb->tail)
return;
sk = tcp_v6_lookup(daddr, th->dest, saddr, th->source, skb->dev->ifindex);
- if (sk == NULL || sk->state == TCP_TIME_WAIT) {
- /* XXX: Update ICMP error count */
+ if (sk == NULL) {
+ icmpv6_statistics.Icmp6InErrors++;
+ return;
+ }
+
+ if (sk->state == TCP_TIME_WAIT) {
+ tcp_tw_put((struct tcp_tw_bucket*)sk);
return;
}
+ bh_lock_sock(sk);
+ if (sk->lock.users)
+ net_statistics.LockDroppedIcmps++;
+
tp = &sk->tp_pinfo.af_tcp;
seq = ntohl(th->seq);
if (sk->state != TCP_LISTEN && !between(seq, tp->snd_una, tp->snd_nxt)) {
net_statistics.OutOfWindowIcmps++;
- return;
+ goto out;
}
np = &sk->net_pinfo.af_inet6;
+
if (type == ICMPV6_PKT_TOOBIG) {
struct dst_entry *dst = NULL;
- if (sk->state == TCP_LISTEN)
- return;
-
- bh_lock_sock(sk);
- if(sk->lock.users) {
- bh_unlock_sock(sk);
- return;
- }
+ if (sk->lock.users)
+ goto out;
+ if ((1<<sk->state)&(TCPF_LISTEN|TCPF_CLOSE))
+ goto out;
/* icmp should have updated the destination cache entry */
- if (sk->dst_cache)
- dst = dst_check(&sk->dst_cache, np->dst_cookie);
+ dst = sk_dst_check(sk, np->dst_cookie);
if (dst == NULL) {
struct flowi fl;
@@ -658,8 +744,7 @@
fl.uli_u.ports.sport = sk->sport;
dst = ip6_route_output(sk, &fl);
- } else
- dst = dst_clone(dst);
+ }
if (dst->error) {
sk->err_soft = -dst->error;
@@ -668,7 +753,7 @@
tcp_simple_retransmit(sk);
} /* else let the usual retransmit timer handle it */
dst_release(dst);
- bh_unlock_sock(sk);
+ goto out;
}
icmpv6_err_convert(type, code, &err);
@@ -678,59 +763,71 @@
struct open_request *req, *prev;
struct ipv6hdr hd;
case TCP_LISTEN:
- bh_lock_sock(sk);
- if (sk->lock.users) {
- net_statistics.LockDroppedIcmps++;
- /* If too many ICMPs get dropped on busy
- * servers this needs to be solved differently.
- */
- bh_unlock_sock(sk);
- return;
- }
+ if (sk->lock.users)
+ goto out;
/* Grrrr - fix this later. */
ipv6_addr_copy(&hd.saddr, saddr);
ipv6_addr_copy(&hd.daddr, daddr);
req = tcp_v6_search_req(tp, &hd, th, tcp_v6_iif(skb), &prev);
- if (!req || (seq != req->snt_isn)) {
- net_statistics.OutOfWindowIcmps++;
- bh_unlock_sock(sk);
- return;
- }
+ if (!req)
+ goto out;
+
if (req->sk) {
+ struct sock *nsk = req->sk;
+
+ sock_hold(nsk);
bh_unlock_sock(sk);
- sk = req->sk; /* report error in accept */
+ sock_put(sk);
+ sk = nsk;
+
+ BUG_TRAP(sk->lock.users==0);
+
+ tp = &sk->tp_pinfo.af_tcp;
+ if (!between(seq, tp->snd_una, tp->snd_nxt)) {
+ net_statistics.OutOfWindowIcmps++;
+ goto out;
+ }
} else {
+ if (seq != req->snt_isn) {
+ net_statistics.OutOfWindowIcmps++;
+ goto out;
+ }
+
tp->syn_backlog--;
tcp_synq_unlink(tp, req, prev);
+ tcp_dec_slow_timer(TCP_SLT_SYNACK);
req->class->destructor(req);
tcp_openreq_free(req);
- bh_unlock_sock(sk);
+ goto out;
}
-
- /* FALL THROUGH */
+ break;
case TCP_SYN_SENT:
- case TCP_SYN_RECV: /* Cannot happen */
- tcp_statistics.TcpAttemptFails++;
- sk->err = err;
- sk->zapped = 1;
- mb();
- sk->error_report(sk);
- return;
+ case TCP_SYN_RECV: /* Cannot happen.
+ It can, it SYNs are crossed. --ANK */
+ if (sk->lock.users == 0) {
+ tcp_statistics.TcpAttemptFails++;
+ sk->err = err;
+ sk->error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
+
+ tcp_set_state(sk, TCP_CLOSE);
+ tcp_done(sk);
+ } else {
+ sk->err_soft = err;
+ }
+ goto out;
}
- if (np->recverr) {
- /* This code isn't serialized with the socket code */
- /* ANK (980927) ... which is harmless now,
- sk->err's may be safely lost.
- */
+ if (sk->lock.users == 0 && np->recverr) {
sk->err = err;
- mb();
sk->error_report(sk);
} else {
sk->err_soft = err;
- mb();
}
+
+out:
+ bh_unlock_sock(sk);
+ sock_put(sk);
}
@@ -740,7 +837,6 @@
struct dst_entry *dst;
struct ipv6_txoptions *opt = NULL;
struct flowi fl;
- int mss;
fl.proto = IPPROTO_TCP;
fl.nl_u.ip6_u.daddr = &req->af.v6_req.rmt_addr;
@@ -769,9 +865,7 @@
if (dst->error)
goto done;
- mss = dst->pmtu - sizeof(struct ipv6hdr) - sizeof(struct tcphdr);
-
- skb = tcp_make_synack(sk, dst, req, mss);
+ skb = tcp_make_synack(sk, dst, req);
if (skb) {
struct tcphdr *th = skb->h.th;
@@ -798,7 +892,9 @@
}
static struct or_calltable or_ipv6 = {
+ AF_INET6,
tcp_v6_send_synack,
+ tcp_v6_or_send_ack,
tcp_v6_or_free,
tcp_v6_send_reset
};
@@ -825,20 +921,14 @@
/* FIXME: this is substantially similar to the ipv4 code.
* Can some kind of merge be done? -- erics
*/
-static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb, __u32 isn)
+static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
{
struct tcp_opt tp;
struct open_request *req;
-
- /* If the socket is dead, don't accept the connection. */
- if (sk->dead) {
- SOCK_DEBUG(sk, "Reset on %p: Connect on dead socket.\n", sk);
- tcp_statistics.TcpAttemptFails++;
- return -ENOTCONN;
- }
+ __u32 isn = TCP_SKB_CB(skb)->when;
if (skb->protocol == __constant_htons(ETH_P_IP))
- return tcp_v4_conn_request(sk, skb, isn);
+ return tcp_v4_conn_request(sk, skb);
/* FIXME: do the same check for anycast */
if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr))
@@ -869,17 +959,15 @@
req->rcv_isn = TCP_SKB_CB(skb)->seq;
req->snt_isn = isn;
tp.tstamp_ok = tp.sack_ok = tp.wscale_ok = tp.snd_wscale = 0;
- tp.mss_clamp = 65535;
+
+ tp.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
+ tp.user_mss = sk->tp_pinfo.af_tcp.user_mss;
+
tcp_parse_options(NULL, skb->h.th, &tp, 0);
- if (tp.mss_clamp == 65535)
- tp.mss_clamp = 576 - sizeof(struct ipv6hdr) - sizeof(struct iphdr);
- if (sk->tp_pinfo.af_tcp.user_mss && sk->tp_pinfo.af_tcp.user_mss < tp.mss_clamp)
- tp.mss_clamp = sk->tp_pinfo.af_tcp.user_mss;
-
- req->mss = tp.mss_clamp;
- if (tp.saw_tstamp)
- req->ts_recent = tp.rcv_tsval;
- req->tstamp_ok = tp.tstamp_ok;
+
+ req->mss = tp.mss_clamp;
+ req->ts_recent = tp.saw_tstamp ? tp.rcv_tsval : 0;
+ req->tstamp_ok = tp.tstamp_ok;
req->sack_ok = tp.sack_ok;
req->snd_wscale = tp.snd_wscale;
req->wscale_ok = tp.wscale_ok;
@@ -887,7 +975,9 @@
ipv6_addr_copy(&req->af.v6_req.rmt_addr, &skb->nh.ipv6h->saddr);
ipv6_addr_copy(&req->af.v6_req.loc_addr, &skb->nh.ipv6h->daddr);
req->af.v6_req.pktopts = NULL;
- if (ipv6_opt_accepted(sk, skb)) {
+ if (ipv6_opt_accepted(sk, skb) ||
+ sk->net_pinfo.af_inet6.rxopt.bits.rxinfo ||
+ sk->net_pinfo.af_inet6.rxopt.bits.rxhlim) {
atomic_inc(&skb->users);
req->af.v6_req.pktopts = skb;
}
@@ -944,7 +1034,7 @@
if (newsk == NULL)
return NULL;
-
+
np = &newsk->net_pinfo.af_inet6;
ipv6_addr_set(&np->daddr, 0, 0, __constant_htonl(0x0000FFFF),
@@ -959,6 +1049,14 @@
newsk->backlog_rcv = tcp_v4_do_rcv;
newsk->net_pinfo.af_inet6.pktoptions = NULL;
newsk->net_pinfo.af_inet6.opt = NULL;
+ newsk->net_pinfo.af_inet6.mcast_oif = tcp_v6_iif(skb);
+ newsk->net_pinfo.af_inet6.mcast_hops = skb->nh.ipv6h->hop_limit;
+
+ /* Charge newly allocated IPv6 socket. Though it is mapped,
+ * it is IPv6 yet.
+ */
+ atomic_inc(&inet6_sock_nr);
+ MOD_INC_USE_COUNT;
/* It is tricky place. Until this moment IPv4 tcp
worked with IPv6 af_tcp.af_specific.
@@ -1007,6 +1105,10 @@
if (newsk == NULL)
goto out;
+ /* Charge newly allocated IPv6 socket */
+ atomic_inc(&inet6_sock_nr);
+ MOD_INC_USE_COUNT;
+
ip6_dst_store(newsk, dst, NULL);
newtp = &(newsk->tp_pinfo.af_tcp);
@@ -1021,16 +1123,21 @@
First: no IPv4 options.
*/
- newsk->opt = NULL;
+ newsk->protinfo.af_inet.opt = NULL;
/* Clone RX bits */
np->rxopt.all = sk->net_pinfo.af_inet6.rxopt.all;
/* Clone pktoptions received with SYN */
- np->pktoptions = req->af.v6_req.pktopts;
- if (np->pktoptions)
- atomic_inc(&np->pktoptions->users);
+ np->pktoptions = NULL;
+ if (req->af.v6_req.pktopts) {
+ np->pktoptions = skb_clone(req->af.v6_req.pktopts, GFP_ATOMIC);
+ if (np->pktoptions)
+ skb_set_owner_r(np->pktoptions, newsk);
+ }
np->opt = NULL;
+ np->mcast_oif = tcp_v6_iif(skb);
+ np->mcast_hops = skb->nh.ipv6h->hop_limit;
/* Clone native IPv6 options from listening socket (if any)
@@ -1049,15 +1156,21 @@
newtp->ext_header_len = np->opt->opt_nflen + np->opt->opt_flen;
tcp_sync_mss(newsk, dst->pmtu);
- newtp->rcv_mss = newtp->mss_clamp;
+ tcp_initialize_rcv_mss(newsk);
+
+ if (newsk->rcvbuf < (3 * (dst->advmss+60+MAX_HEADER+15)))
+ newsk->rcvbuf = min ((3 * (dst->advmss+60+MAX_HEADER+15)), sysctl_rmem_max);
+ if (newsk->sndbuf < (3 * (newtp->mss_clamp+60+MAX_HEADER+15)))
+ newsk->sndbuf = min ((3 * (newtp->mss_clamp+60+MAX_HEADER+15)), sysctl_wmem_max);
newsk->daddr = LOOPBACK4_IPV6;
newsk->saddr = LOOPBACK4_IPV6;
newsk->rcv_saddr= LOOPBACK4_IPV6;
- newsk->prot->hash(newsk);
+ bh_lock_sock(newsk);
+
+ __tcp_v6_hash(newsk);
tcp_inherit_port(sk, newsk);
- sk->data_ready(sk, 0); /* Deliver SIGIO */
return newsk;
@@ -1104,10 +1217,8 @@
t1->seq = th->ack_seq;
} else {
t1->ack = 1;
- if(!th->syn)
- t1->ack_seq = th->seq;
- else
- t1->ack_seq = htonl(ntohl(th->seq)+1);
+ t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
+ + skb->len - (th->doff<<2));
}
buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
@@ -1139,6 +1250,85 @@
kfree_skb(buff);
}
+static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
+{
+ struct tcphdr *th = skb->h.th, *t1;
+ struct sk_buff *buff;
+ struct flowi fl;
+ int tot_len = sizeof(struct tcphdr);
+
+ buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr), GFP_ATOMIC);
+ if (buff == NULL)
+ return;
+
+ skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr));
+
+ if (ts)
+ tot_len += 3*4;
+
+ t1 = (struct tcphdr *) skb_push(buff,tot_len);
+
+ /* Swap the send and the receive. */
+ memset(t1, 0, sizeof(*t1));
+ t1->dest = th->source;
+ t1->source = th->dest;
+ t1->doff = tot_len/4;
+ t1->seq = htonl(seq);
+ t1->ack_seq = htonl(ack);
+ t1->ack = 1;
+ t1->window = htons(win);
+
+ if (ts) {
+ u32 *ptr = (u32*)(t1 + 1);
+ *ptr++ = __constant_htonl((TCPOPT_NOP << 24) |
+ (TCPOPT_NOP << 16) |
+ (TCPOPT_TIMESTAMP << 8) |
+ TCPOLEN_TIMESTAMP);
+ *ptr++ = htonl(tcp_time_stamp);
+ *ptr = htonl(ts);
+ }
+
+ buff->csum = csum_partial((char *)t1, tot_len, 0);
+
+ fl.nl_u.ip6_u.daddr = &skb->nh.ipv6h->saddr;
+ fl.nl_u.ip6_u.saddr = &skb->nh.ipv6h->daddr;
+ fl.fl6_flowlabel = 0;
+
+ t1->check = csum_ipv6_magic(fl.nl_u.ip6_u.saddr,
+ fl.nl_u.ip6_u.daddr,
+ tot_len, IPPROTO_TCP,
+ buff->csum);
+
+ fl.proto = IPPROTO_TCP;
+ fl.oif = tcp_v6_iif(skb);
+ fl.uli_u.ports.dport = t1->dest;
+ fl.uli_u.ports.sport = t1->source;
+
+ buff->dst = ip6_route_output(NULL, &fl);
+
+ if (buff->dst->error == 0) {
+ ip6_xmit(NULL, buff, &fl, NULL);
+ tcp_statistics.TcpOutSegs++;
+ return;
+ }
+
+ kfree_skb(buff);
+}
+
+static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
+{
+ struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
+
+ tcp_v6_send_ack(skb, tw->snd_nxt, tw->rcv_nxt, 0, tw->ts_recent);
+
+ tcp_tw_put(tw);
+}
+
+static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req)
+{
+ tcp_v6_send_ack(skb, req->snt_isn+1, req->rcv_isn+1, req->rcv_wnd, req->ts_recent);
+}
+
static struct open_request *tcp_v6_search_req(struct tcp_opt *tp,
struct ipv6hdr *ip6h,
struct tcphdr *th,
@@ -1154,10 +1344,20 @@
*/
prev = (struct open_request *) (&tp->syn_wait_queue);
for (req = prev->dl_next; req; req = req->dl_next) {
- if (!ipv6_addr_cmp(&req->af.v6_req.rmt_addr, &ip6h->saddr) &&
+ if (req->rmt_port == rport &&
+ req->class->family == AF_INET6 &&
+ !ipv6_addr_cmp(&req->af.v6_req.rmt_addr, &ip6h->saddr) &&
!ipv6_addr_cmp(&req->af.v6_req.loc_addr, &ip6h->daddr) &&
- req->rmt_port == rport &&
(!req->af.v6_req.iif || req->af.v6_req.iif == iif)) {
+ if (req->sk) {
+ bh_lock_sock(req->sk);
+ BUG_TRAP(req->sk->lock.users==0);
+ if (req->sk->state == TCP_CLOSE) {
+ bh_unlock_sock(req->sk);
+ prev = req;
+ continue;
+ }
+ }
*prevp = prev;
return req;
}
@@ -1166,57 +1366,44 @@
return NULL;
}
-static void tcp_v6_rst_req(struct sock *sk, struct sk_buff *skb)
+
+static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
{
- struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
struct open_request *req, *prev;
+ struct tcphdr *th = skb->h.th;
+ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
- req = tcp_v6_search_req(tp,skb->nh.ipv6h,skb->h.th,tcp_v6_iif(skb),&prev);
- if (!req)
- return;
- /* Sequence number check required by RFC793 */
- if (before(TCP_SKB_CB(skb)->seq, req->rcv_isn) ||
- after(TCP_SKB_CB(skb)->seq, req->rcv_isn+1))
- return;
- if(req->sk)
- sk->ack_backlog--;
- else
- tp->syn_backlog--;
- tcp_synq_unlink(tp, req, prev);
- req->class->destructor(req);
- tcp_openreq_free(req);
- net_statistics.EmbryonicRsts++;
-}
-
-static inline struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb)
-{
- struct tcphdr *th = skb->h.th;
- u32 flg = ((u32 *)th)[3];
-
- /* Check for RST */
- if (flg & __constant_htonl(0x00040000)) {
- tcp_v6_rst_req(sk, skb);
- return NULL;
- }
-
- /* Check SYN|ACK */
- if (flg & __constant_htonl(0x00120000)) {
- struct open_request *req, *dummy;
- struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
-
- req = tcp_v6_search_req(tp, skb->nh.ipv6h, th, tcp_v6_iif(skb), &dummy);
- if (req) {
- sk = tcp_check_req(sk, skb, req);
- }
-#if 0 /*def CONFIG_SYN_COOKIES */
- else {
- sk = cookie_v6_check(sk, skb);
- }
+ /* Find possible connection requests. */
+ req = tcp_v6_search_req(tp, skb->nh.ipv6h, th, tcp_v6_iif(skb), &prev);
+ if (req)
+ return tcp_check_req(sk, skb, req, prev);
+
+#if 0 /*def CONFIG_SYN_COOKIES*/
+ if (!th->rst && (th->syn || th->ack))
+ sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
#endif
- }
return sk;
}
+
+static int tcp_v6_csum_verify(struct sk_buff *skb)
+{
+ switch (skb->ip_summed) {
+ case CHECKSUM_NONE:
+ skb->csum = csum_partial((char *)skb->h.th, skb->len, 0);
+ case CHECKSUM_HW:
+ if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
+ &skb->nh.ipv6h->daddr,skb->csum)) {
+ printk(KERN_DEBUG "tcp v6 csum failed\n");
+ return 1;
+ }
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ default:
+ /* CHECKSUM_UNNECESSARY */
+ };
+ return 0;
+}
+
/* The socket must have it's spinlock held when we get
* here.
*
@@ -1230,7 +1417,7 @@
#ifdef CONFIG_FILTER
struct sk_filter *filter;
#endif
- int users = 0, need_unlock = 0;
+ int users = 0;
/* Imagine: socket is IPv6. IPv4 packet arrives,
goes to IPv4 receive handler and backlogged.
@@ -1282,6 +1469,9 @@
}
if (sk->state == TCP_ESTABLISHED) { /* Fast path */
+ /* Ready to move deeper ... */
+ if (tcp_v6_csum_verify(skb))
+ goto csum_err;
if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
goto reset;
if (users)
@@ -1289,6 +1479,9 @@
return 0;
}
+ if (tcp_v6_csum_verify(skb))
+ goto csum_err;
+
if (sk->state == TCP_LISTEN) {
struct sock *nsk;
@@ -1302,15 +1495,24 @@
* the new socket..
*/
if(nsk != sk) {
- bh_lock_sock(nsk);
- if (nsk->lock.users) {
- skb_orphan(skb);
- sk_add_backlog(nsk, skb);
- bh_unlock_sock(nsk);
- return 0;
- }
- need_unlock = 1;
- sk = nsk;
+ int ret;
+ int state = nsk->state;
+
+ skb_orphan(skb);
+ BUG_TRAP(nsk->lock.users == 0);
+ skb_set_owner_r(skb, nsk);
+ ret = tcp_rcv_state_process(nsk, skb, skb->h.th, skb->len);
+
+ /* Wakeup parent, send SIGIO */
+ if (state == TCP_SYN_RECV && nsk->state != state)
+ sk->data_ready(sk, 0);
+ bh_unlock_sock(nsk);
+
+ if (ret)
+ goto reset;
+ if (users)
+ kfree_skb(skb);
+ return 0;
}
}
@@ -1318,7 +1520,7 @@
goto reset;
if (users)
goto ipv6_pktoptions;
- goto out_maybe_unlock;
+ return 0;
reset:
tcp_v6_send_reset(skb);
@@ -1326,7 +1528,11 @@
if (users)
kfree_skb(skb);
kfree_skb(skb);
- goto out_maybe_unlock;
+ return 0;
+csum_err:
+ tcp_statistics.TcpInErrs++;
+ goto discard;
+
ipv6_pktoptions:
/* Do you ask, what is it?
@@ -1339,6 +1545,10 @@
if (atomic_read(&skb->users) > users &&
TCP_SKB_CB(skb)->end_seq == sk->tp_pinfo.af_tcp.rcv_nxt &&
!((1<<sk->state)&(TCPF_CLOSE|TCPF_LISTEN))) {
+ if (sk->net_pinfo.af_inet6.rxopt.bits.rxinfo)
+ sk->net_pinfo.af_inet6.mcast_oif = tcp_v6_iif(skb);
+ if (sk->net_pinfo.af_inet6.rxopt.bits.rxhlim)
+ sk->net_pinfo.af_inet6.mcast_hops = skb->nh.ipv6h->hop_limit;
if (ipv6_opt_accepted(sk, skb)) {
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
kfree_skb(skb);
@@ -1355,9 +1565,6 @@
if (skb)
kfree_skb(skb);
-out_maybe_unlock:
- if (need_unlock)
- bh_unlock_sock(sk);
return 0;
}
@@ -1389,36 +1596,21 @@
if (len < sizeof(struct tcphdr))
goto bad_packet;
- /*
- * Try to use the device checksum if provided.
- */
-
- switch (skb->ip_summed) {
- case CHECKSUM_NONE:
- skb->csum = csum_partial((char *)th, len, 0);
- case CHECKSUM_HW:
- if (tcp_v6_check(th,len,saddr,daddr,skb->csum)) {
- printk(KERN_DEBUG "tcp csum failed\n");
- bad_packet:
- tcp_statistics.TcpInErrs++;
- goto discard_it;
- }
- default:
- /* CHECKSUM_UNNECESSARY */
- };
-
- SOCKHASH_LOCK_READ_BH();
- sk = __tcp_v6_lookup(saddr, th->source, daddr, th->dest, tcp_v6_iif(skb));
- SOCKHASH_UNLOCK_READ_BH();
-
- if (!sk)
- goto no_tcp_socket;
-
TCP_SKB_CB(skb)->seq = ntohl(th->seq);
TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
len - th->doff*4);
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
+ TCP_SKB_CB(skb)->when = 0;
skb->used = 0;
+
+ sk = __tcp_v6_lookup(saddr, th->source, daddr, ntohs(th->dest), tcp_v6_iif(skb));
+
+ if (!sk)
+ goto no_tcp_socket;
+
+process:
+ if(!ipsec_sk_policy(sk,skb))
+ goto discard_and_relse;
if(sk->state == TCP_TIME_WAIT)
goto do_time_wait;
@@ -1430,10 +1622,16 @@
sk_add_backlog(sk, skb);
bh_unlock_sock(sk);
+ sock_put(sk);
return ret;
no_tcp_socket:
- tcp_v6_send_reset(skb);
+ if (tcp_v6_csum_verify(skb)) {
+bad_packet:
+ tcp_statistics.TcpInErrs++;
+ } else {
+ tcp_v6_send_reset(skb);
+ }
discard_it:
@@ -1444,20 +1642,50 @@
kfree_skb(skb);
return 0;
+discard_and_relse:
+ sock_put(sk);
+ goto discard_it;
+
do_time_wait:
- if(tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
- skb, th, skb->len))
+ if (tcp_v6_csum_verify(skb)) {
+ tcp_statistics.TcpInErrs++;
+ sock_put(sk);
+ goto discard_it;
+ }
+
+ switch(tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
+ skb, th, skb->len)) {
+ case TCP_TW_SYN:
+ {
+ struct sock *sk2;
+
+ sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
+ if (sk2 != NULL) {
+ tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
+ tcp_timewait_kill((struct tcp_tw_bucket *)sk);
+ tcp_tw_put((struct tcp_tw_bucket *)sk);
+ sk = sk2;
+ goto process;
+ }
+ /* Fall through to ACK */
+ }
+ case TCP_TW_ACK:
+ tcp_v6_timewait_ack(sk, skb);
+ break;
+ case TCP_TW_RST:
goto no_tcp_socket;
+ case TCP_TW_SUCCESS:
+ }
goto discard_it;
}
static int tcp_v6_rebuild_header(struct sock *sk)
{
- struct dst_entry *dst = NULL;
+ int err;
+ struct dst_entry *dst;
struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
- if (sk->dst_cache)
- dst = dst_check(&sk->dst_cache, np->dst_cookie);
+ dst = sk_dst_check(sk, np->dst_cookie);
if (dst == NULL) {
struct flowi fl;
@@ -1475,39 +1703,29 @@
fl.nl_u.ip6_u.daddr = rt0->addr;
}
-
dst = ip6_route_output(sk, &fl);
if (dst->error) {
+ err = dst->error;
dst_release(dst);
- return dst->error;
+ return err;
}
ip6_dst_store(sk, dst, NULL);
+ return 0;
}
- return dst->error;
-}
-
-static struct sock * tcp_v6_get_sock(struct sk_buff *skb, struct tcphdr *th)
-{
- struct in6_addr *saddr;
- struct in6_addr *daddr;
-
- if (skb->protocol == __constant_htons(ETH_P_IP))
- return ipv4_specific.get_sock(skb, th);
-
- saddr = &skb->nh.ipv6h->saddr;
- daddr = &skb->nh.ipv6h->daddr;
- return tcp_v6_lookup(saddr, th->source, daddr, th->dest, tcp_v6_iif(skb));
+ err = dst->error;
+ dst_release(dst);
+ return err;
}
-static void tcp_v6_xmit(struct sk_buff *skb)
+static int tcp_v6_xmit(struct sk_buff *skb)
{
struct sock *sk = skb->sk;
struct ipv6_pinfo * np = &sk->net_pinfo.af_inet6;
struct flowi fl;
- struct dst_entry *dst = sk->dst_cache;
+ struct dst_entry *dst;
fl.proto = IPPROTO_TCP;
fl.fl6_dst = &np->daddr;
@@ -1522,8 +1740,7 @@
fl.nl_u.ip6_u.daddr = rt0->addr;
}
- if (sk->dst_cache)
- dst = dst_check(&sk->dst_cache, np->dst_cookie);
+ dst = sk_dst_check(sk, np->dst_cookie);
if (dst == NULL) {
dst = ip6_route_output(sk, &fl);
@@ -1531,18 +1748,19 @@
if (dst->error) {
sk->err_soft = -dst->error;
dst_release(dst);
- return;
+ return -sk->err_soft;
}
+ dst_clone(dst);
ip6_dst_store(sk, dst, NULL);
}
- skb->dst = dst_clone(dst);
+ skb->dst = dst;
/* Restore final destination back after routing done */
fl.nl_u.ip6_u.daddr = &np->daddr;
- ip6_xmit(sk, skb, &fl, np->opt);
+ return ip6_xmit(sk, skb, &fl, np->opt);
}
static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
@@ -1563,7 +1781,7 @@
tcp_v6_rebuild_header,
tcp_v6_conn_request,
tcp_v6_syn_recv_sock,
- tcp_v6_get_sock,
+ tcp_v6_hash_connecting,
sizeof(struct ipv6hdr),
ipv6_setsockopt,
@@ -1582,7 +1800,7 @@
tcp_v4_rebuild_header,
tcp_v6_conn_request,
tcp_v6_syn_recv_sock,
- tcp_v6_get_sock,
+ tcp_v4_hash_connecting,
sizeof(struct iphdr),
ipv6_setsockopt,
@@ -1591,6 +1809,8 @@
sizeof(struct sockaddr_in6)
};
+
+
/* NOTE: A lot of things set to zero explicitly by call to
* sk_alloc() so need not be done here.
*/
@@ -1601,9 +1821,8 @@
skb_queue_head_init(&tp->out_of_order_queue);
tcp_init_xmit_timers(sk);
- tp->rto = TCP_TIMEOUT_INIT; /*TCP_WRITE_TIME*/
+ tp->rto = TCP_TIMEOUT_INIT;
tp->mdev = TCP_TIMEOUT_INIT;
- tp->mss_clamp = ~0;
/* So many TCP implementations out there (incorrectly) count the
* initial SYN frame in their delayed-ACK and congestion control
@@ -1617,10 +1836,11 @@
*/
tp->snd_cwnd_cnt = 0;
tp->snd_ssthresh = 0x7fffffff;
+ tp->snd_cwnd_clamp = ~0;
+ tp->mss_cache = 536;
sk->state = TCP_CLOSE;
sk->max_ack_backlog = SOMAXCONN;
- tp->rcv_mss = 536;
/* Init SYN queue. */
tcp_synq_init(tp);
@@ -1639,9 +1859,6 @@
tcp_clear_xmit_timers(sk);
- if (sk->keepopen)
- tcp_dec_slow_timer(TCP_SLT_KEEPALIVE);
-
/*
* Cleanup up the write buffer.
*/
@@ -1674,7 +1891,7 @@
dest = &req->af.v6_req.rmt_addr;
sprintf(tmpbuf,
"%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
- "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d",
+ "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p",
i,
src->s6_addr32[0], src->s6_addr32[1],
src->s6_addr32[2], src->s6_addr32[3],
@@ -1689,8 +1906,8 @@
req->retrans,
sk->socket ? sk->socket->inode->i_uid : 0,
0, /* non standard timer */
- 0 /* open_requests have no inode */
- );
+ 0, /* open_requests have no inode */
+ 0, req);
}
static void get_tcp6_sock(struct sock *sp, char *tmpbuf, int i)
@@ -1722,7 +1939,7 @@
sprintf(tmpbuf,
"%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
- "%02X %08X:%08X %02X:%08lX %08X %5d %8d %ld",
+ "%02X %08X:%08X %02X:%08lX %08X %5d %8d %ld %d %p",
i,
src->s6_addr32[0], src->s6_addr32[1],
src->s6_addr32[2], src->s6_addr32[3], srcp,
@@ -1733,13 +1950,13 @@
timer_active, timer_expires-jiffies,
tp->retransmits,
sp->socket ? sp->socket->inode->i_uid : 0,
- timer_active ? sp->timeout : 0,
- sp->socket ? sp->socket->inode->i_ino : 0);
+ 0,
+ sp->socket ? sp->socket->inode->i_ino : 0,
+ atomic_read(&sp->refcnt), sp);
}
static void get_timewait6_sock(struct tcp_tw_bucket *tw, char *tmpbuf, int i)
{
- extern int tcp_tw_death_row_slot;
struct in6_addr *dest, *src;
__u16 destp, srcp;
int slot_dist;
@@ -1757,24 +1974,28 @@
sprintf(tmpbuf,
"%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
- "%02X %08X:%08X %02X:%08X %08X %5d %8d %d",
+ "%02X %08X:%08X %02X:%08X %08X %5d %8d %d %d %p",
i,
src->s6_addr32[0], src->s6_addr32[1],
src->s6_addr32[2], src->s6_addr32[3], srcp,
dest->s6_addr32[0], dest->s6_addr32[1],
dest->s6_addr32[2], dest->s6_addr32[3], destp,
TCP_TIME_WAIT, 0, 0,
- 3, slot_dist * TCP_TWKILL_PERIOD, 0, 0, 0, 0);
+ 3, slot_dist * TCP_TWKILL_PERIOD, 0, 0, 0, 0,
+ atomic_read(&tw->refcnt), tw);
}
+#define LINE_LEN 190
+#define LINE_FMT "%-190s\n"
+
int tcp6_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
{
int len = 0, num = 0, i;
off_t begin, pos = 0;
- char tmpbuf[150];
+ char tmpbuf[LINE_LEN+2];
- if(offset < 149)
- len += sprintf(buffer, "%-148s\n",
+ if(offset < LINE_LEN+1)
+ len += sprintf(buffer, LINE_FMT,
" sl " /* 6 */
"local_address " /* 38 */
"remote_address " /* 38 */
@@ -1783,10 +2004,10 @@
/*----*/
/*144 */
- pos = 149;
- SOCKHASH_LOCK_READ();
+ pos = LINE_LEN+1;
/* First, walk listening socket table. */
+ tcp_listen_lock();
for(i = 0; i < TCP_LHTABLE_SIZE; i++) {
struct sock *sk = tcp_listening_hash[i];
@@ -1796,64 +2017,81 @@
if (sk->family != PF_INET6)
continue;
- pos += 149;
+ pos += LINE_LEN+1;
if (pos >= offset) {
get_tcp6_sock(sk, tmpbuf, num);
- len += sprintf(buffer+len, "%-148s\n", tmpbuf);
- if (len >= length)
- goto out;
+ len += sprintf(buffer+len, LINE_FMT, tmpbuf);
+ if (len >= length) {
+ tcp_listen_unlock();
+ goto out_no_bh;
+ }
}
+
+ lock_sock(sk);
for (req = tp->syn_wait_queue; req; req = req->dl_next, num++) {
if (req->sk)
continue;
- pos += 149;
+ if (req->class->family != PF_INET6)
+ continue;
+ pos += LINE_LEN+1;
if (pos < offset)
continue;
get_openreq6(sk, req, tmpbuf, num);
- len += sprintf(buffer+len, "%-148s\n", tmpbuf);
- if(len >= length)
- goto out;
+ len += sprintf(buffer+len, LINE_FMT, tmpbuf);
+ if(len >= length) {
+ release_sock(sk);
+ tcp_listen_unlock();
+ goto out_no_bh;
+ }
}
+ release_sock(sk);
}
}
+ tcp_listen_unlock();
+
+ local_bh_disable();
/* Next, walk established hash chain. */
- for (i = 0; i < (tcp_ehash_size >> 1); i++) {
+ for (i = 0; i < tcp_ehash_size; i++) {
+ struct tcp_ehash_bucket *head = &tcp_ehash[i];
struct sock *sk;
+ struct tcp_tw_bucket *tw;
- for(sk = tcp_ehash[i]; sk; sk = sk->next, num++) {
+ read_lock(&head->lock);
+ for(sk = head->chain; sk; sk = sk->next, num++) {
if (sk->family != PF_INET6)
continue;
- pos += 149;
+ pos += LINE_LEN+1;
if (pos < offset)
continue;
get_tcp6_sock(sk, tmpbuf, num);
- len += sprintf(buffer+len, "%-148s\n", tmpbuf);
- if(len >= length)
+ len += sprintf(buffer+len, LINE_FMT, tmpbuf);
+ if(len >= length) {
+ read_unlock(&head->lock);
goto out;
+ }
}
- }
-
- /* Finally, walk time wait buckets. */
- for (i = (tcp_ehash_size>>1); i < tcp_ehash_size; i++) {
- struct tcp_tw_bucket *tw;
- for (tw = (struct tcp_tw_bucket *)tcp_ehash[i];
+ for (tw = (struct tcp_tw_bucket *)tcp_ehash[i+tcp_ehash_size].chain;
tw != NULL;
tw = (struct tcp_tw_bucket *)tw->next, num++) {
if (tw->family != PF_INET6)
continue;
- pos += 149;
+ pos += LINE_LEN+1;
if (pos < offset)
continue;
get_timewait6_sock(tw, tmpbuf, num);
- len += sprintf(buffer+len, "%-148s\n", tmpbuf);
- if(len >= length)
+ len += sprintf(buffer+len, LINE_FMT, tmpbuf);
+ if(len >= length) {
+ read_unlock(&head->lock);
goto out;
+ }
}
+ read_unlock(&head->lock);
}
out:
- SOCKHASH_UNLOCK_READ();
+ local_bh_enable();
+out_no_bh:
begin = len - (pos - offset);
*start = buffer + begin;
@@ -1868,6 +2106,7 @@
struct proto tcpv6_prot = {
tcp_close, /* close */
tcp_v6_connect, /* connect */
+ tcp_disconnect, /* disconnect */
tcp_accept, /* accept */
NULL, /* retransmit */
tcp_write_wakeup, /* write_wakeup */
@@ -1884,7 +2123,7 @@
NULL, /* bind */
tcp_v6_do_rcv, /* backlog_rcv */
tcp_v6_hash, /* hash */
- tcp_v6_unhash, /* unhash */
+ tcp_unhash, /* unhash */
tcp_v6_get_port, /* get_port */
128, /* max_header */
0, /* retransmits */
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)