patch-2.0.31 linux/net/ipv4/tcp.c

Next file: linux/net/ipv4/tcp_input.c
Previous file: linux/net/ipv4/sysctl_net_ipv4.c
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.0.30/linux/net/ipv4/tcp.c linux/net/ipv4/tcp.c
@@ -205,6 +205,7 @@
  *		Theodore Ts'o	:	Do secure TCP sequence numbers.
  *		David S. Miller	:	New socket lookup architecture for ISS.
  *					This code is dedicated to John Dyson.
+ *		Elliot Poger	:	Added support for SO_BINDTODEVICE.
  *					
  * To Fix:
  *		Fast path the code. Two things here - fix the window calculation
@@ -471,6 +472,11 @@
 			unsigned char state = sk2->state;
 			int sk2_reuse = sk2->reuse;
 
+			/* Two sockets can be bound to the same port if they're
+			 * bound to different interfaces... */
+			if (sk->bound_device != sk2->bound_device)
+				continue;
+
 			if(!sk2->rcv_saddr || !sk->rcv_saddr) {
 				if((!sk2_reuse)			||
 				   (!sk_reuse)			||
@@ -527,45 +533,56 @@
 	end = i + TCP_BHTABLE_SIZE;
 	bc = binding_contour;
 	do {
-		struct sock *sk = tcp_bound_hash[tcp_bhashfn(i)];
+		struct sock *sk = tcp_bound_hash[i&(TCP_BHTABLE_SIZE-1)];
 		if(!sk) {
-			retval = (start + i);
-			start  = (retval + 1);
+			/* find the smallest value no smaller than start
+			 * that has this hash value.
+			 */
+			retval = tcp_bhashnext(start-1,i&(TCP_BHTABLE_SIZE-1));
 
 			/* Check for decreasing load. */
-			if(bc != 0)
+			if (bc != 0)
 				binding_contour = 0;
 			goto done;
 		} else {
 			int j = 0;
-			do { sk = sk->bind_next; } while(++j < size && sk);
-			if(j < size) {
-				best = (start + i);
+			do { sk = sk->bind_next; } while (++j < size && sk);
+			if (j < size) {
+				best = i&(TCP_BHTABLE_SIZE-1);
 				size = j;
-				if(bc && size <= bc) {
-					start = best + 1;
+				if (bc && size <= bc) {
+					i = best;
 					goto verify;
 				}
 			}
 		}
 	} while(++i != end);
+	i = best;
 
 	/* Socket load is increasing, adjust our load average. */
 	binding_contour = size;
 verify:
-	if(size < binding_contour)
+	if (size < binding_contour)
 		binding_contour = size;
 
-	if(best > 32767)
-		best -= (32768 - PROT_SOCK);
+	retval = tcp_bhashnext(start-1,i);
 
-	while(tcp_lport_inuse(best))
-		best += TCP_BHTABLE_SIZE;
-	retval = best;
-done:
-	if(start > 32767)
-		start -= (32768 - PROT_SOCK);
+	best = retval;	/* mark the starting point to avoid infinite loops */
+	while(tcp_lport_inuse(retval)) {
+		retval = tcp_bhashnext(retval,i);
+		if (retval > 32767)	/* Upper bound */
+			retval = tcp_bhashnext(PROT_SOCK,i);
+		if (retval == best) {
+			/* This hash chain is full. No answer. */
+			retval = 0;
+			break;
+		}
+	}
 
+done:
+	start = (retval + 1);
+	if (start > 32767 || start < PROT_SOCK)
+		start = PROT_SOCK;
 	SOCKHASH_UNLOCK();
 
 	return retval;
@@ -731,6 +748,7 @@
 		 * here as well.
 		 */
 		sk->cong_window = 1;
+		sk->cong_count = 0;
 		sk->high_seq = sk->sent_seq;
 		return;
 	}
@@ -1254,7 +1272,10 @@
 					sk->write_seq += copy;
 					seglen -= copy;
 				}
-				if (tcp_size >= sk->mss || (flags & MSG_OOB) || !sk->packets_out)
+				/* If we have a full packet or a new OOB
+				 * message, we have to force this packet out.
+				 */
+				if (tcp_size >= sk->mss || (flags & MSG_OOB))
 					tcp_send_skb(sk, skb);
 				else
 					tcp_enqueue_partial(skb, sk);
@@ -1290,8 +1311,14 @@
 
 			delay = 0;
 			tmp = copy + sk->prot->max_header + 15;
-			if (copy < sk->mss && !(flags & MSG_OOB) && sk->packets_out)
-			{
+			/* If won't fill the current packet, and it's not an OOB message,
+			 * then we might want to delay to allow data in the later parts
+			 * of iov to fill this packet out. Note that if we aren't
+			 * Nagling or there are no packets currently out then the top
+			 * level code in tcp_sendmsg() will force any partial packets out
+			 * after we finish building the largest packets this write allows.
+			 */
+			if (copy < sk->mss && !(flags & MSG_OOB)) {
 				tmp = tmp - copy + sk->mtu + 128;
 				delay = 1;
 			}
@@ -1838,7 +1865,10 @@
 		case TCP_CLOSE:
 		case TCP_LISTEN:
 			break;
-		case TCP_LAST_ACK:	/* Could have shutdown() then close()!*/
+		case TCP_LAST_ACK:	/* Could have shutdown() then close().
+					   Be careful not to send double fin. */
+			ns=TCP_LAST_ACK;
+			break;
 		case TCP_CLOSE_WAIT:	/* They have FIN'd us. We send our FIN and
 					   wait only for the ACK */
 			ns=TCP_LAST_ACK;
@@ -2014,7 +2044,7 @@
 
 	/* Now that the socket is dead, if we are in the FIN_WAIT2 state
 	 * we may need to set up a timer.
-         */
+	 */
 	if (sk->state==TCP_FIN_WAIT2)
 	{
 		int timer_active=del_timer(&sk->timer);
@@ -2024,8 +2054,8 @@
 			tcp_reset_msl_timer(sk, TIME_CLOSE, TCP_FIN_TIMEOUT);
 	}
 
-	release_sock(sk);
 	sk->dead = 1;
+	release_sock(sk);
 
 	if(sk->state == TCP_CLOSE)
 		tcp_v4_unhash(sk);
@@ -2198,6 +2228,9 @@
 	buff->free = 0;
 	buff->localroute = sk->localroute;
 
+	/* If this socket is bound to a particular device, make sure we use it. */
+	dev = sk->bound_device;
+
 	/*
 	 *	Put in the IP header and routing stuff.
 	 */
@@ -2252,10 +2285,14 @@
 	 *	but not bigger than device MTU
 	 */
 
-	if(sk->mtu <32)
-		sk->mtu = 32;	/* Sanity limit */
-
 	sk->mtu = min(sk->mtu, dev->mtu - sizeof(struct iphdr) - sizeof(struct tcphdr));
+
+	/* Must check it here, just to be absolutely safe.  If we end up
+	 * with an sk->mtu of zero, we can thus end up with an sk->mss
+	 * of zero, which causes us to bomb out in tcp_do_sendmsg. -DaveM
+	 */
+	if(sk->mtu < 32)
+		sk->mtu = 32;	/* Sanity limit */
 
 	/*
 	 *	Put in the TCP options to say MTU.

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov