patch-2.0.31 linux/net/ipv4/udp.c

Next file: linux/net/ipx/af_ipx.c
Previous file: linux/net/ipv4/timer.c
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.0.30/linux/net/ipv4/udp.c linux/net/ipv4/udp.c
@@ -52,6 +52,9 @@
  *		David S. Miller	:	New socket lookup architecture for ISS.
  *					Last socket cache retained as it
  *					does have a high hit rate.
+ *              Elliot Poger    :       Added support for SO_BINDTODEVICE.
+ *	Willy Konynenberg	:	Transparent proxy adapted to new
+ *					socket hash code.
  *
  *
  *		This program is free software; you can redistribute it and/or
@@ -130,6 +133,11 @@
 		if((sk2->num == snum) && (sk2 != sk)) {
 			int sk2_reuse = sk2->reuse;
 
+			/* Two sockets can be bound to the same port if they're
+			 * bound to different interfaces... */
+			if (sk->bound_device != sk2->bound_device)
+				continue;
+
 			if(!sk2->rcv_saddr || !sk->rcv_saddr) {
 				if((!sk2_reuse) || (!sk_reuse)) {
 					retval = 1;
@@ -147,7 +155,7 @@
 	return retval;
 }
 
-static inline int udp_lport_inuse(int num)
+static inline int udp_lport_inuse(u16 num)
 {
 	struct sock *sk = udp_hash[num & (UDP_HTABLE_SIZE - 1)];
 
@@ -161,36 +169,42 @@
 /* Shared by v4/v6 tcp. */
 unsigned short udp_good_socknum(void)
 {
-	static int start = 0;
-	unsigned short base;
-	int i, best = 0, size = 32767; /* a big num. */
 	int result;
-
-	base = PROT_SOCK + (start & 1023) + 1;
+	static int start = 0;
+	int i, best, best_size_so_far;
 
 	SOCKHASH_LOCK();
-	for(i = 0; i < UDP_HTABLE_SIZE; i++) {
-		struct sock *sk = udp_hash[i];
-		if(!sk) {
-			start = (i + 1 + start) & 1023;
-			result = i + base + 1;
+
+	/* Select initial not-so-random "best" */
+	best = PROT_SOCK + 1 + (start & 1023);
+	best_size_so_far = 32767;	/* "big" num */
+	result = best;
+	for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
+		struct sock *sk;
+		int size;
+
+		sk = udp_hash[result & (UDP_HTABLE_SIZE - 1)];
+
+		/* No clashes - take it */
+		if (!sk)
 			goto out;
-		} else {
-			int j = 0;
-			do {
-				if(++j >= size)
-					goto next;
-			} while((sk = sk->next));
-			best = i;
-			size = j;
-		}
-	next:
+
+		/* Is this one better than our best so far? */
+		size = 0;
+		do {
+			if(++size >= best_size_so_far)
+				goto next;
+		} while((sk = sk->next) != NULL);
+		best_size_so_far = size;
+		best = result;
+next:
 	}
 
-	while(udp_lport_inuse(base + best + 1))
+	while (udp_lport_inuse(best))
 		best += UDP_HTABLE_SIZE;
-	result = (best + base + 1);
+	result = best;
 out:
+	start = result;
 	SOCKHASH_UNLOCK();
 	return result;
 }
@@ -255,7 +269,8 @@
 /* UDP is nearly always wildcards out the wazoo, it makes no sense to try
  * harder than this. -DaveM
  */
-__inline__ struct sock *udp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport)
+__inline__ struct sock *udp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport,
+				      struct device *dev)
 {
 	struct sock *sk, *result = NULL;
 	unsigned short hnum = ntohs(dport);
@@ -279,7 +294,15 @@
 					continue;
 				score++;
 			}
-			if(score == 3) {
+			/* If this socket is bound to a particular interface,
+			 * did the packet come in on it? */
+			if (sk->bound_device) {
+				if (dev == sk->bound_device)
+					score++;
+				else
+					continue;  /* mismatch--not this sock */
+			}
+			if(score == 4) {
 				result = sk;
 				break;
 			} else if(score > badness) {
@@ -292,78 +315,68 @@
 }
 
 #ifdef CONFIG_IP_TRANSPARENT_PROXY
-#define secondlist(hpnum, sk, fpass) \
-({ struct sock *s1; if(!(sk) && (fpass)--) \
-	s1 = udp_hash[(hpnum) & (TCP_HTABLE_SIZE - 1)]; \
-   else \
-	s1 = (sk); \
-   s1; \
-})
-
-#define udp_v4_proxy_loop_init(hnum, hpnum, sk, fpass) \
-	secondlist((hpnum), udp_hash[(hnum)&(TCP_HTABLE_SIZE-1)],(fpass))
-
-#define udp_v4_proxy_loop_next(hnum, hpnum, sk, fpass) \
-	secondlist((hpnum),(sk)->next,(fpass))
-
-struct sock *udp_v4_proxy_lookup(unsigned short num, unsigned long raddr,
-				 unsigned short rnum, unsigned long laddr,
-				 unsigned long paddr, unsigned short pnum)
+struct sock *udp_v4_proxy_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, u32 paddr, u16 rport,
+				 struct device *dev)
 {
-	struct sock *s, *result = NULL;
+	struct sock *hh[3], *sk, *result = NULL;
+	int i;
 	int badness = -1;
-	unsigned short hnum = ntohs(num);
-	unsigned short hpnum = ntohs(pnum);
-	int firstpass = 1;
+	unsigned short hnum = ntohs(dport);
+	unsigned short hpnum = ntohs(rport);
 
 	SOCKHASH_LOCK();
-	for(s = udp_v4_proxy_loop_init(hnum, hpnum, s, firstpass);
-	    s != NULL;
-	    s = udp_v4_proxy_loop_next(hnum, hpnum, s, firstpass)) {
-		if(s->num == hnum || s->num == hpnum) {
-			int score = 0;
-			if(s->dead && (s->state == TCP_CLOSE))
-				continue;
-			if(s->rcv_saddr) {
-				if((s->num != hpnum || s->rcv_saddr != paddr) &&
-				   (s->num != hnum || s->rcv_saddr != laddr))
+	hh[0] = udp_hash[hnum & (UDP_HTABLE_SIZE - 1)];
+	hh[1] = udp_hash[hpnum & (UDP_HTABLE_SIZE - 1)];
+	for (i = 0; i < 2; i++) {
+		for(sk = hh[i]; sk != NULL; sk = sk->next) {
+			if(sk->num == hnum || sk->num == hpnum) {
+				int score = 0;
+				if(sk->dead && (sk->state == TCP_CLOSE))
 					continue;
-				score++;
-			}
-			if(s->daddr) {
-				if(s->daddr != raddr)
-					continue;
-				score++;
-			}
-			if(s->dummy_th.dest) {
-				if(s->dummy_th.dest != rnum)
-					continue;
-				score++;
-			}
-			if(score == 3 && s->num == hnum) {
-				result = s;
-				break;
-			} else if(score > badness && (s->num == hpnum || s->rcv_saddr)) {
-					result = s;
+				if(sk->rcv_saddr) {
+					if((sk->num != hpnum || sk->rcv_saddr != paddr) &&
+					   (sk->num != hnum || sk->rcv_saddr != daddr))
+						continue;
+					score++;
+				}
+				if(sk->daddr) {
+					if(sk->daddr != saddr)
+						continue;
+					score++;
+				}
+				if(sk->dummy_th.dest) {
+					if(sk->dummy_th.dest != sport)
+						continue;
+					score++;
+				}
+				/* If this socket is bound to a particular interface,
+				 * did the packet come in on it? */
+				if(sk->bound_device) {
+					if (sk->bound_device != dev)
+						continue;
+					score++;
+				}
+				if(score == 4 && sk->num == hnum) {
+					result = sk;
+					break;
+				} else if(score > badness && (sk->num == hpnum || sk->rcv_saddr)) {
+					result = sk;
 					badness = score;
+				}
 			}
 		}
 	}
 	SOCKHASH_UNLOCK();
 	return result;
 }
-
-#undef secondlist
-#undef udp_v4_proxy_loop_init
-#undef udp_v4_proxy_loop_next
-
 #endif
 
 static inline struct sock *udp_v4_mcast_next(struct sock *sk,
 					     unsigned short num,
 					     unsigned long raddr,
 					     unsigned short rnum,
-					     unsigned long laddr)
+					     unsigned long laddr,
+					     struct device *dev)
 {
 	struct sock *s = sk;
 	unsigned short hnum = ntohs(num);
@@ -372,6 +385,7 @@
 		    (s->dead && (s->state == TCP_CLOSE))		||
 		    (s->daddr && s->daddr!=raddr)			||
 		    (s->dummy_th.dest != rnum && s->dummy_th.dest != 0) ||
+		    ((s->bound_device) && (s->bound_device!=dev))       ||
 		    (s->rcv_saddr  && s->rcv_saddr != laddr))
 			continue;
 		break;
@@ -408,7 +422,7 @@
 	
 	uh = (struct udphdr *)header;  
    
-	sk = udp_v4_lookup(daddr, uh->dest, saddr, uh->source);
+	sk = udp_v4_lookup(daddr, uh->dest, saddr, uh->source, NULL);
 	if (sk == NULL) 
 	  	return;	/* No socket for error */
   	
@@ -850,7 +864,7 @@
 	if(!sk->broadcast && ip_chk_addr(usin->sin_addr.s_addr)==IS_BROADCAST)
 		return -EACCES;			/* Must turn broadcast on first */
   	
-  	rt=ip_rt_route((__u32)usin->sin_addr.s_addr, sk->localroute);
+  	rt=ip_rt_route((__u32)usin->sin_addr.s_addr, sk->localroute, sk->bound_device);
   	if (rt==NULL)
   		return -ENETUNREACH;
   	if(!sk->saddr)
@@ -869,8 +883,9 @@
 {
 	lock_sock(sk);
 	sk->state = TCP_CLOSE;
-	release_sock(sk);
 	sk->dead = 1;
+	release_sock(sk);
+	udp_v4_unhash(sk);
 	destroy_sock(sk);
 }
 
@@ -918,7 +933,8 @@
 	struct udphdr *uh = (struct udphdr *)(skb->h.raw + iph->ihl*4);
 	struct sock *sk;
 
-	sk = udp_v4_lookup(iph->saddr, uh->source, iph->daddr, uh->dest);
+	sk = udp_v4_lookup(iph->saddr, uh->source, iph->daddr, uh->dest,
+			   skb->dev);
 	if (!sk)
 		return 0;
 	/* 0 means accept all LOCAL addresses here, not all the world... */
@@ -940,7 +956,7 @@
 
 	SOCKHASH_LOCK();
 	sk = udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)];
-	sk = udp_v4_mcast_next(sk, uh->dest, saddr, uh->source, daddr);
+	sk = udp_v4_mcast_next(sk, uh->dest, saddr, uh->source, daddr, skb->dev);
 	if(sk) {
 		struct sock *sknext = NULL;
 
@@ -948,7 +964,7 @@
 			struct sk_buff *skb1 = skb;
 
 			sknext = udp_v4_mcast_next(sk->next, uh->dest, saddr,
-						   uh->source, daddr);
+						   uh->source, daddr, skb->dev);
 			if(sknext)
 				skb1 = skb_clone(skb, GFP_ATOMIC);
 
@@ -1065,11 +1081,11 @@
 #endif
 #ifdef CONFIG_IP_TRANSPARENT_PROXY
 	if(skb->redirport)
-		sk = udp_v4_proxy_lookup(uh->dest, saddr, uh->source,
-					 daddr, dev->pa_addr, skb->redirport);
+		sk = udp_v4_proxy_lookup(saddr, uh->source, daddr, uh->dest,
+					 dev->pa_addr, skb->redirport, dev);
 	else
 #endif
-	sk = udp_v4_lookup(saddr, uh->source, daddr, uh->dest);
+	sk = udp_v4_lookup(saddr, uh->source, daddr, uh->dest, dev);
 	
 	if (sk == NULL) 
   	{

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov