patch-2.4.22 linux-2.4.22/net/sunrpc/xprt.c

Next file: linux-2.4.22/scripts/extract-ikconfig
Previous file: linux-2.4.22/net/sunrpc/xdr.c
Back to the patch index
Back to the overall index

diff -urN linux-2.4.21/net/sunrpc/xprt.c linux-2.4.22/net/sunrpc/xprt.c
@@ -83,10 +83,10 @@
  */
 static void	xprt_request_init(struct rpc_task *, struct rpc_xprt *);
 static void	do_xprt_transmit(struct rpc_task *);
-static void	xprt_reserve_status(struct rpc_task *task);
+static inline void	do_xprt_reserve(struct rpc_task *);
 static void	xprt_disconnect(struct rpc_xprt *);
-static void	xprt_reconn_status(struct rpc_task *task);
-static struct socket *xprt_create_socket(int, struct rpc_timeout *);
+static void	xprt_connect_status(struct rpc_task *task);
+static struct socket *xprt_create_socket(int, struct rpc_timeout *, int);
 static int	xprt_bind_socket(struct rpc_xprt *, struct socket *);
 static int      __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
 
@@ -133,14 +133,17 @@
 /*
  * Serialize write access to sockets, in order to prevent different
  * requests from interfering with each other.
- * Also prevents TCP socket reconnections from colliding with writes.
+ * Also prevents TCP socket connections from colliding with writes.
  */
 static int
 __xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task)
 {
 	if (!xprt->snd_task) {
-		if (xprt->nocong || __xprt_get_cong(xprt, task))
+		if (xprt->nocong || __xprt_get_cong(xprt, task)) {
 			xprt->snd_task = task;
+			if (task->tk_rqstp)
+				task->tk_rqstp->rq_bytes_sent = 0;
+		}
 	}
 	if (xprt->snd_task != task) {
 		dprintk("RPC: %4d TCP write queue full\n", task->tk_pid);
@@ -179,8 +182,11 @@
 		if (!task)
 			return;
 	}
-	if (xprt->nocong || __xprt_get_cong(xprt, task))
+	if (xprt->nocong || __xprt_get_cong(xprt, task)) {
 		xprt->snd_task = task;
+		if (task->tk_rqstp)
+			task->tk_rqstp->rq_bytes_sent = 0;
+	}
 }
 
 /*
@@ -231,6 +237,10 @@
 		unsigned int slen_part, n;
 
 		niov = xdr_kmap(niv, xdr, skip);
+		if (!niov) {
+			result = -EAGAIN;
+			break;
+		}
 
 		msg.msg_flags   = MSG_DONTWAIT|MSG_NOSIGNAL;
 		msg.msg_iov	= niv;
@@ -266,6 +276,7 @@
 		 */
 	case -EAGAIN:
 		break;
+	case -ECONNRESET:
 	case -ENOTCONN:
 	case -EPIPE:
 		/* connection broken */
@@ -383,6 +394,7 @@
 	if (!sk)
 		return;
 
+	write_lock_bh(&sk->callback_lock);
 	xprt->inet = NULL;
 	xprt->sock = NULL;
 
@@ -390,17 +402,12 @@
 	sk->data_ready   = xprt->old_data_ready;
 	sk->state_change = xprt->old_state_change;
 	sk->write_space  = xprt->old_write_space;
+	write_unlock_bh(&sk->callback_lock);
 
 	xprt_disconnect(xprt);
 	sk->no_check	 = 0;
 
 	sock_release(sock);
-	/*
-	 *	TCP doesn't require the rpciod now - other things may
-	 *	but rpciod handles that not us.
-	 */
-	if(xprt->stream)
-		rpciod_down();
 }
 
 /*
@@ -410,31 +417,29 @@
 xprt_disconnect(struct rpc_xprt *xprt)
 {
 	dprintk("RPC:      disconnected transport %p\n", xprt);
+	spin_lock_bh(&xprt->sock_lock);
 	xprt_clear_connected(xprt);
 	rpc_wake_up_status(&xprt->pending, -ENOTCONN);
+	spin_unlock_bh(&xprt->sock_lock);
 }
 
 /*
  * Reconnect a broken TCP connection.
  *
- * Note: This cannot collide with the TCP reads, as both run from rpciod
  */
 void
-xprt_reconnect(struct rpc_task *task)
+xprt_connect(struct rpc_task *task)
 {
 	struct rpc_xprt	*xprt = task->tk_xprt;
 	struct socket	*sock = xprt->sock;
 	struct sock	*inet;
 	int		status;
 
-	dprintk("RPC: %4d xprt_reconnect %p connected %d\n",
+	dprintk("RPC: %4d xprt_connect %p connected %d\n",
 				task->tk_pid, xprt, xprt_connected(xprt));
 	if (xprt->shutdown)
 		return;
 
-	if (!xprt->stream)
-		return;
-
 	if (!xprt->addr.sin_port) {
 		task->tk_status = -EIO;
 		return;
@@ -445,76 +450,112 @@
 	if (xprt_connected(xprt))
 		goto out_write;
 
-	if (sock && sock->state != SS_UNCONNECTED)
-		xprt_close(xprt);
-	status = -ENOTCONN;
-	if (!(inet = xprt->inet)) {
-		/* Create an unconnected socket */
-		if (!(sock = xprt_create_socket(xprt->prot, &xprt->timeout)))
-			goto defer;
-		xprt_bind_socket(xprt, sock);
-		inet = sock->sk;
+	if (task->tk_rqstp)
+		task->tk_rqstp->rq_bytes_sent = 0;
+
+	xprt_close(xprt);
+	/* Create an unconnected socket */
+	sock = xprt_create_socket(xprt->prot, &xprt->timeout, xprt->resvport);
+	if (!sock) {
+		/* couldn't create socket or bind to reserved port;
+		 * this is likely a permanent error, so cause an abort */
+		task->tk_status = -EIO;
+		goto out_write;
 	}
+	xprt_bind_socket(xprt, sock);
+
+	if (!xprt->stream)
+		goto out_write;
+
+	inet = sock->sk;
 
 	/* Now connect it asynchronously. */
 	dprintk("RPC: %4d connecting new socket\n", task->tk_pid);
 	status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr,
 				sizeof(xprt->addr), O_NONBLOCK);
+	dprintk("RPC: %4d connect status %d connected %d\n",
+		task->tk_pid, status, xprt_connected(xprt));
 
-	if (status < 0) {
-		switch (status) {
-		case -EALREADY:
-		case -EINPROGRESS:
-			status = 0;
-			break;
-		case -EISCONN:
-		case -EPIPE:
-			status = 0;
-			xprt_close(xprt);
-			goto defer;
-		default:
-			printk("RPC: TCP connect error %d!\n", -status);
-			xprt_close(xprt);
-			goto defer;
-		}
+	if (status >= 0)
+		return;
 
+	switch (status) {
+	case -EALREADY:
+	case -EINPROGRESS:
 		/* Protect against TCP socket state changes */
 		lock_sock(inet);
-		dprintk("RPC: %4d connect status %d connected %d\n",
-				task->tk_pid, status, xprt_connected(xprt));
-
 		if (inet->state != TCP_ESTABLISHED) {
-			task->tk_timeout = xprt->timeout.to_maxval;
-			/* if the socket is already closing, delay 5 secs */
+			dprintk("RPC: %4d  waiting for connection\n",
+					task->tk_pid);
+			task->tk_timeout = RPC_CONNECT_TIMEOUT;
+			/* if the socket is already closing, delay briefly */
 			if ((1<<inet->state) & ~(TCPF_SYN_SENT|TCPF_SYN_RECV))
-				task->tk_timeout = 5*HZ;
-			rpc_sleep_on(&xprt->pending, task, xprt_reconn_status, NULL);
-			release_sock(inet);
-			return;
+				task->tk_timeout = RPC_REESTABLISH_TIMEOUT;
+			rpc_sleep_on(&xprt->pending, task, xprt_connect_status,
+					NULL);
 		}
 		release_sock(inet);
+		break;
+	case -ECONNREFUSED:
+	case -ECONNRESET:
+	case -ENOTCONN:
+		if (!task->tk_client->cl_softrtry) {
+			rpc_delay(task, RPC_REESTABLISH_TIMEOUT);
+			task->tk_status = -ENOTCONN;
+			break;
+		}
+	default:
+		/* Report myriad other possible returns.  If this file
+		 * system is soft mounted, just error out, like Solaris.  */
+		if (task->tk_client->cl_softrtry) {
+			printk(KERN_WARNING
+					"RPC: error %d connecting to server %s, exiting\n",
+					-status, task->tk_client->cl_server);
+			task->tk_status = -EIO;
+			goto out_write;
+		}
+		printk(KERN_WARNING "RPC: error %d connecting to server %s\n",
+				-status, task->tk_client->cl_server);
+		/* This will prevent anybody else from connecting */
+		rpc_delay(task, RPC_REESTABLISH_TIMEOUT);
+		task->tk_status = status;
+		break;
 	}
-defer:
-	if (status < 0) {
-		rpc_delay(task, 5*HZ);
-		task->tk_status = -ENOTCONN;
-	}
+	return;
  out_write:
 	xprt_release_write(xprt, task);
 }
 
 /*
- * Reconnect timeout. We just mark the transport as not being in the
- * process of reconnecting, and leave the rest to the upper layers.
+ * We arrive here when awoken from waiting on connection establishment.
  */
 static void
-xprt_reconn_status(struct rpc_task *task)
+xprt_connect_status(struct rpc_task *task)
 {
 	struct rpc_xprt	*xprt = task->tk_xprt;
 
-	dprintk("RPC: %4d xprt_reconn_timeout %d\n",
-				task->tk_pid, task->tk_status);
+	if (task->tk_status >= 0) {
+		dprintk("RPC: %4d xprt_connect_status: connection established\n",
+				task->tk_pid);
+		return;
+	}
+
+	/* if soft mounted, cause this RPC to fail */
+	if (task->tk_client->cl_softrtry)
+		task->tk_status = -EIO;
 
+	switch (task->tk_status) {
+	case -ENOTCONN:
+		rpc_delay(task, RPC_REESTABLISH_TIMEOUT);
+		return;
+	case -ETIMEDOUT:
+		dprintk("RPC: %4d xprt_connect_status: timed out\n",
+				task->tk_pid);
+		break;
+	default:
+		printk(KERN_ERR "RPC: error %d connecting to server %s\n",
+				-task->tk_status, task->tk_client->cl_server);
+	}
 	xprt_release_write(xprt, task);
 }
 
@@ -657,8 +698,9 @@
 	struct sk_buff	*skb;
 	int		err, repsize, copied;
 
+	read_lock(&sk->callback_lock);
 	dprintk("RPC:      udp_data_ready...\n");
-	if (!(xprt = xprt_from_sock(sk))) {
+	if (sk->dead || !(xprt = xprt_from_sock(sk))) {
 		printk("RPC:      udp_data_ready request not found!\n");
 		goto out;
 	}
@@ -688,11 +730,11 @@
 	xprt_pktdump("packet data:",
 		     (u32 *) (skb->h.raw+sizeof(struct udphdr)), repsize);
 
-	if ((copied = rovr->rq_rlen) > repsize)
+	if ((copied = rovr->rq_private_buf.len) > repsize)
 		copied = repsize;
 
 	/* Suck it into the iovec, verify checksum if not done by hw. */
-	if (csum_partial_copy_to_xdr(&rovr->rq_rcv_buf, skb))
+	if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb))
 		goto out_unlock;
 
 	/* Something worked... */
@@ -707,6 +749,7 @@
  out:
 	if (sk->sleep && waitqueue_active(sk->sleep))
 		wake_up_interruptible(sk->sleep);
+	read_unlock(&sk->callback_lock);
 }
 
 /*
@@ -813,7 +856,7 @@
 		return;
 	}
 
-	rcvbuf = &req->rq_rcv_buf;
+	rcvbuf = &req->rq_private_buf;
 	len = desc->count;
 	if (len > xprt->tcp_reclen - xprt->tcp_offset) {
 		skb_reader_t my_desc;
@@ -831,7 +874,7 @@
 	xprt->tcp_copied += len;
 	xprt->tcp_offset += len;
 
-	if (xprt->tcp_copied == req->rq_rlen)
+	if (xprt->tcp_copied == req->rq_private_buf.len)
 		xprt->tcp_flags &= ~XPRT_COPY_DATA;
 	else if (xprt->tcp_offset == xprt->tcp_reclen) {
 		if (xprt->tcp_flags & XPRT_LAST_FRAG)
@@ -895,7 +938,7 @@
 		}
 		/* Skip over any trailing bytes on short reads */
 		tcp_read_discard(xprt, &desc);
-	} while (desc.count && xprt_connected(xprt));
+	} while (desc.count);
 	dprintk("RPC:      tcp_data_recv done\n");
 	return len - desc.count;
 }
@@ -905,18 +948,21 @@
 	struct rpc_xprt *xprt;
 	read_descriptor_t rd_desc;
 
+	read_lock(&sk->callback_lock);
 	dprintk("RPC:      tcp_data_ready...\n");
 	if (!(xprt = xprt_from_sock(sk))) {
 		printk("RPC:      tcp_data_ready socket info not found!\n");
-		return;
+		goto out;
 	}
 	if (xprt->shutdown)
-		return;
+		goto out;
 
 	/* We use rd_desc to pass struct xprt to tcp_data_recv */
 	rd_desc.buf = (char *)xprt;
 	rd_desc.count = 65536;
 	tcp_read_sock(sk, &rd_desc, tcp_data_recv);
+out:
+	read_unlock(&sk->callback_lock);
 }
 
 static void
@@ -924,6 +970,7 @@
 {
 	struct rpc_xprt	*xprt;
 
+	read_lock(&sk->callback_lock);
 	if (!(xprt = xprt_from_sock(sk)))
 		goto out;
 	dprintk("RPC:      tcp_state_change client %p...\n", xprt);
@@ -942,10 +989,10 @@
 		xprt->tcp_copied = 0;
 		xprt->tcp_flags = XPRT_COPY_RECM | XPRT_COPY_XID;
 
-		spin_lock(&xprt->sock_lock);
+		spin_lock_bh(&xprt->sock_lock);
 		if (xprt->snd_task && xprt->snd_task->tk_rpcwait == &xprt->pending)
 			rpc_wake_up_task(xprt->snd_task);
-		spin_unlock(&xprt->sock_lock);
+		spin_unlock_bh(&xprt->sock_lock);
 		break;
 	case TCP_SYN_SENT:
 	case TCP_SYN_RECV:
@@ -957,6 +1004,7 @@
  out:
 	if (sk->sleep && waitqueue_active(sk->sleep))
 		wake_up_interruptible_all(sk->sleep);
+	read_unlock(&sk->callback_lock);
 }
 
 /*
@@ -971,24 +1019,25 @@
 	struct rpc_xprt	*xprt;
 	struct socket	*sock;
 
+	read_lock(&sk->callback_lock);
 	if (!(xprt = xprt_from_sock(sk)) || !(sock = sk->socket))
-		return;
+		goto out;
 	if (xprt->shutdown)
-		return;
+		goto out;
 
 	/* Wait until we have enough socket memory */
 	if (xprt->stream) {
 		/* from net/ipv4/tcp.c:tcp_write_space */
 		if (tcp_wspace(sk) < tcp_min_write_space(sk))
-			return;
+			goto out;
 	} else {
 		/* from net/core/sock.c:sock_def_write_space */
 		if (!sock_writeable(sk))
-			return;
+			goto out;
 	}
 
 	if (!test_and_clear_bit(SOCK_NOSPACE, &sock->flags))
-		return;
+		goto out;
 
 	spin_lock_bh(&xprt->sock_lock);
 	if (xprt->snd_task && xprt->snd_task->tk_rpcwait == &xprt->pending)
@@ -996,21 +1045,8 @@
 	spin_unlock_bh(&xprt->sock_lock);
 	if (sk->sleep && waitqueue_active(sk->sleep))
 		wake_up_interruptible(sk->sleep);
-}
-
-/*
- * Exponential backoff for UDP retries
- */
-static inline int
-xprt_expbackoff(struct rpc_task *task, struct rpc_rqst *req)
-{
-	int backoff;
-
-	req->rq_ntimeo++;
-	backoff = min(rpc_ntimeo(&task->tk_client->cl_rtt), XPRT_MAX_BACKOFF);
-	if (req->rq_ntimeo < (1 << backoff))
-		return 1;
-	return 0;
+out:
+	read_unlock(&sk->callback_lock);
 }
 
 /*
@@ -1026,14 +1062,7 @@
 	if (req->rq_received)
 		goto out;
 
-	if (!xprt->nocong) {
-		if (xprt_expbackoff(task, req)) {
-			rpc_add_timer(task, xprt_timer);
-			goto out_unlock;
-		}
-		rpc_inc_timeo(&task->tk_client->cl_rtt);
-		xprt_adjust_cwnd(req->rq_xprt, -ETIMEDOUT);
-	}
+	xprt_adjust_cwnd(req->rq_xprt, -ETIMEDOUT);
 	req->rq_nresend++;
 
 	dprintk("RPC: %4d xprt_timer (%s request)\n",
@@ -1043,7 +1072,6 @@
 out:
 	task->tk_timeout = 0;
 	rpc_wake_up_task(task);
-out_unlock:
 	spin_unlock(&xprt->sock_lock);
 }
 
@@ -1063,9 +1091,6 @@
 	if (xprt->shutdown)
 		task->tk_status = -EIO;
 
-	if (!xprt_connected(xprt))
-		task->tk_status = -ENOTCONN;
-
 	if (task->tk_status < 0)
 		return;
 
@@ -1081,17 +1106,29 @@
 	}
 
 	spin_lock_bh(&xprt->sock_lock);
-	if (!__xprt_lock_write(xprt, task)) {
-		spin_unlock_bh(&xprt->sock_lock);
-		return;
+	if (req->rq_received != 0 && !req->rq_bytes_sent)
+		goto out_notrans;
+
+	if (!__xprt_lock_write(xprt, task))
+		goto out_notrans;
+
+	if (!xprt_connected(xprt)) {
+		task->tk_status = -ENOTCONN;
+		goto out_notrans;
 	}
+
 	if (list_empty(&req->rq_list)) {
+		/* Update the softirq receive buffer */
+		memcpy(&req->rq_private_buf, &req->rq_rcv_buf,
+				sizeof(req->rq_private_buf));
 		list_add_tail(&req->rq_list, &xprt->recv);
-		req->rq_received = 0;
 	}
 	spin_unlock_bh(&xprt->sock_lock);
 
 	do_xprt_transmit(task);
+	return;
+out_notrans:
+	spin_unlock_bh(&xprt->sock_lock);
 }
 
 static void
@@ -1117,8 +1154,12 @@
 		if (xprt->stream) {
 			req->rq_bytes_sent += status;
 
-			if (req->rq_bytes_sent >= req->rq_slen)
+			/* If we've sent the entire packet, immediately
+			 * reset the count of bytes sent. */
+			if (req->rq_bytes_sent >= req->rq_slen) {
+				req->rq_bytes_sent = 0;
 				goto out_receive;
+			}
 		} else {
 			if (status >= req->rq_slen)
 				goto out_receive;
@@ -1135,13 +1176,11 @@
 			break;
 	}
 
-	/* Note: at this point, task->tk_sleeping has not yet been set,
-	 *	 hence there is no danger of the waking up task being put on
-	 *	 schedq, and being picked up by a parallel run of rpciod().
+	/* If we're doing a resend and have received a reply already,
+	 * then exit early.
+	 * Note, though, that we can't do this if we've already started
+	 * resending down a TCP stream.
 	 */
-	if (req->rq_received)
-		goto out_release;
-
 	task->tk_status = status;
 
 	switch (status) {
@@ -1149,7 +1188,10 @@
 		if (test_bit(SOCK_ASYNC_NOSPACE, &xprt->sock->flags)) {
 			/* Protect against races with xprt_write_space */
 			spin_lock_bh(&xprt->sock_lock);
-			if (test_bit(SOCK_NOSPACE, &xprt->sock->flags)) {
+			/* Don't race with disconnect */
+			if (!xprt_connected(xprt))
+				task->tk_status = -ENOTCONN;
+			else if (test_bit(SOCK_NOSPACE, &xprt->sock->flags)) {
 				task->tk_timeout = req->rq_timeout.to_current;
 				rpc_sleep_on(&xprt->pending, task, NULL, NULL);
 			}
@@ -1160,30 +1202,33 @@
 		rpc_delay(task, HZ>>4);
 		return;
 	case -ECONNREFUSED:
+		task->tk_timeout = RPC_REESTABLISH_TIMEOUT;
+		rpc_sleep_on(&xprt->sending, task, NULL, NULL);
 	case -ENOTCONN:
-		if (!xprt->stream)
-			return;
+		return;
 	default:
 		if (xprt->stream)
 			xprt_disconnect(xprt);
-		req->rq_bytes_sent = 0;
 	}
- out_release:
 	xprt_release_write(xprt, task);
 	return;
  out_receive:
 	dprintk("RPC: %4d xmit complete\n", task->tk_pid);
+	spin_lock_bh(&xprt->sock_lock);
 	/* Set the task's receive timeout value */
 	if (!xprt->nocong) {
 		task->tk_timeout = rpc_calc_rto(&clnt->cl_rtt,
 				rpcproc_timer(clnt, task->tk_msg.rpc_proc));
-		req->rq_ntimeo = 0;
+		task->tk_timeout <<= clnt->cl_timeout.to_retries
+			- req->rq_timeout.to_retries;
 		if (task->tk_timeout > req->rq_timeout.to_maxval)
 			task->tk_timeout = req->rq_timeout.to_maxval;
 	} else
 		task->tk_timeout = req->rq_timeout.to_current;
-	spin_lock_bh(&xprt->sock_lock);
-	if (!req->rq_received)
+	/* Don't race with disconnect */
+	if (!xprt_connected(xprt))
+		task->tk_status = -ENOTCONN;
+	else if (!req->rq_received)
 		rpc_sleep_on(&xprt->pending, task, NULL, xprt_timer);
 	__xprt_release_write(xprt, task);
 	spin_unlock_bh(&xprt->sock_lock);
@@ -1192,61 +1237,39 @@
 /*
  * Reserve an RPC call slot.
  */
-int
+void
 xprt_reserve(struct rpc_task *task)
 {
 	struct rpc_xprt	*xprt = task->tk_xprt;
 
-	/* We already have an initialized request. */
-	if (task->tk_rqstp)
-		return 0;
-
-	spin_lock(&xprt->xprt_lock);
-	xprt_reserve_status(task);
-	if (task->tk_rqstp) {
-		task->tk_timeout = 0;
-	} else if (!task->tk_timeout) {
-		task->tk_status = -ENOBUFS;
-	} else {
-		dprintk("RPC:      xprt_reserve waiting on backlog\n");
-		task->tk_status = -EAGAIN;
-		rpc_sleep_on(&xprt->backlog, task, NULL, NULL);
+	task->tk_status = -EIO;
+	if (!xprt->shutdown) {
+		spin_lock(&xprt->xprt_lock);
+		do_xprt_reserve(task);
+		spin_unlock(&xprt->xprt_lock);
 	}
-	spin_unlock(&xprt->xprt_lock);
-	dprintk("RPC: %4d xprt_reserve returns %d\n",
-				task->tk_pid, task->tk_status);
-	return task->tk_status;
 }
 
-/*
- * Reservation callback
- */
-static void
-xprt_reserve_status(struct rpc_task *task)
+static inline void
+do_xprt_reserve(struct rpc_task *task)
 {
 	struct rpc_xprt	*xprt = task->tk_xprt;
-	struct rpc_rqst	*req;
 
-	if (xprt->shutdown) {
-		task->tk_status = -EIO;
-	} else if (task->tk_status < 0) {
-		/* NOP */
-	} else if (task->tk_rqstp) {
-		/* We've already been given a request slot: NOP */
-	} else {
-		if (!(req = xprt->free))
-			goto out_nofree;
-		/* OK: There's room for us. Grab a free slot */
-		xprt->free     = req->rq_next;
-		req->rq_next   = NULL;
+	task->tk_status = 0;
+	if (task->tk_rqstp)
+		return;
+	if (xprt->free) {
+		struct rpc_rqst	*req = xprt->free;
+		xprt->free = req->rq_next;
+		req->rq_next = NULL;
 		task->tk_rqstp = req;
 		xprt_request_init(task, xprt);
+		return;
 	}
-
-	return;
-
-out_nofree:
+	dprintk("RPC:      waiting for request slot\n");
 	task->tk_status = -EAGAIN;
+	task->tk_timeout = 0;
+	rpc_sleep_on(&xprt->backlog, task, NULL, NULL);
 }
 
 /*
@@ -1339,7 +1362,6 @@
 	to->to_initval   = 
 	to->to_increment = incr;
 	to->to_maxval    = incr * retr;
-	to->to_resrvval  = incr * retr;
 	to->to_retries   = retr;
 	to->to_exponential = 0;
 }
@@ -1348,8 +1370,7 @@
  * Initialize an RPC client
  */
 static struct rpc_xprt *
-xprt_setup(struct socket *sock, int proto,
-			struct sockaddr_in *ap, struct rpc_timeout *to)
+xprt_setup(int proto, struct sockaddr_in *ap, struct rpc_timeout *to)
 {
 	struct rpc_xprt	*xprt;
 	struct rpc_rqst	*req;
@@ -1380,7 +1401,6 @@
 	if (to) {
 		xprt->timeout = *to;
 		xprt->timeout.to_current = to->to_initval;
-		xprt->timeout.to_resrvval = to->to_maxval << 1;
 	} else
 		xprt_default_timeout(&xprt->timeout, xprt->prot);
 
@@ -1395,9 +1415,11 @@
 	req->rq_next = NULL;
 	xprt->free = xprt->slot;
 
+	/* Check whether we want to use a reserved port */
+	xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0;
+
 	dprintk("RPC:      created transport %p\n", xprt);
 	
-	xprt_bind_socket(xprt, sock);
 	return xprt;
 }
 
@@ -1409,6 +1431,12 @@
 {
 	struct sockaddr_in myaddr;
 	int		err, port;
+	kernel_cap_t saved_cap = current->cap_effective;
+
+	/* Override capabilities.
+	 * They were checked in xprt_create_proto i.e. at mount time
+	 */
+	cap_raise (current->cap_effective, CAP_NET_BIND_SERVICE);
 
 	memset(&myaddr, 0, sizeof(myaddr));
 	myaddr.sin_family = AF_INET;
@@ -1418,6 +1446,7 @@
 		err = sock->ops->bind(sock, (struct sockaddr *) &myaddr,
 						sizeof(myaddr));
 	} while (err == -EADDRINUSE && --port > 0);
+	current->cap_effective = saved_cap;
 
 	if (err < 0)
 		printk("RPC: Can't bind to reserved port (%d).\n", -err);
@@ -1433,6 +1462,7 @@
 	if (xprt->inet)
 		return -EBUSY;
 
+	write_lock_bh(&sk->callback_lock);
 	sk->user_data = xprt;
 	xprt->old_data_ready = sk->data_ready;
 	xprt->old_state_change = sk->state_change;
@@ -1453,11 +1483,7 @@
 	/* Reset to new socket */
 	xprt->sock = sock;
 	xprt->inet = sk;
-	/*
-	 *	TCP requires the rpc I/O daemon is present
-	 */
-	if(xprt->stream)
-		rpciod_up();
+	write_unlock_bh(&sk->callback_lock);
 
 	return 0;
 }
@@ -1487,7 +1513,7 @@
  * Create a client socket given the protocol and peer address.
  */
 static struct socket *
-xprt_create_socket(int proto, struct rpc_timeout *to)
+xprt_create_socket(int proto, struct rpc_timeout *to, int resvport)
 {
 	struct socket	*sock;
 	int		type, err;
@@ -1502,8 +1528,8 @@
 		goto failed;
 	}
 
-	/* If the caller has the capability, bind to a reserved port */
-	if (capable(CAP_NET_BIND_SERVICE) && xprt_bindresvport(sock) < 0)
+	/* bind to a reserved port */
+	if (resvport && xprt_bindresvport(sock) < 0)
 		goto failed;
 
 	return sock;
@@ -1519,18 +1545,19 @@
 struct rpc_xprt *
 xprt_create_proto(int proto, struct sockaddr_in *sap, struct rpc_timeout *to)
 {
-	struct socket	*sock;
 	struct rpc_xprt	*xprt;
 
-	dprintk("RPC:      xprt_create_proto called\n");
-
-	if (!(sock = xprt_create_socket(proto, to)))
-		return NULL;
-
-	if (!(xprt = xprt_setup(sock, proto, sap, to)))
-		sock_release(sock);
+	xprt = xprt_setup(proto, sap, to);
+	if (!xprt)
+		goto out_bad;
 
+	dprintk("RPC:      xprt_create_proto created xprt %p\n", xprt);
 	return xprt;
+out_bad:
+	dprintk("RPC:      xprt_create_proto failed\n");
+	if (xprt)
+		kfree(xprt);
+	return NULL;
 }
 
 /*

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)