SUNRPC: Move TCP receive data path into a workqueue context
Stream protocols such as TCP can often build up a backlog of data to be read due to ordering. Combine this with the fact that some workloads such as NFS read()-intensive workloads need to receive a lot of data per RPC call, and it turns out that receiving the data from inside a softirq context can cause starvation. The following patch moves the TCP data receive into a workqueue context. We still end up calling tcp_read_sock(), but we do so from a process context, meaning that softirqs are enabled for most of the time. With this patch, I see a doubling of read bandwidth when running a multi-threaded iozone workload between a virtual client and server setup. Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
This commit is contained in:
parent
66d7a56a62
commit
edc1b01cd3
2 changed files with 38 additions and 15 deletions
|
@ -44,6 +44,8 @@ struct sock_xprt {
|
||||||
*/
|
*/
|
||||||
unsigned long sock_state;
|
unsigned long sock_state;
|
||||||
struct delayed_work connect_worker;
|
struct delayed_work connect_worker;
|
||||||
|
struct work_struct recv_worker;
|
||||||
|
struct mutex recv_mutex;
|
||||||
struct sockaddr_storage srcaddr;
|
struct sockaddr_storage srcaddr;
|
||||||
unsigned short srcport;
|
unsigned short srcport;
|
||||||
|
|
||||||
|
|
|
@ -823,6 +823,7 @@ static void xs_reset_transport(struct sock_xprt *transport)
|
||||||
|
|
||||||
kernel_sock_shutdown(sock, SHUT_RDWR);
|
kernel_sock_shutdown(sock, SHUT_RDWR);
|
||||||
|
|
||||||
|
mutex_lock(&transport->recv_mutex);
|
||||||
write_lock_bh(&sk->sk_callback_lock);
|
write_lock_bh(&sk->sk_callback_lock);
|
||||||
transport->inet = NULL;
|
transport->inet = NULL;
|
||||||
transport->sock = NULL;
|
transport->sock = NULL;
|
||||||
|
@ -833,6 +834,7 @@ static void xs_reset_transport(struct sock_xprt *transport)
|
||||||
xprt_clear_connected(xprt);
|
xprt_clear_connected(xprt);
|
||||||
write_unlock_bh(&sk->sk_callback_lock);
|
write_unlock_bh(&sk->sk_callback_lock);
|
||||||
xs_sock_reset_connection_flags(xprt);
|
xs_sock_reset_connection_flags(xprt);
|
||||||
|
mutex_unlock(&transport->recv_mutex);
|
||||||
|
|
||||||
trace_rpc_socket_close(xprt, sock);
|
trace_rpc_socket_close(xprt, sock);
|
||||||
sock_release(sock);
|
sock_release(sock);
|
||||||
|
@ -886,6 +888,7 @@ static void xs_destroy(struct rpc_xprt *xprt)
|
||||||
|
|
||||||
cancel_delayed_work_sync(&transport->connect_worker);
|
cancel_delayed_work_sync(&transport->connect_worker);
|
||||||
xs_close(xprt);
|
xs_close(xprt);
|
||||||
|
cancel_work_sync(&transport->recv_worker);
|
||||||
xs_xprt_free(xprt);
|
xs_xprt_free(xprt);
|
||||||
module_put(THIS_MODULE);
|
module_put(THIS_MODULE);
|
||||||
}
|
}
|
||||||
|
@ -1243,12 +1246,12 @@ static inline int xs_tcp_read_reply(struct rpc_xprt *xprt,
|
||||||
dprintk("RPC: read reply XID %08x\n", ntohl(transport->tcp_xid));
|
dprintk("RPC: read reply XID %08x\n", ntohl(transport->tcp_xid));
|
||||||
|
|
||||||
/* Find and lock the request corresponding to this xid */
|
/* Find and lock the request corresponding to this xid */
|
||||||
spin_lock(&xprt->transport_lock);
|
spin_lock_bh(&xprt->transport_lock);
|
||||||
req = xprt_lookup_rqst(xprt, transport->tcp_xid);
|
req = xprt_lookup_rqst(xprt, transport->tcp_xid);
|
||||||
if (!req) {
|
if (!req) {
|
||||||
dprintk("RPC: XID %08x request not found!\n",
|
dprintk("RPC: XID %08x request not found!\n",
|
||||||
ntohl(transport->tcp_xid));
|
ntohl(transport->tcp_xid));
|
||||||
spin_unlock(&xprt->transport_lock);
|
spin_unlock_bh(&xprt->transport_lock);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1257,7 +1260,7 @@ static inline int xs_tcp_read_reply(struct rpc_xprt *xprt,
|
||||||
if (!(transport->tcp_flags & TCP_RCV_COPY_DATA))
|
if (!(transport->tcp_flags & TCP_RCV_COPY_DATA))
|
||||||
xprt_complete_rqst(req->rq_task, transport->tcp_copied);
|
xprt_complete_rqst(req->rq_task, transport->tcp_copied);
|
||||||
|
|
||||||
spin_unlock(&xprt->transport_lock);
|
spin_unlock_bh(&xprt->transport_lock);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1277,10 +1280,10 @@ static int xs_tcp_read_callback(struct rpc_xprt *xprt,
|
||||||
struct rpc_rqst *req;
|
struct rpc_rqst *req;
|
||||||
|
|
||||||
/* Look up and lock the request corresponding to the given XID */
|
/* Look up and lock the request corresponding to the given XID */
|
||||||
spin_lock(&xprt->transport_lock);
|
spin_lock_bh(&xprt->transport_lock);
|
||||||
req = xprt_lookup_bc_request(xprt, transport->tcp_xid);
|
req = xprt_lookup_bc_request(xprt, transport->tcp_xid);
|
||||||
if (req == NULL) {
|
if (req == NULL) {
|
||||||
spin_unlock(&xprt->transport_lock);
|
spin_unlock_bh(&xprt->transport_lock);
|
||||||
printk(KERN_WARNING "Callback slot table overflowed\n");
|
printk(KERN_WARNING "Callback slot table overflowed\n");
|
||||||
xprt_force_disconnect(xprt);
|
xprt_force_disconnect(xprt);
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -1291,7 +1294,7 @@ static int xs_tcp_read_callback(struct rpc_xprt *xprt,
|
||||||
|
|
||||||
if (!(transport->tcp_flags & TCP_RCV_COPY_DATA))
|
if (!(transport->tcp_flags & TCP_RCV_COPY_DATA))
|
||||||
xprt_complete_bc_request(req, transport->tcp_copied);
|
xprt_complete_bc_request(req, transport->tcp_copied);
|
||||||
spin_unlock(&xprt->transport_lock);
|
spin_unlock_bh(&xprt->transport_lock);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -1402,19 +1405,33 @@ static void xs_tcp_data_receive(struct sock_xprt *transport)
|
||||||
unsigned long total = 0;
|
unsigned long total = 0;
|
||||||
int read = 0;
|
int read = 0;
|
||||||
|
|
||||||
|
mutex_lock(&transport->recv_mutex);
|
||||||
sk = transport->inet;
|
sk = transport->inet;
|
||||||
|
if (sk == NULL)
|
||||||
|
goto out;
|
||||||
|
|
||||||
/* We use rd_desc to pass struct xprt to xs_tcp_data_recv */
|
/* We use rd_desc to pass struct xprt to xs_tcp_data_recv */
|
||||||
for (;;) {
|
for (;;) {
|
||||||
|
lock_sock(sk);
|
||||||
read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv);
|
read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv);
|
||||||
|
release_sock(sk);
|
||||||
if (read <= 0)
|
if (read <= 0)
|
||||||
break;
|
break;
|
||||||
total += read;
|
total += read;
|
||||||
rd_desc.count = 65536;
|
rd_desc.count = 65536;
|
||||||
}
|
}
|
||||||
|
out:
|
||||||
|
mutex_unlock(&transport->recv_mutex);
|
||||||
trace_xs_tcp_data_ready(xprt, read, total);
|
trace_xs_tcp_data_ready(xprt, read, total);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void xs_tcp_data_receive_workfn(struct work_struct *work)
|
||||||
|
{
|
||||||
|
struct sock_xprt *transport =
|
||||||
|
container_of(work, struct sock_xprt, recv_worker);
|
||||||
|
xs_tcp_data_receive(transport);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* xs_tcp_data_ready - "data ready" callback for TCP sockets
|
* xs_tcp_data_ready - "data ready" callback for TCP sockets
|
||||||
* @sk: socket with data to read
|
* @sk: socket with data to read
|
||||||
|
@ -1437,8 +1454,8 @@ static void xs_tcp_data_ready(struct sock *sk)
|
||||||
*/
|
*/
|
||||||
if (xprt->reestablish_timeout)
|
if (xprt->reestablish_timeout)
|
||||||
xprt->reestablish_timeout = 0;
|
xprt->reestablish_timeout = 0;
|
||||||
|
queue_work(rpciod_workqueue, &transport->recv_worker);
|
||||||
|
|
||||||
xs_tcp_data_receive(transport);
|
|
||||||
out:
|
out:
|
||||||
read_unlock_bh(&sk->sk_callback_lock);
|
read_unlock_bh(&sk->sk_callback_lock);
|
||||||
}
|
}
|
||||||
|
@ -1840,6 +1857,10 @@ static inline void xs_reclassify_socket(int family, struct socket *sock)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static void xs_dummy_data_receive_workfn(struct work_struct *work)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
static void xs_dummy_setup_socket(struct work_struct *work)
|
static void xs_dummy_setup_socket(struct work_struct *work)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
@ -2664,6 +2685,7 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
|
||||||
}
|
}
|
||||||
|
|
||||||
new = container_of(xprt, struct sock_xprt, xprt);
|
new = container_of(xprt, struct sock_xprt, xprt);
|
||||||
|
mutex_init(&new->recv_mutex);
|
||||||
memcpy(&xprt->addr, args->dstaddr, args->addrlen);
|
memcpy(&xprt->addr, args->dstaddr, args->addrlen);
|
||||||
xprt->addrlen = args->addrlen;
|
xprt->addrlen = args->addrlen;
|
||||||
if (args->srcaddr)
|
if (args->srcaddr)
|
||||||
|
@ -2717,6 +2739,7 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
|
||||||
xprt->ops = &xs_local_ops;
|
xprt->ops = &xs_local_ops;
|
||||||
xprt->timeout = &xs_local_default_timeout;
|
xprt->timeout = &xs_local_default_timeout;
|
||||||
|
|
||||||
|
INIT_WORK(&transport->recv_worker, xs_dummy_data_receive_workfn);
|
||||||
INIT_DELAYED_WORK(&transport->connect_worker,
|
INIT_DELAYED_WORK(&transport->connect_worker,
|
||||||
xs_dummy_setup_socket);
|
xs_dummy_setup_socket);
|
||||||
|
|
||||||
|
@ -2788,21 +2811,20 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
|
||||||
|
|
||||||
xprt->timeout = &xs_udp_default_timeout;
|
xprt->timeout = &xs_udp_default_timeout;
|
||||||
|
|
||||||
|
INIT_WORK(&transport->recv_worker, xs_dummy_data_receive_workfn);
|
||||||
|
INIT_DELAYED_WORK(&transport->connect_worker, xs_udp_setup_socket);
|
||||||
|
|
||||||
switch (addr->sa_family) {
|
switch (addr->sa_family) {
|
||||||
case AF_INET:
|
case AF_INET:
|
||||||
if (((struct sockaddr_in *)addr)->sin_port != htons(0))
|
if (((struct sockaddr_in *)addr)->sin_port != htons(0))
|
||||||
xprt_set_bound(xprt);
|
xprt_set_bound(xprt);
|
||||||
|
|
||||||
INIT_DELAYED_WORK(&transport->connect_worker,
|
|
||||||
xs_udp_setup_socket);
|
|
||||||
xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP);
|
xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP);
|
||||||
break;
|
break;
|
||||||
case AF_INET6:
|
case AF_INET6:
|
||||||
if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
|
if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
|
||||||
xprt_set_bound(xprt);
|
xprt_set_bound(xprt);
|
||||||
|
|
||||||
INIT_DELAYED_WORK(&transport->connect_worker,
|
|
||||||
xs_udp_setup_socket);
|
|
||||||
xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6);
|
xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
@ -2867,21 +2889,20 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
|
||||||
xprt->ops = &xs_tcp_ops;
|
xprt->ops = &xs_tcp_ops;
|
||||||
xprt->timeout = &xs_tcp_default_timeout;
|
xprt->timeout = &xs_tcp_default_timeout;
|
||||||
|
|
||||||
|
INIT_WORK(&transport->recv_worker, xs_tcp_data_receive_workfn);
|
||||||
|
INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_setup_socket);
|
||||||
|
|
||||||
switch (addr->sa_family) {
|
switch (addr->sa_family) {
|
||||||
case AF_INET:
|
case AF_INET:
|
||||||
if (((struct sockaddr_in *)addr)->sin_port != htons(0))
|
if (((struct sockaddr_in *)addr)->sin_port != htons(0))
|
||||||
xprt_set_bound(xprt);
|
xprt_set_bound(xprt);
|
||||||
|
|
||||||
INIT_DELAYED_WORK(&transport->connect_worker,
|
|
||||||
xs_tcp_setup_socket);
|
|
||||||
xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP);
|
xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP);
|
||||||
break;
|
break;
|
||||||
case AF_INET6:
|
case AF_INET6:
|
||||||
if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
|
if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
|
||||||
xprt_set_bound(xprt);
|
xprt_set_bound(xprt);
|
||||||
|
|
||||||
INIT_DELAYED_WORK(&transport->connect_worker,
|
|
||||||
xs_tcp_setup_socket);
|
|
||||||
xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6);
|
xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
|
Loading…
Add table
Reference in a new issue