android_kernel_oneplus_msm8998/net/rds/tcp.c

451 lines
12 KiB
C
Raw Normal View History

/*
* Copyright (c) 2006 Oracle. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#include <linux/kernel.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 17:04:11 +09:00
#include <linux/slab.h>
#include <linux/in.h>
#include <linux/module.h>
#include <net/tcp.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <net/tcp.h>
#include "rds.h"
#include "tcp.h"
/* only for info exporting */
static DEFINE_SPINLOCK(rds_tcp_tc_list_lock);
static LIST_HEAD(rds_tcp_tc_list);
static unsigned int rds_tcp_tc_count;
/* Track rds_tcp_connection structs so they can be cleaned up */
static DEFINE_SPINLOCK(rds_tcp_conn_lock);
static LIST_HEAD(rds_tcp_conn_list);
static struct kmem_cache *rds_tcp_conn_slab;
#define RDS_TCP_DEFAULT_BUFSIZE (128 * 1024)
/* doing it this way avoids calling tcp_sk() */
void rds_tcp_nonagle(struct socket *sock)
{
mm_segment_t oldfs = get_fs();
int val = 1;
set_fs(KERNEL_DS);
sock->ops->setsockopt(sock, SOL_TCP, TCP_NODELAY, (char __user *)&val,
sizeof(val));
set_fs(oldfs);
}
/* All module specific customizations to the RDS-TCP socket should be done in
* rds_tcp_tune() and applied after socket creation. In general these
* customizations should be tunable via module_param()
*/
void rds_tcp_tune(struct socket *sock)
{
rds_tcp_nonagle(sock);
}
u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc)
{
return tcp_sk(tc->t_sock->sk)->snd_nxt;
}
u32 rds_tcp_snd_una(struct rds_tcp_connection *tc)
{
return tcp_sk(tc->t_sock->sk)->snd_una;
}
void rds_tcp_restore_callbacks(struct socket *sock,
struct rds_tcp_connection *tc)
{
rdsdebug("restoring sock %p callbacks from tc %p\n", sock, tc);
write_lock_bh(&sock->sk->sk_callback_lock);
/* done under the callback_lock to serialize with write_space */
spin_lock(&rds_tcp_tc_list_lock);
list_del_init(&tc->t_list_item);
rds_tcp_tc_count--;
spin_unlock(&rds_tcp_tc_list_lock);
tc->t_sock = NULL;
sock->sk->sk_write_space = tc->t_orig_write_space;
sock->sk->sk_data_ready = tc->t_orig_data_ready;
sock->sk->sk_state_change = tc->t_orig_state_change;
sock->sk->sk_user_data = NULL;
write_unlock_bh(&sock->sk->sk_callback_lock);
}
/*
* This is the only path that sets tc->t_sock. Send and receive trust that
* it is set. The RDS_CONN_CONNECTED bit protects those paths from being
* called while it isn't set.
*/
void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn)
{
struct rds_tcp_connection *tc = conn->c_transport_data;
rdsdebug("setting sock %p callbacks to tc %p\n", sock, tc);
write_lock_bh(&sock->sk->sk_callback_lock);
/* done under the callback_lock to serialize with write_space */
spin_lock(&rds_tcp_tc_list_lock);
list_add_tail(&tc->t_list_item, &rds_tcp_tc_list);
rds_tcp_tc_count++;
spin_unlock(&rds_tcp_tc_list_lock);
/* accepted sockets need our listen data ready undone */
if (sock->sk->sk_data_ready == rds_tcp_listen_data_ready)
sock->sk->sk_data_ready = sock->sk->sk_user_data;
tc->t_sock = sock;
tc->conn = conn;
tc->t_orig_data_ready = sock->sk->sk_data_ready;
tc->t_orig_write_space = sock->sk->sk_write_space;
tc->t_orig_state_change = sock->sk->sk_state_change;
sock->sk->sk_user_data = conn;
sock->sk->sk_data_ready = rds_tcp_data_ready;
sock->sk->sk_write_space = rds_tcp_write_space;
sock->sk->sk_state_change = rds_tcp_state_change;
write_unlock_bh(&sock->sk->sk_callback_lock);
}
static void rds_tcp_tc_info(struct socket *sock, unsigned int len,
struct rds_info_iterator *iter,
struct rds_info_lengths *lens)
{
struct rds_info_tcp_socket tsinfo;
struct rds_tcp_connection *tc;
unsigned long flags;
struct sockaddr_in sin;
int sinlen;
spin_lock_irqsave(&rds_tcp_tc_list_lock, flags);
if (len / sizeof(tsinfo) < rds_tcp_tc_count)
goto out;
list_for_each_entry(tc, &rds_tcp_tc_list, t_list_item) {
sock->ops->getname(sock, (struct sockaddr *)&sin, &sinlen, 0);
tsinfo.local_addr = sin.sin_addr.s_addr;
tsinfo.local_port = sin.sin_port;
sock->ops->getname(sock, (struct sockaddr *)&sin, &sinlen, 1);
tsinfo.peer_addr = sin.sin_addr.s_addr;
tsinfo.peer_port = sin.sin_port;
tsinfo.hdr_rem = tc->t_tinc_hdr_rem;
tsinfo.data_rem = tc->t_tinc_data_rem;
tsinfo.last_sent_nxt = tc->t_last_sent_nxt;
tsinfo.last_expected_una = tc->t_last_expected_una;
tsinfo.last_seen_una = tc->t_last_seen_una;
rds_info_copy(iter, &tsinfo, sizeof(tsinfo));
}
out:
lens->nr = rds_tcp_tc_count;
lens->each = sizeof(tsinfo);
spin_unlock_irqrestore(&rds_tcp_tc_list_lock, flags);
}
static int rds_tcp_laddr_check(struct net *net, __be32 addr)
{
if (inet_addr_type(net, addr) == RTN_LOCAL)
return 0;
return -EADDRNOTAVAIL;
}
static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
{
struct rds_tcp_connection *tc;
tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp);
if (!tc)
return -ENOMEM;
tc->t_sock = NULL;
tc->t_tinc = NULL;
tc->t_tinc_hdr_rem = sizeof(struct rds_header);
tc->t_tinc_data_rem = 0;
conn->c_transport_data = tc;
spin_lock_irq(&rds_tcp_conn_lock);
list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list);
spin_unlock_irq(&rds_tcp_conn_lock);
rdsdebug("alloced tc %p\n", conn->c_transport_data);
return 0;
}
static void rds_tcp_conn_free(void *arg)
{
struct rds_tcp_connection *tc = arg;
unsigned long flags;
rdsdebug("freeing tc %p\n", tc);
spin_lock_irqsave(&rds_tcp_conn_lock, flags);
list_del(&tc->t_tcp_node);
spin_unlock_irqrestore(&rds_tcp_conn_lock, flags);
kmem_cache_free(rds_tcp_conn_slab, tc);
}
static void rds_tcp_destroy_conns(void)
{
struct rds_tcp_connection *tc, *_tc;
LIST_HEAD(tmp_list);
/* avoid calling conn_destroy with irqs off */
spin_lock_irq(&rds_tcp_conn_lock);
list_splice(&rds_tcp_conn_list, &tmp_list);
INIT_LIST_HEAD(&rds_tcp_conn_list);
spin_unlock_irq(&rds_tcp_conn_lock);
list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) {
if (tc->conn->c_passive)
rds_conn_destroy(tc->conn->c_passive);
rds_conn_destroy(tc->conn);
}
}
static void rds_tcp_exit(void);
struct rds_transport rds_tcp_transport = {
.laddr_check = rds_tcp_laddr_check,
.xmit_prepare = rds_tcp_xmit_prepare,
.xmit_complete = rds_tcp_xmit_complete,
.xmit = rds_tcp_xmit,
.recv = rds_tcp_recv,
.conn_alloc = rds_tcp_conn_alloc,
.conn_free = rds_tcp_conn_free,
.conn_connect = rds_tcp_conn_connect,
.conn_shutdown = rds_tcp_conn_shutdown,
.inc_copy_to_user = rds_tcp_inc_copy_to_user,
.inc_free = rds_tcp_inc_free,
.stats_info_copy = rds_tcp_stats_info_copy,
.exit = rds_tcp_exit,
.t_owner = THIS_MODULE,
.t_name = "tcp",
.t_type = RDS_TRANS_TCP,
.t_prefer_loopback = 1,
};
static int rds_tcp_netid;
/* per-network namespace private data for this module */
struct rds_tcp_net {
struct socket *rds_tcp_listen_sock;
struct work_struct rds_tcp_accept_w;
};
static void rds_tcp_accept_worker(struct work_struct *work)
{
struct rds_tcp_net *rtn = container_of(work,
struct rds_tcp_net,
rds_tcp_accept_w);
while (rds_tcp_accept_one(rtn->rds_tcp_listen_sock) == 0)
cond_resched();
}
void rds_tcp_accept_work(struct sock *sk)
{
struct net *net = sock_net(sk);
struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
queue_work(rds_wq, &rtn->rds_tcp_accept_w);
}
static __net_init int rds_tcp_init_net(struct net *net)
{
struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net);
if (!rtn->rds_tcp_listen_sock) {
pr_warn("could not set up listen sock\n");
return -EAFNOSUPPORT;
}
INIT_WORK(&rtn->rds_tcp_accept_w, rds_tcp_accept_worker);
return 0;
}
static void __net_exit rds_tcp_exit_net(struct net *net)
{
struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
/* If rds_tcp_exit_net() is called as a result of netns deletion,
* the rds_tcp_kill_sock() device notifier would already have cleaned
* up the listen socket, thus there is no work to do in this function.
*
* If rds_tcp_exit_net() is called as a result of module unload,
* i.e., due to rds_tcp_exit() -> unregister_pernet_subsys(), then
* we do need to clean up the listen socket here.
*/
if (rtn->rds_tcp_listen_sock) {
rds_tcp_listen_stop(rtn->rds_tcp_listen_sock);
rtn->rds_tcp_listen_sock = NULL;
flush_work(&rtn->rds_tcp_accept_w);
}
}
static struct pernet_operations rds_tcp_net_ops = {
.init = rds_tcp_init_net,
.exit = rds_tcp_exit_net,
.id = &rds_tcp_netid,
.size = sizeof(struct rds_tcp_net),
};
static void rds_tcp_kill_sock(struct net *net)
{
struct rds_tcp_connection *tc, *_tc;
struct sock *sk;
LIST_HEAD(tmp_list);
struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
rds_tcp_listen_stop(rtn->rds_tcp_listen_sock);
rtn->rds_tcp_listen_sock = NULL;
flush_work(&rtn->rds_tcp_accept_w);
spin_lock_irq(&rds_tcp_conn_lock);
list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
struct net *c_net = read_pnet(&tc->conn->c_net);
net: rds: force to destroy connection if t_sock is NULL in rds_tcp_kill_sock(). [ Upstream commit cb66ddd156203daefb8d71158036b27b0e2caf63 ] When it is to cleanup net namespace, rds_tcp_exit_net() will call rds_tcp_kill_sock(), if t_sock is NULL, it will not call rds_conn_destroy(), rds_conn_path_destroy() and rds_tcp_conn_free() to free connection, and the worker cp_conn_w is not stopped, afterwards the net is freed in net_drop_ns(); While cp_conn_w rds_connect_worker() will call rds_tcp_conn_path_connect() and reference 'net' which has already been freed. In rds_tcp_conn_path_connect(), rds_tcp_set_callbacks() will set t_sock = sock before sock->ops->connect, but if connect() is failed, it will call rds_tcp_restore_callbacks() and set t_sock = NULL, if connect is always failed, rds_connect_worker() will try to reconnect all the time, so rds_tcp_kill_sock() will never to cancel worker cp_conn_w and free the connections. Therefore, the condition !tc->t_sock is not needed if it is going to do cleanup_net->rds_tcp_exit_net->rds_tcp_kill_sock, because tc->t_sock is always NULL, and there is on other path to cancel cp_conn_w and free connection. So this patch is to fix this. rds_tcp_kill_sock(): ... if (net != c_net || !tc->t_sock) ... Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com> ================================================================== BUG: KASAN: use-after-free in inet_create+0xbcc/0xd28 net/ipv4/af_inet.c:340 Read of size 4 at addr ffff8003496a4684 by task kworker/u8:4/3721 CPU: 3 PID: 3721 Comm: kworker/u8:4 Not tainted 5.1.0 #11 Hardware name: linux,dummy-virt (DT) Workqueue: krdsd rds_connect_worker Call trace: dump_backtrace+0x0/0x3c0 arch/arm64/kernel/time.c:53 show_stack+0x28/0x38 arch/arm64/kernel/traps.c:152 __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x120/0x188 lib/dump_stack.c:113 print_address_description+0x68/0x278 mm/kasan/report.c:253 kasan_report_error mm/kasan/report.c:351 [inline] kasan_report+0x21c/0x348 mm/kasan/report.c:409 __asan_report_load4_noabort+0x30/0x40 mm/kasan/report.c:429 inet_create+0xbcc/0xd28 net/ipv4/af_inet.c:340 __sock_create+0x4f8/0x770 net/socket.c:1276 sock_create_kern+0x50/0x68 net/socket.c:1322 rds_tcp_conn_path_connect+0x2b4/0x690 net/rds/tcp_connect.c:114 rds_connect_worker+0x108/0x1d0 net/rds/threads.c:175 process_one_work+0x6e8/0x1700 kernel/workqueue.c:2153 worker_thread+0x3b0/0xdd0 kernel/workqueue.c:2296 kthread+0x2f0/0x378 kernel/kthread.c:255 ret_from_fork+0x10/0x18 arch/arm64/kernel/entry.S:1117 Allocated by task 687: save_stack mm/kasan/kasan.c:448 [inline] set_track mm/kasan/kasan.c:460 [inline] kasan_kmalloc+0xd4/0x180 mm/kasan/kasan.c:553 kasan_slab_alloc+0x14/0x20 mm/kasan/kasan.c:490 slab_post_alloc_hook mm/slab.h:444 [inline] slab_alloc_node mm/slub.c:2705 [inline] slab_alloc mm/slub.c:2713 [inline] kmem_cache_alloc+0x14c/0x388 mm/slub.c:2718 kmem_cache_zalloc include/linux/slab.h:697 [inline] net_alloc net/core/net_namespace.c:384 [inline] copy_net_ns+0xc4/0x2d0 net/core/net_namespace.c:424 create_new_namespaces+0x300/0x658 kernel/nsproxy.c:107 unshare_nsproxy_namespaces+0xa0/0x198 kernel/nsproxy.c:206 ksys_unshare+0x340/0x628 kernel/fork.c:2577 __do_sys_unshare kernel/fork.c:2645 [inline] __se_sys_unshare kernel/fork.c:2643 [inline] __arm64_sys_unshare+0x38/0x58 kernel/fork.c:2643 __invoke_syscall arch/arm64/kernel/syscall.c:35 [inline] invoke_syscall arch/arm64/kernel/syscall.c:47 [inline] el0_svc_common+0x168/0x390 arch/arm64/kernel/syscall.c:83 el0_svc_handler+0x60/0xd0 arch/arm64/kernel/syscall.c:129 el0_svc+0x8/0xc arch/arm64/kernel/entry.S:960 Freed by task 264: save_stack mm/kasan/kasan.c:448 [inline] set_track mm/kasan/kasan.c:460 [inline] __kasan_slab_free+0x114/0x220 mm/kasan/kasan.c:521 kasan_slab_free+0x10/0x18 mm/kasan/kasan.c:528 slab_free_hook mm/slub.c:1370 [inline] slab_free_freelist_hook mm/slub.c:1397 [inline] slab_free mm/slub.c:2952 [inline] kmem_cache_free+0xb8/0x3a8 mm/slub.c:2968 net_free net/core/net_namespace.c:400 [inline] net_drop_ns.part.6+0x78/0x90 net/core/net_namespace.c:407 net_drop_ns net/core/net_namespace.c:406 [inline] cleanup_net+0x53c/0x6d8 net/core/net_namespace.c:569 process_one_work+0x6e8/0x1700 kernel/workqueue.c:2153 worker_thread+0x3b0/0xdd0 kernel/workqueue.c:2296 kthread+0x2f0/0x378 kernel/kthread.c:255 ret_from_fork+0x10/0x18 arch/arm64/kernel/entry.S:1117 The buggy address belongs to the object at ffff8003496a3f80 which belongs to the cache net_namespace of size 7872 The buggy address is located 1796 bytes inside of 7872-byte region [ffff8003496a3f80, ffff8003496a5e40) The buggy address belongs to the page: page:ffff7e000d25a800 count:1 mapcount:0 mapping:ffff80036ce4b000 index:0x0 compound_mapcount: 0 flags: 0xffffe0000008100(slab|head) raw: 0ffffe0000008100 dead000000000100 dead000000000200 ffff80036ce4b000 raw: 0000000000000000 0000000080040004 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8003496a4580: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8003496a4600: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff8003496a4680: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff8003496a4700: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8003496a4780: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Fixes: 467fa15356ac("RDS-TCP: Support multiple RDS-TCP listen endpoints, one per netns.") Reported-by: Hulk Robot <hulkci@huawei.com> Signed-off-by: Mao Wenan <maowenan@huawei.com> Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2019-03-28 17:10:56 +08:00
if (net != c_net)
continue;
list_move_tail(&tc->t_tcp_node, &tmp_list);
}
spin_unlock_irq(&rds_tcp_conn_lock);
list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) {
net: rds: Fix NULL ptr use in rds_tcp_kill_sock After the commit c4e97b06cfdc ("net: rds: force to destroy connection if t_sock is NULL in rds_tcp_kill_sock()."), it introduced null-ptr-deref in rds_tcp_kill_sock as below: BUG: KASAN: null-ptr-deref on address 0000000000000020 Read of size 8 by task kworker/u16:10/910 CPU: 3 PID: 910 Comm: kworker/u16:10 Not tainted 4.4.178+ #3 Hardware name: linux,dummy-virt (DT) Workqueue: netns cleanup_net Call trace: [<ffffff90080abb50>] dump_backtrace+0x0/0x618 [<ffffff90080ac1a0>] show_stack+0x38/0x60 [<ffffff9008c42b78>] dump_stack+0x1a8/0x230 [<ffffff90085d469c>] kasan_report_error+0xc8c/0xfc0 [<ffffff90085d54a4>] kasan_report+0x94/0xd8 [<ffffff90085d1b28>] __asan_load8+0x88/0x150 [<ffffff9009c9cc2c>] rds_tcp_dev_event+0x734/0xb48 [<ffffff90081eacb0>] raw_notifier_call_chain+0x150/0x1e8 [<ffffff900973fec0>] call_netdevice_notifiers_info+0x90/0x110 [<ffffff9009764874>] netdev_run_todo+0x2f4/0xb08 [<ffffff9009796d34>] rtnl_unlock+0x2c/0x48 [<ffffff9009756484>] default_device_exit_batch+0x444/0x528 [<ffffff9009720498>] ops_exit_list+0x1c0/0x240 [<ffffff9009724a80>] cleanup_net+0x738/0xbf8 [<ffffff90081ca6cc>] process_one_work+0x96c/0x13e0 [<ffffff90081cf370>] worker_thread+0x7e0/0x1910 [<ffffff90081e7174>] kthread+0x304/0x390 [<ffffff9008094280>] ret_from_fork+0x10/0x50 If the first loop add the tc->t_sock = NULL to the tmp_list, 1). list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) then the second loop is to find connections to destroy, tc->t_sock might equal NULL, and tc->t_sock->sk happens null-ptr-deref. 2). list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) Fixes: c4e97b06cfdc ("net: rds: force to destroy connection if t_sock is NULL in rds_tcp_kill_sock().") Signed-off-by: Mao Wenan <maowenan@huawei.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2019-09-18 16:37:33 +08:00
if (tc->t_sock) {
sk = tc->t_sock->sk;
sk->sk_prot->disconnect(sk, 0);
tcp_done(sk);
}
if (tc->conn->c_passive)
rds_conn_destroy(tc->conn->c_passive);
rds_conn_destroy(tc->conn);
}
}
static int rds_tcp_dev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
/* rds-tcp registers as a pernet subys, so the ->exit will only
* get invoked after network acitivity has quiesced. We need to
* clean up all sockets to quiesce network activity, and use
* the unregistration of the per-net loopback device as a trigger
* to start that cleanup.
*/
if (event == NETDEV_UNREGISTER_FINAL &&
dev->ifindex == LOOPBACK_IFINDEX)
rds_tcp_kill_sock(dev_net(dev));
return NOTIFY_DONE;
}
static struct notifier_block rds_tcp_dev_notifier = {
.notifier_call = rds_tcp_dev_event,
.priority = -10, /* must be called after other network notifiers */
};
static void rds_tcp_exit(void)
{
rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);
unregister_pernet_subsys(&rds_tcp_net_ops);
if (unregister_netdevice_notifier(&rds_tcp_dev_notifier))
pr_warn("could not unregister rds_tcp_dev_notifier\n");
rds_tcp_destroy_conns();
rds_trans_unregister(&rds_tcp_transport);
rds_tcp_recv_exit();
kmem_cache_destroy(rds_tcp_conn_slab);
}
module_exit(rds_tcp_exit);
static int rds_tcp_init(void)
{
int ret;
rds_tcp_conn_slab = kmem_cache_create("rds_tcp_connection",
sizeof(struct rds_tcp_connection),
0, 0, NULL);
if (!rds_tcp_conn_slab) {
ret = -ENOMEM;
goto out;
}
ret = register_netdevice_notifier(&rds_tcp_dev_notifier);
if (ret) {
pr_warn("could not register rds_tcp_dev_notifier\n");
goto out;
}
ret = register_pernet_subsys(&rds_tcp_net_ops);
if (ret)
goto out_slab;
ret = rds_tcp_recv_init();
if (ret)
goto out_pernet;
ret = rds_trans_register(&rds_tcp_transport);
if (ret)
goto out_recv;
rds_info_register_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);
goto out;
out_recv:
rds_tcp_recv_exit();
out_pernet:
unregister_pernet_subsys(&rds_tcp_net_ops);
out_slab:
kmem_cache_destroy(rds_tcp_conn_slab);
out:
return ret;
}
module_init(rds_tcp_init);
MODULE_AUTHOR("Oracle Corporation <rds-devel@oss.oracle.com>");
MODULE_DESCRIPTION("RDS: TCP transport");
MODULE_LICENSE("Dual BSD/GPL");