From eec9e29fc5e9b417649830ab76a3aa10b90d2e9f Mon Sep 17 00:00:00 2001 From: Shlomo Pongratz Date: Wed, 10 Apr 2013 14:26:46 +0000 Subject: [PATCH 01/22] IB/core: Verify that QP handler is valid before dispatching events For QPs of type IB_QPT_XRC_TGT the IB core assigns a common event handler __ib_shared_qp_event_handler(), and the optionally supplied event handler is stored. When the common handler is called it iterates on all opened QPs and calles the original handlers without checking if they are NULL. Fix that. Signed-off-by: Shlomo Pongratz Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/core/verbs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index a8fdd3381405..22192deb8828 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -348,7 +348,8 @@ static void __ib_shared_qp_event_handler(struct ib_event *event, void *context) struct ib_qp *qp = context; list_for_each_entry(event->element.qp, &qp->open_list, open_list) - event->element.qp->event_handler(event, event->element.qp->qp_context); + if (event->element.qp->event_handler) + event->element.qp->event_handler(event, event->element.qp->qp_context); } static void __ib_insert_xrcd_qp(struct ib_xrcd *xrcd, struct ib_qp *qp) From 9f550553a470d3b05fc8bdced3a738d8ed5b8d8a Mon Sep 17 00:00:00 2001 From: Shlomo Pongratz Date: Wed, 10 Apr 2013 14:26:47 +0000 Subject: [PATCH 02/22] mlx4_core: Implement SRQ object lookup from srqn Expose a new API mlx4_srq_lookup() to retrive a SRQ based on its number. This API is needed in the mlx4_ib driver CQ polling logic, when a work completion is associated with a XRC TGT QP. Since a target QP may redirect to more than one XRC SRQ, the srq field in the QP has no usage and the real XRC SRQ need to be retrived using the information from the XRCETH IB header which is placed in the HW CQE. Signed-off-by: Shlomo Pongratz Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx4/srq.c | 15 +++++++++++++++ include/linux/mlx4/srq.h | 2 ++ 2 files changed, 17 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx4/srq.c b/drivers/net/ethernet/mellanox/mlx4/srq.c index e329fe1f11b7..79fd269e2c54 100644 --- a/drivers/net/ethernet/mellanox/mlx4/srq.c +++ b/drivers/net/ethernet/mellanox/mlx4/srq.c @@ -298,3 +298,18 @@ void mlx4_cleanup_srq_table(struct mlx4_dev *dev) return; mlx4_bitmap_cleanup(&mlx4_priv(dev)->srq_table.bitmap); } + +struct mlx4_srq *mlx4_srq_lookup(struct mlx4_dev *dev, u32 srqn) +{ + struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table; + struct mlx4_srq *srq; + unsigned long flags; + + spin_lock_irqsave(&srq_table->lock, flags); + srq = radix_tree_lookup(&srq_table->tree, + srqn & (dev->caps.num_srqs - 1)); + spin_unlock_irqrestore(&srq_table->lock, flags); + + return srq; +} +EXPORT_SYMBOL_GPL(mlx4_srq_lookup); diff --git a/include/linux/mlx4/srq.h b/include/linux/mlx4/srq.h index 799a0697a383..192e0f7784f2 100644 --- a/include/linux/mlx4/srq.h +++ b/include/linux/mlx4/srq.h @@ -39,4 +39,6 @@ struct mlx4_wqe_srq_next_seg { u32 reserved2[3]; }; +struct mlx4_srq *mlx4_srq_lookup(struct mlx4_dev *dev, u32 srqn); + #endif /* MLX4_SRQ_H */ From f3cca4b1f410c653241f005f4f401c7cd7232058 Mon Sep 17 00:00:00 2001 From: Shlomo Pongratz Date: Wed, 10 Apr 2013 14:26:48 +0000 Subject: [PATCH 03/22] IB/mlx4: Fetch XRC SRQ in the CQ polling code An XRC target QP may redirect to more than one XRC SRQ. This means that for work completions associated with a XRC TGT QP, the srq field in the QP has no usage and the real XRC SRQ need to be retrived using the information from the XRCETH placed into the CQE, do that. Signed-off-by: Shlomo Pongratz Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/cq.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index ae67df35dd4d..dab4b5188a27 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -33,6 +33,7 @@ #include #include +#include #include #include "mlx4_ib.h" @@ -585,6 +586,7 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, struct mlx4_qp *mqp; struct mlx4_ib_wq *wq; struct mlx4_ib_srq *srq; + struct mlx4_srq *msrq = NULL; int is_send; int is_error; u32 g_mlpath_rqpn; @@ -653,6 +655,20 @@ repoll: wc->qp = &(*cur_qp)->ibqp; + if (wc->qp->qp_type == IB_QPT_XRC_TGT) { + u32 srq_num; + g_mlpath_rqpn = be32_to_cpu(cqe->g_mlpath_rqpn); + srq_num = g_mlpath_rqpn & 0xffffff; + /* SRQ is also in the radix tree */ + msrq = mlx4_srq_lookup(to_mdev(cq->ibcq.device)->dev, + srq_num); + if (unlikely(!msrq)) { + pr_warn("CQ %06x with entry for unknown SRQN %06x\n", + cq->mcq.cqn, srq_num); + return -EINVAL; + } + } + if (is_send) { wq = &(*cur_qp)->sq; if (!(*cur_qp)->sq_signal_bits) { @@ -666,6 +682,11 @@ repoll: wqe_ctr = be16_to_cpu(cqe->wqe_index); wc->wr_id = srq->wrid[wqe_ctr]; mlx4_ib_free_srq_wqe(srq, wqe_ctr); + } else if (msrq) { + srq = to_mibsrq(msrq); + wqe_ctr = be16_to_cpu(cqe->wqe_index); + wc->wr_id = srq->wrid[wqe_ctr]; + mlx4_ib_free_srq_wqe(srq, wqe_ctr); } else { wq = &(*cur_qp)->rq; tail = wq->tail & (wq->wqe_cnt - 1); From bc4ba94ce792f6444daf47b39fe8023428ac25b5 Mon Sep 17 00:00:00 2001 From: Cong Ding Date: Mon, 4 Feb 2013 22:18:06 +0000 Subject: [PATCH 04/22] RDMA/cxgb3: Fix uninitialized variable The variable npages might be used uninitialized. Signed-off-by: Cong Ding Acked-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/iwch_provider.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 9c12da0cbd32..e87f2201b220 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -559,7 +559,7 @@ static int iwch_reregister_phys_mem(struct ib_mr *mr, __be64 *page_list = NULL; int shift = 0; u64 total_size; - int npages; + int npages = 0; int ret; PDBG("%s ib_mr %p ib_pd %p\n", __func__, mr, pd); From cc529c0d72bd2490801ce324caf2ea0a0c1d7b1e Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Mon, 4 Mar 2013 12:58:17 +0000 Subject: [PATCH 05/22] RDMA: Rename random32() to prandom_u32() Use more preferable function name which implies using a pseudo-random number generator. Signed-off-by: Akinobu Mita Reviewed-by: Steve Wise Cc: Roland Dreier Cc: Sean Hefty Cc: Hal Rosenstock Cc: Steve Wise Cc: linux-rdma@vger.kernel.org Reviewed-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/cxio_resource.c | 4 ++-- drivers/infiniband/hw/cxgb4/id_table.c | 4 ++-- drivers/infiniband/hw/mlx4/mad.c | 2 +- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c index 31f9201b2980..c40088ecf9f3 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_resource.c +++ b/drivers/infiniband/hw/cxgb3/cxio_resource.c @@ -62,13 +62,13 @@ static int __cxio_init_resource_fifo(struct kfifo *fifo, kfifo_in(fifo, (unsigned char *) &entry, sizeof(u32)); if (random) { j = 0; - random_bytes = random32(); + random_bytes = prandom_u32(); for (i = 0; i < RANDOM_SIZE; i++) rarray[i] = i + skip_low; for (i = skip_low + RANDOM_SIZE; i < nr - skip_high; i++) { if (j >= RANDOM_SIZE) { j = 0; - random_bytes = random32(); + random_bytes = prandom_u32(); } idx = (random_bytes >> (j * 2)) & 0xF; kfifo_in(fifo, diff --git a/drivers/infiniband/hw/cxgb4/id_table.c b/drivers/infiniband/hw/cxgb4/id_table.c index f95e5df30db2..0161ae6ad629 100644 --- a/drivers/infiniband/hw/cxgb4/id_table.c +++ b/drivers/infiniband/hw/cxgb4/id_table.c @@ -54,7 +54,7 @@ u32 c4iw_id_alloc(struct c4iw_id_table *alloc) if (obj < alloc->max) { if (alloc->flags & C4IW_ID_TABLE_F_RANDOM) - alloc->last += random32() % RANDOM_SKIP; + alloc->last += prandom_u32() % RANDOM_SKIP; else alloc->last = obj + 1; if (alloc->last >= alloc->max) @@ -88,7 +88,7 @@ int c4iw_id_table_alloc(struct c4iw_id_table *alloc, u32 start, u32 num, alloc->start = start; alloc->flags = flags; if (flags & C4IW_ID_TABLE_F_RANDOM) - alloc->last = random32() % RANDOM_SKIP; + alloc->last = prandom_u32() % RANDOM_SKIP; else alloc->last = 0; alloc->max = num; diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 934792c477bc..4d599cedbb0b 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -93,7 +93,7 @@ static void __propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num, __be64 mlx4_ib_gen_node_guid(void) { #define NODE_GUID_HI ((u64) (((u64)IB_OPENIB_OUI) << 40)) - return cpu_to_be64(NODE_GUID_HI | random32()); + return cpu_to_be64(NODE_GUID_HI | prandom_u32()); } __be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 1ef880de3a41..3eceb61e3532 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -460,7 +460,7 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even goto err_qp; } - psn = random32() & 0xffffff; + psn = prandom_u32() & 0xffffff; ret = ipoib_cm_modify_rx_qp(dev, cm_id, p->qp, psn); if (ret) goto err_modify; From 83bdd3b96cad1b0c5b8e257ed1f4f38eb2cb844b Mon Sep 17 00:00:00 2001 From: Doug Ledford Date: Mon, 1 Apr 2013 21:25:30 +0000 Subject: [PATCH 06/22] IPoIB: Fix ipoib_hard_header() return value If you have a patched up dhcp server (and dhclient), they will use AF_PACKET/SOCK_DGRAM pair to send dhcp packets over IPoIB. However, when testing an upstream kernel, this has been broken for a very long time (I tested 2.6.34, 2.6.38, 3.0, 3.1, 3.8, HEAD). It turns out that the hard_header routine in ipoib is not following the API and is returning 0 even when it pushed data onto the skb. This then causes af_packet.c to overwrite the header just pushed with data from user space. Fixing this gets DHCP working on IPoIB. Signed-off-by: Doug Ledford Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 8534afd04e7c..31dd2a7a880f 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -828,7 +828,7 @@ static int ipoib_hard_header(struct sk_buff *skb, */ memcpy(cb->hwaddr, daddr, INFINIBAND_ALEN); - return 0; + return sizeof *header; } static void ipoib_set_mcast_list(struct net_device *dev) From 532ec6f1c0b5e936f73c607309253204866e2102 Mon Sep 17 00:00:00 2001 From: Grant Grundler Date: Tue, 26 Mar 2013 21:48:28 +0000 Subject: [PATCH 07/22] SRPT: Fix odd use of WARN_ON() While WARN_ON("const string") will work, it's intent is not obvious. The warning is more useful by showing the "state" value. Signed-off-by: Grant Grundler Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srpt/ib_srpt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index c09d41b1a2ff..b08ca7a9f76b 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1374,7 +1374,7 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx) target_put_sess_cmd(ioctx->ch->sess, &ioctx->cmd); break; default: - WARN_ON("ERROR: unexpected command state"); + WARN(1, "Unexpected command state (%d)", state); break; } From 137200a4bb74c6d3c13a8eea1edd617a15fdf3e2 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Thu, 28 Mar 2013 18:12:28 +0000 Subject: [PATCH 08/22] IB/ipath: Correct ipath_verbs_register_sysfs() error handling ipath_verbs_register_sysfs() never returned the correct error code from device_create_file and never cleaned up from a failure. Additionally, the caller of ipath_verbs_register_sysfs() doesn't return the correct "ret" value. This patch resolves all of these issues. Reported-by: Wei Yongjun Reviewed-by: Dean Luick Signed-off-by: Mike Marciniszyn Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_verbs.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index ea93870266eb..44ea9390417c 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -2187,7 +2187,8 @@ int ipath_register_ib_device(struct ipath_devdata *dd) if (ret) goto err_reg; - if (ipath_verbs_register_sysfs(dev)) + ret = ipath_verbs_register_sysfs(dev); + if (ret) goto err_class; enable_timer(dd); @@ -2327,15 +2328,15 @@ static int ipath_verbs_register_sysfs(struct ib_device *dev) int i; int ret; - for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i) - if (device_create_file(&dev->dev, - ipath_class_attributes[i])) { - ret = 1; + for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i) { + ret = device_create_file(&dev->dev, + ipath_class_attributes[i]); + if (ret) goto bail; - } - - ret = 0; - + } + return 0; bail: + for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i) + device_remove_file(&dev->dev, ipath_class_attributes[i]); return ret; } From c9bdad3c819cf63e2acea9994d2a60f23f1b7dd5 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Thu, 28 Mar 2013 18:17:20 +0000 Subject: [PATCH 09/22] IB/qib: Correct qib_verbs_register_sysfs() error handling qib_verbs_register_sysfs() never cleans up from a failure. Additionally, the caller of qib_verbs_register_sysfs() doesn't return the correct "ret" value. This patch resolves both of those issues. Reported-by: Wei Yongjun Reviewed-by: Dean Luick Signed-off-by: Mike Marciniszyn Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib_sysfs.c | 6 +++++- drivers/infiniband/hw/qib/qib_verbs.c | 3 ++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c index 034cc821de5c..3c8e4e3caca6 100644 --- a/drivers/infiniband/hw/qib/qib_sysfs.c +++ b/drivers/infiniband/hw/qib/qib_sysfs.c @@ -808,10 +808,14 @@ int qib_verbs_register_sysfs(struct qib_devdata *dd) for (i = 0; i < ARRAY_SIZE(qib_attributes); ++i) { ret = device_create_file(&dev->dev, qib_attributes[i]); if (ret) - return ret; + goto bail; } return 0; +bail: + for (i = 0; i < ARRAY_SIZE(qib_attributes); ++i) + device_remove_file(&dev->dev, qib_attributes[i]); + return ret; } /* diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 7c0ab16a2fe2..904c384aa361 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -2234,7 +2234,8 @@ int qib_register_ib_device(struct qib_devdata *dd) if (ret) goto err_agents; - if (qib_verbs_register_sysfs(dd)) + ret = qib_verbs_register_sysfs(dd); + if (ret) goto err_class; goto bail; From e413a823f60b582af471f0079eb99f50d34b0da7 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Tue, 23 Apr 2013 16:17:14 +0000 Subject: [PATCH 10/22] RDMA/iwcm: Don't touch cmid after dropping reference The function cm_work_handler() cannot touch the cm_id after it derefs it, because it might be freed on another concurrent thread. If there are more work items queued for this cm_id, then we know there must be more references because they are added when the work items are queued. So in the while loop inside cm_work_handler(), after derefing, if the queue is empty, then exit the function. Otherwise we know it's safe to re-acquire the lock. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/core/iwcm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index 0bb99bb38809..c47c2034ca71 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -878,6 +878,8 @@ static void cm_work_handler(struct work_struct *_work) } return; } + if (empty) + return; spin_lock_irqsave(&cm_id_priv->lock, flags); } spin_unlock_irqrestore(&cm_id_priv->lock, flags); From e0debf9cb50d5e99bb21564627061076869b79ee Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Sun, 21 Apr 2013 15:09:59 +0000 Subject: [PATCH 11/22] mlx4_core: Reduce warning message for SRQ_LIMIT event to debug level Commit acba2420f9d2 ("mlx4_core: Add wrapper functions and comm channel and slave event support to EQs") introduced a warning printout for SRQ LIMIT events. This warning can flood the log when (correct, normally operating) apps use SRQ LIMIT events as a trigger to post WQEs to SRQs. Reduce the warning message to be a debug printout. Reported-by: Rick Warner Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx4/eq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 8e3123a1df88..6000342f9725 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -497,8 +497,8 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) break; case MLX4_EVENT_TYPE_SRQ_LIMIT: - mlx4_warn(dev, "%s: MLX4_EVENT_TYPE_SRQ_LIMIT\n", - __func__); + mlx4_dbg(dev, "%s: MLX4_EVENT_TYPE_SRQ_LIMIT\n", + __func__); case MLX4_EVENT_TYPE_SRQ_CATAS_ERROR: if (mlx4_is_master(dev)) { /* forward only to slave owning the SRQ */ From 02d7ef6f9dfdd0756441913e34008e8883195deb Mon Sep 17 00:00:00 2001 From: Dotan Barak Date: Sun, 21 Apr 2013 15:10:00 +0000 Subject: [PATCH 12/22] IB/mlx4: Disable VLAN stripping for RAW PACKET QPs Fix the asymmetric behavior w.r.t VLAN insertion/stripping for RAW PACKET QPs -- we don't insert on send and need not strip on receive. Signed-off-by: Dotan Barak Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/qp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 35cced2a4da8..d4e87c28507e 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1292,6 +1292,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { context->sq_size_stride |= !!qp->sq_no_prefetch << 7; context->xrcd = cpu_to_be32((u32) qp->xrcdn); + if (ibqp->qp_type == IB_QPT_RAW_PACKET) + context->param3 |= cpu_to_be32(1 << 30); } if (qp->ibqp.uobject) From 3528f69637723183f997573c7ec50ca869f219ba Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Sun, 21 Apr 2013 15:10:01 +0000 Subject: [PATCH 13/22] IB/mlx4: Set link type for RAW PACKET QPs in the QP context When the link type is Ethernet, setting the link type in the QP context will enable TCP/IP stateless offloads (checksum, LSO, RSS) for RAW PACKET Ethernet QPs. For IB UD QPs this worked OK since the value assumed by the firmware for IB link layer is zero. Signed-off-by: Eli Cohen Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/qp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index d4e87c28507e..4f10af2905b5 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1460,6 +1460,10 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, } } + if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) + context->pri_path.ackto = (context->pri_path.ackto & 0xf8) | + MLX4_IB_LINK_TYPE_ETH; + if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD && attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify) sqd_event = 1; From 3cd0e1789ad39c3b7ed006ce53a83328bdadbee8 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Wed, 24 Apr 2013 13:58:44 +0000 Subject: [PATCH 14/22] mlx4_core: Move DMFS HW structs to common header file Move flow steering HW structures to be on the public mlx4 include directory, as a pre-step for the mlx4 IB driver to use them too. Signed-off-by: Hadar Hen Zion Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 79 ----------------------- include/linux/mlx4/device.h | 79 +++++++++++++++++++++++ 2 files changed, 79 insertions(+), 79 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index d738454116a0..d5fdb19771e2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -701,85 +701,6 @@ struct mlx4_steer { struct list_head steer_entries[MLX4_NUM_STEERS]; }; -struct mlx4_net_trans_rule_hw_ctrl { - __be32 ctrl; - u8 rsvd1; - u8 funcid; - u8 vep; - u8 port; - __be32 qpn; - __be32 rsvd2; -}; - -struct mlx4_net_trans_rule_hw_ib { - u8 size; - u8 rsvd1; - __be16 id; - u32 rsvd2; - __be32 qpn; - __be32 qpn_mask; - u8 dst_gid[16]; - u8 dst_gid_msk[16]; -} __packed; - -struct mlx4_net_trans_rule_hw_eth { - u8 size; - u8 rsvd; - __be16 id; - u8 rsvd1[6]; - u8 dst_mac[6]; - u16 rsvd2; - u8 dst_mac_msk[6]; - u16 rsvd3; - u8 src_mac[6]; - u16 rsvd4; - u8 src_mac_msk[6]; - u8 rsvd5; - u8 ether_type_enable; - __be16 ether_type; - __be16 vlan_id_msk; - __be16 vlan_id; -} __packed; - -struct mlx4_net_trans_rule_hw_tcp_udp { - u8 size; - u8 rsvd; - __be16 id; - __be16 rsvd1[3]; - __be16 dst_port; - __be16 rsvd2; - __be16 dst_port_msk; - __be16 rsvd3; - __be16 src_port; - __be16 rsvd4; - __be16 src_port_msk; -} __packed; - -struct mlx4_net_trans_rule_hw_ipv4 { - u8 size; - u8 rsvd; - __be16 id; - __be32 rsvd1; - __be32 dst_ip; - __be32 dst_ip_msk; - __be32 src_ip; - __be32 src_ip_msk; -} __packed; - -struct _rule_hw { - union { - struct { - u8 size; - u8 rsvd; - __be16 id; - }; - struct mlx4_net_trans_rule_hw_eth eth; - struct mlx4_net_trans_rule_hw_ib ib; - struct mlx4_net_trans_rule_hw_ipv4 ipv4; - struct mlx4_net_trans_rule_hw_tcp_udp tcp_udp; - }; -}; - enum { MLX4_PCI_DEV_IS_VF = 1 << 0, MLX4_PCI_DEV_FORCE_SENSE_PORT = 1 << 1, diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 811f91cf5e8c..9fbf416fa694 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -962,6 +962,85 @@ struct mlx4_net_trans_rule { u32 qpn; }; +struct mlx4_net_trans_rule_hw_ctrl { + __be32 ctrl; + u8 rsvd1; + u8 funcid; + u8 vep; + u8 port; + __be32 qpn; + __be32 rsvd2; +}; + +struct mlx4_net_trans_rule_hw_ib { + u8 size; + u8 rsvd1; + __be16 id; + u32 rsvd2; + __be32 qpn; + __be32 qpn_mask; + u8 dst_gid[16]; + u8 dst_gid_msk[16]; +} __packed; + +struct mlx4_net_trans_rule_hw_eth { + u8 size; + u8 rsvd; + __be16 id; + u8 rsvd1[6]; + u8 dst_mac[6]; + u16 rsvd2; + u8 dst_mac_msk[6]; + u16 rsvd3; + u8 src_mac[6]; + u16 rsvd4; + u8 src_mac_msk[6]; + u8 rsvd5; + u8 ether_type_enable; + __be16 ether_type; + __be16 vlan_id_msk; + __be16 vlan_id; +} __packed; + +struct mlx4_net_trans_rule_hw_tcp_udp { + u8 size; + u8 rsvd; + __be16 id; + __be16 rsvd1[3]; + __be16 dst_port; + __be16 rsvd2; + __be16 dst_port_msk; + __be16 rsvd3; + __be16 src_port; + __be16 rsvd4; + __be16 src_port_msk; +} __packed; + +struct mlx4_net_trans_rule_hw_ipv4 { + u8 size; + u8 rsvd; + __be16 id; + __be32 rsvd1; + __be32 dst_ip; + __be32 dst_ip_msk; + __be32 src_ip; + __be32 src_ip_msk; +} __packed; + +struct _rule_hw { + union { + struct { + u8 size; + u8 rsvd; + __be16 id; + }; + struct mlx4_net_trans_rule_hw_eth eth; + struct mlx4_net_trans_rule_hw_ib ib; + struct mlx4_net_trans_rule_hw_ipv4 ipv4; + struct mlx4_net_trans_rule_hw_tcp_udp tcp_udp; + }; +}; + int mlx4_flow_steer_promisc_add(struct mlx4_dev *dev, u8 port, u32 qpn, enum mlx4_net_trans_promisc_mode mode); int mlx4_flow_steer_promisc_remove(struct mlx4_dev *dev, u8 port, From f91625398a2e6e03f0155861b630021ceddb42e7 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Wed, 24 Apr 2013 13:58:45 +0000 Subject: [PATCH 15/22] mlx4: Match DMFS promiscuous field names to firmware spec Align the names used by enum mlx4_net_trans_promisc_mode with the actual firmware specification. The patch doesn't introduce any functional change or API change towards the firmware. Remove MLX4_FS_PROMISC_FUNCTION_PORT which isn't of use. Add new enums MLX4_FS_{UC/MC}_SNIFFER as a preparation step for sniffer support. Signed-off-by: Hadar Hen Zion Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- .../net/ethernet/mellanox/mlx4/en_ethtool.c | 2 +- .../net/ethernet/mellanox/mlx4/en_netdev.c | 16 +++++++------- drivers/net/ethernet/mellanox/mlx4/mcg.c | 21 +++++++++---------- include/linux/mlx4/device.h | 11 +++++----- 4 files changed, 25 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index 00f25b5f297f..20476844fb20 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -889,7 +889,7 @@ static int mlx4_en_flow_replace(struct net_device *dev, .queue_mode = MLX4_NET_TRANS_Q_FIFO, .exclusive = 0, .allow_loopback = 1, - .promisc_mode = MLX4_FS_PROMISC_NONE, + .promisc_mode = MLX4_FS_REGULAR, }; rule.port = priv->port; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 30d78f806dc3..0860130f2b17 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -127,7 +127,7 @@ static void mlx4_en_filter_work(struct work_struct *work) .queue_mode = MLX4_NET_TRANS_Q_LIFO, .exclusive = 1, .allow_loopback = 1, - .promisc_mode = MLX4_FS_PROMISC_NONE, + .promisc_mode = MLX4_FS_REGULAR, .port = priv->port, .priority = MLX4_DOMAIN_RFS, }; @@ -446,7 +446,7 @@ static int mlx4_en_uc_steer_add(struct mlx4_en_priv *priv, .queue_mode = MLX4_NET_TRANS_Q_FIFO, .exclusive = 0, .allow_loopback = 1, - .promisc_mode = MLX4_FS_PROMISC_NONE, + .promisc_mode = MLX4_FS_REGULAR, .priority = MLX4_DOMAIN_NIC, }; @@ -793,7 +793,7 @@ static void mlx4_en_set_promisc_mode(struct mlx4_en_priv *priv, err = mlx4_flow_steer_promisc_add(mdev->dev, priv->port, priv->base_qpn, - MLX4_FS_PROMISC_UPLINK); + MLX4_FS_ALL_DEFAULT); if (err) en_err(priv, "Failed enabling promiscuous mode\n"); priv->flags |= MLX4_EN_FLAG_MC_PROMISC; @@ -856,7 +856,7 @@ static void mlx4_en_clear_promisc_mode(struct mlx4_en_priv *priv, case MLX4_STEERING_MODE_DEVICE_MANAGED: err = mlx4_flow_steer_promisc_remove(mdev->dev, priv->port, - MLX4_FS_PROMISC_UPLINK); + MLX4_FS_ALL_DEFAULT); if (err) en_err(priv, "Failed disabling promiscuous mode\n"); priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC; @@ -917,7 +917,7 @@ static void mlx4_en_do_multicast(struct mlx4_en_priv *priv, err = mlx4_flow_steer_promisc_add(mdev->dev, priv->port, priv->base_qpn, - MLX4_FS_PROMISC_ALL_MULTI); + MLX4_FS_MC_DEFAULT); break; case MLX4_STEERING_MODE_B0: @@ -940,7 +940,7 @@ static void mlx4_en_do_multicast(struct mlx4_en_priv *priv, case MLX4_STEERING_MODE_DEVICE_MANAGED: err = mlx4_flow_steer_promisc_remove(mdev->dev, priv->port, - MLX4_FS_PROMISC_ALL_MULTI); + MLX4_FS_MC_DEFAULT); break; case MLX4_STEERING_MODE_B0: @@ -1598,10 +1598,10 @@ void mlx4_en_stop_port(struct net_device *dev, int detach) MLX4_EN_FLAG_MC_PROMISC); mlx4_flow_steer_promisc_remove(mdev->dev, priv->port, - MLX4_FS_PROMISC_UPLINK); + MLX4_FS_ALL_DEFAULT); mlx4_flow_steer_promisc_remove(mdev->dev, priv->port, - MLX4_FS_PROMISC_ALL_MULTI); + MLX4_FS_MC_DEFAULT); } else if (priv->flags & MLX4_EN_FLAG_PROMISC) { priv->flags &= ~MLX4_EN_FLAG_PROMISC; diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index 52685524708d..d1f01dc8fbc3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -649,10 +649,11 @@ static void trans_rule_ctrl_to_hw(struct mlx4_net_trans_rule *ctrl, struct mlx4_net_trans_rule_hw_ctrl *hw) { static const u8 __promisc_mode[] = { - [MLX4_FS_PROMISC_NONE] = 0x0, - [MLX4_FS_PROMISC_UPLINK] = 0x1, - [MLX4_FS_PROMISC_FUNCTION_PORT] = 0x2, - [MLX4_FS_PROMISC_ALL_MULTI] = 0x3, + [MLX4_FS_REGULAR] = 0x0, + [MLX4_FS_ALL_DEFAULT] = 0x1, + [MLX4_FS_MC_DEFAULT] = 0x3, + [MLX4_FS_UC_SNIFFER] = 0x4, + [MLX4_FS_MC_SNIFFER] = 0x5, }; u32 dw = 0; @@ -1153,7 +1154,7 @@ int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], struct mlx4_net_trans_rule rule = { .queue_mode = MLX4_NET_TRANS_Q_FIFO, .exclusive = 0, - .promisc_mode = MLX4_FS_PROMISC_NONE, + .promisc_mode = MLX4_FS_REGULAR, .priority = MLX4_DOMAIN_NIC, }; @@ -1222,11 +1223,10 @@ int mlx4_flow_steer_promisc_add(struct mlx4_dev *dev, u8 port, u64 *regid_p; switch (mode) { - case MLX4_FS_PROMISC_UPLINK: - case MLX4_FS_PROMISC_FUNCTION_PORT: + case MLX4_FS_ALL_DEFAULT: regid_p = &dev->regid_promisc_array[port]; break; - case MLX4_FS_PROMISC_ALL_MULTI: + case MLX4_FS_MC_DEFAULT: regid_p = &dev->regid_allmulti_array[port]; break; default: @@ -1253,11 +1253,10 @@ int mlx4_flow_steer_promisc_remove(struct mlx4_dev *dev, u8 port, u64 *regid_p; switch (mode) { - case MLX4_FS_PROMISC_UPLINK: - case MLX4_FS_PROMISC_FUNCTION_PORT: + case MLX4_FS_ALL_DEFAULT: regid_p = &dev->regid_promisc_array[port]; break; - case MLX4_FS_PROMISC_ALL_MULTI: + case MLX4_FS_MC_DEFAULT: regid_p = &dev->regid_allmulti_array[port]; break; default: diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 9fbf416fa694..b2fe59d199f8 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -896,11 +896,12 @@ static inline int map_hw_to_sw_id(u16 header_id) } enum mlx4_net_trans_promisc_mode { - MLX4_FS_PROMISC_NONE = 0, - MLX4_FS_PROMISC_UPLINK, - /* For future use. Not implemented yet */ - MLX4_FS_PROMISC_FUNCTION_PORT, - MLX4_FS_PROMISC_ALL_MULTI, + MLX4_FS_REGULAR = 1, + MLX4_FS_ALL_DEFAULT, + MLX4_FS_MC_DEFAULT, + MLX4_FS_UC_SNIFFER, + MLX4_FS_MC_SNIFFER, + }; struct mlx4_spec_eth { From ba60a3560ccf8e4253da9c0e8d1292375fd51796 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Wed, 24 Apr 2013 13:58:46 +0000 Subject: [PATCH 16/22] mlx4_core: Change a few DMFS fields names to match firmare spec Change struct mlx4_net_trans_rule_hw_eth :: vlan_id name to vlan_tag Change struct mlx4_net_trans_rule_hw_ib :: r_u_qpn name to l3_qpn The patch doesn't introduce any functional change or API change towards the firmware. Signed-off-by: Hadar Hen Zion Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx4/mcg.c | 6 +++--- include/linux/mlx4/device.h | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index d1f01dc8fbc3..3cfd3725961d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -714,12 +714,12 @@ static int parse_trans_rule(struct mlx4_dev *dev, struct mlx4_spec_list *spec, rule_hw->eth.ether_type_enable = 1; rule_hw->eth.ether_type = spec->eth.ether_type; } - rule_hw->eth.vlan_id = spec->eth.vlan_id; - rule_hw->eth.vlan_id_msk = spec->eth.vlan_id_msk; + rule_hw->eth.vlan_tag = spec->eth.vlan_id; + rule_hw->eth.vlan_tag_msk = spec->eth.vlan_id_msk; break; case MLX4_NET_TRANS_RULE_ID_IB: - rule_hw->ib.qpn = spec->ib.r_qpn; + rule_hw->ib.l3_qpn = spec->ib.l3_qpn; rule_hw->ib.qpn_mask = spec->ib.qpn_msk; memcpy(&rule_hw->ib.dst_gid, &spec->ib.dst_gid, 16); memcpy(&rule_hw->ib.dst_gid_msk, &spec->ib.dst_gid_msk, 16); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index b2fe59d199f8..a69bda77c1d1 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -930,7 +930,7 @@ struct mlx4_spec_ipv4 { }; struct mlx4_spec_ib { - __be32 r_qpn; + __be32 l3_qpn; __be32 qpn_msk; u8 dst_gid[16]; u8 dst_gid_msk[16]; @@ -978,7 +978,7 @@ struct mlx4_net_trans_rule_hw_ib { u8 rsvd1; __be16 id; u32 rsvd2; - __be32 qpn; + __be32 l3_qpn; __be32 qpn_mask; u8 dst_gid[16]; u8 dst_gid_msk[16]; @@ -999,8 +999,8 @@ struct mlx4_net_trans_rule_hw_eth { u8 rsvd5; u8 ether_type_enable; __be16 ether_type; - __be16 vlan_id_msk; - __be16 vlan_id; + __be16 vlan_tag_msk; + __be16 vlan_tag; } __packed; struct mlx4_net_trans_rule_hw_tcp_udp { From bcf372971d471f6cb4070adb7bfc987d8b3d21f2 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Wed, 24 Apr 2013 13:58:47 +0000 Subject: [PATCH 17/22] mlx4_core: Directly expose fields of DMFS HW rule control segment Some of struct mlx4_net_trans_rule_hw_ctrl fields were packed into u32 and accessed through bit field operations. Expose and access them directly as u8 or u16. Signed-off-by: Hadar Hen Zion Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx4/mcg.c | 14 +++++++------- include/linux/mlx4/device.h | 4 +++- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index 3cfd3725961d..07712f976c8f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -656,15 +656,15 @@ static void trans_rule_ctrl_to_hw(struct mlx4_net_trans_rule *ctrl, [MLX4_FS_MC_SNIFFER] = 0x5, }; - u32 dw = 0; + u8 flags = 0; - dw = ctrl->queue_mode == MLX4_NET_TRANS_Q_LIFO ? 1 : 0; - dw |= ctrl->exclusive ? (1 << 2) : 0; - dw |= ctrl->allow_loopback ? (1 << 3) : 0; - dw |= __promisc_mode[ctrl->promisc_mode] << 8; - dw |= ctrl->priority << 16; + flags = ctrl->queue_mode == MLX4_NET_TRANS_Q_LIFO ? 1 : 0; + flags |= ctrl->exclusive ? (1 << 2) : 0; + flags |= ctrl->allow_loopback ? (1 << 3) : 0; - hw->ctrl = cpu_to_be32(dw); + hw->flags = flags; + hw->type = __promisc_mode[ctrl->promisc_mode]; + hw->prio = cpu_to_be16(ctrl->priority); hw->port = ctrl->port; hw->qpn = cpu_to_be32(ctrl->qpn); } diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index a69bda77c1d1..08e5bc1387e1 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -964,7 +964,9 @@ struct mlx4_net_trans_rule { }; struct mlx4_net_trans_rule_hw_ctrl { - __be32 ctrl; + __be16 prio; + u8 type; + u8 flags; u8 rsvd1; u8 funcid; u8 vep; From c2c19dc3c9a1585e58804041e5a328cde425403a Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Wed, 24 Apr 2013 13:58:48 +0000 Subject: [PATCH 18/22] mlx4_core: Expose a few helpers to fill DMFS HW strucutures Re-arrange some of code which fills DMFS HW structures so we can use it from within the core driver and from the IB driver too, e.g when verbs DMFS structures are transformed into mlx4 hardware structs. Also, add struct mlx4_flow_handle struct which will be of use by the DMFS verbs flow in the IB driver. Signed-off-by: Hadar Hen Zion Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx4/mcg.c | 85 ++++++++++++++++-------- include/linux/mlx4/device.h | 12 +++- 2 files changed, 70 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index 07712f976c8f..00b4e7be7c7e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -645,17 +645,28 @@ static int find_entry(struct mlx4_dev *dev, u8 port, return err; } +static const u8 __promisc_mode[] = { + [MLX4_FS_REGULAR] = 0x0, + [MLX4_FS_ALL_DEFAULT] = 0x1, + [MLX4_FS_MC_DEFAULT] = 0x3, + [MLX4_FS_UC_SNIFFER] = 0x4, + [MLX4_FS_MC_SNIFFER] = 0x5, +}; + +int mlx4_map_sw_to_hw_steering_mode(struct mlx4_dev *dev, + enum mlx4_net_trans_promisc_mode flow_type) +{ + if (flow_type >= MLX4_FS_MODE_NUM || flow_type < 0) { + mlx4_err(dev, "Invalid flow type. type = %d\n", flow_type); + return -EINVAL; + } + return __promisc_mode[flow_type]; +} +EXPORT_SYMBOL_GPL(mlx4_map_sw_to_hw_steering_mode); + static void trans_rule_ctrl_to_hw(struct mlx4_net_trans_rule *ctrl, struct mlx4_net_trans_rule_hw_ctrl *hw) { - static const u8 __promisc_mode[] = { - [MLX4_FS_REGULAR] = 0x0, - [MLX4_FS_ALL_DEFAULT] = 0x1, - [MLX4_FS_MC_DEFAULT] = 0x3, - [MLX4_FS_UC_SNIFFER] = 0x4, - [MLX4_FS_MC_SNIFFER] = 0x5, - }; - u8 flags = 0; flags = ctrl->queue_mode == MLX4_NET_TRANS_Q_LIFO ? 1 : 0; @@ -678,29 +689,51 @@ const u16 __sw_id_hw[] = { [MLX4_NET_TRANS_RULE_ID_UDP] = 0xE006 }; +int mlx4_map_sw_to_hw_steering_id(struct mlx4_dev *dev, + enum mlx4_net_trans_rule_id id) +{ + if (id >= MLX4_NET_TRANS_RULE_NUM || id < 0) { + mlx4_err(dev, "Invalid network rule id. id = %d\n", id); + return -EINVAL; + } + return __sw_id_hw[id]; +} +EXPORT_SYMBOL_GPL(mlx4_map_sw_to_hw_steering_id); + +static const int __rule_hw_sz[] = { + [MLX4_NET_TRANS_RULE_ID_ETH] = + sizeof(struct mlx4_net_trans_rule_hw_eth), + [MLX4_NET_TRANS_RULE_ID_IB] = + sizeof(struct mlx4_net_trans_rule_hw_ib), + [MLX4_NET_TRANS_RULE_ID_IPV6] = 0, + [MLX4_NET_TRANS_RULE_ID_IPV4] = + sizeof(struct mlx4_net_trans_rule_hw_ipv4), + [MLX4_NET_TRANS_RULE_ID_TCP] = + sizeof(struct mlx4_net_trans_rule_hw_tcp_udp), + [MLX4_NET_TRANS_RULE_ID_UDP] = + sizeof(struct mlx4_net_trans_rule_hw_tcp_udp) +}; + +int mlx4_hw_rule_sz(struct mlx4_dev *dev, + enum mlx4_net_trans_rule_id id) +{ + if (id >= MLX4_NET_TRANS_RULE_NUM || id < 0) { + mlx4_err(dev, "Invalid network rule id. id = %d\n", id); + return -EINVAL; + } + + return __rule_hw_sz[id]; +} +EXPORT_SYMBOL_GPL(mlx4_hw_rule_sz); + static int parse_trans_rule(struct mlx4_dev *dev, struct mlx4_spec_list *spec, struct _rule_hw *rule_hw) { - static const size_t __rule_hw_sz[] = { - [MLX4_NET_TRANS_RULE_ID_ETH] = - sizeof(struct mlx4_net_trans_rule_hw_eth), - [MLX4_NET_TRANS_RULE_ID_IB] = - sizeof(struct mlx4_net_trans_rule_hw_ib), - [MLX4_NET_TRANS_RULE_ID_IPV6] = 0, - [MLX4_NET_TRANS_RULE_ID_IPV4] = - sizeof(struct mlx4_net_trans_rule_hw_ipv4), - [MLX4_NET_TRANS_RULE_ID_TCP] = - sizeof(struct mlx4_net_trans_rule_hw_tcp_udp), - [MLX4_NET_TRANS_RULE_ID_UDP] = - sizeof(struct mlx4_net_trans_rule_hw_tcp_udp) - }; - if (spec->id >= MLX4_NET_TRANS_RULE_NUM) { - mlx4_err(dev, "Invalid network rule id. id = %d\n", spec->id); + if (mlx4_hw_rule_sz(dev, spec->id) < 0) return -EINVAL; - } - memset(rule_hw, 0, __rule_hw_sz[spec->id]); + memset(rule_hw, 0, mlx4_hw_rule_sz(dev, spec->id)); rule_hw->id = cpu_to_be16(__sw_id_hw[spec->id]); - rule_hw->size = __rule_hw_sz[spec->id] >> 2; + rule_hw->size = mlx4_hw_rule_sz(dev, spec->id) >> 2; switch (spec->id) { case MLX4_NET_TRANS_RULE_ID_ETH: diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 08e5bc1387e1..ad4a53fbdddf 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -901,7 +901,7 @@ enum mlx4_net_trans_promisc_mode { MLX4_FS_MC_DEFAULT, MLX4_FS_UC_SNIFFER, MLX4_FS_MC_SNIFFER, - + MLX4_FS_MODE_NUM, /* should be last */ }; struct mlx4_spec_eth { @@ -1044,6 +1044,11 @@ struct _rule_hw { }; }; +/* translating DMFS verbs sniffer rule to the FW API would need two reg IDs */ +struct mlx4_flow_handle { + u64 reg_id[2]; +}; + int mlx4_flow_steer_promisc_add(struct mlx4_dev *dev, u8 port, u32 qpn, enum mlx4_net_trans_promisc_mode mode); int mlx4_flow_steer_promisc_remove(struct mlx4_dev *dev, u8 port, @@ -1093,6 +1098,11 @@ void mlx4_counter_free(struct mlx4_dev *dev, u32 idx); int mlx4_flow_attach(struct mlx4_dev *dev, struct mlx4_net_trans_rule *rule, u64 *reg_id); int mlx4_flow_detach(struct mlx4_dev *dev, u64 reg_id); +int mlx4_map_sw_to_hw_steering_mode(struct mlx4_dev *dev, + enum mlx4_net_trans_promisc_mode flow_type); +int mlx4_map_sw_to_hw_steering_id(struct mlx4_dev *dev, + enum mlx4_net_trans_rule_id id); +int mlx4_hw_rule_sz(struct mlx4_dev *dev, enum mlx4_net_trans_rule_id id); void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, int i, int val); From c1d786e68256001abe7b55f5f6831c8a996866b1 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Wed, 1 May 2013 13:25:24 +0000 Subject: [PATCH 19/22] IB/iser: Add module version Add displaying module version, update the version to 1.1, and remove the DRV_DATE define. Signed-off-by: Roi Dayan Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.c | 6 +++--- drivers/infiniband/ulp/iser/iscsi_iser.h | 3 +-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 0ab8c9cc3a78..69e0d7c3b149 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -82,10 +82,10 @@ module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO); int iser_debug_level = 0; -MODULE_DESCRIPTION("iSER (iSCSI Extensions for RDMA) Datamover " - "v" DRV_VER " (" DRV_DATE ")"); +MODULE_DESCRIPTION("iSER (iSCSI Extensions for RDMA) Datamover"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Alex Nezhinsky, Dan Bar Dov, Or Gerlitz"); +MODULE_VERSION(DRV_VER); module_param_named(debug_level, iser_debug_level, int, 0644); MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0 (default:disabled)"); @@ -682,7 +682,7 @@ static umode_t iser_attr_is_visible(int param_type, int param) static struct scsi_host_template iscsi_iser_sht = { .module = THIS_MODULE, - .name = "iSCSI Initiator over iSER, v." DRV_VER, + .name = "iSCSI Initiator over iSER", .queuecommand = iscsi_queuecommand, .change_queue_depth = iscsi_change_queue_depth, .sg_tablesize = ISCSI_ISER_SG_TABLESIZE, diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 5babdb35bda7..0da04bf82044 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -65,8 +65,7 @@ #define DRV_NAME "iser" #define PFX DRV_NAME ": " -#define DRV_VER "0.1" -#define DRV_DATE "May 7th, 2006" +#define DRV_VER "1.1" #define iser_dbg(fmt, arg...) \ do { \ From 4f3638826124b722b2c0dde5097bca182bf74125 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Wed, 1 May 2013 13:25:25 +0000 Subject: [PATCH 20/22] IB/iser: Move informational messages from error to info level Introduce iser_info() and move informational messages that were printed as errors to use that macro. Also, cleanup printk leftovers to use the existing macros. Signed-off-by: Roi Dayan Signed-off-by: Or Gerlitz [ Use pr_warn(... instead of printk(KERN_WARNING .... - Roland ] Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.c | 18 +++++++-------- drivers/infiniband/ulp/iser/iscsi_iser.h | 12 ++++++++-- drivers/infiniband/ulp/iser/iser_verbs.c | 29 ++++++++++++------------ 3 files changed, 34 insertions(+), 25 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 69e0d7c3b149..f19b0998a53c 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -370,8 +370,8 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session, /* binds the iSER connection retrieved from the previously * connected ep_handle to the iSCSI layer connection. exchanges * connection pointers */ - iser_err("binding iscsi/iser conn %p %p to ib_conn %p\n", - conn, conn->dd_data, ib_conn); + iser_info("binding iscsi/iser conn %p %p to ib_conn %p\n", + conn, conn->dd_data, ib_conn); iser_conn = conn->dd_data; ib_conn->iser_conn = iser_conn; iser_conn->ib_conn = ib_conn; @@ -475,28 +475,28 @@ iscsi_iser_set_param(struct iscsi_cls_conn *cls_conn, case ISCSI_PARAM_HDRDGST_EN: sscanf(buf, "%d", &value); if (value) { - printk(KERN_ERR "DataDigest wasn't negotiated to None"); + iser_err("DataDigest wasn't negotiated to None"); return -EPROTO; } break; case ISCSI_PARAM_DATADGST_EN: sscanf(buf, "%d", &value); if (value) { - printk(KERN_ERR "DataDigest wasn't negotiated to None"); + iser_err("DataDigest wasn't negotiated to None"); return -EPROTO; } break; case ISCSI_PARAM_IFMARKER_EN: sscanf(buf, "%d", &value); if (value) { - printk(KERN_ERR "IFMarker wasn't negotiated to No"); + iser_err("IFMarker wasn't negotiated to No"); return -EPROTO; } break; case ISCSI_PARAM_OFMARKER_EN: sscanf(buf, "%d", &value); if (value) { - printk(KERN_ERR "OFMarker wasn't negotiated to No"); + iser_err("OFMarker wasn't negotiated to No"); return -EPROTO; } break; @@ -596,7 +596,7 @@ iscsi_iser_ep_poll(struct iscsi_endpoint *ep, int timeout_ms) ib_conn->state == ISER_CONN_DOWN)) rc = -1; - iser_err("ib conn %p rc = %d\n", ib_conn, rc); + iser_info("ib conn %p rc = %d\n", ib_conn, rc); if (rc > 0) return 1; /* success, this is the equivalent of POLLOUT */ @@ -623,7 +623,7 @@ iscsi_iser_ep_disconnect(struct iscsi_endpoint *ep) iscsi_suspend_tx(ib_conn->iser_conn->iscsi_conn); - iser_err("ib conn %p state %d\n",ib_conn, ib_conn->state); + iser_info("ib conn %p state %d\n", ib_conn, ib_conn->state); iser_conn_terminate(ib_conn); } @@ -740,7 +740,7 @@ static int __init iser_init(void) iser_dbg("Starting iSER datamover...\n"); if (iscsi_max_lun < 1) { - printk(KERN_ERR "Invalid max_lun value of %u\n", iscsi_max_lun); + iser_err("Invalid max_lun value of %u\n", iscsi_max_lun); return -EINVAL; } diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 0da04bf82044..1a570886cc03 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -42,6 +42,7 @@ #include #include +#include #include #include @@ -69,15 +70,22 @@ #define iser_dbg(fmt, arg...) \ do { \ - if (iser_debug_level > 1) \ + if (iser_debug_level > 2) \ printk(KERN_DEBUG PFX "%s:" fmt,\ __func__ , ## arg); \ } while (0) #define iser_warn(fmt, arg...) \ + do { \ + if (iser_debug_level > 1) \ + pr_warn(PFX "%s:" fmt, \ + __func__ , ## arg); \ + } while (0) + +#define iser_info(fmt, arg...) \ do { \ if (iser_debug_level > 0) \ - printk(KERN_DEBUG PFX "%s:" fmt,\ + pr_info(PFX "%s:" fmt, \ __func__ , ## arg); \ } while (0) diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 4debadc53106..13838ad3ce76 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -74,8 +74,9 @@ static int iser_create_device_ib_res(struct iser_device *device) struct iser_cq_desc *cq_desc; device->cqs_used = min(ISER_MAX_CQ, device->ib_device->num_comp_vectors); - iser_err("using %d CQs, device %s supports %d vectors\n", device->cqs_used, - device->ib_device->name, device->ib_device->num_comp_vectors); + iser_info("using %d CQs, device %s supports %d vectors\n", + device->cqs_used, device->ib_device->name, + device->ib_device->num_comp_vectors); device->cq_desc = kmalloc(sizeof(struct iser_cq_desc) * device->cqs_used, GFP_KERNEL); @@ -262,7 +263,7 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn) min_index = index; device->cq_active_qps[min_index]++; mutex_unlock(&ig.connlist_mutex); - iser_err("cq index %d used for ib_conn %p\n", min_index, ib_conn); + iser_info("cq index %d used for ib_conn %p\n", min_index, ib_conn); init_attr.event_handler = iser_qp_event_callback; init_attr.qp_context = (void *)ib_conn; @@ -280,9 +281,9 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn) goto out_err; ib_conn->qp = ib_conn->cma_id->qp; - iser_err("setting conn %p cma_id %p: fmr_pool %p qp %p\n", - ib_conn, ib_conn->cma_id, - ib_conn->fmr_pool, ib_conn->cma_id->qp); + iser_info("setting conn %p cma_id %p: fmr_pool %p qp %p\n", + ib_conn, ib_conn->cma_id, + ib_conn->fmr_pool, ib_conn->cma_id->qp); return ret; out_err: @@ -299,9 +300,9 @@ static int iser_free_ib_conn_res(struct iser_conn *ib_conn, int can_destroy_id) int cq_index; BUG_ON(ib_conn == NULL); - iser_err("freeing conn %p cma_id %p fmr pool %p qp %p\n", - ib_conn, ib_conn->cma_id, - ib_conn->fmr_pool, ib_conn->qp); + iser_info("freeing conn %p cma_id %p fmr pool %p qp %p\n", + ib_conn, ib_conn->cma_id, + ib_conn->fmr_pool, ib_conn->qp); /* qp is created only once both addr & route are resolved */ if (ib_conn->fmr_pool != NULL) @@ -379,7 +380,7 @@ static void iser_device_try_release(struct iser_device *device) { mutex_lock(&ig.device_list_mutex); device->refcount--; - iser_err("device %p refcount %d\n",device,device->refcount); + iser_info("device %p refcount %d\n", device, device->refcount); if (!device->refcount) { iser_free_device_ib_res(device); list_del(&device->ig_list); @@ -558,8 +559,8 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve { int ret = 0; - iser_err("event %d status %d conn %p id %p\n", - event->event, event->status, cma_id->context, cma_id); + iser_info("event %d status %d conn %p id %p\n", + event->event, event->status, cma_id->context, cma_id); switch (event->event) { case RDMA_CM_EVENT_ADDR_RESOLVED: @@ -619,8 +620,8 @@ int iser_connect(struct iser_conn *ib_conn, /* the device is known only --after-- address resolution */ ib_conn->device = NULL; - iser_err("connecting to: %pI4, port 0x%x\n", - &dst_addr->sin_addr, dst_addr->sin_port); + iser_info("connecting to: %pI4, port 0x%x\n", + &dst_addr->sin_addr, dst_addr->sin_port); ib_conn->state = ISER_CONN_PENDING; From 450d1e40d597dd71843457c73d372c4b4f2a83eb Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Wed, 1 May 2013 13:25:26 +0000 Subject: [PATCH 21/22] IB/iser: Return error to upper layers on EAGAIN registration failures Commit 819a087316a6 ("IB/iser: Avoid error prints on EAGAIN registration failures") not only eliminated the error print on that case, but rather also modified the code such that it doesn't return any error to upper layers. As a result a wrong mapping was used. Fix this to correctly return the error in that case. Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iser_memory.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index be1edb04b085..68ebb7fe072a 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -416,8 +416,9 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *iser_task, for (i=0 ; ipage_vec->length ; i++) iser_err("page_vec[%d] = 0x%llx\n", i, (unsigned long long) ib_conn->page_vec->pages[i]); - return err; } + if (err) + return err; } return 0; } From 8d8399deb0434df6efb3a54a0efeac677e310fc1 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Wed, 1 May 2013 13:25:27 +0000 Subject: [PATCH 22/22] IB/iser: Add support for iser CM REQ additional info Annex A12 of the IBTA spec defines additional information that needs to be provided through the CM exchange relating to usage of ZBVA (Zero Based VAs) and Send With Invalidate over an iSER connection. Currently, the initiator sets both to not supported, but does provide the header so that existing iSER targets can be patched to start looking on the private data carried by the CM. This is a preparation step to enable iSER with HW drivers for which FMRs are not supported, such as mlx4 VF instances or new HW devices which might support only FRWR (Fast Registration Work-Requests) along the details of the IB_DEVICE_MEM_MGT_EXTENSIONS device capability. Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.h | 9 +++++++++ drivers/infiniband/ulp/iser/iser_verbs.c | 7 +++++++ 2 files changed, 16 insertions(+) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 1a570886cc03..06f578cde75b 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -140,6 +140,15 @@ struct iser_hdr { __be64 read_va; } __attribute__((packed)); + +#define ISER_ZBVA_NOT_SUPPORTED 0x80 +#define ISER_SEND_W_INV_NOT_SUPPORTED 0x40 + +struct iser_cm_hdr { + u8 flags; + u8 rsvd[3]; +} __packed; + /* Constant PDU lengths calculations */ #define ISER_HEADERS_LEN (sizeof(struct iser_hdr) + sizeof(struct iscsi_hdr)) diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 13838ad3ce76..5278916c3103 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -499,6 +499,7 @@ static int iser_route_handler(struct rdma_cm_id *cma_id) { struct rdma_conn_param conn_param; int ret; + struct iser_cm_hdr req_hdr; ret = iser_create_ib_conn_res((struct iser_conn *)cma_id->context); if (ret) @@ -510,6 +511,12 @@ static int iser_route_handler(struct rdma_cm_id *cma_id) conn_param.retry_count = 7; conn_param.rnr_retry_count = 6; + memset(&req_hdr, 0, sizeof(req_hdr)); + req_hdr.flags = (ISER_ZBVA_NOT_SUPPORTED | + ISER_SEND_W_INV_NOT_SUPPORTED); + conn_param.private_data = (void *)&req_hdr; + conn_param.private_data_len = sizeof(struct iser_cm_hdr); + ret = rdma_connect(cma_id, &conn_param); if (ret) { iser_err("failure connecting: %d\n", ret);