From e0f31d8498676fda36289603a054d0d490aa2679 Mon Sep 17 00:00:00 2001 From: Govindarajulu Varadarajan <_govind@gmx.com> Date: Mon, 23 Jun 2014 16:07:58 +0530 Subject: [PATCH 1/8] flow_keys: Record IP layer protocol in skb_flow_dissect() skb_flow_dissect() dissects only transport header type in ip_proto. It dose not give any information about IPv4 or IPv6. This patch adds new member, n_proto, to struct flow_keys. Which records the IP layer type. i.e IPv4 or IPv6. This can be used in netdev->ndo_rx_flow_steer driver function to dissect flow. Adding new member to flow_keys increases the struct size by around 4 bytes. This causes BUILD_BUG_ON(sizeof(qcb->data) < sz); to fail in qdisc_cb_private_validate() So increase data size by 4 Signed-off-by: Govindarajulu Varadarajan <_govind@gmx.com> Signed-off-by: David S. Miller --- include/net/flow_keys.h | 14 ++++++++++++++ include/net/sch_generic.h | 2 +- net/core/flow_dissector.c | 1 + 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/include/net/flow_keys.h b/include/net/flow_keys.h index 7e64bd8bbda9..fbefdca5e283 100644 --- a/include/net/flow_keys.h +++ b/include/net/flow_keys.h @@ -1,6 +1,19 @@ #ifndef _NET_FLOW_KEYS_H #define _NET_FLOW_KEYS_H +/* struct flow_keys: + * @src: source ip address in case of IPv4 + * For IPv6 it contains 32bit hash of src address + * @dst: destination ip address in case of IPv4 + * For IPv6 it contains 32bit hash of dst address + * @ports: port numbers of Transport header + * port16[0]: src port number + * port16[1]: dst port number + * @thoff: Transport header offset + * @n_proto: Network header protocol (eg. IPv4/IPv6) + * @ip_proto: Transport header protocol (eg. TCP/UDP) + * All the members, except thoff, are in network byte order. + */ struct flow_keys { /* (src,dst) must be grouped, in the same way than in IP header */ __be32 src; @@ -10,6 +23,7 @@ struct flow_keys { __be16 port16[2]; }; u16 thoff; + u16 n_proto; u8 ip_proto; }; diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 624f9857c83e..a3cfb8ebeb53 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -231,7 +231,7 @@ struct qdisc_skb_cb { unsigned int pkt_len; u16 slave_dev_queue_mapping; u16 _pad; - unsigned char data[20]; + unsigned char data[24]; }; static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 107ed12a5323..c2b53c1b21d2 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -175,6 +175,7 @@ ipv6: break; } + flow->n_proto = proto; flow->ip_proto = ip_proto; flow->ports = skb_flow_get_ports(skb, nhoff, ip_proto); flow->thoff = (u16) nhoff; From 10cc88446cec4eee8e2efab24ad387d52ef1f4fb Mon Sep 17 00:00:00 2001 From: Govindarajulu Varadarajan <_govind@gmx.com> Date: Mon, 23 Jun 2014 16:07:59 +0530 Subject: [PATCH 2/8] enic: fix return value in _vnic_dev_cmd Hardware (in readq(&devcmd->args[0])) returns positive number in case of error. But _vnic_dev_cmd should return a negative value in case of error. Signed-off-by: Govindarajulu Varadarajan <_govind@gmx.com> Signed-off-by: David S. Miller --- drivers/net/ethernet/cisco/enic/vnic_dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/vnic_dev.c b/drivers/net/ethernet/cisco/enic/vnic_dev.c index e86a45cb9e68..263081b8e636 100644 --- a/drivers/net/ethernet/cisco/enic/vnic_dev.c +++ b/drivers/net/ethernet/cisco/enic/vnic_dev.c @@ -312,12 +312,12 @@ static int _vnic_dev_cmd(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd, err = (int)readq(&devcmd->args[0]); if (err == ERR_EINVAL && cmd == CMD_CAPABILITY) - return err; + return -err; if (err != ERR_ECMDUNKNOWN || cmd != CMD_CAPABILITY) pr_err("Error %d devcmd %d\n", err, _CMD_N(cmd)); - return err; + return -err; } if (_CMD_DIR(cmd) & _CMD_DIR_READ) { From 631185273b6e1f8e0b5a00c1aca08650b2d18a57 Mon Sep 17 00:00:00 2001 From: Govindarajulu Varadarajan <_govind@gmx.com> Date: Mon, 23 Jun 2014 16:08:00 +0530 Subject: [PATCH 3/8] enic: devcmd for adding IP 5 tuple hardware filters This patch adds interface to add and delete IP 5 tuple filter. This interface is used by Accelerated RFS code to steer a flow to corresponding receive queue. As of now adaptor supports only ipv4 + tcp/udp packet steering. Signed-off-by: Govindarajulu Varadarajan <_govind@gmx.com> Signed-off-by: David S. Miller --- drivers/net/ethernet/cisco/enic/Makefile | 2 +- drivers/net/ethernet/cisco/enic/enic_clsf.c | 66 +++++++++++++++++++ drivers/net/ethernet/cisco/enic/enic_clsf.h | 10 +++ drivers/net/ethernet/cisco/enic/vnic_dev.c | 61 +++++++++++++++++ drivers/net/ethernet/cisco/enic/vnic_dev.h | 2 + drivers/net/ethernet/cisco/enic/vnic_devcmd.h | 5 ++ 6 files changed, 145 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/cisco/enic/enic_clsf.c create mode 100644 drivers/net/ethernet/cisco/enic/enic_clsf.h diff --git a/drivers/net/ethernet/cisco/enic/Makefile b/drivers/net/ethernet/cisco/enic/Makefile index 239e1e46545d..aadcaf7876ce 100644 --- a/drivers/net/ethernet/cisco/enic/Makefile +++ b/drivers/net/ethernet/cisco/enic/Makefile @@ -2,5 +2,5 @@ obj-$(CONFIG_ENIC) := enic.o enic-y := enic_main.o vnic_cq.o vnic_intr.o vnic_wq.o \ enic_res.o enic_dev.o enic_pp.o vnic_dev.o vnic_rq.o vnic_vic.o \ - enic_ethtool.o enic_api.o + enic_ethtool.o enic_api.o enic_clsf.o diff --git a/drivers/net/ethernet/cisco/enic/enic_clsf.c b/drivers/net/ethernet/cisco/enic/enic_clsf.c new file mode 100644 index 000000000000..f6703c4f76a9 --- /dev/null +++ b/drivers/net/ethernet/cisco/enic/enic_clsf.c @@ -0,0 +1,66 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include "enic_res.h" +#include "enic_clsf.h" + +/* enic_addfltr_5t - Add ipv4 5tuple filter + * @enic: enic struct of vnic + * @keys: flow_keys of ipv4 5tuple + * @rq: rq number to steer to + * + * This function returns filter_id(hardware_id) of the filter + * added. In case of error it returns an negative number. + */ +int enic_addfltr_5t(struct enic *enic, struct flow_keys *keys, u16 rq) +{ + int res; + struct filter data; + + switch (keys->ip_proto) { + case IPPROTO_TCP: + data.u.ipv4.protocol = PROTO_TCP; + break; + case IPPROTO_UDP: + data.u.ipv4.protocol = PROTO_UDP; + break; + default: + return -EPROTONOSUPPORT; + }; + data.type = FILTER_IPV4_5TUPLE; + data.u.ipv4.src_addr = ntohl(keys->src); + data.u.ipv4.dst_addr = ntohl(keys->dst); + data.u.ipv4.src_port = ntohs(keys->port16[0]); + data.u.ipv4.dst_port = ntohs(keys->port16[1]); + data.u.ipv4.flags = FILTER_FIELDS_IPV4_5TUPLE; + + spin_lock_bh(&enic->devcmd_lock); + res = vnic_dev_classifier(enic->vdev, CLSF_ADD, &rq, &data); + spin_unlock_bh(&enic->devcmd_lock); + res = (res == 0) ? rq : res; + + return res; +} + +/* enic_delfltr - Delete clsf filter + * @enic: enic struct of vnic + * @filter_id: filter_is(hardware_id) of filter to be deleted + * + * This function returns zero in case of success, negative number incase of + * error. + */ +int enic_delfltr(struct enic *enic, u16 filter_id) +{ + int ret; + + spin_lock_bh(&enic->devcmd_lock); + ret = vnic_dev_classifier(enic->vdev, CLSF_DEL, &filter_id, NULL); + spin_unlock_bh(&enic->devcmd_lock); + + return ret; +} diff --git a/drivers/net/ethernet/cisco/enic/enic_clsf.h b/drivers/net/ethernet/cisco/enic/enic_clsf.h new file mode 100644 index 000000000000..b6925b368b77 --- /dev/null +++ b/drivers/net/ethernet/cisco/enic/enic_clsf.h @@ -0,0 +1,10 @@ +#ifndef _ENIC_CLSF_H_ +#define _ENIC_CLSF_H_ + +#include "vnic_dev.h" +#include "enic.h" + +int enic_addfltr_5t(struct enic *enic, struct flow_keys *keys, u16 rq); +int enic_delfltr(struct enic *enic, u16 filter_id); + +#endif /* _ENIC_CLSF_H_ */ diff --git a/drivers/net/ethernet/cisco/enic/vnic_dev.c b/drivers/net/ethernet/cisco/enic/vnic_dev.c index 263081b8e636..5abc496bcf29 100644 --- a/drivers/net/ethernet/cisco/enic/vnic_dev.c +++ b/drivers/net/ethernet/cisco/enic/vnic_dev.c @@ -1048,3 +1048,64 @@ int vnic_dev_set_mac_addr(struct vnic_dev *vdev, u8 *mac_addr) return vnic_dev_cmd(vdev, CMD_SET_MAC_ADDR, &a0, &a1, wait); } + +/* vnic_dev_classifier: Add/Delete classifier entries + * @vdev: vdev of the device + * @cmd: CLSF_ADD for Add filter + * CLSF_DEL for Delete filter + * @entry: In case of ADD filter, the caller passes the RQ number in this + * variable. + * + * This function stores the filter_id returned by the firmware in the + * same variable before return; + * + * In case of DEL filter, the caller passes the RQ number. Return + * value is irrelevant. + * @data: filter data + */ +int vnic_dev_classifier(struct vnic_dev *vdev, u8 cmd, u16 *entry, + struct filter *data) +{ + u64 a0, a1; + int wait = 1000; + dma_addr_t tlv_pa; + int ret = -EINVAL; + struct filter_tlv *tlv, *tlv_va; + struct filter_action *action; + u64 tlv_size; + + if (cmd == CLSF_ADD) { + tlv_size = sizeof(struct filter) + + sizeof(struct filter_action) + + 2 * sizeof(struct filter_tlv); + tlv_va = pci_alloc_consistent(vdev->pdev, tlv_size, &tlv_pa); + if (!tlv_va) + return -ENOMEM; + tlv = tlv_va; + a0 = tlv_pa; + a1 = tlv_size; + memset(tlv, 0, tlv_size); + tlv->type = CLSF_TLV_FILTER; + tlv->length = sizeof(struct filter); + *(struct filter *)&tlv->val = *data; + + tlv = (struct filter_tlv *)((char *)tlv + + sizeof(struct filter_tlv) + + sizeof(struct filter)); + + tlv->type = CLSF_TLV_ACTION; + tlv->length = sizeof(struct filter_action); + action = (struct filter_action *)&tlv->val; + action->type = FILTER_ACTION_RQ_STEERING; + action->u.rq_idx = *entry; + + ret = vnic_dev_cmd(vdev, CMD_ADD_FILTER, &a0, &a1, wait); + *entry = (u16)a0; + pci_free_consistent(vdev->pdev, tlv_size, tlv_va, tlv_pa); + } else if (cmd == CLSF_DEL) { + a0 = *entry; + ret = vnic_dev_cmd(vdev, CMD_DEL_FILTER, &a0, &a1, wait); + } + + return ret; +} diff --git a/drivers/net/ethernet/cisco/enic/vnic_dev.h b/drivers/net/ethernet/cisco/enic/vnic_dev.h index 1f3b301f8225..1fb214efceba 100644 --- a/drivers/net/ethernet/cisco/enic/vnic_dev.h +++ b/drivers/net/ethernet/cisco/enic/vnic_dev.h @@ -133,5 +133,7 @@ int vnic_dev_enable2(struct vnic_dev *vdev, int active); int vnic_dev_enable2_done(struct vnic_dev *vdev, int *status); int vnic_dev_deinit_done(struct vnic_dev *vdev, int *status); int vnic_dev_set_mac_addr(struct vnic_dev *vdev, u8 *mac_addr); +int vnic_dev_classifier(struct vnic_dev *vdev, u8 cmd, u16 *entry, + struct filter *data); #endif /* _VNIC_DEV_H_ */ diff --git a/drivers/net/ethernet/cisco/enic/vnic_devcmd.h b/drivers/net/ethernet/cisco/enic/vnic_devcmd.h index b9a0d78fd639..435d0cd96c22 100644 --- a/drivers/net/ethernet/cisco/enic/vnic_devcmd.h +++ b/drivers/net/ethernet/cisco/enic/vnic_devcmd.h @@ -603,6 +603,11 @@ struct filter_tlv { u_int32_t val[0]; }; +enum { + CLSF_ADD = 0, + CLSF_DEL = 1, +}; + /* * Writing cmd register causes STAT_BUSY to get set in status register. * When cmd completes, STAT_BUSY will be cleared. From b6e97c132bbca469d57634622dd7bdacb21f018f Mon Sep 17 00:00:00 2001 From: Govindarajulu Varadarajan <_govind@gmx.com> Date: Mon, 23 Jun 2014 16:08:01 +0530 Subject: [PATCH 4/8] enic: alloc/free rx_cpu_rmap rx_cpu_rmap provides the reverse irq cpu affinity. This patch allocates and sets drivers netdev->rx_cpu_rmap accordingly. rx_cpu_rmap is set in enic_request_intr() which is called by enic_open and rx_cpu_rmap is freed in enic_free_intr() which is called by enic_stop. This is used by Accelerated RFS. Signed-off-by: Govindarajulu Varadarajan <_govind@gmx.com> Signed-off-by: David S. Miller --- drivers/net/ethernet/cisco/enic/enic_main.c | 43 +++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index f32f828b7f3d..151b375337a9 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -39,6 +39,9 @@ #include #include #include +#ifdef CONFIG_RFS_ACCEL +#include +#endif #include "cq_enet_desc.h" #include "vnic_dev.h" @@ -1192,6 +1195,44 @@ static void enic_calc_int_moderation(struct enic *enic, struct vnic_rq *rq) pkt_size_counter->small_pkt_bytes_cnt = 0; } +#ifdef CONFIG_RFS_ACCEL +static void enic_free_rx_cpu_rmap(struct enic *enic) +{ + free_irq_cpu_rmap(enic->netdev->rx_cpu_rmap); + enic->netdev->rx_cpu_rmap = NULL; +} + +static void enic_set_rx_cpu_rmap(struct enic *enic) +{ + int i, res; + + if (vnic_dev_get_intr_mode(enic->vdev) == VNIC_DEV_INTR_MODE_MSIX) { + enic->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(enic->rq_count); + if (unlikely(!enic->netdev->rx_cpu_rmap)) + return; + for (i = 0; i < enic->rq_count; i++) { + res = irq_cpu_rmap_add(enic->netdev->rx_cpu_rmap, + enic->msix_entry[i].vector); + if (unlikely(res)) { + enic_free_rx_cpu_rmap(enic); + return; + } + } + } +} + +#else + +static void enic_free_rx_cpu_rmap(struct enic *enic) +{ +} + +static void enic_set_rx_cpu_rmap(struct enic *enic) +{ +} + +#endif /* CONFIG_RFS_ACCEL */ + static int enic_poll_msix(struct napi_struct *napi, int budget) { struct net_device *netdev = napi->dev; @@ -1267,6 +1308,7 @@ static void enic_free_intr(struct enic *enic) struct net_device *netdev = enic->netdev; unsigned int i; + enic_free_rx_cpu_rmap(enic); switch (vnic_dev_get_intr_mode(enic->vdev)) { case VNIC_DEV_INTR_MODE_INTX: free_irq(enic->pdev->irq, netdev); @@ -1291,6 +1333,7 @@ static int enic_request_intr(struct enic *enic) unsigned int i, intr; int err = 0; + enic_set_rx_cpu_rmap(enic); switch (vnic_dev_get_intr_mode(enic->vdev)) { case VNIC_DEV_INTR_MODE_INTX: From a145df23ef32c7b933875f334ba28791ee75766e Mon Sep 17 00:00:00 2001 From: Govindarajulu Varadarajan <_govind@gmx.com> Date: Mon, 23 Jun 2014 16:08:02 +0530 Subject: [PATCH 5/8] enic: Add Accelerated RFS support This patch adds supports for Accelerated Receive Flow Steering. When the desired rx is different from current rq, for a flow, kernel calls the driver function enic_rx_flow_steer(). enic_rx_flow_steer adds a IP-TCP/UDP hardware filter. Driver registers a timer function enic_flow_may_expire. This function is called every HZ/4 seconds. In this function we check if the added filter has expired by calling rps_may_expire_flow(). If the flow has expired, it removes the hw filter. As of now adaptor supports only IPv4 - TCP/UDP filters. Signed-off-by: Govindarajulu Varadarajan <_govind@gmx.com> Signed-off-by: David S. Miller --- drivers/net/ethernet/cisco/enic/enic.h | 41 ++++ drivers/net/ethernet/cisco/enic/enic_clsf.c | 213 ++++++++++++++++++++ drivers/net/ethernet/cisco/enic/enic_clsf.h | 9 + drivers/net/ethernet/cisco/enic/enic_main.c | 13 ++ drivers/net/ethernet/cisco/enic/enic_res.c | 1 + drivers/net/ethernet/cisco/enic/vnic_enet.h | 2 + 6 files changed, 279 insertions(+) diff --git a/drivers/net/ethernet/cisco/enic/enic.h b/drivers/net/ethernet/cisco/enic/enic.h index 14f465f239d6..b9b9178e174e 100644 --- a/drivers/net/ethernet/cisco/enic/enic.h +++ b/drivers/net/ethernet/cisco/enic/enic.h @@ -99,6 +99,44 @@ struct enic_port_profile { u8 mac_addr[ETH_ALEN]; }; +#ifdef CONFIG_RFS_ACCEL +/* enic_rfs_fltr_node - rfs filter node in hash table + * @@keys: IPv4 5 tuple + * @flow_id: flow_id of clsf filter provided by kernel + * @fltr_id: filter id of clsf filter returned by adaptor + * @rq_id: desired rq index + * @node: hlist_node + */ +struct enic_rfs_fltr_node { + struct flow_keys keys; + u32 flow_id; + u16 fltr_id; + u16 rq_id; + struct hlist_node node; +}; + +/* enic_rfs_flw_tbl - rfs flow table + * @max: Maximum number of filters vNIC supports + * @free: Number of free filters available + * @toclean: hash table index to clean next + * @ht_head: hash table list head + * @lock: spin lock + * @rfs_may_expire: timer function for enic_rps_may_expire_flow + */ +struct enic_rfs_flw_tbl { + u16 max; + int free; + +#define ENIC_RFS_FLW_BITSHIFT (10) +#define ENIC_RFS_FLW_MASK ((1 << ENIC_RFS_FLW_BITSHIFT) - 1) + u16 toclean:ENIC_RFS_FLW_BITSHIFT; + struct hlist_head ht_head[1 << ENIC_RFS_FLW_BITSHIFT]; + spinlock_t lock; + struct timer_list rfs_may_expire; +}; + +#endif /* CONFIG_RFS_ACCEL */ + /* Per-instance private data structure */ struct enic { struct net_device *netdev; @@ -150,6 +188,9 @@ struct enic { /* completion queue cache line section */ ____cacheline_aligned struct vnic_cq cq[ENIC_CQ_MAX]; unsigned int cq_count; +#ifdef CONFIG_RFS_ACCEL + struct enic_rfs_flw_tbl rfs_h; +#endif }; static inline struct device *enic_get_dev(struct enic *enic) diff --git a/drivers/net/ethernet/cisco/enic/enic_clsf.c b/drivers/net/ethernet/cisco/enic/enic_clsf.c index f6703c4f76a9..7f27a4c7fbfd 100644 --- a/drivers/net/ethernet/cisco/enic/enic_clsf.c +++ b/drivers/net/ethernet/cisco/enic/enic_clsf.c @@ -64,3 +64,216 @@ int enic_delfltr(struct enic *enic, u16 filter_id) return ret; } + +#ifdef CONFIG_RFS_ACCEL +void enic_flow_may_expire(unsigned long data) +{ + struct enic *enic = (struct enic *)data; + bool res; + int j; + + spin_lock(&enic->rfs_h.lock); + for (j = 0; j < ENIC_CLSF_EXPIRE_COUNT; j++) { + struct hlist_head *hhead; + struct hlist_node *tmp; + struct enic_rfs_fltr_node *n; + + hhead = &enic->rfs_h.ht_head[enic->rfs_h.toclean++]; + hlist_for_each_entry_safe(n, tmp, hhead, node) { + res = rps_may_expire_flow(enic->netdev, n->rq_id, + n->flow_id, n->fltr_id); + if (res) { + res = enic_delfltr(enic, n->fltr_id); + if (unlikely(res)) + continue; + hlist_del(&n->node); + kfree(n); + enic->rfs_h.free++; + } + } + } + spin_unlock(&enic->rfs_h.lock); + mod_timer(&enic->rfs_h.rfs_may_expire, jiffies + HZ/4); +} + +/* enic_rfs_flw_tbl_init - initialize enic->rfs_h members + * @enic: enic data + */ +void enic_rfs_flw_tbl_init(struct enic *enic) +{ + int i; + + spin_lock_init(&enic->rfs_h.lock); + for (i = 0; i <= ENIC_RFS_FLW_MASK; i++) + INIT_HLIST_HEAD(&enic->rfs_h.ht_head[i]); + enic->rfs_h.max = enic->config.num_arfs; + enic->rfs_h.free = enic->rfs_h.max; + enic->rfs_h.toclean = 0; + init_timer(&enic->rfs_h.rfs_may_expire); + enic->rfs_h.rfs_may_expire.function = enic_flow_may_expire; + enic->rfs_h.rfs_may_expire.data = (unsigned long)enic; + mod_timer(&enic->rfs_h.rfs_may_expire, jiffies + HZ/4); +} + +void enic_rfs_flw_tbl_free(struct enic *enic) +{ + int i, res; + + del_timer_sync(&enic->rfs_h.rfs_may_expire); + spin_lock(&enic->rfs_h.lock); + enic->rfs_h.free = 0; + for (i = 0; i < (1 << ENIC_RFS_FLW_BITSHIFT); i++) { + struct hlist_head *hhead; + struct hlist_node *tmp; + struct enic_rfs_fltr_node *n; + + hhead = &enic->rfs_h.ht_head[i]; + hlist_for_each_entry_safe(n, tmp, hhead, node) { + enic_delfltr(enic, n->fltr_id); + hlist_del(&n->node); + kfree(n); + } + } + spin_unlock(&enic->rfs_h.lock); +} + +static struct enic_rfs_fltr_node *htbl_key_search(struct hlist_head *h, + struct flow_keys *k) +{ + struct enic_rfs_fltr_node *tpos; + + hlist_for_each_entry(tpos, h, node) + if (tpos->keys.src == k->src && + tpos->keys.dst == k->dst && + tpos->keys.ports == k->ports && + tpos->keys.ip_proto == k->ip_proto && + tpos->keys.n_proto == k->n_proto) + return tpos; + return NULL; +} + +int enic_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, + u16 rxq_index, u32 flow_id) +{ + struct flow_keys keys; + struct enic_rfs_fltr_node *n; + struct enic *enic; + u16 tbl_idx; + int res, i; + + enic = netdev_priv(dev); + res = skb_flow_dissect(skb, &keys); + if (!res || keys.n_proto != htons(ETH_P_IP) || + (keys.ip_proto != IPPROTO_TCP && keys.ip_proto != IPPROTO_UDP)) + return -EPROTONOSUPPORT; + + tbl_idx = skb_get_hash_raw(skb) & ENIC_RFS_FLW_MASK; + spin_lock(&enic->rfs_h.lock); + n = htbl_key_search(&enic->rfs_h.ht_head[tbl_idx], &keys); + + if (n) { /* entry already present */ + if (rxq_index == n->rq_id) { + res = -EEXIST; + goto ret_unlock; + } + + /* desired rq changed for the flow, we need to delete + * old fltr and add new one + * + * The moment we delete the fltr, the upcoming pkts + * are put it default rq based on rss. When we add + * new filter, upcoming pkts are put in desired queue. + * This could cause ooo pkts. + * + * Lets 1st try adding new fltr and then del old one. + */ + i = --enic->rfs_h.free; + /* clsf tbl is full, we have to del old fltr first*/ + if (unlikely(i < 0)) { + enic->rfs_h.free++; + res = enic_delfltr(enic, n->fltr_id); + if (unlikely(res < 0)) + goto ret_unlock; + res = enic_addfltr_5t(enic, &keys, rxq_index); + if (res < 0) { + hlist_del(&n->node); + enic->rfs_h.free++; + goto ret_unlock; + } + /* add new fltr 1st then del old fltr */ + } else { + int ret; + + res = enic_addfltr_5t(enic, &keys, rxq_index); + if (res < 0) { + enic->rfs_h.free++; + goto ret_unlock; + } + ret = enic_delfltr(enic, n->fltr_id); + /* deleting old fltr failed. Add old fltr to list. + * enic_flow_may_expire() will try to delete it later. + */ + if (unlikely(ret < 0)) { + struct enic_rfs_fltr_node *d; + struct hlist_head *head; + + head = &enic->rfs_h.ht_head[tbl_idx]; + d = kmalloc(sizeof(*d), GFP_ATOMIC); + if (d) { + d->fltr_id = n->fltr_id; + INIT_HLIST_NODE(&d->node); + hlist_add_head(&d->node, head); + } + } else { + enic->rfs_h.free++; + } + } + n->rq_id = rxq_index; + n->fltr_id = res; + n->flow_id = flow_id; + /* entry not present */ + } else { + i = --enic->rfs_h.free; + if (i <= 0) { + enic->rfs_h.free++; + res = -EBUSY; + goto ret_unlock; + } + + n = kmalloc(sizeof(*n), GFP_ATOMIC); + if (!n) { + res = -ENOMEM; + enic->rfs_h.free++; + goto ret_unlock; + } + + res = enic_addfltr_5t(enic, &keys, rxq_index); + if (res < 0) { + kfree(n); + enic->rfs_h.free++; + goto ret_unlock; + } + n->rq_id = rxq_index; + n->fltr_id = res; + n->flow_id = flow_id; + n->keys = keys; + INIT_HLIST_NODE(&n->node); + hlist_add_head(&n->node, &enic->rfs_h.ht_head[tbl_idx]); + } + +ret_unlock: + spin_unlock(&enic->rfs_h.lock); + return res; +} + +#else + +void enic_rfs_flw_tbl_init(struct enic *enic) +{ +} + +void enic_rfs_flw_tbl_free(struct enic *enic) +{ +} + +#endif /* CONFIG_RFS_ACCEL */ diff --git a/drivers/net/ethernet/cisco/enic/enic_clsf.h b/drivers/net/ethernet/cisco/enic/enic_clsf.h index b6925b368b77..76a85bb0bb73 100644 --- a/drivers/net/ethernet/cisco/enic/enic_clsf.h +++ b/drivers/net/ethernet/cisco/enic/enic_clsf.h @@ -4,7 +4,16 @@ #include "vnic_dev.h" #include "enic.h" +#define ENIC_CLSF_EXPIRE_COUNT 128 + int enic_addfltr_5t(struct enic *enic, struct flow_keys *keys, u16 rq); int enic_delfltr(struct enic *enic, u16 filter_id); +#ifdef CONFIG_RFS_ACCEL +void enic_rfs_flw_tbl_init(struct enic *enic); +void enic_rfs_flw_tbl_free(struct enic *enic); +int enic_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, + u16 rxq_index, u32 flow_id); +#endif /* CONFIG_RFS_ACCEL */ + #endif /* _ENIC_CLSF_H_ */ diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 151b375337a9..a302f1b3e8ff 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -52,6 +52,7 @@ #include "enic.h" #include "enic_dev.h" #include "enic_pp.h" +#include "enic_clsf.h" #define ENIC_NOTIFY_TIMER_PERIOD (2 * HZ) #define WQ_ENET_MAX_DESC_LEN (1 << WQ_ENET_LEN_BITS) @@ -1546,6 +1547,7 @@ static int enic_open(struct net_device *netdev) vnic_intr_unmask(&enic->intr[i]); enic_notify_timer_start(enic); + enic_rfs_flw_tbl_init(enic); return 0; @@ -1572,6 +1574,7 @@ static int enic_stop(struct net_device *netdev) enic_synchronize_irqs(enic); del_timer_sync(&enic->notify_timer); + enic_rfs_flw_tbl_free(enic); enic_dev_disable(enic); @@ -2064,6 +2067,9 @@ static const struct net_device_ops enic_netdev_dynamic_ops = { #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = enic_poll_controller, #endif +#ifdef CONFIG_RFS_ACCEL + .ndo_rx_flow_steer = enic_rx_flow_steer, +#endif }; static const struct net_device_ops enic_netdev_ops = { @@ -2084,6 +2090,9 @@ static const struct net_device_ops enic_netdev_ops = { #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = enic_poll_controller, #endif +#ifdef CONFIG_RFS_ACCEL + .ndo_rx_flow_steer = enic_rx_flow_steer, +#endif }; static void enic_dev_deinit(struct enic *enic) @@ -2429,6 +2438,10 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->features |= netdev->hw_features; +#ifdef CONFIG_RFS_ACCEL + netdev->hw_features |= NETIF_F_NTUPLE; +#endif + if (using_dac) netdev->features |= NETIF_F_HIGHDMA; diff --git a/drivers/net/ethernet/cisco/enic/enic_res.c b/drivers/net/ethernet/cisco/enic/enic_res.c index 31d658880c3c..9c96911fb2c8 100644 --- a/drivers/net/ethernet/cisco/enic/enic_res.c +++ b/drivers/net/ethernet/cisco/enic/enic_res.c @@ -71,6 +71,7 @@ int enic_get_vnic_config(struct enic *enic) GET_CONFIG(intr_mode); GET_CONFIG(intr_timer_usec); GET_CONFIG(loop_tag); + GET_CONFIG(num_arfs); c->wq_desc_count = min_t(u32, ENIC_MAX_WQ_DESCS, diff --git a/drivers/net/ethernet/cisco/enic/vnic_enet.h b/drivers/net/ethernet/cisco/enic/vnic_enet.h index 609542848e02..75aced2de869 100644 --- a/drivers/net/ethernet/cisco/enic/vnic_enet.h +++ b/drivers/net/ethernet/cisco/enic/vnic_enet.h @@ -32,6 +32,8 @@ struct vnic_enet_config { char devname[16]; u32 intr_timer_usec; u16 loop_tag; + u16 vf_rq_count; + u16 num_arfs; }; #define VENETF_TSO 0x1 /* TSO enabled */ From 8e091340cfcd6f96ca0dddb078ce28c407a6d44c Mon Sep 17 00:00:00 2001 From: Tony Camuso Date: Mon, 23 Jun 2014 16:08:03 +0530 Subject: [PATCH 6/8] enic: fix lockdep around devcmd_lock We were experiencing occasional "BUG: scheduling while atomic" splats in our testing. Enabling DEBUG_SPINLOCK and DEBUG_LOCKDEP in the kernel exposed a lockdep in the enic driver. enic 0000:0b:00.0 eth2: Link UP ====================================================== [ INFO: SOFTIRQ-safe -> SOFTIRQ-unsafe lock order detected ] 3.12.0-rc1.x86_64-dbg+ #2 Tainted: GF W ------------------------------------------------------ NetworkManager/4209 [HC0[0]:SC0[2]:HE1:SE0] is trying to acquire: (&(&enic->devcmd_lock)->rlock){+.+...}, at: [] enic_dev_packet_filter+0x44/0x90 [enic] The fix was to replace spin_lock with spin_lock_bh for the enic devcmd_lock, so that soft irqs would be disabled while the lock is held. Signed-off-by: Sujith Sankar Signed-off-by: Tony Camuso Signed-off-by: Govindarajulu Varadarajan <_govind@gmx.com> Signed-off-by: David S. Miller --- drivers/net/ethernet/cisco/enic/enic_api.c | 4 +- drivers/net/ethernet/cisco/enic/enic_dev.c | 80 ++++++++++----------- drivers/net/ethernet/cisco/enic/enic_dev.h | 4 +- drivers/net/ethernet/cisco/enic/enic_main.c | 16 ++--- 4 files changed, 52 insertions(+), 52 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/enic_api.c b/drivers/net/ethernet/cisco/enic/enic_api.c index e13efbdaa2ed..b161f24522b8 100644 --- a/drivers/net/ethernet/cisco/enic/enic_api.c +++ b/drivers/net/ethernet/cisco/enic/enic_api.c @@ -34,13 +34,13 @@ int enic_api_devcmd_proxy_by_index(struct net_device *netdev, int vf, struct vnic_dev *vdev = enic->vdev; spin_lock(&enic->enic_api_lock); - spin_lock(&enic->devcmd_lock); + spin_lock_bh(&enic->devcmd_lock); vnic_dev_cmd_proxy_by_index_start(vdev, vf); err = vnic_dev_cmd(vdev, cmd, a0, a1, wait); vnic_dev_cmd_proxy_end(vdev); - spin_unlock(&enic->devcmd_lock); + spin_unlock_bh(&enic->devcmd_lock); spin_unlock(&enic->enic_api_lock); return err; diff --git a/drivers/net/ethernet/cisco/enic/enic_dev.c b/drivers/net/ethernet/cisco/enic/enic_dev.c index 3e27df522847..87ddc44b590e 100644 --- a/drivers/net/ethernet/cisco/enic/enic_dev.c +++ b/drivers/net/ethernet/cisco/enic/enic_dev.c @@ -29,9 +29,9 @@ int enic_dev_fw_info(struct enic *enic, struct vnic_devcmd_fw_info **fw_info) { int err; - spin_lock(&enic->devcmd_lock); + spin_lock_bh(&enic->devcmd_lock); err = vnic_dev_fw_info(enic->vdev, fw_info); - spin_unlock(&enic->devcmd_lock); + spin_unlock_bh(&enic->devcmd_lock); return err; } @@ -40,9 +40,9 @@ int enic_dev_stats_dump(struct enic *enic, struct vnic_stats **vstats) { int err; - spin_lock(&enic->devcmd_lock); + spin_lock_bh(&enic->devcmd_lock); err = vnic_dev_stats_dump(enic->vdev, vstats); - spin_unlock(&enic->devcmd_lock); + spin_unlock_bh(&enic->devcmd_lock); return err; } @@ -54,9 +54,9 @@ int enic_dev_add_station_addr(struct enic *enic) if (!is_valid_ether_addr(enic->netdev->dev_addr)) return -EADDRNOTAVAIL; - spin_lock(&enic->devcmd_lock); + spin_lock_bh(&enic->devcmd_lock); err = vnic_dev_add_addr(enic->vdev, enic->netdev->dev_addr); - spin_unlock(&enic->devcmd_lock); + spin_unlock_bh(&enic->devcmd_lock); return err; } @@ -68,9 +68,9 @@ int enic_dev_del_station_addr(struct enic *enic) if (!is_valid_ether_addr(enic->netdev->dev_addr)) return -EADDRNOTAVAIL; - spin_lock(&enic->devcmd_lock); + spin_lock_bh(&enic->devcmd_lock); err = vnic_dev_del_addr(enic->vdev, enic->netdev->dev_addr); - spin_unlock(&enic->devcmd_lock); + spin_unlock_bh(&enic->devcmd_lock); return err; } @@ -80,10 +80,10 @@ int enic_dev_packet_filter(struct enic *enic, int directed, int multicast, { int err; - spin_lock(&enic->devcmd_lock); + spin_lock_bh(&enic->devcmd_lock); err = vnic_dev_packet_filter(enic->vdev, directed, multicast, broadcast, promisc, allmulti); - spin_unlock(&enic->devcmd_lock); + spin_unlock_bh(&enic->devcmd_lock); return err; } @@ -92,9 +92,9 @@ int enic_dev_add_addr(struct enic *enic, const u8 *addr) { int err; - spin_lock(&enic->devcmd_lock); + spin_lock_bh(&enic->devcmd_lock); err = vnic_dev_add_addr(enic->vdev, addr); - spin_unlock(&enic->devcmd_lock); + spin_unlock_bh(&enic->devcmd_lock); return err; } @@ -103,9 +103,9 @@ int enic_dev_del_addr(struct enic *enic, const u8 *addr) { int err; - spin_lock(&enic->devcmd_lock); + spin_lock_bh(&enic->devcmd_lock); err = vnic_dev_del_addr(enic->vdev, addr); - spin_unlock(&enic->devcmd_lock); + spin_unlock_bh(&enic->devcmd_lock); return err; } @@ -114,9 +114,9 @@ int enic_dev_notify_unset(struct enic *enic) { int err; - spin_lock(&enic->devcmd_lock); + spin_lock_bh(&enic->devcmd_lock); err = vnic_dev_notify_unset(enic->vdev); - spin_unlock(&enic->devcmd_lock); + spin_unlock_bh(&enic->devcmd_lock); return err; } @@ -125,9 +125,9 @@ int enic_dev_hang_notify(struct enic *enic) { int err; - spin_lock(&enic->devcmd_lock); + spin_lock_bh(&enic->devcmd_lock); err = vnic_dev_hang_notify(enic->vdev); - spin_unlock(&enic->devcmd_lock); + spin_unlock_bh(&enic->devcmd_lock); return err; } @@ -136,10 +136,10 @@ int enic_dev_set_ig_vlan_rewrite_mode(struct enic *enic) { int err; - spin_lock(&enic->devcmd_lock); + spin_lock_bh(&enic->devcmd_lock); err = vnic_dev_set_ig_vlan_rewrite_mode(enic->vdev, IG_VLAN_REWRITE_MODE_PRIORITY_TAG_DEFAULT_VLAN); - spin_unlock(&enic->devcmd_lock); + spin_unlock_bh(&enic->devcmd_lock); return err; } @@ -148,9 +148,9 @@ int enic_dev_enable(struct enic *enic) { int err; - spin_lock(&enic->devcmd_lock); + spin_lock_bh(&enic->devcmd_lock); err = vnic_dev_enable_wait(enic->vdev); - spin_unlock(&enic->devcmd_lock); + spin_unlock_bh(&enic->devcmd_lock); return err; } @@ -159,9 +159,9 @@ int enic_dev_disable(struct enic *enic) { int err; - spin_lock(&enic->devcmd_lock); + spin_lock_bh(&enic->devcmd_lock); err = vnic_dev_disable(enic->vdev); - spin_unlock(&enic->devcmd_lock); + spin_unlock_bh(&enic->devcmd_lock); return err; } @@ -170,9 +170,9 @@ int enic_dev_intr_coal_timer_info(struct enic *enic) { int err; - spin_lock(&enic->devcmd_lock); + spin_lock_bh(&enic->devcmd_lock); err = vnic_dev_intr_coal_timer_info(enic->vdev); - spin_unlock(&enic->devcmd_lock); + spin_unlock_bh(&enic->devcmd_lock); return err; } @@ -181,9 +181,9 @@ int enic_vnic_dev_deinit(struct enic *enic) { int err; - spin_lock(&enic->devcmd_lock); + spin_lock_bh(&enic->devcmd_lock); err = vnic_dev_deinit(enic->vdev); - spin_unlock(&enic->devcmd_lock); + spin_unlock_bh(&enic->devcmd_lock); return err; } @@ -192,10 +192,10 @@ int enic_dev_init_prov2(struct enic *enic, struct vic_provinfo *vp) { int err; - spin_lock(&enic->devcmd_lock); + spin_lock_bh(&enic->devcmd_lock); err = vnic_dev_init_prov2(enic->vdev, (u8 *)vp, vic_provinfo_size(vp)); - spin_unlock(&enic->devcmd_lock); + spin_unlock_bh(&enic->devcmd_lock); return err; } @@ -204,9 +204,9 @@ int enic_dev_deinit_done(struct enic *enic, int *status) { int err; - spin_lock(&enic->devcmd_lock); + spin_lock_bh(&enic->devcmd_lock); err = vnic_dev_deinit_done(enic->vdev, status); - spin_unlock(&enic->devcmd_lock); + spin_unlock_bh(&enic->devcmd_lock); return err; } @@ -217,9 +217,9 @@ int enic_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid) struct enic *enic = netdev_priv(netdev); int err; - spin_lock(&enic->devcmd_lock); + spin_lock_bh(&enic->devcmd_lock); err = enic_add_vlan(enic, vid); - spin_unlock(&enic->devcmd_lock); + spin_unlock_bh(&enic->devcmd_lock); return err; } @@ -230,9 +230,9 @@ int enic_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid) struct enic *enic = netdev_priv(netdev); int err; - spin_lock(&enic->devcmd_lock); + spin_lock_bh(&enic->devcmd_lock); err = enic_del_vlan(enic, vid); - spin_unlock(&enic->devcmd_lock); + spin_unlock_bh(&enic->devcmd_lock); return err; } @@ -241,9 +241,9 @@ int enic_dev_enable2(struct enic *enic, int active) { int err; - spin_lock(&enic->devcmd_lock); + spin_lock_bh(&enic->devcmd_lock); err = vnic_dev_enable2(enic->vdev, active); - spin_unlock(&enic->devcmd_lock); + spin_unlock_bh(&enic->devcmd_lock); return err; } @@ -252,9 +252,9 @@ int enic_dev_enable2_done(struct enic *enic, int *status) { int err; - spin_lock(&enic->devcmd_lock); + spin_lock_bh(&enic->devcmd_lock); err = vnic_dev_enable2_done(enic->vdev, status); - spin_unlock(&enic->devcmd_lock); + spin_unlock_bh(&enic->devcmd_lock); return err; } diff --git a/drivers/net/ethernet/cisco/enic/enic_dev.h b/drivers/net/ethernet/cisco/enic/enic_dev.h index 36ea1ab25f6a..10bb970b2f35 100644 --- a/drivers/net/ethernet/cisco/enic/enic_dev.h +++ b/drivers/net/ethernet/cisco/enic/enic_dev.h @@ -28,7 +28,7 @@ */ #define ENIC_DEVCMD_PROXY_BY_INDEX(vf, err, enic, vnicdevcmdfn, ...) \ do { \ - spin_lock(&enic->devcmd_lock); \ + spin_lock_bh(&enic->devcmd_lock); \ if (enic_is_valid_vf(enic, vf)) { \ vnic_dev_cmd_proxy_by_index_start(enic->vdev, vf); \ err = vnicdevcmdfn(enic->vdev, ##__VA_ARGS__); \ @@ -36,7 +36,7 @@ } else { \ err = vnicdevcmdfn(enic->vdev, ##__VA_ARGS__); \ } \ - spin_unlock(&enic->devcmd_lock); \ + spin_unlock_bh(&enic->devcmd_lock); \ } while (0) int enic_dev_fw_info(struct enic *enic, struct vnic_devcmd_fw_info **fw_info); diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index a302f1b3e8ff..5448df2d78c2 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -1465,7 +1465,7 @@ static int enic_dev_notify_set(struct enic *enic) { int err; - spin_lock(&enic->devcmd_lock); + spin_lock_bh(&enic->devcmd_lock); switch (vnic_dev_get_intr_mode(enic->vdev)) { case VNIC_DEV_INTR_MODE_INTX: err = vnic_dev_notify_set(enic->vdev, @@ -1479,7 +1479,7 @@ static int enic_dev_notify_set(struct enic *enic) err = vnic_dev_notify_set(enic->vdev, -1 /* no intr */); break; } - spin_unlock(&enic->devcmd_lock); + spin_unlock_bh(&enic->devcmd_lock); return err; } @@ -1804,11 +1804,11 @@ static int enic_set_rsskey(struct enic *enic) memcpy(rss_key_buf_va, &rss_key, sizeof(union vnic_rss_key)); - spin_lock(&enic->devcmd_lock); + spin_lock_bh(&enic->devcmd_lock); err = enic_set_rss_key(enic, rss_key_buf_pa, sizeof(union vnic_rss_key)); - spin_unlock(&enic->devcmd_lock); + spin_unlock_bh(&enic->devcmd_lock); pci_free_consistent(enic->pdev, sizeof(union vnic_rss_key), rss_key_buf_va, rss_key_buf_pa); @@ -1831,11 +1831,11 @@ static int enic_set_rsscpu(struct enic *enic, u8 rss_hash_bits) for (i = 0; i < (1 << rss_hash_bits); i++) (*rss_cpu_buf_va).cpu[i/4].b[i%4] = i % enic->rq_count; - spin_lock(&enic->devcmd_lock); + spin_lock_bh(&enic->devcmd_lock); err = enic_set_rss_cpu(enic, rss_cpu_buf_pa, sizeof(union vnic_rss_cpu)); - spin_unlock(&enic->devcmd_lock); + spin_unlock_bh(&enic->devcmd_lock); pci_free_consistent(enic->pdev, sizeof(union vnic_rss_cpu), rss_cpu_buf_va, rss_cpu_buf_pa); @@ -1853,13 +1853,13 @@ static int enic_set_niccfg(struct enic *enic, u8 rss_default_cpu, /* Enable VLAN tag stripping. */ - spin_lock(&enic->devcmd_lock); + spin_lock_bh(&enic->devcmd_lock); err = enic_set_nic_cfg(enic, rss_default_cpu, rss_hash_type, rss_hash_bits, rss_base_cpu, rss_enable, tso_ipid_split_en, ig_vlan_strip_en); - spin_unlock(&enic->devcmd_lock); + spin_unlock_bh(&enic->devcmd_lock); return err; } From 14747cd977195a8aae13d0b1ad021e33c8786afe Mon Sep 17 00:00:00 2001 From: Govindarajulu Varadarajan <_govind@gmx.com> Date: Mon, 23 Jun 2014 16:08:04 +0530 Subject: [PATCH 7/8] enic: add low latency socket busy_poll support This patch adds support for low latency busy_poll. * Introduce drivers ndo_busy_poll function enic_busy_poll, which is called by socket waiting for data. * Introduce locking between napi_poll nad busy_poll * enic_busy_poll cleans up all the rx pkts possible. While in busy_poll, rq holds the state ENIC_POLL_STATE_POLL. While in napi_poll, rq holds the state ENIC_POLL_STATE_NAPI. * in napi_poll we return if we are in busy_poll. Incase of INTx & msix, we just service wq and return if busy_poll is going on. Signed-off-by: Govindarajulu Varadarajan <_govind@gmx.com> Signed-off-by: David S. Miller --- drivers/net/ethernet/cisco/enic/enic_main.c | 85 ++++++++++++-- drivers/net/ethernet/cisco/enic/vnic_rq.h | 122 ++++++++++++++++++++ 2 files changed, 195 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 5448df2d78c2..d4918eef5050 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -42,6 +42,9 @@ #ifdef CONFIG_RFS_ACCEL #include #endif +#ifdef CONFIG_NET_RX_BUSY_POLL +#include +#endif #include "cq_enet_desc.h" #include "vnic_dev.h" @@ -1053,10 +1056,12 @@ static void enic_rq_indicate_buf(struct vnic_rq *rq, if (vlan_stripped) __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci); - if (netdev->features & NETIF_F_GRO) - napi_gro_receive(&enic->napi[q_number], skb); - else + skb_mark_napi_id(skb, &enic->napi[rq->index]); + if (enic_poll_busy_polling(rq) || + !(netdev->features & NETIF_F_GRO)) netif_receive_skb(skb); + else + napi_gro_receive(&enic->napi[q_number], skb); if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce) enic_intr_update_pkt_size(&cq->pkt_size_counter, bytes_written); @@ -1093,16 +1098,22 @@ static int enic_poll(struct napi_struct *napi, int budget) unsigned int work_done, rq_work_done = 0, wq_work_done; int err; - /* Service RQ (first) and WQ - */ + wq_work_done = vnic_cq_service(&enic->cq[cq_wq], wq_work_to_do, + enic_wq_service, NULL); + + if (!enic_poll_lock_napi(&enic->rq[cq_rq])) { + if (wq_work_done > 0) + vnic_intr_return_credits(&enic->intr[intr], + wq_work_done, + 0 /* dont unmask intr */, + 0 /* dont reset intr timer */); + return rq_work_done; + } if (budget > 0) rq_work_done = vnic_cq_service(&enic->cq[cq_rq], rq_work_to_do, enic_rq_service, NULL); - wq_work_done = vnic_cq_service(&enic->cq[cq_wq], - wq_work_to_do, enic_wq_service, NULL); - /* Accumulate intr event credits for this polling * cycle. An intr event is the completion of a * a WQ or RQ packet. @@ -1134,6 +1145,7 @@ static int enic_poll(struct napi_struct *napi, int budget) napi_complete(napi); vnic_intr_unmask(&enic->intr[intr]); } + enic_poll_unlock_napi(&enic->rq[cq_rq]); return rq_work_done; } @@ -1234,6 +1246,34 @@ static void enic_set_rx_cpu_rmap(struct enic *enic) #endif /* CONFIG_RFS_ACCEL */ +#ifdef CONFIG_NET_RX_BUSY_POLL +int enic_busy_poll(struct napi_struct *napi) +{ + struct net_device *netdev = napi->dev; + struct enic *enic = netdev_priv(netdev); + unsigned int rq = (napi - &enic->napi[0]); + unsigned int cq = enic_cq_rq(enic, rq); + unsigned int intr = enic_msix_rq_intr(enic, rq); + unsigned int work_to_do = -1; /* clean all pkts possible */ + unsigned int work_done; + + if (!enic_poll_lock_poll(&enic->rq[rq])) + return LL_FLUSH_BUSY; + work_done = vnic_cq_service(&enic->cq[cq], work_to_do, + enic_rq_service, NULL); + + if (work_done > 0) + vnic_intr_return_credits(&enic->intr[intr], + work_done, 0, 0); + vnic_rq_fill(&enic->rq[rq], enic_rq_alloc_buf); + if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce) + enic_calc_int_moderation(enic, &enic->rq[rq]); + enic_poll_unlock_poll(&enic->rq[rq]); + + return work_done; +} +#endif /* CONFIG_NET_RX_BUSY_POLL */ + static int enic_poll_msix(struct napi_struct *napi, int budget) { struct net_device *netdev = napi->dev; @@ -1245,6 +1285,8 @@ static int enic_poll_msix(struct napi_struct *napi, int budget) unsigned int work_done = 0; int err; + if (!enic_poll_lock_napi(&enic->rq[rq])) + return work_done; /* Service RQ */ @@ -1290,6 +1332,7 @@ static int enic_poll_msix(struct napi_struct *napi, int budget) enic_set_int_moderation(enic, &enic->rq[rq]); vnic_intr_unmask(&enic->intr[intr]); } + enic_poll_unlock_napi(&enic->rq[rq]); return work_done; } @@ -1538,8 +1581,10 @@ static int enic_open(struct net_device *netdev) netif_tx_wake_all_queues(netdev); - for (i = 0; i < enic->rq_count; i++) + for (i = 0; i < enic->rq_count; i++) { + enic_busy_poll_init_lock(&enic->rq[i]); napi_enable(&enic->napi[i]); + } enic_dev_enable(enic); @@ -1578,8 +1623,13 @@ static int enic_stop(struct net_device *netdev) enic_dev_disable(enic); - for (i = 0; i < enic->rq_count; i++) + local_bh_disable(); + for (i = 0; i < enic->rq_count; i++) { napi_disable(&enic->napi[i]); + while (!enic_poll_lock_napi(&enic->rq[i])) + mdelay(1); + } + local_bh_enable(); netif_carrier_off(netdev); netif_tx_disable(netdev); @@ -2070,6 +2120,9 @@ static const struct net_device_ops enic_netdev_dynamic_ops = { #ifdef CONFIG_RFS_ACCEL .ndo_rx_flow_steer = enic_rx_flow_steer, #endif +#ifdef CONFIG_NET_RX_BUSY_POLL + .ndo_busy_poll = enic_busy_poll, +#endif }; static const struct net_device_ops enic_netdev_ops = { @@ -2093,14 +2146,19 @@ static const struct net_device_ops enic_netdev_ops = { #ifdef CONFIG_RFS_ACCEL .ndo_rx_flow_steer = enic_rx_flow_steer, #endif +#ifdef CONFIG_NET_RX_BUSY_POLL + .ndo_busy_poll = enic_busy_poll, +#endif }; static void enic_dev_deinit(struct enic *enic) { unsigned int i; - for (i = 0; i < enic->rq_count; i++) + for (i = 0; i < enic->rq_count; i++) { + napi_hash_del(&enic->napi[i]); netif_napi_del(&enic->napi[i]); + } enic_free_vnic_resources(enic); enic_clear_intr_mode(enic); @@ -2166,11 +2224,14 @@ static int enic_dev_init(struct enic *enic) switch (vnic_dev_get_intr_mode(enic->vdev)) { default: netif_napi_add(netdev, &enic->napi[0], enic_poll, 64); + napi_hash_add(&enic->napi[0]); break; case VNIC_DEV_INTR_MODE_MSIX: - for (i = 0; i < enic->rq_count; i++) + for (i = 0; i < enic->rq_count; i++) { netif_napi_add(netdev, &enic->napi[i], enic_poll_msix, 64); + napi_hash_add(&enic->napi[i]); + } break; } diff --git a/drivers/net/ethernet/cisco/enic/vnic_rq.h b/drivers/net/ethernet/cisco/enic/vnic_rq.h index ee7bc95af278..8111d5202df2 100644 --- a/drivers/net/ethernet/cisco/enic/vnic_rq.h +++ b/drivers/net/ethernet/cisco/enic/vnic_rq.h @@ -85,6 +85,21 @@ struct vnic_rq { struct vnic_rq_buf *to_clean; void *os_buf_head; unsigned int pkts_outstanding; +#ifdef CONFIG_NET_RX_BUSY_POLL +#define ENIC_POLL_STATE_IDLE 0 +#define ENIC_POLL_STATE_NAPI (1 << 0) /* NAPI owns this poll */ +#define ENIC_POLL_STATE_POLL (1 << 1) /* poll owns this poll */ +#define ENIC_POLL_STATE_NAPI_YIELD (1 << 2) /* NAPI yielded this poll */ +#define ENIC_POLL_STATE_POLL_YIELD (1 << 3) /* poll yielded this poll */ +#define ENIC_POLL_YIELD (ENIC_POLL_STATE_NAPI_YIELD | \ + ENIC_POLL_STATE_POLL_YIELD) +#define ENIC_POLL_LOCKED (ENIC_POLL_STATE_NAPI | \ + ENIC_POLL_STATE_POLL) +#define ENIC_POLL_USER_PEND (ENIC_POLL_STATE_POLL | \ + ENIC_POLL_STATE_POLL_YIELD) + unsigned int bpoll_state; + spinlock_t bpoll_lock; +#endif /* CONFIG_NET_RX_BUSY_POLL */ }; static inline unsigned int vnic_rq_desc_avail(struct vnic_rq *rq) @@ -197,6 +212,113 @@ static inline int vnic_rq_fill(struct vnic_rq *rq, return 0; } +#ifdef CONFIG_NET_RX_BUSY_POLL +static inline void enic_busy_poll_init_lock(struct vnic_rq *rq) +{ + spin_lock_init(&rq->bpoll_lock); + rq->bpoll_state = ENIC_POLL_STATE_IDLE; +} + +static inline bool enic_poll_lock_napi(struct vnic_rq *rq) +{ + bool rc = true; + + spin_lock(&rq->bpoll_lock); + if (rq->bpoll_state & ENIC_POLL_LOCKED) { + WARN_ON(rq->bpoll_state & ENIC_POLL_STATE_NAPI); + rq->bpoll_state |= ENIC_POLL_STATE_NAPI_YIELD; + rc = false; + } else { + rq->bpoll_state = ENIC_POLL_STATE_NAPI; + } + spin_unlock(&rq->bpoll_lock); + + return rc; +} + +static inline bool enic_poll_unlock_napi(struct vnic_rq *rq) +{ + bool rc = false; + + spin_lock(&rq->bpoll_lock); + WARN_ON(rq->bpoll_state & + (ENIC_POLL_STATE_POLL | ENIC_POLL_STATE_NAPI_YIELD)); + if (rq->bpoll_state & ENIC_POLL_STATE_POLL_YIELD) + rc = true; + rq->bpoll_state = ENIC_POLL_STATE_IDLE; + spin_unlock(&rq->bpoll_lock); + + return rc; +} + +static inline bool enic_poll_lock_poll(struct vnic_rq *rq) +{ + bool rc = true; + + spin_lock_bh(&rq->bpoll_lock); + if (rq->bpoll_state & ENIC_POLL_LOCKED) { + rq->bpoll_state |= ENIC_POLL_STATE_POLL_YIELD; + rc = false; + } else { + rq->bpoll_state |= ENIC_POLL_STATE_POLL; + } + spin_unlock_bh(&rq->bpoll_lock); + + return rc; +} + +static inline bool enic_poll_unlock_poll(struct vnic_rq *rq) +{ + bool rc = false; + + spin_lock_bh(&rq->bpoll_lock); + WARN_ON(rq->bpoll_state & ENIC_POLL_STATE_NAPI); + if (rq->bpoll_state & ENIC_POLL_STATE_POLL_YIELD) + rc = true; + rq->bpoll_state = ENIC_POLL_STATE_IDLE; + spin_unlock_bh(&rq->bpoll_lock); + + return rc; +} + +static inline bool enic_poll_busy_polling(struct vnic_rq *rq) +{ + WARN_ON(!(rq->bpoll_state & ENIC_POLL_LOCKED)); + return rq->bpoll_state & ENIC_POLL_USER_PEND; +} + +#else + +static inline void enic_busy_poll_init_lock(struct vnic_rq *rq) +{ +} + +static inline bool enic_poll_lock_napi(struct vnic_rq *rq) +{ + return true; +} + +static inline bool enic_poll_unlock_napi(struct vnic_rq *rq) +{ + return false; +} + +static inline bool enic_poll_lock_poll(struct vnic_rq *rq) +{ + return false; +} + +static inline bool enic_poll_unlock_poll(struct vnic_rq *rq) +{ + return false; +} + +static inline bool enic_poll_ll_polling(struct vnic_rq *rq) +{ + return false; +} +#endif /* CONFIG_NET_RX_BUSY_POLL */ + void vnic_rq_free(struct vnic_rq *rq); int vnic_rq_alloc(struct vnic_dev *vdev, struct vnic_rq *rq, unsigned int index, unsigned int desc_count, unsigned int desc_size); From 4cfe878537cec0e9c0f84b93cc6aa9526f6942b5 Mon Sep 17 00:00:00 2001 From: Govindarajulu Varadarajan <_govind@gmx.com> Date: Mon, 23 Jun 2014 16:08:05 +0530 Subject: [PATCH 8/8] enic: do tx cleanup in napi poll Till now enic had been doing tx clean in isr. Using napi infrastructure to move the tx clean up out of isr to softirq. Now, wq isr schedules napi poll. In enic_poll_msix_wq we clean up the tx queus. This is applicable only on MSIX. In INTx and MSI we use single napi to clean both rx & tx queues. Signed-off-by: Govindarajulu Varadarajan <_govind@gmx.com> Signed-off-by: David S. Miller --- drivers/net/ethernet/cisco/enic/enic.h | 2 +- drivers/net/ethernet/cisco/enic/enic_main.c | 88 +++++++++++++-------- 2 files changed, 54 insertions(+), 36 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/enic.h b/drivers/net/ethernet/cisco/enic/enic.h index b9b9178e174e..c8aa9fb81d3c 100644 --- a/drivers/net/ethernet/cisco/enic/enic.h +++ b/drivers/net/ethernet/cisco/enic/enic.h @@ -178,7 +178,7 @@ struct enic { unsigned int rq_count; u64 rq_truncated_pkts; u64 rq_bad_fcs; - struct napi_struct napi[ENIC_RQ_MAX]; + struct napi_struct napi[ENIC_RQ_MAX + ENIC_WQ_MAX]; /* interrupt resource cache line section */ ____cacheline_aligned struct vnic_intr intr[ENIC_INTR_MAX]; diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index d4918eef5050..9348febc0743 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -316,40 +316,15 @@ static irqreturn_t enic_isr_msi(int irq, void *data) return IRQ_HANDLED; } -static irqreturn_t enic_isr_msix_rq(int irq, void *data) +static irqreturn_t enic_isr_msix(int irq, void *data) { struct napi_struct *napi = data; - /* schedule NAPI polling for RQ cleanup */ napi_schedule(napi); return IRQ_HANDLED; } -static irqreturn_t enic_isr_msix_wq(int irq, void *data) -{ - struct enic *enic = data; - unsigned int cq; - unsigned int intr; - unsigned int wq_work_to_do = -1; /* no limit */ - unsigned int wq_work_done; - unsigned int wq_irq; - - wq_irq = (u32)irq - enic->msix_entry[enic_msix_wq_intr(enic, 0)].vector; - cq = enic_cq_wq(enic, wq_irq); - intr = enic_msix_wq_intr(enic, wq_irq); - - wq_work_done = vnic_cq_service(&enic->cq[cq], - wq_work_to_do, enic_wq_service, NULL); - - vnic_intr_return_credits(&enic->intr[intr], - wq_work_done, - 1 /* unmask intr */, - 1 /* reset intr timer */); - - return IRQ_HANDLED; -} - static irqreturn_t enic_isr_msix_err(int irq, void *data) { struct enic *enic = data; @@ -1274,7 +1249,36 @@ int enic_busy_poll(struct napi_struct *napi) } #endif /* CONFIG_NET_RX_BUSY_POLL */ -static int enic_poll_msix(struct napi_struct *napi, int budget) +static int enic_poll_msix_wq(struct napi_struct *napi, int budget) +{ + struct net_device *netdev = napi->dev; + struct enic *enic = netdev_priv(netdev); + unsigned int wq_index = (napi - &enic->napi[0]) - enic->rq_count; + struct vnic_wq *wq = &enic->wq[wq_index]; + unsigned int cq; + unsigned int intr; + unsigned int wq_work_to_do = -1; /* clean all desc possible */ + unsigned int wq_work_done; + unsigned int wq_irq; + + wq_irq = wq->index; + cq = enic_cq_wq(enic, wq_irq); + intr = enic_msix_wq_intr(enic, wq_irq); + wq_work_done = vnic_cq_service(&enic->cq[cq], wq_work_to_do, + enic_wq_service, NULL); + + vnic_intr_return_credits(&enic->intr[intr], wq_work_done, + 0 /* don't unmask intr */, + 1 /* reset intr timer */); + if (!wq_work_done) { + napi_complete(napi); + vnic_intr_unmask(&enic->intr[intr]); + } + + return 0; +} + +static int enic_poll_msix_rq(struct napi_struct *napi, int budget) { struct net_device *netdev = napi->dev; struct enic *enic = netdev_priv(netdev); @@ -1399,17 +1403,19 @@ static int enic_request_intr(struct enic *enic) snprintf(enic->msix[intr].devname, sizeof(enic->msix[intr].devname), "%.11s-rx-%d", netdev->name, i); - enic->msix[intr].isr = enic_isr_msix_rq; + enic->msix[intr].isr = enic_isr_msix; enic->msix[intr].devid = &enic->napi[i]; } for (i = 0; i < enic->wq_count; i++) { + int wq = enic_cq_wq(enic, i); + intr = enic_msix_wq_intr(enic, i); snprintf(enic->msix[intr].devname, sizeof(enic->msix[intr].devname), "%.11s-tx-%d", netdev->name, i); - enic->msix[intr].isr = enic_isr_msix_wq; - enic->msix[intr].devid = enic; + enic->msix[intr].isr = enic_isr_msix; + enic->msix[intr].devid = &enic->napi[wq]; } intr = enic_msix_err_intr(enic); @@ -1585,7 +1591,9 @@ static int enic_open(struct net_device *netdev) enic_busy_poll_init_lock(&enic->rq[i]); napi_enable(&enic->napi[i]); } - + if (vnic_dev_get_intr_mode(enic->vdev) == VNIC_DEV_INTR_MODE_MSIX) + for (i = 0; i < enic->wq_count; i++) + napi_enable(&enic->napi[enic_cq_wq(enic, i)]); enic_dev_enable(enic); for (i = 0; i < enic->intr_count; i++) @@ -1633,6 +1641,9 @@ static int enic_stop(struct net_device *netdev) netif_carrier_off(netdev); netif_tx_disable(netdev); + if (vnic_dev_get_intr_mode(enic->vdev) == VNIC_DEV_INTR_MODE_MSIX) + for (i = 0; i < enic->wq_count; i++) + napi_disable(&enic->napi[enic_cq_wq(enic, i)]); if (!enic_is_dynamic(enic) && !enic_is_sriov_vf(enic)) enic_dev_del_station_addr(enic); @@ -1752,13 +1763,14 @@ static void enic_poll_controller(struct net_device *netdev) case VNIC_DEV_INTR_MODE_MSIX: for (i = 0; i < enic->rq_count; i++) { intr = enic_msix_rq_intr(enic, i); - enic_isr_msix_rq(enic->msix_entry[intr].vector, - &enic->napi[i]); + enic_isr_msix(enic->msix_entry[intr].vector, + &enic->napi[i]); } for (i = 0; i < enic->wq_count; i++) { intr = enic_msix_wq_intr(enic, i); - enic_isr_msix_wq(enic->msix_entry[intr].vector, enic); + enic_isr_msix(enic->msix_entry[intr].vector, + &enic->napi[enic_cq_wq(enic, i)]); } break; @@ -2159,6 +2171,9 @@ static void enic_dev_deinit(struct enic *enic) napi_hash_del(&enic->napi[i]); netif_napi_del(&enic->napi[i]); } + if (vnic_dev_get_intr_mode(enic->vdev) == VNIC_DEV_INTR_MODE_MSIX) + for (i = 0; i < enic->wq_count; i++) + netif_napi_del(&enic->napi[enic_cq_wq(enic, i)]); enic_free_vnic_resources(enic); enic_clear_intr_mode(enic); @@ -2229,9 +2244,12 @@ static int enic_dev_init(struct enic *enic) case VNIC_DEV_INTR_MODE_MSIX: for (i = 0; i < enic->rq_count; i++) { netif_napi_add(netdev, &enic->napi[i], - enic_poll_msix, 64); + enic_poll_msix_rq, NAPI_POLL_WEIGHT); napi_hash_add(&enic->napi[i]); } + for (i = 0; i < enic->wq_count; i++) + netif_napi_add(netdev, &enic->napi[enic_cq_wq(enic, i)], + enic_poll_msix_wq, NAPI_POLL_WEIGHT); break; }