From 26a80f6e542e6b86c9ad9c3d217d74ca8c3fed66 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 6 Aug 2015 16:41:52 +0200 Subject: [PATCH 1/7] mlxsw: Call free_netdev when removing port When removing a port's netdevice we should also free the memory allocated by alloc_etherdev(). Do this by calling free_netdev() at the end of the teardown sequence. Reported-by: Or Gerlitz Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/switchx2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index 29b46eef9769..687f5cbef25b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -1079,6 +1079,7 @@ static void mlxsw_sx_port_remove(struct mlxsw_sx *mlxsw_sx, u8 local_port) unregister_netdev(mlxsw_sx_port->dev); /* This calls ndo_stop */ mlxsw_sx_port_swid_set(mlxsw_sx_port, MLXSW_PORT_SWID_DISABLED_PORT); free_percpu(mlxsw_sx_port->pcpu_stats); + free_netdev(mlxsw_sx_port->dev); } static void mlxsw_sx_ports_remove(struct mlxsw_sx *mlxsw_sx) From e61011b5e044b15727fc6ee330c17f1fc69faef8 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 6 Aug 2015 16:41:53 +0200 Subject: [PATCH 2/7] mlxsw: Make system port to local port mapping explicit System ports are unique identifiers in a multi-ASIC environment that represent all the available ports in the system. Local ports on the other hand, are unique only within the local ASIC. Since system port to local port mapping is not part of the HW-SW contract and since only single-ASIC configurations are currently supported, set an explicit 1:1 mapping by configuring the Switch System Port Record (SSPR) register. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 60 +++++++++++++++++++ .../net/ethernet/mellanox/mlxsw/switchx2.c | 18 ++++++ 2 files changed, 78 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index b5a72f8e78b1..096e1c12175a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -150,6 +150,64 @@ static inline void mlxsw_reg_smid_pack(char *payload, u16 mid) mlxsw_reg_smid_port_mask_set(payload, MLXSW_PORT_CPU_PORT, 1); } +/* SSPR - Switch System Port Record Register + * ----------------------------------------- + * Configures the system port to local port mapping. + */ +#define MLXSW_REG_SSPR_ID 0x2008 +#define MLXSW_REG_SSPR_LEN 0x8 + +static const struct mlxsw_reg_info mlxsw_reg_sspr = { + .id = MLXSW_REG_SSPR_ID, + .len = MLXSW_REG_SSPR_LEN, +}; + +/* reg_sspr_m + * Master - if set, then the record describes the master system port. + * This is needed in case a local port is mapped into several system ports + * (for multipathing). That number will be reported as the source system + * port when packets are forwarded to the CPU. Only one master port is allowed + * per local port. + * + * Note: Must be set for Spectrum. + * Access: RW + */ +MLXSW_ITEM32(reg, sspr, m, 0x00, 31, 1); + +/* reg_sspr_local_port + * Local port number. + * + * Access: RW + */ +MLXSW_ITEM32(reg, sspr, local_port, 0x00, 16, 8); + +/* reg_sspr_sub_port + * Virtual port within the physical port. + * Should be set to 0 when virtual ports are not enabled on the port. + * + * Access: RW + */ +MLXSW_ITEM32(reg, sspr, sub_port, 0x00, 8, 8); + +/* reg_sspr_system_port + * Unique identifier within the stacking domain that represents all the ports + * that are available in the system (external ports). + * + * Currently, only single-ASIC configurations are supported, so we default to + * 1:1 mapping between system ports and local ports. + * Access: Index + */ +MLXSW_ITEM32(reg, sspr, system_port, 0x04, 0, 16); + +static inline void mlxsw_reg_sspr_pack(char *payload, u8 local_port) +{ + MLXSW_REG_ZERO(sspr, payload); + mlxsw_reg_sspr_m_set(payload, 1); + mlxsw_reg_sspr_local_port_set(payload, local_port); + mlxsw_reg_sspr_sub_port_set(payload, 0); + mlxsw_reg_sspr_system_port_set(payload, local_port); +} + /* SPMS - Switch Port MSTP/RSTP State Register * ------------------------------------------- * Configures the spanning tree state of a physical port. @@ -1216,6 +1274,8 @@ static inline const char *mlxsw_reg_id_str(u16 reg_id) return "SPAD"; case MLXSW_REG_SMID_ID: return "SMID"; + case MLXSW_REG_SSPR_ID: + return "SSPR"; case MLXSW_REG_SPMS_ID: return "SPMS"; case MLXSW_REG_SFGC_ID: diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index 687f5cbef25b..7eb045ef797a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -245,6 +245,16 @@ static int mlxsw_sx_port_swid_set(struct mlxsw_sx_port *mlxsw_sx_port, u8 swid) return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(pspa), pspa_pl); } +static int +mlxsw_sx_port_system_port_mapping_set(struct mlxsw_sx_port *mlxsw_sx_port) +{ + struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx; + char sspr_pl[MLXSW_REG_SSPR_LEN]; + + mlxsw_reg_sspr_pack(sspr_pl, mlxsw_sx_port->local_port); + return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(sspr), sspr_pl); +} + static int mlxsw_sx_port_module_check(struct mlxsw_sx_port *mlxsw_sx_port, bool *p_usable) { @@ -1001,6 +1011,13 @@ static int mlxsw_sx_port_create(struct mlxsw_sx *mlxsw_sx, u8 local_port) goto port_not_usable; } + err = mlxsw_sx_port_system_port_mapping_set(mlxsw_sx_port); + if (err) { + dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set system port mapping\n", + mlxsw_sx_port->local_port); + goto err_port_system_port_mapping_set; + } + err = mlxsw_sx_port_swid_set(mlxsw_sx_port, 0); if (err) { dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set SWID\n", @@ -1061,6 +1078,7 @@ err_port_stp_state_set: err_port_mtu_set: err_port_speed_set: err_port_swid_set: +err_port_system_port_mapping_set: port_not_usable: err_port_module_check: err_dev_addr_get: From 74ed207e2a82dece0de0849df62cd450dec9d4d7 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 6 Aug 2015 16:41:54 +0200 Subject: [PATCH 3/7] mlxsw: Make pci module dependent on HAS_DMA and HAS_IOMEM This resolves compile errors on um-allyesconfig. Note that there are many other drivers which have the same issue. Reported-by: kbuild test robot Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig index 8d1080da49b5..2941d9c5ae48 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig +++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig @@ -12,7 +12,7 @@ config MLXSW_CORE config MLXSW_PCI tristate "PCI bus implementation for Mellanox Technologies Switch ASICs" - depends on PCI && MLXSW_CORE + depends on PCI && HAS_DMA && HAS_IOMEM && MLXSW_CORE default m ---help--- This is PCI bus implementation for Mellanox Technologies Switch ASICs. From 7b7b9cff7468fdf38ab189dde6ecb4750b171bc8 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 6 Aug 2015 16:41:55 +0200 Subject: [PATCH 4/7] mlxsw: Strip FCS from incoming packets FCS of incoming packets is already checked by HW. Just strip it out. Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/pci.c | 6 +++++- drivers/net/ethernet/mellanox/mlxsw/pci.h | 6 ++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 298ead5b42ca..3ec52ea10f83 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -667,6 +667,7 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci, char *wqe; struct sk_buff *skb; struct mlxsw_rx_info rx_info; + u16 byte_count; int err; elem_info = mlxsw_pci_queue_elem_info_consumer_get(q); @@ -686,7 +687,10 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci, rx_info.sys_port = mlxsw_pci_cqe_system_port_get(cqe); rx_info.trap_id = mlxsw_pci_cqe_trap_id_get(cqe); - skb_put(skb, mlxsw_pci_cqe_byte_count_get(cqe)); + byte_count = mlxsw_pci_cqe_byte_count_get(cqe); + if (mlxsw_pci_cqe_crc_get(cqe)) + byte_count -= ETH_FCS_LEN; + skb_put(skb, byte_count); mlxsw_core_skb_receive(mlxsw_pci->core, skb, &rx_info); put_new_skb: diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.h b/drivers/net/ethernet/mellanox/mlxsw/pci.h index 887af8459149..1ef9664b4512 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.h +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.h @@ -155,6 +155,12 @@ MLXSW_ITEM32(pci, cqe, byte_count, 0x04, 0, 14); */ MLXSW_ITEM32(pci, cqe, trap_id, 0x08, 0, 8); +/* pci_cqe_crc + * Length include CRC. Indicates the length field includes + * the packet's CRC. + */ +MLXSW_ITEM32(pci, cqe, crc, 0x0C, 8, 1); + /* pci_cqe_e * CQE with Error. */ From d003462a50de8605e66be0966e6370ab9821e07e Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 6 Aug 2015 16:41:56 +0200 Subject: [PATCH 5/7] mlxsw: Simplify mlxsw_sx_port_xmit function Previously we only checked if the transmission queue is not full in the middle of the xmit function. This lead to complex logic due to the fact that sometimes we need to reallocate the headroom for our Tx header. Allow the switch driver to know if the transmission queue is not full before sending the packet and remove this complex logic. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core.c | 10 +++++++ drivers/net/ethernet/mellanox/mlxsw/core.h | 5 ++++ drivers/net/ethernet/mellanox/mlxsw/pci.c | 20 ++++++++++---- .../net/ethernet/mellanox/mlxsw/switchx2.c | 27 ++++++++----------- 4 files changed, 41 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index ad66ae44a0d7..7562802bc6c8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -865,6 +865,16 @@ static struct mlxsw_core *__mlxsw_core_get(void *driver_priv) return container_of(driver_priv, struct mlxsw_core, driver_priv); } +bool mlxsw_core_skb_transmit_busy(void *driver_priv, + const struct mlxsw_tx_info *tx_info) +{ + struct mlxsw_core *mlxsw_core = __mlxsw_core_get(driver_priv); + + return mlxsw_core->bus->skb_transmit_busy(mlxsw_core->bus_priv, + tx_info); +} +EXPORT_SYMBOL(mlxsw_core_skb_transmit_busy); + int mlxsw_core_skb_transmit(void *driver_priv, struct sk_buff *skb, const struct mlxsw_tx_info *tx_info) { diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 2280b319c362..165808471188 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -73,6 +73,9 @@ struct mlxsw_tx_info { bool is_emad; }; +bool mlxsw_core_skb_transmit_busy(void *driver_priv, + const struct mlxsw_tx_info *tx_info); + int mlxsw_core_skb_transmit(void *driver_priv, struct sk_buff *skb, const struct mlxsw_tx_info *tx_info); @@ -177,6 +180,8 @@ struct mlxsw_bus { int (*init)(void *bus_priv, struct mlxsw_core *mlxsw_core, const struct mlxsw_config_profile *profile); void (*fini)(void *bus_priv); + bool (*skb_transmit_busy)(void *bus_priv, + const struct mlxsw_tx_info *tx_info); int (*skb_transmit)(void *bus_priv, struct sk_buff *skb, const struct mlxsw_tx_info *tx_info); int (*cmd_exec)(void *bus_priv, u16 opcode, u8 opcode_mod, diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 3ec52ea10f83..a34f4742aa00 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -1443,6 +1443,15 @@ mlxsw_pci_sdq_pick(struct mlxsw_pci *mlxsw_pci, return mlxsw_pci_sdq_get(mlxsw_pci, sdqn); } +static bool mlxsw_pci_skb_transmit_busy(void *bus_priv, + const struct mlxsw_tx_info *tx_info) +{ + struct mlxsw_pci *mlxsw_pci = bus_priv; + struct mlxsw_pci_queue *q = mlxsw_pci_sdq_pick(mlxsw_pci, tx_info); + + return !mlxsw_pci_queue_elem_info_producer_get(q); +} + static int mlxsw_pci_skb_transmit(void *bus_priv, struct sk_buff *skb, const struct mlxsw_tx_info *tx_info) { @@ -1625,11 +1634,12 @@ err_in_mbox_map: } static const struct mlxsw_bus mlxsw_pci_bus = { - .kind = "pci", - .init = mlxsw_pci_init, - .fini = mlxsw_pci_fini, - .skb_transmit = mlxsw_pci_skb_transmit, - .cmd_exec = mlxsw_pci_cmd_exec, + .kind = "pci", + .init = mlxsw_pci_init, + .fini = mlxsw_pci_fini, + .skb_transmit_busy = mlxsw_pci_skb_transmit_busy, + .skb_transmit = mlxsw_pci_skb_transmit, + .cmd_exec = mlxsw_pci_cmd_exec, }; static int mlxsw_pci_sw_reset(struct mlxsw_pci *mlxsw_pci) diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index 7eb045ef797a..fce9b453433b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -300,31 +300,26 @@ static netdev_tx_t mlxsw_sx_port_xmit(struct sk_buff *skb, .local_port = mlxsw_sx_port->local_port, .is_emad = false, }; - struct sk_buff *skb_old = NULL; int err; - if (unlikely(skb_headroom(skb) < MLXSW_TXHDR_LEN)) { - struct sk_buff *skb_new; + if (mlxsw_core_skb_transmit_busy(mlxsw_sx, &tx_info)) + return NETDEV_TX_BUSY; - skb_old = skb; - skb_new = skb_realloc_headroom(skb, MLXSW_TXHDR_LEN); - if (!skb_new) { + if (unlikely(skb_headroom(skb) < MLXSW_TXHDR_LEN)) { + struct sk_buff *skb_orig = skb; + + skb = skb_realloc_headroom(skb, MLXSW_TXHDR_LEN); + if (!skb) { this_cpu_inc(mlxsw_sx_port->pcpu_stats->tx_dropped); - dev_kfree_skb_any(skb_old); + dev_kfree_skb_any(skb_orig); return NETDEV_TX_OK; } - skb = skb_new; } mlxsw_sx_txhdr_construct(skb, &tx_info); + /* Due to a race we might fail here because of a full queue. In that + * unlikely case we simply drop the packet. + */ err = mlxsw_core_skb_transmit(mlxsw_sx, skb, &tx_info); - if (err == -EAGAIN) { - if (skb_old) - dev_kfree_skb_any(skb); - return NETDEV_TX_BUSY; - } - - if (skb_old) - dev_kfree_skb_any(skb_old); if (!err) { pcpu_stats = this_cpu_ptr(mlxsw_sx_port->pcpu_stats); From 3bfcd34764cbd3bcb48f6e9009eacf1d19856213 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 6 Aug 2015 16:41:57 +0200 Subject: [PATCH 6/7] mlxsw: Use correct skb length when dumping payload Do not use the length of the transmitted skb (which was freed), but that of the response skb. This issue was discovered using the Kernel Address sanitizer (KASan). Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 7562802bc6c8..09325b72d524 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1073,7 +1073,7 @@ static int mlxsw_core_reg_access_emad(struct mlxsw_core *mlxsw_core, mlxsw_core->emad.tid - 1); mlxsw_core_buf_dump_dbg(mlxsw_core, mlxsw_core->emad.resp_skb->data, - skb->len); + mlxsw_core->emad.resp_skb->len); dev_kfree_skb(mlxsw_core->emad.resp_skb); } From e577516b9db3e0f19df82b6430fe3b06e05d6304 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 6 Aug 2015 16:41:58 +0200 Subject: [PATCH 7/7] mlxsw: Fix use-after-free bug in mlxsw_sx_port_xmit Store the length of the skb before transmitting it and use it for stats instead of skb->len, since skb might have been freed already. This issue was discovered using the Kernel Address sanitizer (KASan). Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/switchx2.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index fce9b453433b..3e52ee93438c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -300,6 +300,7 @@ static netdev_tx_t mlxsw_sx_port_xmit(struct sk_buff *skb, .local_port = mlxsw_sx_port->local_port, .is_emad = false, }; + u64 len; int err; if (mlxsw_core_skb_transmit_busy(mlxsw_sx, &tx_info)) @@ -316,6 +317,7 @@ static netdev_tx_t mlxsw_sx_port_xmit(struct sk_buff *skb, } } mlxsw_sx_txhdr_construct(skb, &tx_info); + len = skb->len; /* Due to a race we might fail here because of a full queue. In that * unlikely case we simply drop the packet. */ @@ -325,7 +327,7 @@ static netdev_tx_t mlxsw_sx_port_xmit(struct sk_buff *skb, pcpu_stats = this_cpu_ptr(mlxsw_sx_port->pcpu_stats); u64_stats_update_begin(&pcpu_stats->syncp); pcpu_stats->tx_packets++; - pcpu_stats->tx_bytes += skb->len; + pcpu_stats->tx_bytes += len; u64_stats_update_end(&pcpu_stats->syncp); } else { this_cpu_inc(mlxsw_sx_port->pcpu_stats->tx_dropped);