net/mlx4_core: Refactor the catas flow to work per device
Using a WQ per device instead of a single global WQ, this allows independent reset handling per device even when SRIOV is used. This comes as a pre-patch for supporting chip reset for both native and SRIOV. Signed-off-by: Yishai Hadas <yishaih@mellanox.com> Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
dd0eefe3ab
commit
ad9a0bf08f
4 changed files with 49 additions and 45 deletions
|
@ -40,10 +40,7 @@ enum {
|
||||||
MLX4_CATAS_POLL_INTERVAL = 5 * HZ,
|
MLX4_CATAS_POLL_INTERVAL = 5 * HZ,
|
||||||
};
|
};
|
||||||
|
|
||||||
static DEFINE_SPINLOCK(catas_lock);
|
|
||||||
|
|
||||||
static LIST_HEAD(catas_list);
|
|
||||||
static struct work_struct catas_work;
|
|
||||||
|
|
||||||
static int internal_err_reset = 1;
|
static int internal_err_reset = 1;
|
||||||
module_param(internal_err_reset, int, 0644);
|
module_param(internal_err_reset, int, 0644);
|
||||||
|
@ -77,13 +74,9 @@ static void poll_catas(unsigned long dev_ptr)
|
||||||
dump_err_buf(dev);
|
dump_err_buf(dev);
|
||||||
mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, 0);
|
mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, 0);
|
||||||
|
|
||||||
if (internal_err_reset) {
|
if (internal_err_reset)
|
||||||
spin_lock(&catas_lock);
|
queue_work(dev->persist->catas_wq,
|
||||||
list_add(&priv->catas_err.list, &catas_list);
|
&dev->persist->catas_work);
|
||||||
spin_unlock(&catas_lock);
|
|
||||||
|
|
||||||
queue_work(mlx4_wq, &catas_work);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} else
|
} else
|
||||||
mod_timer(&priv->catas_err.timer,
|
mod_timer(&priv->catas_err.timer,
|
||||||
|
@ -92,34 +85,23 @@ static void poll_catas(unsigned long dev_ptr)
|
||||||
|
|
||||||
static void catas_reset(struct work_struct *work)
|
static void catas_reset(struct work_struct *work)
|
||||||
{
|
{
|
||||||
struct mlx4_priv *priv, *tmppriv;
|
struct mlx4_dev_persistent *persist =
|
||||||
struct mlx4_dev *dev;
|
container_of(work, struct mlx4_dev_persistent,
|
||||||
struct mlx4_dev_persistent *persist;
|
catas_work);
|
||||||
|
struct pci_dev *pdev = persist->pdev;
|
||||||
LIST_HEAD(tlist);
|
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
spin_lock_irq(&catas_lock);
|
|
||||||
list_splice_init(&catas_list, &tlist);
|
|
||||||
spin_unlock_irq(&catas_lock);
|
|
||||||
|
|
||||||
list_for_each_entry_safe(priv, tmppriv, &tlist, catas_err.list) {
|
|
||||||
struct pci_dev *pdev = priv->dev.persist->pdev;
|
|
||||||
|
|
||||||
/* If the device is off-line, we cannot reset it */
|
/* If the device is off-line, we cannot reset it */
|
||||||
if (pci_channel_offline(pdev))
|
if (pci_channel_offline(pdev))
|
||||||
continue;
|
return;
|
||||||
|
|
||||||
ret = mlx4_restart_one(priv->dev.persist->pdev);
|
ret = mlx4_restart_one(pdev);
|
||||||
/* 'priv' now is not valid */
|
/* 'priv' now is not valid */
|
||||||
if (ret)
|
if (ret)
|
||||||
pr_err("mlx4 %s: Reset failed (%d)\n",
|
pr_err("mlx4 %s: Reset failed (%d)\n",
|
||||||
pci_name(pdev), ret);
|
pci_name(pdev), ret);
|
||||||
else {
|
else
|
||||||
persist = pci_get_drvdata(pdev);
|
|
||||||
mlx4_dbg(persist->dev, "Reset succeeded\n");
|
mlx4_dbg(persist->dev, "Reset succeeded\n");
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void mlx4_start_catas_poll(struct mlx4_dev *dev)
|
void mlx4_start_catas_poll(struct mlx4_dev *dev)
|
||||||
|
@ -158,15 +140,26 @@ void mlx4_stop_catas_poll(struct mlx4_dev *dev)
|
||||||
|
|
||||||
del_timer_sync(&priv->catas_err.timer);
|
del_timer_sync(&priv->catas_err.timer);
|
||||||
|
|
||||||
if (priv->catas_err.map)
|
if (priv->catas_err.map) {
|
||||||
iounmap(priv->catas_err.map);
|
iounmap(priv->catas_err.map);
|
||||||
|
priv->catas_err.map = NULL;
|
||||||
spin_lock_irq(&catas_lock);
|
}
|
||||||
list_del(&priv->catas_err.list);
|
|
||||||
spin_unlock_irq(&catas_lock);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void __init mlx4_catas_init(void)
|
int mlx4_catas_init(struct mlx4_dev *dev)
|
||||||
{
|
{
|
||||||
INIT_WORK(&catas_work, catas_reset);
|
INIT_WORK(&dev->persist->catas_work, catas_reset);
|
||||||
|
dev->persist->catas_wq = create_singlethread_workqueue("mlx4_health");
|
||||||
|
if (!dev->persist->catas_wq)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void mlx4_catas_end(struct mlx4_dev *dev)
|
||||||
|
{
|
||||||
|
if (dev->persist->catas_wq) {
|
||||||
|
destroy_workqueue(dev->persist->catas_wq);
|
||||||
|
dev->persist->catas_wq = NULL;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -3064,11 +3064,19 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv);
|
err = mlx4_catas_init(&priv->dev);
|
||||||
if (err)
|
if (err)
|
||||||
goto err_release_regions;
|
goto err_release_regions;
|
||||||
|
|
||||||
|
err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv);
|
||||||
|
if (err)
|
||||||
|
goto err_catas;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
err_catas:
|
||||||
|
mlx4_catas_end(&priv->dev);
|
||||||
|
|
||||||
err_release_regions:
|
err_release_regions:
|
||||||
pci_release_regions(pdev);
|
pci_release_regions(pdev);
|
||||||
|
|
||||||
|
@ -3219,6 +3227,7 @@ static void mlx4_remove_one(struct pci_dev *pdev)
|
||||||
struct mlx4_priv *priv = mlx4_priv(dev);
|
struct mlx4_priv *priv = mlx4_priv(dev);
|
||||||
|
|
||||||
mlx4_unload_one(pdev);
|
mlx4_unload_one(pdev);
|
||||||
|
mlx4_catas_end(dev);
|
||||||
pci_release_regions(pdev);
|
pci_release_regions(pdev);
|
||||||
pci_disable_device(pdev);
|
pci_disable_device(pdev);
|
||||||
kfree(dev->persist);
|
kfree(dev->persist);
|
||||||
|
@ -3403,7 +3412,6 @@ static int __init mlx4_init(void)
|
||||||
if (mlx4_verify_params())
|
if (mlx4_verify_params())
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
mlx4_catas_init();
|
|
||||||
|
|
||||||
mlx4_wq = create_singlethread_workqueue("mlx4");
|
mlx4_wq = create_singlethread_workqueue("mlx4");
|
||||||
if (!mlx4_wq)
|
if (!mlx4_wq)
|
||||||
|
|
|
@ -995,7 +995,8 @@ void __mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn);
|
||||||
|
|
||||||
void mlx4_start_catas_poll(struct mlx4_dev *dev);
|
void mlx4_start_catas_poll(struct mlx4_dev *dev);
|
||||||
void mlx4_stop_catas_poll(struct mlx4_dev *dev);
|
void mlx4_stop_catas_poll(struct mlx4_dev *dev);
|
||||||
void mlx4_catas_init(void);
|
int mlx4_catas_init(struct mlx4_dev *dev);
|
||||||
|
void mlx4_catas_end(struct mlx4_dev *dev);
|
||||||
int mlx4_restart_one(struct pci_dev *pdev);
|
int mlx4_restart_one(struct pci_dev *pdev);
|
||||||
int mlx4_register_device(struct mlx4_dev *dev);
|
int mlx4_register_device(struct mlx4_dev *dev);
|
||||||
void mlx4_unregister_device(struct mlx4_dev *dev);
|
void mlx4_unregister_device(struct mlx4_dev *dev);
|
||||||
|
|
|
@ -751,6 +751,8 @@ struct mlx4_dev_persistent {
|
||||||
int num_vfs;
|
int num_vfs;
|
||||||
enum mlx4_port_type curr_port_type[MLX4_MAX_PORTS + 1];
|
enum mlx4_port_type curr_port_type[MLX4_MAX_PORTS + 1];
|
||||||
enum mlx4_port_type curr_port_poss_type[MLX4_MAX_PORTS + 1];
|
enum mlx4_port_type curr_port_poss_type[MLX4_MAX_PORTS + 1];
|
||||||
|
struct work_struct catas_work;
|
||||||
|
struct workqueue_struct *catas_wq;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct mlx4_dev {
|
struct mlx4_dev {
|
||||||
|
|
Loading…
Add table
Reference in a new issue