Skip to content

Commit b898ce7

Browse files
author
Saeed Mahameed
committed
net/mlx5: cmdif, Avoid skipping reclaim pages if FW is not accessible
In case of pci is offline reclaim_pages_cmd() will still try to call the FW to release FW pages, cmd_exec() in this case will return a silent success without actually calling the FW. This is wrong and will cause page leaks, what we should do is to detect pci offline or command interface un-available before tying to access the FW and manually release the FW pages in the driver. In this patch we share the code to check for FW command interface availability and we call it in sensitive places e.g. reclaim_pages_cmd(). Alternative fix: 1. Remove MLX5_CMD_OP_MANAGE_PAGES form mlx5_internal_err_ret_value, command success simulation list. 2. Always Release FW pages even if cmd_exec fails in reclaim_pages_cmd(). Reviewed-by: Moshe Shemesh <moshe@nvidia.com> Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
1 parent 410bd75 commit b898ce7

3 files changed

Lines changed: 11 additions & 9 deletions

File tree

drivers/net/ethernet/mellanox/mlx5/core/cmd.c

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -902,6 +902,13 @@ static int cmd_alloc_index_retry(struct mlx5_cmd *cmd)
902902
return idx;
903903
}
904904

905+
bool mlx5_cmd_is_down(struct mlx5_core_dev *dev)
906+
{
907+
return pci_channel_offline(dev->pdev) ||
908+
dev->cmd.state != MLX5_CMDIF_STATE_UP ||
909+
dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR;
910+
}
911+
905912
static void cmd_work_handler(struct work_struct *work)
906913
{
907914
struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work);
@@ -967,10 +974,7 @@ static void cmd_work_handler(struct work_struct *work)
967974
set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state);
968975

969976
/* Skip sending command to fw if internal error */
970-
if (pci_channel_offline(dev->pdev) ||
971-
dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR ||
972-
cmd->state != MLX5_CMDIF_STATE_UP ||
973-
!opcode_allowed(&dev->cmd, ent->op)) {
977+
if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, ent->op)) {
974978
u8 status = 0;
975979
u32 drv_synd;
976980

@@ -1800,10 +1804,7 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
18001804
u8 token;
18011805

18021806
opcode = MLX5_GET(mbox_in, in, opcode);
1803-
if (pci_channel_offline(dev->pdev) ||
1804-
dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR ||
1805-
dev->cmd.state != MLX5_CMDIF_STATE_UP ||
1806-
!opcode_allowed(&dev->cmd, opcode)) {
1807+
if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, opcode)) {
18071808
err = mlx5_internal_err_ret_value(dev, opcode, &drv_synd, &status);
18081809
MLX5_SET(mbox_out, out, status, status);
18091810
MLX5_SET(mbox_out, out, syndrome, drv_synd);

drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -432,7 +432,7 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev,
432432
u32 npages;
433433
u32 i = 0;
434434

435-
if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR)
435+
if (!mlx5_cmd_is_down(dev))
436436
return mlx5_cmd_exec(dev, in, in_size, out, out_size);
437437

438438
/* No hard feelings, we want our pages back! */

include/linux/mlx5/driver.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -935,6 +935,7 @@ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
935935
int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size,
936936
void *out, int out_size);
937937
void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome);
938+
bool mlx5_cmd_is_down(struct mlx5_core_dev *dev);
938939

939940
int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type);
940941
int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn);

0 commit comments

Comments
 (0)