Skip to content

Commit

Permalink
net/mlx5: Avoid double clear or set of sync reset requested
Browse files Browse the repository at this point in the history
Double clear of reset requested state can lead to NULL pointer as it
will try to delete the timer twice. This can happen for example on a
race between abort from FW and pci error or reset. Avoid such case using
test_and_clear_bit() to verify only one time reset requested state clear
flow. Similarly use test_and_set_bit() to verify only one time reset
requested state set flow.

Fixes: 7dd6df3 ("net/mlx5: Handle sync reset abort event")
Signed-off-by: Moshe Shemesh <[email protected]>
Reviewed-by: Maher Sanalla <[email protected]>
Reviewed-by: Shay Drory <[email protected]>
Signed-off-by: Saeed Mahameed <[email protected]>
  • Loading branch information
mosheshemesh2 authored and Saeed Mahameed committed May 4, 2022
1 parent cb7786a commit fc3d3db
Showing 1 changed file with 19 additions and 9 deletions.
28 changes: 19 additions & 9 deletions drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
Original file line number Diff line number Diff line change
Expand Up @@ -162,14 +162,19 @@ static void mlx5_stop_sync_reset_poll(struct mlx5_core_dev *dev)
del_timer_sync(&fw_reset->timer);
}

static void mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health)
static int mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health)
{
struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;

if (!test_and_clear_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) {
mlx5_core_warn(dev, "Reset request was already cleared\n");
return -EALREADY;
}

mlx5_stop_sync_reset_poll(dev);
clear_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags);
if (poll_health)
mlx5_start_health_poll(dev);
return 0;
}

static void mlx5_sync_reset_reload_work(struct work_struct *work)
Expand Down Expand Up @@ -229,13 +234,17 @@ static int mlx5_fw_reset_set_reset_sync_nack(struct mlx5_core_dev *dev)
return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL3, 0, 2, false);
}

static void mlx5_sync_reset_set_reset_requested(struct mlx5_core_dev *dev)
static int mlx5_sync_reset_set_reset_requested(struct mlx5_core_dev *dev)
{
struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;

if (test_and_set_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) {
mlx5_core_warn(dev, "Reset request was already set\n");
return -EALREADY;
}
mlx5_stop_health_poll(dev, true);
set_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags);
mlx5_start_sync_reset_poll(dev);
return 0;
}

static void mlx5_fw_live_patch_event(struct work_struct *work)
Expand Down Expand Up @@ -264,7 +273,9 @@ static void mlx5_sync_reset_request_event(struct work_struct *work)
err ? "Failed" : "Sent");
return;
}
mlx5_sync_reset_set_reset_requested(dev);
if (mlx5_sync_reset_set_reset_requested(dev))
return;

err = mlx5_fw_reset_set_reset_sync_ack(dev);
if (err)
mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack Failed. Error code: %d\n", err);
Expand Down Expand Up @@ -362,7 +373,8 @@ static void mlx5_sync_reset_now_event(struct work_struct *work)
struct mlx5_core_dev *dev = fw_reset->dev;
int err;

mlx5_sync_reset_clear_reset_requested(dev, false);
if (mlx5_sync_reset_clear_reset_requested(dev, false))
return;

mlx5_core_warn(dev, "Sync Reset now. Device is going to reset.\n");

Expand Down Expand Up @@ -391,10 +403,8 @@ static void mlx5_sync_reset_abort_event(struct work_struct *work)
reset_abort_work);
struct mlx5_core_dev *dev = fw_reset->dev;

if (!test_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags))
if (mlx5_sync_reset_clear_reset_requested(dev, true))
return;

mlx5_sync_reset_clear_reset_requested(dev, true);
mlx5_core_warn(dev, "PCI Sync FW Update Reset Aborted.\n");
}

Expand Down

0 comments on commit fc3d3db

Please sign in to comment.