Skip to content

Commit

Permalink
vfio/mlx5: Refactor to enable VFs migration in parallel
Browse files Browse the repository at this point in the history
Refactor to enable different VFs to run their commands over the PF
command interface in parallel and to not block one each other.

This is done by not using the global PF lock that was used before but
relying on the VF attach/detach mechanism to sync.

Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Yishai Hadas <[email protected]>
Reviewed-by: Alex Williamson <[email protected]>
Signed-off-by: Leon Romanovsky <[email protected]>
  • Loading branch information
Yishai Hadas authored and rleon committed May 11, 2022
1 parent 61a2f14 commit 8580ad1
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 99 deletions.
103 changes: 44 additions & 59 deletions drivers/vfio/pci/mlx5/cmd.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,70 +5,65 @@

#include "cmd.h"

int mlx5vf_cmd_suspend_vhca(struct pci_dev *pdev, u16 vhca_id, u16 op_mod)
static int mlx5vf_cmd_get_vhca_id(struct mlx5_core_dev *mdev, u16 function_id,
u16 *vhca_id);

int mlx5vf_cmd_suspend_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod)
{
struct mlx5_core_dev *mdev = mlx5_vf_get_core_dev(pdev);
u32 out[MLX5_ST_SZ_DW(suspend_vhca_out)] = {};
u32 in[MLX5_ST_SZ_DW(suspend_vhca_in)] = {};
int ret;

if (!mdev)
lockdep_assert_held(&mvdev->state_mutex);
if (mvdev->mdev_detach)
return -ENOTCONN;

MLX5_SET(suspend_vhca_in, in, opcode, MLX5_CMD_OP_SUSPEND_VHCA);
MLX5_SET(suspend_vhca_in, in, vhca_id, vhca_id);
MLX5_SET(suspend_vhca_in, in, vhca_id, mvdev->vhca_id);
MLX5_SET(suspend_vhca_in, in, op_mod, op_mod);

ret = mlx5_cmd_exec_inout(mdev, suspend_vhca, in, out);
mlx5_vf_put_core_dev(mdev);
return ret;
return mlx5_cmd_exec_inout(mvdev->mdev, suspend_vhca, in, out);
}

int mlx5vf_cmd_resume_vhca(struct pci_dev *pdev, u16 vhca_id, u16 op_mod)
int mlx5vf_cmd_resume_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod)
{
struct mlx5_core_dev *mdev = mlx5_vf_get_core_dev(pdev);
u32 out[MLX5_ST_SZ_DW(resume_vhca_out)] = {};
u32 in[MLX5_ST_SZ_DW(resume_vhca_in)] = {};
int ret;

if (!mdev)
lockdep_assert_held(&mvdev->state_mutex);
if (mvdev->mdev_detach)
return -ENOTCONN;

MLX5_SET(resume_vhca_in, in, opcode, MLX5_CMD_OP_RESUME_VHCA);
MLX5_SET(resume_vhca_in, in, vhca_id, vhca_id);
MLX5_SET(resume_vhca_in, in, vhca_id, mvdev->vhca_id);
MLX5_SET(resume_vhca_in, in, op_mod, op_mod);

ret = mlx5_cmd_exec_inout(mdev, resume_vhca, in, out);
mlx5_vf_put_core_dev(mdev);
return ret;
return mlx5_cmd_exec_inout(mvdev->mdev, resume_vhca, in, out);
}

int mlx5vf_cmd_query_vhca_migration_state(struct pci_dev *pdev, u16 vhca_id,
int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev,
size_t *state_size)
{
struct mlx5_core_dev *mdev = mlx5_vf_get_core_dev(pdev);
u32 out[MLX5_ST_SZ_DW(query_vhca_migration_state_out)] = {};
u32 in[MLX5_ST_SZ_DW(query_vhca_migration_state_in)] = {};
int ret;

if (!mdev)
lockdep_assert_held(&mvdev->state_mutex);
if (mvdev->mdev_detach)
return -ENOTCONN;

MLX5_SET(query_vhca_migration_state_in, in, opcode,
MLX5_CMD_OP_QUERY_VHCA_MIGRATION_STATE);
MLX5_SET(query_vhca_migration_state_in, in, vhca_id, vhca_id);
MLX5_SET(query_vhca_migration_state_in, in, vhca_id, mvdev->vhca_id);
MLX5_SET(query_vhca_migration_state_in, in, op_mod, 0);

ret = mlx5_cmd_exec_inout(mdev, query_vhca_migration_state, in, out);
ret = mlx5_cmd_exec_inout(mvdev->mdev, query_vhca_migration_state, in,
out);
if (ret)
goto end;
return ret;

*state_size = MLX5_GET(query_vhca_migration_state_out, out,
required_umem_size);

end:
mlx5_vf_put_core_dev(mdev);
return ret;
return 0;
}

static int mlx5fv_vf_event(struct notifier_block *nb,
Expand Down Expand Up @@ -120,6 +115,10 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev)
if (mvdev->vf_id < 0)
goto end;

if (mlx5vf_cmd_get_vhca_id(mvdev->mdev, mvdev->vf_id + 1,
&mvdev->vhca_id))
goto end;

mutex_init(&mvdev->state_mutex);
spin_lock_init(&mvdev->reset_lock);
mvdev->nb.notifier_call = mlx5fv_vf_event;
Expand All @@ -137,23 +136,18 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev)
mlx5_vf_put_core_dev(mvdev->mdev);
}

int mlx5vf_cmd_get_vhca_id(struct pci_dev *pdev, u16 function_id, u16 *vhca_id)
static int mlx5vf_cmd_get_vhca_id(struct mlx5_core_dev *mdev, u16 function_id,
u16 *vhca_id)
{
struct mlx5_core_dev *mdev = mlx5_vf_get_core_dev(pdev);
u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {};
int out_size;
void *out;
int ret;

if (!mdev)
return -ENOTCONN;

out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out);
out = kzalloc(out_size, GFP_KERNEL);
if (!out) {
ret = -ENOMEM;
goto end;
}
if (!out)
return -ENOMEM;

MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
MLX5_SET(query_hca_cap_in, in, other_function, 1);
Expand All @@ -171,8 +165,6 @@ int mlx5vf_cmd_get_vhca_id(struct pci_dev *pdev, u16 function_id, u16 *vhca_id)

err_exec:
kfree(out);
end:
mlx5_vf_put_core_dev(mdev);
return ret;
}

Expand Down Expand Up @@ -217,21 +209,23 @@ static int _create_state_mkey(struct mlx5_core_dev *mdev, u32 pdn,
return err;
}

int mlx5vf_cmd_save_vhca_state(struct pci_dev *pdev, u16 vhca_id,
int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
struct mlx5_vf_migration_file *migf)
{
struct mlx5_core_dev *mdev = mlx5_vf_get_core_dev(pdev);
u32 out[MLX5_ST_SZ_DW(save_vhca_state_out)] = {};
u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {};
struct mlx5_core_dev *mdev;
u32 pdn, mkey;
int err;

if (!mdev)
lockdep_assert_held(&mvdev->state_mutex);
if (mvdev->mdev_detach)
return -ENOTCONN;

mdev = mvdev->mdev;
err = mlx5_core_alloc_pd(mdev, &pdn);
if (err)
goto end;
return err;

err = dma_map_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE,
0);
Expand All @@ -245,45 +239,36 @@ int mlx5vf_cmd_save_vhca_state(struct pci_dev *pdev, u16 vhca_id,
MLX5_SET(save_vhca_state_in, in, opcode,
MLX5_CMD_OP_SAVE_VHCA_STATE);
MLX5_SET(save_vhca_state_in, in, op_mod, 0);
MLX5_SET(save_vhca_state_in, in, vhca_id, vhca_id);
MLX5_SET(save_vhca_state_in, in, vhca_id, mvdev->vhca_id);
MLX5_SET(save_vhca_state_in, in, mkey, mkey);
MLX5_SET(save_vhca_state_in, in, size, migf->total_length);

err = mlx5_cmd_exec_inout(mdev, save_vhca_state, in, out);
if (err)
goto err_exec;

migf->total_length =
MLX5_GET(save_vhca_state_out, out, actual_image_size);

mlx5_core_destroy_mkey(mdev, mkey);
mlx5_core_dealloc_pd(mdev, pdn);
dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0);
mlx5_vf_put_core_dev(mdev);

return 0;

migf->total_length = MLX5_GET(save_vhca_state_out, out,
actual_image_size);
err_exec:
mlx5_core_destroy_mkey(mdev, mkey);
err_create_mkey:
dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0);
err_dma_map:
mlx5_core_dealloc_pd(mdev, pdn);
end:
mlx5_vf_put_core_dev(mdev);
return err;
}

int mlx5vf_cmd_load_vhca_state(struct pci_dev *pdev, u16 vhca_id,
int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev,
struct mlx5_vf_migration_file *migf)
{
struct mlx5_core_dev *mdev = mlx5_vf_get_core_dev(pdev);
struct mlx5_core_dev *mdev;
u32 out[MLX5_ST_SZ_DW(save_vhca_state_out)] = {};
u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {};
u32 pdn, mkey;
int err;

if (!mdev)
lockdep_assert_held(&mvdev->state_mutex);
if (mvdev->mdev_detach)
return -ENOTCONN;

mutex_lock(&migf->lock);
Expand All @@ -292,6 +277,7 @@ int mlx5vf_cmd_load_vhca_state(struct pci_dev *pdev, u16 vhca_id,
goto end;
}

mdev = mvdev->mdev;
err = mlx5_core_alloc_pd(mdev, &pdn);
if (err)
goto end;
Expand All @@ -307,7 +293,7 @@ int mlx5vf_cmd_load_vhca_state(struct pci_dev *pdev, u16 vhca_id,
MLX5_SET(load_vhca_state_in, in, opcode,
MLX5_CMD_OP_LOAD_VHCA_STATE);
MLX5_SET(load_vhca_state_in, in, op_mod, 0);
MLX5_SET(load_vhca_state_in, in, vhca_id, vhca_id);
MLX5_SET(load_vhca_state_in, in, vhca_id, mvdev->vhca_id);
MLX5_SET(load_vhca_state_in, in, mkey, mkey);
MLX5_SET(load_vhca_state_in, in, size, migf->total_length);

Expand All @@ -319,7 +305,6 @@ int mlx5vf_cmd_load_vhca_state(struct pci_dev *pdev, u16 vhca_id,
err_reg:
mlx5_core_dealloc_pd(mdev, pdn);
end:
mlx5_vf_put_core_dev(mdev);
mutex_unlock(&migf->lock);
return err;
}
11 changes: 5 additions & 6 deletions drivers/vfio/pci/mlx5/cmd.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,15 @@ struct mlx5vf_pci_core_device {
struct mlx5_core_dev *mdev;
};

int mlx5vf_cmd_suspend_vhca(struct pci_dev *pdev, u16 vhca_id, u16 op_mod);
int mlx5vf_cmd_resume_vhca(struct pci_dev *pdev, u16 vhca_id, u16 op_mod);
int mlx5vf_cmd_query_vhca_migration_state(struct pci_dev *pdev, u16 vhca_id,
int mlx5vf_cmd_suspend_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod);
int mlx5vf_cmd_resume_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod);
int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev,
size_t *state_size);
int mlx5vf_cmd_get_vhca_id(struct pci_dev *pdev, u16 function_id, u16 *vhca_id);
void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev);
void mlx5vf_cmd_remove_migratable(struct mlx5vf_pci_core_device *mvdev);
int mlx5vf_cmd_save_vhca_state(struct pci_dev *pdev, u16 vhca_id,
int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
struct mlx5_vf_migration_file *migf);
int mlx5vf_cmd_load_vhca_state(struct pci_dev *pdev, u16 vhca_id,
int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev,
struct mlx5_vf_migration_file *migf);
void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev);
#endif /* MLX5_VFIO_CMD_H */
44 changes: 10 additions & 34 deletions drivers/vfio/pci/mlx5/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -208,8 +208,8 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev)
stream_open(migf->filp->f_inode, migf->filp);
mutex_init(&migf->lock);

ret = mlx5vf_cmd_query_vhca_migration_state(
mvdev->core_device.pdev, mvdev->vhca_id, &migf->total_length);
ret = mlx5vf_cmd_query_vhca_migration_state(mvdev,
&migf->total_length);
if (ret)
goto out_free;

Expand All @@ -218,8 +218,7 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev)
if (ret)
goto out_free;

ret = mlx5vf_cmd_save_vhca_state(mvdev->core_device.pdev,
mvdev->vhca_id, migf);
ret = mlx5vf_cmd_save_vhca_state(mvdev, migf);
if (ret)
goto out_free;
return migf;
Expand Down Expand Up @@ -346,35 +345,31 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev,
int ret;

if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_STOP) {
ret = mlx5vf_cmd_suspend_vhca(
mvdev->core_device.pdev, mvdev->vhca_id,
ret = mlx5vf_cmd_suspend_vhca(mvdev,
MLX5_SUSPEND_VHCA_IN_OP_MOD_SUSPEND_RESPONDER);
if (ret)
return ERR_PTR(ret);
return NULL;
}

if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RUNNING_P2P) {
ret = mlx5vf_cmd_resume_vhca(
mvdev->core_device.pdev, mvdev->vhca_id,
ret = mlx5vf_cmd_resume_vhca(mvdev,
MLX5_RESUME_VHCA_IN_OP_MOD_RESUME_RESPONDER);
if (ret)
return ERR_PTR(ret);
return NULL;
}

if (cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_RUNNING_P2P) {
ret = mlx5vf_cmd_suspend_vhca(
mvdev->core_device.pdev, mvdev->vhca_id,
ret = mlx5vf_cmd_suspend_vhca(mvdev,
MLX5_SUSPEND_VHCA_IN_OP_MOD_SUSPEND_INITIATOR);
if (ret)
return ERR_PTR(ret);
return NULL;
}

if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_RUNNING) {
ret = mlx5vf_cmd_resume_vhca(
mvdev->core_device.pdev, mvdev->vhca_id,
ret = mlx5vf_cmd_resume_vhca(mvdev,
MLX5_RESUME_VHCA_IN_OP_MOD_RESUME_INITIATOR);
if (ret)
return ERR_PTR(ret);
Expand Down Expand Up @@ -409,8 +404,7 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev,
}

if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) {
ret = mlx5vf_cmd_load_vhca_state(mvdev->core_device.pdev,
mvdev->vhca_id,
ret = mlx5vf_cmd_load_vhca_state(mvdev,
mvdev->resuming_migf);
if (ret)
return ERR_PTR(ret);
Expand Down Expand Up @@ -517,34 +511,16 @@ static int mlx5vf_pci_open_device(struct vfio_device *core_vdev)
struct mlx5vf_pci_core_device *mvdev = container_of(
core_vdev, struct mlx5vf_pci_core_device, core_device.vdev);
struct vfio_pci_core_device *vdev = &mvdev->core_device;
int vf_id;
int ret;

ret = vfio_pci_core_enable(vdev);
if (ret)
return ret;

if (!mvdev->migrate_cap) {
vfio_pci_core_finish_enable(vdev);
return 0;
}

vf_id = pci_iov_vf_id(vdev->pdev);
if (vf_id < 0) {
ret = vf_id;
goto out_disable;
}

ret = mlx5vf_cmd_get_vhca_id(vdev->pdev, vf_id + 1, &mvdev->vhca_id);
if (ret)
goto out_disable;

mvdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
if (mvdev->migrate_cap)
mvdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
vfio_pci_core_finish_enable(vdev);
return 0;
out_disable:
vfio_pci_core_disable(vdev);
return ret;
}

static void mlx5vf_pci_close_device(struct vfio_device *core_vdev)
Expand Down

0 comments on commit 8580ad1

Please sign in to comment.