From d740f4be7cf0faded598fe43e2f472c47230c298 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Fri, 16 Feb 2024 14:29:50 -0800 Subject: [PATCH 1/3] pds_core: add simple AER handler Set up the pci_error_handlers error_detected and resume to be useful in handling AER events. Reviewed-by: Brett Creeley Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/pds_core/main.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/net/ethernet/amd/pds_core/main.c b/drivers/net/ethernet/amd/pds_core/main.c index 0050c5894563b8..e10451a5a89bb3 100644 --- a/drivers/net/ethernet/amd/pds_core/main.c +++ b/drivers/net/ethernet/amd/pds_core/main.c @@ -45,6 +45,7 @@ static void pdsc_unmap_bars(struct pdsc *pdsc) for (i = 0; i < PDS_CORE_BARS_MAX; i++) { if (bars[i].vaddr) pci_iounmap(pdsc->pdev, bars[i].vaddr); + bars[i].vaddr = NULL; } } @@ -512,10 +513,33 @@ void pdsc_reset_done(struct pci_dev *pdev) pdsc_restart_health_thread(pdsc); } +static pci_ers_result_t pdsc_pci_error_detected(struct pci_dev *pdev, + pci_channel_state_t error) +{ + if (error == pci_channel_io_frozen) { + pdsc_reset_prepare(pdev); + return PCI_ERS_RESULT_NEED_RESET; + } + + return PCI_ERS_RESULT_NONE; +} + +static void pdsc_pci_error_resume(struct pci_dev *pdev) +{ + struct pdsc *pdsc = pci_get_drvdata(pdev); + + if (test_bit(PDSC_S_FW_DEAD, &pdsc->state)) + pci_reset_function_locked(pdev); +} + static const struct pci_error_handlers pdsc_err_handler = { /* FLR handling */ .reset_prepare = pdsc_reset_prepare, .reset_done = pdsc_reset_done, + + /* AER handling */ + .error_detected = pdsc_pci_error_detected, + .resume = pdsc_pci_error_resume, }; static struct pci_driver pdsc_driver = { From 2dac60e062340c1e5c975ad6465192d11c40d47a Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Fri, 16 Feb 2024 14:29:51 -0800 Subject: [PATCH 2/3] pds_core: delete VF dev on reset When the VF is hit with a reset, remove the aux device in the prepare for reset and try to restore it after the reset. The userland mechanics will need to recover and rebuild whatever uses the device afterwards. Reviewed-by: Brett Creeley Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/pds_core/auxbus.c | 18 +++++++++++++++++- drivers/net/ethernet/amd/pds_core/main.c | 16 ++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amd/pds_core/auxbus.c b/drivers/net/ethernet/amd/pds_core/auxbus.c index 11c23a7f3172d3..a3c79848a69aaa 100644 --- a/drivers/net/ethernet/amd/pds_core/auxbus.c +++ b/drivers/net/ethernet/amd/pds_core/auxbus.c @@ -184,6 +184,9 @@ int pdsc_auxbus_dev_del(struct pdsc *cf, struct pdsc *pf) struct pds_auxiliary_dev *padev; int err = 0; + if (!cf) + return -ENODEV; + mutex_lock(&pf->config_lock); padev = pf->vfs[cf->vf_id].padev; @@ -202,14 +205,27 @@ int pdsc_auxbus_dev_del(struct pdsc *cf, struct pdsc *pf) int pdsc_auxbus_dev_add(struct pdsc *cf, struct pdsc *pf) { struct pds_auxiliary_dev *padev; - enum pds_core_vif_types vt; char devname[PDS_DEVNAME_LEN]; + enum pds_core_vif_types vt; + unsigned long mask; u16 vt_support; int client_id; int err = 0; + if (!cf) + return -ENODEV; + mutex_lock(&pf->config_lock); + mask = BIT_ULL(PDSC_S_FW_DEAD) | + BIT_ULL(PDSC_S_STOPPING_DRIVER); + if (cf->state & mask) { + dev_err(pf->dev, "%s: can't add dev, VF client in bad state %#lx\n", + __func__, cf->state); + err = -ENXIO; + goto out_unlock; + } + /* We only support vDPA so far, so it is the only one to * be verified that it is available in the Core device and * enabled in the devlink param. In the future this might diff --git a/drivers/net/ethernet/amd/pds_core/main.c b/drivers/net/ethernet/amd/pds_core/main.c index e10451a5a89bb3..82901a8473068e 100644 --- a/drivers/net/ethernet/amd/pds_core/main.c +++ b/drivers/net/ethernet/amd/pds_core/main.c @@ -476,6 +476,14 @@ void pdsc_reset_prepare(struct pci_dev *pdev) pdsc_stop_health_thread(pdsc); pdsc_fw_down(pdsc); + if (pdev->is_virtfn) { + struct pdsc *pf; + + pf = pdsc_get_pf_struct(pdsc->pdev); + if (!IS_ERR(pf)) + pdsc_auxbus_dev_del(pdsc, pf); + } + pdsc_unmap_bars(pdsc); pci_release_regions(pdev); pci_disable_device(pdev); @@ -511,6 +519,14 @@ void pdsc_reset_done(struct pci_dev *pdev) pdsc_fw_up(pdsc); pdsc_restart_health_thread(pdsc); + + if (pdev->is_virtfn) { + struct pdsc *pf; + + pf = pdsc_get_pf_struct(pdsc->pdev); + if (!IS_ERR(pf)) + pdsc_auxbus_dev_add(pdsc, pf); + } } static pci_ers_result_t pdsc_pci_error_detected(struct pci_dev *pdev, From 2cbab3c296f1addd73b40549a2271b30f960df8b Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Fri, 16 Feb 2024 14:29:52 -0800 Subject: [PATCH 3/3] pds_core: use pci_reset_function for health reset We get the benefit of all the PCI reset locking and recovery if we use the existing pci_reset_function() that will call our local reset handlers. Reviewed-by: Brett Creeley Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/pds_core/core.c | 3 +-- drivers/net/ethernet/amd/pds_core/core.h | 3 --- drivers/net/ethernet/amd/pds_core/main.c | 7 ++++--- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c index 1234a4a8a4aed2..9662ee72814c0c 100644 --- a/drivers/net/ethernet/amd/pds_core/core.c +++ b/drivers/net/ethernet/amd/pds_core/core.c @@ -607,8 +607,7 @@ static void pdsc_check_pci_health(struct pdsc *pdsc) if (fw_status != PDS_RC_BAD_PCI) return; - pdsc_reset_prepare(pdsc->pdev); - pdsc_reset_done(pdsc->pdev); + pci_reset_function(pdsc->pdev); } void pdsc_health_thread(struct work_struct *work) diff --git a/drivers/net/ethernet/amd/pds_core/core.h b/drivers/net/ethernet/amd/pds_core/core.h index 3468a63f5ae9a5..92d7657dd6147e 100644 --- a/drivers/net/ethernet/amd/pds_core/core.h +++ b/drivers/net/ethernet/amd/pds_core/core.h @@ -284,9 +284,6 @@ int pdsc_devcmd_reset(struct pdsc *pdsc); int pdsc_dev_init(struct pdsc *pdsc); void pdsc_dev_uninit(struct pdsc *pdsc); -void pdsc_reset_prepare(struct pci_dev *pdev); -void pdsc_reset_done(struct pci_dev *pdev); - int pdsc_intr_alloc(struct pdsc *pdsc, char *name, irq_handler_t handler, void *data); void pdsc_intr_free(struct pdsc *pdsc, int index); diff --git a/drivers/net/ethernet/amd/pds_core/main.c b/drivers/net/ethernet/amd/pds_core/main.c index 82901a8473068e..ab6133e7db422d 100644 --- a/drivers/net/ethernet/amd/pds_core/main.c +++ b/drivers/net/ethernet/amd/pds_core/main.c @@ -469,7 +469,7 @@ static void pdsc_restart_health_thread(struct pdsc *pdsc) mod_timer(&pdsc->wdtimer, jiffies + 1); } -void pdsc_reset_prepare(struct pci_dev *pdev) +static void pdsc_reset_prepare(struct pci_dev *pdev) { struct pdsc *pdsc = pci_get_drvdata(pdev); @@ -486,10 +486,11 @@ void pdsc_reset_prepare(struct pci_dev *pdev) pdsc_unmap_bars(pdsc); pci_release_regions(pdev); - pci_disable_device(pdev); + if (pci_is_enabled(pdev)) + pci_disable_device(pdev); } -void pdsc_reset_done(struct pci_dev *pdev) +static void pdsc_reset_done(struct pci_dev *pdev) { struct pdsc *pdsc = pci_get_drvdata(pdev); struct device *dev = pdsc->dev;