Skip to content

Commit

Permalink
iwlwifi: pcie: remove non-responsive device
Browse files Browse the repository at this point in the history
If we fail to to grab NIC access because the device is not responding
(i.e. CSR_GP_CNTRL returns 0xFFFFFFFF), remove the device from the PCI
bus, to avoid any further damage, and to let the user space rescan.

In order to inform the userspace that a rescan is needed, we send a
kobject uevent with "INACCESSIBLE".

This functionality is disabled by default, but can be enabled via a
new module parameter called "remove_when_gone".  In the future we may
change this module parameter to include 3 modes instead: do nothing;
auto-rescan or; send uevent.

Signed-off-by: Luca Coelho <[email protected]>
Signed-off-by: Rajat Jain <[email protected]>
  • Loading branch information
lucacoelho committed Apr 26, 2018
1 parent de460dd commit 49564a8
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 3 deletions.
6 changes: 6 additions & 0 deletions drivers/net/wireless/intel/iwlwifi/iwl-drv.c
Original file line number Diff line number Diff line change
Expand Up @@ -1850,3 +1850,9 @@ MODULE_PARM_DESC(d0i3_timeout, "Timeout to D0i3 entry when idle (ms)");

module_param_named(disable_11ac, iwlwifi_mod_params.disable_11ac, bool, 0444);
MODULE_PARM_DESC(disable_11ac, "Disable VHT capabilities (default: false)");

module_param_named(remove_when_gone,
iwlwifi_mod_params.remove_when_gone, bool,
0444);
MODULE_PARM_DESC(remove_when_gone,
"Remove dev from PCIe bus if it is deemed inaccessible (default: false)");
2 changes: 2 additions & 0 deletions drivers/net/wireless/intel/iwlwifi/iwl-modparams.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ enum iwl_uapsd_disable {
* @lar_disable: disable LAR (regulatory), default = 0
* @fw_monitor: allow to use firmware monitor
* @disable_11ac: disable VHT capabilities, default = false.
* @remove_when_gone: remove an inaccessible device from the PCIe bus.
*/
struct iwl_mod_params {
int swcrypto;
Expand All @@ -143,6 +144,7 @@ struct iwl_mod_params {
bool lar_disable;
bool fw_monitor;
bool disable_11ac;
bool remove_when_gone;
};

#endif /* #__iwl_modparams_h__ */
5 changes: 5 additions & 0 deletions drivers/net/wireless/intel/iwlwifi/pcie/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,8 @@ struct iwl_self_init_dram {
* @hw_init_mask: initial unmasked hw causes
* @fh_mask: current unmasked fh causes
* @hw_mask: current unmasked hw causes
* @in_rescan: true if we have triggered a device rescan
* @scheduled_for_removal: true if we have scheduled a device removal
*/
struct iwl_trans_pcie {
struct iwl_rxq *rxq;
Expand Down Expand Up @@ -464,6 +466,9 @@ struct iwl_trans_pcie {
u32 fh_mask;
u32 hw_mask;
cpumask_t affinity_mask[IWL_MAX_RX_HW_QUEUES];
u16 tx_cmd_queue_size;
bool in_rescan;
bool scheduled_for_removal;
};

static inline struct iwl_trans_pcie *
Expand Down
74 changes: 71 additions & 3 deletions drivers/net/wireless/intel/iwlwifi/pcie/trans.c
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@
#include <linux/gfp.h>
#include <linux/vmalloc.h>
#include <linux/pm_runtime.h>
#include <linux/module.h>

#include "iwl-drv.h"
#include "iwl-trans.h"
Expand Down Expand Up @@ -1935,6 +1936,29 @@ static void iwl_trans_pcie_set_pmi(struct iwl_trans *trans, bool state)
clear_bit(STATUS_TPOWER_PMI, &trans->status);
}

struct iwl_trans_pcie_removal {
struct pci_dev *pdev;
struct work_struct work;
};

static void iwl_trans_pcie_removal_wk(struct work_struct *wk)
{
struct iwl_trans_pcie_removal *removal =
container_of(wk, struct iwl_trans_pcie_removal, work);
struct pci_dev *pdev = removal->pdev;
char *prop[] = {"EVENT=INACCESSIBLE", NULL};

dev_err(&pdev->dev, "Device gone - attempting removal\n");
kobject_uevent_env(&pdev->dev.kobj, KOBJ_CHANGE, prop);
pci_lock_rescan_remove();
pci_dev_put(pdev);
pci_stop_and_remove_bus_device(pdev);
pci_unlock_rescan_remove();

kfree(removal);
module_put(THIS_MODULE);
}

static bool iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans,
unsigned long *flags)
{
Expand Down Expand Up @@ -1977,11 +2001,55 @@ static bool iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans,
(BIT(trans->cfg->csr->flag_mac_clock_ready) |
CSR_GP_CNTRL_REG_FLAG_GOING_TO_SLEEP), 15000);
if (unlikely(ret < 0)) {
iwl_trans_pcie_dump_regs(trans);
iwl_write32(trans, CSR_RESET, CSR_RESET_REG_FLAG_FORCE_NMI);
u32 cntrl = iwl_read32(trans, CSR_GP_CNTRL);

WARN_ONCE(1,
"Timeout waiting for hardware access (CSR_GP_CNTRL 0x%08x)\n",
iwl_read32(trans, CSR_GP_CNTRL));
cntrl);

iwl_trans_pcie_dump_regs(trans);

if (iwlwifi_mod_params.remove_when_gone && cntrl == ~0U) {
struct iwl_trans_pcie_removal *removal;

if (trans_pcie->scheduled_for_removal)
goto err;

IWL_ERR(trans, "Device gone - scheduling removal!\n");

/*
* get a module reference to avoid doing this
* while unloading anyway and to avoid
* scheduling a work with code that's being
* removed.
*/
if (!try_module_get(THIS_MODULE)) {
IWL_ERR(trans,
"Module is being unloaded - abort\n");
goto err;
}

removal = kzalloc(sizeof(*removal), GFP_ATOMIC);
if (!removal) {
module_put(THIS_MODULE);
goto err;
}
/*
* we don't need to clear this flag, because
* the trans will be freed and reallocated.
*/
trans_pcie->scheduled_for_removal = true;

removal->pdev = to_pci_dev(trans->dev);
INIT_WORK(&removal->work, iwl_trans_pcie_removal_wk);
pci_dev_get(removal->pdev);
schedule_work(&removal->work);
} else {
iwl_write32(trans, CSR_RESET,
CSR_RESET_REG_FLAG_FORCE_NMI);
}

err:
spin_unlock_irqrestore(&trans_pcie->reg_lock, *flags);
return false;
}
Expand Down

0 comments on commit 49564a8

Please sign in to comment.