Skip to content

Commit

Permalink
powerpc/powernv/pseries: Rework device adding to IOMMU groups
Browse files Browse the repository at this point in the history
The powernv platform registers IOMMU groups and adds devices to them
from the pci_controller_ops::setup_bridge() hook except one case when
virtual functions (SRIOV VFs) are added from a bus notifier.

The pseries platform registers IOMMU groups from
the pci_controller_ops::dma_bus_setup() hook and adds devices from
the pci_controller_ops::dma_dev_setup() hook. The very same bus notifier
used for powernv does not add devices for pseries though as
__of_scan_bus() adds devices first, then it does the bus/dev DMA setup.

Both platforms use iommu_add_device() which takes a device and expects
it to have a valid IOMMU table struct with an iommu_table_group pointer
which in turn points the iommu_group struct (which represents
an IOMMU group). Although the helper seems easy to use, it relies on
some pre-existing device configuration and associated data structures
which it does not really need.

This simplifies iommu_add_device() to take the table_group pointer
directly. Pseries already has a table_group pointer handy and the bus
notified is not used anyway. For powernv, this copies the existing bus
notifier, makes it work for powernv only which means an easy way of
getting to the table_group pointer. This was tested on VFs but should
also support physical PCI hotplug.

Since iommu_add_device() receives the table_group pointer directly,
pseries does not do TCE cache invalidation (the hypervisor does) nor
allow multiple groups per a VFIO container (in other words sharing
an IOMMU table between partitionable endpoints), this removes
iommu_table_group_link from pseries.

Signed-off-by: Alexey Kardashevskiy <[email protected]>
Reviewed-by: David Gibson <[email protected]>
Signed-off-by: Michael Ellerman <[email protected]>
  • Loading branch information
aik authored and mpe committed Dec 21, 2018
1 parent c409c63 commit c4e9d3c
Show file tree
Hide file tree
Showing 5 changed files with 74 additions and 95 deletions.
12 changes: 4 additions & 8 deletions arch/powerpc/include/asm/iommu.h
Original file line number Diff line number Diff line change
Expand Up @@ -215,9 +215,9 @@ struct iommu_table_group {

extern void iommu_register_group(struct iommu_table_group *table_group,
int pci_domain_number, unsigned long pe_num);
extern int iommu_add_device(struct device *dev);
extern int iommu_add_device(struct iommu_table_group *table_group,
struct device *dev);
extern void iommu_del_device(struct device *dev);
extern int __init tce_iommu_bus_notifier_init(void);
extern long iommu_tce_xchg(struct mm_struct *mm, struct iommu_table *tbl,
unsigned long entry, unsigned long *hpa,
enum dma_data_direction *direction);
Expand All @@ -228,19 +228,15 @@ static inline void iommu_register_group(struct iommu_table_group *table_group,
{
}

static inline int iommu_add_device(struct device *dev)
static inline int iommu_add_device(struct iommu_table_group *table_group,
struct device *dev)
{
return 0;
}

static inline void iommu_del_device(struct device *dev)
{
}

static inline int __init tce_iommu_bus_notifier_init(void)
{
return 0;
}
#endif /* !CONFIG_IOMMU_API */

int dma_iommu_mapping_error(struct device *dev, dma_addr_t dma_addr);
Expand Down
58 changes: 3 additions & 55 deletions arch/powerpc/kernel/iommu.c
Original file line number Diff line number Diff line change
Expand Up @@ -1078,11 +1078,8 @@ void iommu_release_ownership(struct iommu_table *tbl)
}
EXPORT_SYMBOL_GPL(iommu_release_ownership);

int iommu_add_device(struct device *dev)
int iommu_add_device(struct iommu_table_group *table_group, struct device *dev)
{
struct iommu_table *tbl;
struct iommu_table_group_link *tgl;

/*
* The sysfs entries should be populated before
* binding IOMMU group. If sysfs entries isn't
Expand All @@ -1098,32 +1095,10 @@ int iommu_add_device(struct device *dev)
return -EBUSY;
}

tbl = get_iommu_table_base(dev);
if (!tbl) {
pr_debug("%s: Skipping device %s with no tbl\n",
__func__, dev_name(dev));
return 0;
}

tgl = list_first_entry_or_null(&tbl->it_group_list,
struct iommu_table_group_link, next);
if (!tgl) {
pr_debug("%s: Skipping device %s with no group\n",
__func__, dev_name(dev));
return 0;
}
pr_debug("%s: Adding %s to iommu group %d\n",
__func__, dev_name(dev),
iommu_group_id(tgl->table_group->group));

if (PAGE_SIZE < IOMMU_PAGE_SIZE(tbl)) {
pr_err("%s: Invalid IOMMU page size %lx (%lx) on %s\n",
__func__, IOMMU_PAGE_SIZE(tbl),
PAGE_SIZE, dev_name(dev));
return -EINVAL;
}
__func__, dev_name(dev), iommu_group_id(table_group->group));

return iommu_group_add_device(tgl->table_group->group, dev);
return iommu_group_add_device(table_group->group, dev);
}
EXPORT_SYMBOL_GPL(iommu_add_device);

Expand All @@ -1143,31 +1118,4 @@ void iommu_del_device(struct device *dev)
iommu_group_remove_device(dev);
}
EXPORT_SYMBOL_GPL(iommu_del_device);

static int tce_iommu_bus_notifier(struct notifier_block *nb,
unsigned long action, void *data)
{
struct device *dev = data;

switch (action) {
case BUS_NOTIFY_ADD_DEVICE:
return iommu_add_device(dev);
case BUS_NOTIFY_DEL_DEVICE:
if (dev->iommu_group)
iommu_del_device(dev);
return 0;
default:
return 0;
}
}

static struct notifier_block tce_iommu_bus_nb = {
.notifier_call = tce_iommu_bus_notifier,
};

int __init tce_iommu_bus_notifier_init(void)
{
bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb);
return 0;
}
#endif /* CONFIG_IOMMU_API */
10 changes: 1 addition & 9 deletions arch/powerpc/platforms/powernv/pci-ioda.c
Original file line number Diff line number Diff line change
Expand Up @@ -1935,7 +1935,7 @@ static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe,
set_iommu_table_base(&dev->dev, pe->table_group.tables[0]);
set_dma_offset(&dev->dev, pe->tce_bypass_base);
if (add_to_group)
iommu_add_device(&dev->dev);
iommu_add_device(&pe->table_group, &dev->dev);

if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
pnv_ioda_setup_bus_dma(pe, dev->subordinate,
Expand Down Expand Up @@ -2521,14 +2521,6 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe)
if (!pnv_iommu_bypass_disabled)
pnv_pci_ioda2_set_bypass(pe, true);

/*
* Setting table base here only for carrying iommu_group
* further down to let iommu_add_device() do the job.
* pnv_pci_ioda_dma_dev_setup will override it later anyway.
*/
if (pe->flags & PNV_IODA_PE_DEV)
set_iommu_table_base(&pe->pdev->dev, tbl);

return 0;
}

Expand Down
43 changes: 42 additions & 1 deletion arch/powerpc/platforms/powernv/pci.c
Original file line number Diff line number Diff line change
Expand Up @@ -1125,4 +1125,45 @@ void __init pnv_pci_init(void)
set_pci_dma_ops(&dma_iommu_ops);
}

machine_subsys_initcall_sync(powernv, tce_iommu_bus_notifier_init);
static int pnv_tce_iommu_bus_notifier(struct notifier_block *nb,
unsigned long action, void *data)
{
struct device *dev = data;
struct pci_dev *pdev;
struct pci_dn *pdn;
struct pnv_ioda_pe *pe;
struct pci_controller *hose;
struct pnv_phb *phb;

switch (action) {
case BUS_NOTIFY_ADD_DEVICE:
pdev = to_pci_dev(dev);
pdn = pci_get_pdn(pdev);
hose = pci_bus_to_host(pdev->bus);
phb = hose->private_data;

WARN_ON_ONCE(!phb);
if (!pdn || pdn->pe_number == IODA_INVALID_PE || !phb)
return 0;

pe = &phb->ioda.pe_array[pdn->pe_number];
iommu_add_device(&pe->table_group, dev);
return 0;
case BUS_NOTIFY_DEL_DEVICE:
iommu_del_device(dev);
return 0;
default:
return 0;
}
}

static struct notifier_block pnv_tce_iommu_bus_nb = {
.notifier_call = pnv_tce_iommu_bus_notifier,
};

static int __init pnv_tce_iommu_bus_notifier_init(void)
{
bus_register_notifier(&pci_bus_type, &pnv_tce_iommu_bus_nb);
return 0;
}
machine_subsys_initcall_sync(powernv, pnv_tce_iommu_bus_notifier_init);
46 changes: 24 additions & 22 deletions arch/powerpc/platforms/pseries/iommu.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ static struct iommu_table_group *iommu_pseries_alloc_group(int node)
{
struct iommu_table_group *table_group;
struct iommu_table *tbl;
struct iommu_table_group_link *tgl;

table_group = kzalloc_node(sizeof(struct iommu_table_group), GFP_KERNEL,
node);
Expand All @@ -68,22 +67,13 @@ static struct iommu_table_group *iommu_pseries_alloc_group(int node)
if (!tbl)
goto free_group;

tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL,
node);
if (!tgl)
goto free_table;

INIT_LIST_HEAD_RCU(&tbl->it_group_list);
kref_init(&tbl->it_kref);
tgl->table_group = table_group;
list_add_rcu(&tgl->next, &tbl->it_group_list);

table_group->tables[0] = tbl;

return table_group;

free_table:
kfree(tbl);
free_group:
kfree(table_group);
return NULL;
Expand All @@ -93,23 +83,12 @@ static void iommu_pseries_free_group(struct iommu_table_group *table_group,
const char *node_name)
{
struct iommu_table *tbl;
#ifdef CONFIG_IOMMU_API
struct iommu_table_group_link *tgl;
#endif

if (!table_group)
return;

tbl = table_group->tables[0];
#ifdef CONFIG_IOMMU_API
tgl = list_first_entry_or_null(&tbl->it_group_list,
struct iommu_table_group_link, next);

WARN_ON_ONCE(!tgl);
if (tgl) {
list_del_rcu(&tgl->next);
kfree(tgl);
}
if (table_group->group) {
iommu_group_put(table_group->group);
BUG_ON(table_group->group);
Expand Down Expand Up @@ -1216,7 +1195,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
}

set_iommu_table_base(&dev->dev, pci->table_group->tables[0]);
iommu_add_device(&dev->dev);
iommu_add_device(pci->table_group, &dev->dev);
}

static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask)
Expand Down Expand Up @@ -1421,4 +1400,27 @@ static int __init disable_multitce(char *str)

__setup("multitce=", disable_multitce);

static int tce_iommu_bus_notifier(struct notifier_block *nb,
unsigned long action, void *data)
{
struct device *dev = data;

switch (action) {
case BUS_NOTIFY_DEL_DEVICE:
iommu_del_device(dev);
return 0;
default:
return 0;
}
}

static struct notifier_block tce_iommu_bus_nb = {
.notifier_call = tce_iommu_bus_notifier,
};

static int __init tce_iommu_bus_notifier_init(void)
{
bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb);
return 0;
}
machine_subsys_initcall_sync(pseries, tce_iommu_bus_notifier_init);

0 comments on commit c4e9d3c

Please sign in to comment.