Skip to content

Commit

Permalink
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/gi…
Browse files Browse the repository at this point in the history
…t/rdma/rdma

Pull rdma updates from Jason Gunthorpe:
 "This has been a slightly more active cycle than normal with ongoing
  core changes and quite a lot of collected driver updates.

   - Various driver fixes for bnxt_re, cxgb4, hns, mlx5, pvrdma, rxe

   - A new data transfer mode for HFI1 giving higher performance

   - Significant functional and bug fix update to the mlx5
     On-Demand-Paging MR feature

   - A chip hang reset recovery system for hns

   - Change mm->pinned_vm to an atomic64

   - Update bnxt_re to support a new 57500 chip

   - A sane netlink 'rdma link add' method for creating rxe devices and
     fixing the various unregistration race conditions in rxe's
     unregister flow

   - Allow lookup up objects by an ID over netlink

   - Various reworking of the core to driver interface:
       - drivers should not assume umem SGLs are in PAGE_SIZE chunks
       - ucontext is accessed via udata not other means
       - start to make the core code responsible for object memory
         allocation
       - drivers should convert struct device to struct ib_device via a
         helper
       - drivers have more tools to avoid use after unregister problems"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (280 commits)
  net/mlx5: ODP support for XRC transport is not enabled by default in FW
  IB/hfi1: Close race condition on user context disable and close
  RDMA/umem: Revert broken 'off by one' fix
  RDMA/umem: minor bug fix in error handling path
  RDMA/hns: Use GFP_ATOMIC in hns_roce_v2_modify_qp
  cxgb4: kfree mhp after the debug print
  IB/rdmavt: Fix concurrency panics in QP post_send and modify to error
  IB/rdmavt: Fix loopback send with invalidate ordering
  IB/iser: Fix dma_nents type definition
  IB/mlx5: Set correct write permissions for implicit ODP MR
  bnxt_re: Clean cq for kernel consumers only
  RDMA/uverbs: Don't do double free of allocated PD
  RDMA: Handle ucontext allocations by IB/core
  RDMA/core: Fix a WARN() message
  bnxt_re: fix the regression due to changes in alloc_pbl
  IB/mlx4: Increase the timeout for CM cache
  IB/core: Abort page fault handler silently during owning process exit
  IB/mlx5: Validate correct PD before prefetch MR
  IB/mlx5: Protect against prefetch of invalid MR
  RDMA/uverbs: Store PR pointer before it is overwritten
  ...
  • Loading branch information
torvalds committed Mar 9, 2019
2 parents 2901752 + fca22e7 commit a50243b
Show file tree
Hide file tree
Showing 264 changed files with 16,710 additions and 5,014 deletions.
2 changes: 2 additions & 0 deletions .clang-format
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@ ForEachMacros:
- 'for_each_set_bit'
- 'for_each_set_bit_from'
- 'for_each_sg'
- 'for_each_sg_dma_page'
- 'for_each_sg_page'
- 'for_each_sibling_event'
- '__for_each_thread'
Expand Down Expand Up @@ -360,6 +361,7 @@ ForEachMacros:
- 'radix_tree_for_each_slot'
- 'radix_tree_for_each_tagged'
- 'rbtree_postorder_for_each_entry_safe'
- 'rdma_for_each_port'
- 'resource_list_for_each_entry'
- 'resource_list_for_each_entry_safe'
- 'rhl_for_each_entry_rcu'
Expand Down
4 changes: 2 additions & 2 deletions Documentation/infiniband/user_verbs.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,11 @@ Memory pinning
I/O targets be kept resident at the same physical address. The
ib_uverbs module manages pinning and unpinning memory regions via
get_user_pages() and put_page() calls. It also accounts for the
amount of memory pinned in the process's locked_vm, and checks that
amount of memory pinned in the process's pinned_vm, and checks that
unprivileged processes do not exceed their RLIMIT_MEMLOCK limit.

Pages that are pinned multiple times are counted each time they are
pinned, so the value of locked_vm may be an overestimate of the
pinned, so the value of pinned_vm may be an overestimate of the
number of pages pinned by a process.

/dev files
Expand Down
8 changes: 7 additions & 1 deletion drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,13 @@ static dma_addr_t __vmw_piter_dma_addr(struct vmw_piter *viter)

static dma_addr_t __vmw_piter_sg_addr(struct vmw_piter *viter)
{
return sg_page_iter_dma_address(&viter->iter);
/*
* FIXME: This driver wrongly mixes DMA and CPU SG list iteration and
* needs revision. See
* https://lore.kernel.org/lkml/[email protected]/
*/
return sg_page_iter_dma_address(
container_of(&viter->iter, struct sg_dma_page_iter, base));
}


Expand Down
15 changes: 7 additions & 8 deletions drivers/infiniband/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ config INFINIBAND_ADDR_TRANS_CONFIGFS
This allows the user to config the default GID type that the CM
uses for each device, when initiaing new connections.

if INFINIBAND_USER_ACCESS || !INFINIBAND_USER_ACCESS
source "drivers/infiniband/hw/mthca/Kconfig"
source "drivers/infiniband/hw/qib/Kconfig"
source "drivers/infiniband/hw/cxgb3/Kconfig"
Expand All @@ -101,6 +102,12 @@ source "drivers/infiniband/hw/ocrdma/Kconfig"
source "drivers/infiniband/hw/vmw_pvrdma/Kconfig"
source "drivers/infiniband/hw/usnic/Kconfig"
source "drivers/infiniband/hw/hns/Kconfig"
source "drivers/infiniband/hw/bnxt_re/Kconfig"
source "drivers/infiniband/hw/hfi1/Kconfig"
source "drivers/infiniband/hw/qedr/Kconfig"
source "drivers/infiniband/sw/rdmavt/Kconfig"
source "drivers/infiniband/sw/rxe/Kconfig"
endif

source "drivers/infiniband/ulp/ipoib/Kconfig"

Expand All @@ -111,13 +118,5 @@ source "drivers/infiniband/ulp/iser/Kconfig"
source "drivers/infiniband/ulp/isert/Kconfig"

source "drivers/infiniband/ulp/opa_vnic/Kconfig"
source "drivers/infiniband/sw/rdmavt/Kconfig"
source "drivers/infiniband/sw/rxe/Kconfig"

source "drivers/infiniband/hw/hfi1/Kconfig"

source "drivers/infiniband/hw/qedr/Kconfig"

source "drivers/infiniband/hw/bnxt_re/Kconfig"

endif # INFINIBAND
4 changes: 2 additions & 2 deletions drivers/infiniband/core/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
nldev.o restrack.o

ib_core-$(CONFIG_SECURITY_INFINIBAND) += security.o
ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o
ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o

ib_cm-y := cm.o
Expand All @@ -39,3 +37,5 @@ ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
uverbs_std_types_flow_action.o uverbs_std_types_dm.o \
uverbs_std_types_mr.o uverbs_std_types_counters.o \
uverbs_uapi.o uverbs_std_types_device.o
ib_uverbs-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
ib_uverbs-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o
118 changes: 46 additions & 72 deletions drivers/infiniband/core/cache.c
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ EXPORT_SYMBOL(ib_cache_gid_parse_type_str);

static struct ib_gid_table *rdma_gid_table(struct ib_device *device, u8 port)
{
return device->cache.ports[port - rdma_start_port(device)].gid;
return device->port_data[port].cache.gid;
}

static bool is_gid_entry_free(const struct ib_gid_table_entry *entry)
Expand Down Expand Up @@ -547,21 +547,19 @@ int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
unsigned long mask;
int ret;

if (ib_dev->ops.get_netdev) {
idev = ib_dev->ops.get_netdev(ib_dev, port);
if (idev && attr->ndev != idev) {
union ib_gid default_gid;
idev = ib_device_get_netdev(ib_dev, port);
if (idev && attr->ndev != idev) {
union ib_gid default_gid;

/* Adding default GIDs in not permitted */
make_default_gid(idev, &default_gid);
if (!memcmp(gid, &default_gid, sizeof(*gid))) {
dev_put(idev);
return -EPERM;
}
}
if (idev)
/* Adding default GIDs is not permitted */
make_default_gid(idev, &default_gid);
if (!memcmp(gid, &default_gid, sizeof(*gid))) {
dev_put(idev);
return -EPERM;
}
}
if (idev)
dev_put(idev);

mask = GID_ATTR_FIND_MASK_GID |
GID_ATTR_FIND_MASK_GID_TYPE |
Expand Down Expand Up @@ -765,7 +763,7 @@ static struct ib_gid_table *alloc_gid_table(int sz)
return NULL;
}

static void release_gid_table(struct ib_device *device, u8 port,
static void release_gid_table(struct ib_device *device,
struct ib_gid_table *table)
{
bool leak = false;
Expand Down Expand Up @@ -863,31 +861,27 @@ static void gid_table_reserve_default(struct ib_device *ib_dev, u8 port,

static void gid_table_release_one(struct ib_device *ib_dev)
{
struct ib_gid_table *table;
u8 port;
unsigned int p;

for (port = 0; port < ib_dev->phys_port_cnt; port++) {
table = ib_dev->cache.ports[port].gid;
release_gid_table(ib_dev, port, table);
ib_dev->cache.ports[port].gid = NULL;
rdma_for_each_port (ib_dev, p) {
release_gid_table(ib_dev, ib_dev->port_data[p].cache.gid);
ib_dev->port_data[p].cache.gid = NULL;
}
}

static int _gid_table_setup_one(struct ib_device *ib_dev)
{
u8 port;
struct ib_gid_table *table;
unsigned int rdma_port;

for (port = 0; port < ib_dev->phys_port_cnt; port++) {
u8 rdma_port = port + rdma_start_port(ib_dev);

table = alloc_gid_table(
ib_dev->port_immutable[rdma_port].gid_tbl_len);
rdma_for_each_port (ib_dev, rdma_port) {
table = alloc_gid_table(
ib_dev->port_data[rdma_port].immutable.gid_tbl_len);
if (!table)
goto rollback_table_setup;

gid_table_reserve_default(ib_dev, rdma_port, table);
ib_dev->cache.ports[port].gid = table;
ib_dev->port_data[rdma_port].cache.gid = table;
}
return 0;

Expand All @@ -898,14 +892,11 @@ static int _gid_table_setup_one(struct ib_device *ib_dev)

static void gid_table_cleanup_one(struct ib_device *ib_dev)
{
struct ib_gid_table *table;
u8 port;
unsigned int p;

for (port = 0; port < ib_dev->phys_port_cnt; port++) {
table = ib_dev->cache.ports[port].gid;
cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev),
table);
}
rdma_for_each_port (ib_dev, p)
cleanup_gid_table_port(ib_dev, p,
ib_dev->port_data[p].cache.gid);
}

static int gid_table_setup_one(struct ib_device *ib_dev)
Expand Down Expand Up @@ -983,17 +974,17 @@ const struct ib_gid_attr *rdma_find_gid(struct ib_device *device,
unsigned long mask = GID_ATTR_FIND_MASK_GID |
GID_ATTR_FIND_MASK_GID_TYPE;
struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type};
u8 p;
unsigned int p;

if (ndev)
mask |= GID_ATTR_FIND_MASK_NETDEV;

for (p = 0; p < device->phys_port_cnt; p++) {
rdma_for_each_port(device, p) {
struct ib_gid_table *table;
unsigned long flags;
int index;

table = device->cache.ports[p].gid;
table = device->port_data[p].cache.gid;
read_lock_irqsave(&table->rwlock, flags);
index = find_gid(table, gid, &gid_attr_val, false, mask, NULL);
if (index >= 0) {
Expand Down Expand Up @@ -1025,7 +1016,7 @@ int ib_get_cached_pkey(struct ib_device *device,

read_lock_irqsave(&device->cache.lock, flags);

cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
cache = device->port_data[port_num].cache.pkey;

if (index < 0 || index >= cache->table_len)
ret = -EINVAL;
Expand All @@ -1043,14 +1034,12 @@ int ib_get_cached_subnet_prefix(struct ib_device *device,
u64 *sn_pfx)
{
unsigned long flags;
int p;

if (!rdma_is_port_valid(device, port_num))
return -EINVAL;

p = port_num - rdma_start_port(device);
read_lock_irqsave(&device->cache.lock, flags);
*sn_pfx = device->cache.ports[p].subnet_prefix;
*sn_pfx = device->port_data[port_num].cache.subnet_prefix;
read_unlock_irqrestore(&device->cache.lock, flags);

return 0;
Expand All @@ -1073,7 +1062,7 @@ int ib_find_cached_pkey(struct ib_device *device,

read_lock_irqsave(&device->cache.lock, flags);

cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
cache = device->port_data[port_num].cache.pkey;

*index = -1;

Expand Down Expand Up @@ -1113,7 +1102,7 @@ int ib_find_exact_cached_pkey(struct ib_device *device,

read_lock_irqsave(&device->cache.lock, flags);

cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
cache = device->port_data[port_num].cache.pkey;

*index = -1;

Expand Down Expand Up @@ -1141,7 +1130,7 @@ int ib_get_cached_lmc(struct ib_device *device,
return -EINVAL;

read_lock_irqsave(&device->cache.lock, flags);
*lmc = device->cache.ports[port_num - rdma_start_port(device)].lmc;
*lmc = device->port_data[port_num].cache.lmc;
read_unlock_irqrestore(&device->cache.lock, flags);

return ret;
Expand All @@ -1159,8 +1148,7 @@ int ib_get_cached_port_state(struct ib_device *device,
return -EINVAL;

read_lock_irqsave(&device->cache.lock, flags);
*port_state = device->cache.ports[port_num
- rdma_start_port(device)].port_state;
*port_state = device->port_data[port_num].cache.port_state;
read_unlock_irqrestore(&device->cache.lock, flags);

return ret;
Expand Down Expand Up @@ -1361,16 +1349,13 @@ static void ib_cache_update(struct ib_device *device,

write_lock_irq(&device->cache.lock);

old_pkey_cache = device->cache.ports[port -
rdma_start_port(device)].pkey;
old_pkey_cache = device->port_data[port].cache.pkey;

device->cache.ports[port - rdma_start_port(device)].pkey = pkey_cache;
device->cache.ports[port - rdma_start_port(device)].lmc = tprops->lmc;
device->cache.ports[port - rdma_start_port(device)].port_state =
tprops->state;
device->port_data[port].cache.pkey = pkey_cache;
device->port_data[port].cache.lmc = tprops->lmc;
device->port_data[port].cache.port_state = tprops->state;

device->cache.ports[port - rdma_start_port(device)].subnet_prefix =
tprops->subnet_prefix;
device->port_data[port].cache.subnet_prefix = tprops->subnet_prefix;
write_unlock_irq(&device->cache.lock);

if (enforce_security)
Expand Down Expand Up @@ -1428,27 +1413,17 @@ static void ib_cache_event(struct ib_event_handler *handler,

int ib_cache_setup_one(struct ib_device *device)
{
int p;
unsigned int p;
int err;

rwlock_init(&device->cache.lock);

device->cache.ports =
kcalloc(rdma_end_port(device) - rdma_start_port(device) + 1,
sizeof(*device->cache.ports),
GFP_KERNEL);
if (!device->cache.ports)
return -ENOMEM;

err = gid_table_setup_one(device);
if (err) {
kfree(device->cache.ports);
device->cache.ports = NULL;
if (err)
return err;
}

for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p)
ib_cache_update(device, p + rdma_start_port(device), true);
rdma_for_each_port (device, p)
ib_cache_update(device, p, true);

INIT_IB_EVENT_HANDLER(&device->cache.event_handler,
device, ib_cache_event);
Expand All @@ -1458,19 +1433,18 @@ int ib_cache_setup_one(struct ib_device *device)

void ib_cache_release_one(struct ib_device *device)
{
int p;
unsigned int p;

/*
* The release function frees all the cache elements.
* This function should be called as part of freeing
* all the device's resources when the cache could no
* longer be accessed.
*/
for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p)
kfree(device->cache.ports[p].pkey);
rdma_for_each_port (device, p)
kfree(device->port_data[p].cache.pkey);

gid_table_release_one(device);
kfree(device->cache.ports);
}

void ib_cache_cleanup_one(struct ib_device *device)
Expand Down
5 changes: 2 additions & 3 deletions drivers/infiniband/core/cgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,11 @@
* Register with the rdma cgroup. Should be called before
* exposing rdma device to user space applications to avoid
* resource accounting leak.
* Returns 0 on success or otherwise failure code.
*/
int ib_device_register_rdmacg(struct ib_device *device)
void ib_device_register_rdmacg(struct ib_device *device)
{
device->cg_device.name = device->name;
return rdmacg_register_device(&device->cg_device);
rdmacg_register_device(&device->cg_device);
}

/**
Expand Down
3 changes: 1 addition & 2 deletions drivers/infiniband/core/cm.c
Original file line number Diff line number Diff line change
Expand Up @@ -4052,8 +4052,7 @@ static void cm_recv_handler(struct ib_mad_agent *mad_agent,
atomic_long_inc(&port->counter_group[CM_RECV].
counter[attr_id - CM_ATTR_ID_OFFSET]);

work = kmalloc(sizeof(*work) + sizeof(struct sa_path_rec) * paths,
GFP_KERNEL);
work = kmalloc(struct_size(work, path, paths), GFP_KERNEL);
if (!work) {
ib_free_recv_mad(mad_recv_wc);
return;
Expand Down
Loading

0 comments on commit a50243b

Please sign in to comment.