Skip to content

Commit

Permalink
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/gi…
Browse files Browse the repository at this point in the history
…t/rdma/rdma

Pull rdma updates from Jason Gunthorpe:
 "This has been a smaller cycle than normal. One new driver was
  accepted, which is unusual, and at least one more driver remains in
  review on the list.

  Summary:

   - Driver fixes for hns, hfi1, nes, rxe, i40iw, mlx5, cxgb4,
     vmw_pvrdma

   - Many patches from MatthewW converting radix tree and IDR users to
     use xarray

   - Introduction of tracepoints to the MAD layer

   - Build large SGLs at the start for DMA mapping and get the driver to
     split them

   - Generally clean SGL handling code throughout the subsystem

   - Support for restricting RDMA devices to net namespaces for
     containers

   - Progress to remove object allocation boilerplate code from drivers

   - Change in how the mlx5 driver shows representor ports linked to VFs

   - mlx5 uapi feature to access the on chip SW ICM memory

   - Add a new driver for 'EFA'. This is HW that supports user space
     packet processing through QPs in Amazon's cloud"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (186 commits)
  RDMA/ipoib: Allow user space differentiate between valid dev_port
  IB/core, ipoib: Do not overreact to SM LID change event
  RDMA/device: Don't fire uevent before device is fully initialized
  lib/scatterlist: Remove leftover from sg_page_iter comment
  RDMA/efa: Add driver to Kconfig/Makefile
  RDMA/efa: Add the efa module
  RDMA/efa: Add EFA verbs implementation
  RDMA/efa: Add common command handlers
  RDMA/efa: Implement functions that submit and complete admin commands
  RDMA/efa: Add the ABI definitions
  RDMA/efa: Add the com service API definitions
  RDMA/efa: Add the efa_com.h file
  RDMA/efa: Add the efa.h header file
  RDMA/efa: Add EFA device definitions
  RDMA: Add EFA related definitions
  RDMA/umem: Remove hugetlb flag
  RDMA/bnxt_re: Use core helpers to get aligned DMA address
  RDMA/i40iw: Use core helpers to get aligned DMA address within a supported page size
  RDMA/verbs: Add a DMA iterator to return aligned contiguous memory blocks
  RDMA/umem: Add API to find best driver supported page size in an MR
  ...
  • Loading branch information
torvalds committed May 9, 2019
2 parents 055128e + b79656e commit dce45af
Show file tree
Hide file tree
Showing 251 changed files with 12,535 additions and 4,571 deletions.
17 changes: 15 additions & 2 deletions MAINTAINERS
Original file line number Diff line number Diff line change
Expand Up @@ -745,6 +745,15 @@ S: Supported
F: Documentation/networking/device_drivers/amazon/ena.txt
F: drivers/net/ethernet/amazon/

AMAZON RDMA EFA DRIVER
M: Gal Pressman <[email protected]>
R: Yossi Leybovich <[email protected]>
L: [email protected]
Q: https://patchwork.kernel.org/project/linux-rdma/list/
S: Supported
F: drivers/infiniband/hw/efa/
F: include/uapi/rdma/efa-abi.h

AMD CRYPTOGRAPHIC COPROCESSOR (CCP) DRIVER
M: Tom Lendacky <[email protected]>
M: Gary Hook <[email protected]>
Expand Down Expand Up @@ -4279,7 +4288,7 @@ S: Supported
F: drivers/scsi/cxgbi/cxgb3i

CXGB3 IWARP RNIC DRIVER (IW_CXGB3)
M: Steve Wise <swise@chelsio.com>
M: Potnuri Bharat Teja <bharat@chelsio.com>
L: [email protected]
W: http://www.openfabrics.org
S: Supported
Expand Down Expand Up @@ -4308,7 +4317,7 @@ S: Supported
F: drivers/scsi/cxgbi/cxgb4i

CXGB4 IWARP RNIC DRIVER (IW_CXGB4)
M: Steve Wise <swise@chelsio.com>
M: Potnuri Bharat Teja <bharat@chelsio.com>
L: [email protected]
W: http://www.openfabrics.org
S: Supported
Expand Down Expand Up @@ -7727,6 +7736,10 @@ F: drivers/infiniband/
F: include/uapi/linux/if_infiniband.h
F: include/uapi/rdma/
F: include/rdma/
F: include/trace/events/ib_mad.h
F: include/trace/events/ib_umad.h
F: samples/bpf/ibumad_kern.c
F: samples/bpf/ibumad_user.c

INGENIC JZ4780 DMA Driver
M: Zubair Lutfullah Kakakhel <[email protected]>
Expand Down
1 change: 1 addition & 0 deletions drivers/infiniband/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ source "drivers/infiniband/hw/mthca/Kconfig"
source "drivers/infiniband/hw/qib/Kconfig"
source "drivers/infiniband/hw/cxgb3/Kconfig"
source "drivers/infiniband/hw/cxgb4/Kconfig"
source "drivers/infiniband/hw/efa/Kconfig"
source "drivers/infiniband/hw/i40iw/Kconfig"
source "drivers/infiniband/hw/mlx4/Kconfig"
source "drivers/infiniband/hw/mlx5/Kconfig"
Expand Down
1 change: 1 addition & 0 deletions drivers/infiniband/core/addr.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
#include <net/ipv6_stubs.h>
#include <net/ip6_route.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_sa.h>
#include <rdma/ib.h>
#include <rdma/rdma_netlink.h>
Expand Down
145 changes: 115 additions & 30 deletions drivers/infiniband/core/cache.c
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,22 @@ enum gid_table_entry_state {
GID_TABLE_ENTRY_PENDING_DEL = 3,
};

struct roce_gid_ndev_storage {
struct rcu_head rcu_head;
struct net_device *ndev;
};

struct ib_gid_table_entry {
struct kref kref;
struct work_struct del_work;
struct ib_gid_attr attr;
void *context;
/* Store the ndev pointer to release reference later on in
* call_rcu context because by that time gid_table_entry
* and attr might be already freed. So keep a copy of it.
* ndev_storage is freed by rcu callback.
*/
struct roce_gid_ndev_storage *ndev_storage;
enum gid_table_entry_state state;
};

Expand Down Expand Up @@ -206,6 +217,20 @@ static void schedule_free_gid(struct kref *kref)
queue_work(ib_wq, &entry->del_work);
}

static void put_gid_ndev(struct rcu_head *head)
{
struct roce_gid_ndev_storage *storage =
container_of(head, struct roce_gid_ndev_storage, rcu_head);

WARN_ON(!storage->ndev);
/* At this point its safe to release netdev reference,
* as all callers working on gid_attr->ndev are done
* using this netdev.
*/
dev_put(storage->ndev);
kfree(storage);
}

static void free_gid_entry_locked(struct ib_gid_table_entry *entry)
{
struct ib_device *device = entry->attr.device;
Expand All @@ -228,8 +253,8 @@ static void free_gid_entry_locked(struct ib_gid_table_entry *entry)
/* Now this index is ready to be allocated */
write_unlock_irq(&table->rwlock);

if (entry->attr.ndev)
dev_put(entry->attr.ndev);
if (entry->ndev_storage)
call_rcu(&entry->ndev_storage->rcu_head, put_gid_ndev);
kfree(entry);
}

Expand Down Expand Up @@ -266,14 +291,25 @@ static struct ib_gid_table_entry *
alloc_gid_entry(const struct ib_gid_attr *attr)
{
struct ib_gid_table_entry *entry;
struct net_device *ndev;

entry = kzalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
return NULL;

ndev = rcu_dereference_protected(attr->ndev, 1);
if (ndev) {
entry->ndev_storage = kzalloc(sizeof(*entry->ndev_storage),
GFP_KERNEL);
if (!entry->ndev_storage) {
kfree(entry);
return NULL;
}
dev_hold(ndev);
entry->ndev_storage->ndev = ndev;
}
kref_init(&entry->kref);
memcpy(&entry->attr, attr, sizeof(*attr));
if (entry->attr.ndev)
dev_hold(entry->attr.ndev);
INIT_WORK(&entry->del_work, free_gid_work);
entry->state = GID_TABLE_ENTRY_INVALID;
return entry;
Expand Down Expand Up @@ -343,6 +379,7 @@ static int add_roce_gid(struct ib_gid_table_entry *entry)
static void del_gid(struct ib_device *ib_dev, u8 port,
struct ib_gid_table *table, int ix)
{
struct roce_gid_ndev_storage *ndev_storage;
struct ib_gid_table_entry *entry;

lockdep_assert_held(&table->lock);
Expand All @@ -360,6 +397,13 @@ static void del_gid(struct ib_device *ib_dev, u8 port,
table->data_vec[ix] = NULL;
write_unlock_irq(&table->rwlock);

ndev_storage = entry->ndev_storage;
if (ndev_storage) {
entry->ndev_storage = NULL;
rcu_assign_pointer(entry->attr.ndev, NULL);
call_rcu(&ndev_storage->rcu_head, put_gid_ndev);
}

if (rdma_cap_roce_gid_table(ib_dev, port))
ib_dev->ops.del_gid(&entry->attr, &entry->context);

Expand Down Expand Up @@ -543,30 +587,11 @@ static int __ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
union ib_gid *gid, struct ib_gid_attr *attr)
{
struct net_device *idev;
unsigned long mask;
int ret;

idev = ib_device_get_netdev(ib_dev, port);
if (idev && attr->ndev != idev) {
union ib_gid default_gid;

/* Adding default GIDs is not permitted */
make_default_gid(idev, &default_gid);
if (!memcmp(gid, &default_gid, sizeof(*gid))) {
dev_put(idev);
return -EPERM;
}
}
if (idev)
dev_put(idev);

mask = GID_ATTR_FIND_MASK_GID |
GID_ATTR_FIND_MASK_GID_TYPE |
GID_ATTR_FIND_MASK_NETDEV;
unsigned long mask = GID_ATTR_FIND_MASK_GID |
GID_ATTR_FIND_MASK_GID_TYPE |
GID_ATTR_FIND_MASK_NETDEV;

ret = __ib_cache_gid_add(ib_dev, port, gid, attr, mask, false);
return ret;
return __ib_cache_gid_add(ib_dev, port, gid, attr, mask, false);
}

static int
Expand Down Expand Up @@ -1263,11 +1288,72 @@ struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr)

read_lock_irqsave(&table->rwlock, flags);
valid = is_gid_entry_valid(table->data_vec[attr->index]);
if (valid && attr->ndev && (READ_ONCE(attr->ndev->flags) & IFF_UP))
ndev = attr->ndev;
if (valid) {
ndev = rcu_dereference(attr->ndev);
if (!ndev ||
(ndev && ((READ_ONCE(ndev->flags) & IFF_UP) == 0)))
ndev = ERR_PTR(-ENODEV);
}
read_unlock_irqrestore(&table->rwlock, flags);
return ndev;
}
EXPORT_SYMBOL(rdma_read_gid_attr_ndev_rcu);

static int get_lower_dev_vlan(struct net_device *lower_dev, void *data)
{
u16 *vlan_id = data;

if (is_vlan_dev(lower_dev))
*vlan_id = vlan_dev_vlan_id(lower_dev);

/* We are interested only in first level vlan device, so
* always return 1 to stop iterating over next level devices.
*/
return 1;
}

/**
* rdma_read_gid_l2_fields - Read the vlan ID and source MAC address
* of a GID entry.
*
* @attr: GID attribute pointer whose L2 fields to be read
* @vlan_id: Pointer to vlan id to fill up if the GID entry has
* vlan id. It is optional.
* @smac: Pointer to smac to fill up for a GID entry. It is optional.
*
* rdma_read_gid_l2_fields() returns 0 on success and returns vlan id
* (if gid entry has vlan) and source MAC, or returns error.
*/
int rdma_read_gid_l2_fields(const struct ib_gid_attr *attr,
u16 *vlan_id, u8 *smac)
{
struct net_device *ndev;

rcu_read_lock();
ndev = rcu_dereference(attr->ndev);
if (!ndev) {
rcu_read_unlock();
return -ENODEV;
}
if (smac)
ether_addr_copy(smac, ndev->dev_addr);
if (vlan_id) {
*vlan_id = 0xffff;
if (is_vlan_dev(ndev)) {
*vlan_id = vlan_dev_vlan_id(ndev);
} else {
/* If the netdev is upper device and if it's lower
* device is vlan device, consider vlan id of the
* the lower vlan device for this gid entry.
*/
netdev_walk_all_lower_dev_rcu(attr->ndev,
get_lower_dev_vlan, vlan_id);
}
}
rcu_read_unlock();
return 0;
}
EXPORT_SYMBOL(rdma_read_gid_l2_fields);

static int config_non_roce_gid_cache(struct ib_device *device,
u8 port, int gid_tbl_len)
Expand Down Expand Up @@ -1392,7 +1478,6 @@ static void ib_cache_event(struct ib_event_handler *handler,
event->event == IB_EVENT_PORT_ACTIVE ||
event->event == IB_EVENT_LID_CHANGE ||
event->event == IB_EVENT_PKEY_CHANGE ||
event->event == IB_EVENT_SM_CHANGE ||
event->event == IB_EVENT_CLIENT_REREGISTER ||
event->event == IB_EVENT_GID_CHANGE) {
work = kmalloc(sizeof *work, GFP_ATOMIC);
Expand Down
Loading

0 comments on commit dce45af

Please sign in to comment.