Skip to content

Commit

Permalink
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/gi…
Browse files Browse the repository at this point in the history
…t/rdma/rdma

Pull rdma updates from Jason Gunthorpe:
 "This cycle we got a new RDMA driver "ERDMA" for the Alibaba cloud
  environment. Otherwise the changes are dominated by rxe fixes.

  There is another RDMA driver on the list that might get merged next
  cycle, 'MANA' for the Azure cloud environment.

  Summary:

   - Bug fixes and small features for irdma, hns, siw, qedr, hfi1, mlx5

   - General spelling/grammer fixes

   - rdma cm can follow changes in neighbours for control packets

   - Significant amounts of rxe fixes and spec compliance changes

   - Use the modern NAPI API

   - Use the bitmap API instead of open coding

   - Performance improvements for rtrs

   - Add the ERDMA driver for Alibaba cloud

   - Fix a use after free bug in SRP"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (99 commits)
  RDMA/ib_srpt: Unify checking rdma_cm_id condition in srpt_cm_req_recv()
  RDMA/rxe: Fix error unwind in rxe_create_qp()
  RDMA/mlx5: Add missing check for return value in get namespace flow
  RDMA/rxe: Split qp state for requester and completer
  RDMA/rxe: Generate error completion for error requester QP state
  RDMA/rxe: Update wqe_index for each wqe error completion
  RDMA/srpt: Fix a use-after-free
  RDMA/srpt: Introduce a reference count in struct srpt_device
  RDMA/srpt: Duplicate port name members
  IB/qib: Fix repeated "in" within comments
  RDMA/erdma: Add driver to kernel build environment
  RDMA/erdma: Add the ABI definitions
  RDMA/erdma: Add the erdma module
  RDMA/erdma: Add connection management (CM) support
  RDMA/erdma: Add verbs implementation
  RDMA/erdma: Add verbs header file
  RDMA/erdma: Add event queue implementation
  RDMA/erdma: Add cmdq implementation
  RDMA/erdma: Add main include file
  RDMA/erdma: Add the hardware related definitions
  ...
  • Loading branch information
torvalds committed Aug 5, 2022
2 parents 746fc76 + 6b822d4 commit e495274
Show file tree
Hide file tree
Showing 95 changed files with 8,364 additions and 1,003 deletions.
8 changes: 8 additions & 0 deletions MAINTAINERS
Original file line number Diff line number Diff line change
Expand Up @@ -736,6 +736,14 @@ S: Maintained
F: Documentation/i2c/busses/i2c-ali1563.rst
F: drivers/i2c/busses/i2c-ali1563.c

ALIBABA ELASTIC RDMA DRIVER
M: Cheng Xu <[email protected]>
M: Kai Shen <[email protected]>
L: [email protected]
S: Supported
F: drivers/infiniband/hw/erdma
F: include/uapi/rdma/erdma-abi.h

ALIENWARE WMI DRIVER
L: [email protected]
S: Maintained
Expand Down
15 changes: 8 additions & 7 deletions drivers/infiniband/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -78,20 +78,21 @@ config INFINIBAND_VIRT_DMA
def_bool !HIGHMEM

if INFINIBAND_USER_ACCESS || !INFINIBAND_USER_ACCESS
source "drivers/infiniband/hw/mthca/Kconfig"
source "drivers/infiniband/hw/qib/Kconfig"
source "drivers/infiniband/hw/bnxt_re/Kconfig"
source "drivers/infiniband/hw/cxgb4/Kconfig"
source "drivers/infiniband/hw/efa/Kconfig"
source "drivers/infiniband/hw/erdma/Kconfig"
source "drivers/infiniband/hw/hfi1/Kconfig"
source "drivers/infiniband/hw/hns/Kconfig"
source "drivers/infiniband/hw/irdma/Kconfig"
source "drivers/infiniband/hw/mlx4/Kconfig"
source "drivers/infiniband/hw/mlx5/Kconfig"
source "drivers/infiniband/hw/mthca/Kconfig"
source "drivers/infiniband/hw/ocrdma/Kconfig"
source "drivers/infiniband/hw/vmw_pvrdma/Kconfig"
source "drivers/infiniband/hw/usnic/Kconfig"
source "drivers/infiniband/hw/hns/Kconfig"
source "drivers/infiniband/hw/bnxt_re/Kconfig"
source "drivers/infiniband/hw/hfi1/Kconfig"
source "drivers/infiniband/hw/qedr/Kconfig"
source "drivers/infiniband/hw/qib/Kconfig"
source "drivers/infiniband/hw/usnic/Kconfig"
source "drivers/infiniband/hw/vmw_pvrdma/Kconfig"
source "drivers/infiniband/sw/rdmavt/Kconfig"
source "drivers/infiniband/sw/rxe/Kconfig"
source "drivers/infiniband/sw/siw/Kconfig"
Expand Down
230 changes: 218 additions & 12 deletions drivers/infiniband/core/cma.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <linux/in6.h>
#include <linux/mutex.h>
#include <linux/random.h>
#include <linux/rbtree.h>
#include <linux/igmp.h>
#include <linux/xarray.h>
#include <linux/inetdevice.h>
Expand All @@ -20,6 +21,7 @@

#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <net/netevent.h>
#include <net/tcp.h>
#include <net/ipv6.h>
#include <net/ip_fib.h>
Expand Down Expand Up @@ -168,6 +170,9 @@ static struct ib_sa_client sa_client;
static LIST_HEAD(dev_list);
static LIST_HEAD(listen_any_list);
static DEFINE_MUTEX(lock);
static struct rb_root id_table = RB_ROOT;
/* Serialize operations of id_table tree */
static DEFINE_SPINLOCK(id_table_lock);
static struct workqueue_struct *cma_wq;
static unsigned int cma_pernet_id;

Expand Down Expand Up @@ -202,6 +207,11 @@ struct xarray *cma_pernet_xa(struct net *net, enum rdma_ucm_port_space ps)
}
}

struct id_table_entry {
struct list_head id_list;
struct rb_node rb_node;
};

struct cma_device {
struct list_head list;
struct ib_device *device;
Expand Down Expand Up @@ -420,11 +430,21 @@ static inline u8 cma_get_ip_ver(const struct cma_hdr *hdr)
return hdr->ip_version >> 4;
}

static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
static void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
{
hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
}

static struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv)
{
return (struct sockaddr *)&id_priv->id.route.addr.src_addr;
}

static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv)
{
return (struct sockaddr *)&id_priv->id.route.addr.dst_addr;
}

static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join)
{
struct in_device *in_dev = NULL;
Expand All @@ -445,6 +465,117 @@ static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join)
return (in_dev) ? 0 : -ENODEV;
}

static int compare_netdev_and_ip(int ifindex_a, struct sockaddr *sa,
struct id_table_entry *entry_b)
{
struct rdma_id_private *id_priv = list_first_entry(
&entry_b->id_list, struct rdma_id_private, id_list_entry);
int ifindex_b = id_priv->id.route.addr.dev_addr.bound_dev_if;
struct sockaddr *sb = cma_dst_addr(id_priv);

if (ifindex_a != ifindex_b)
return (ifindex_a > ifindex_b) ? 1 : -1;

if (sa->sa_family != sb->sa_family)
return sa->sa_family - sb->sa_family;

if (sa->sa_family == AF_INET)
return memcmp((char *)&((struct sockaddr_in *)sa)->sin_addr,
(char *)&((struct sockaddr_in *)sb)->sin_addr,
sizeof(((struct sockaddr_in *)sa)->sin_addr));

return ipv6_addr_cmp(&((struct sockaddr_in6 *)sa)->sin6_addr,
&((struct sockaddr_in6 *)sb)->sin6_addr);
}

static int cma_add_id_to_tree(struct rdma_id_private *node_id_priv)
{
struct rb_node **new, *parent = NULL;
struct id_table_entry *this, *node;
unsigned long flags;
int result;

node = kzalloc(sizeof(*node), GFP_KERNEL);
if (!node)
return -ENOMEM;

spin_lock_irqsave(&id_table_lock, flags);
new = &id_table.rb_node;
while (*new) {
this = container_of(*new, struct id_table_entry, rb_node);
result = compare_netdev_and_ip(
node_id_priv->id.route.addr.dev_addr.bound_dev_if,
cma_dst_addr(node_id_priv), this);

parent = *new;
if (result < 0)
new = &((*new)->rb_left);
else if (result > 0)
new = &((*new)->rb_right);
else {
list_add_tail(&node_id_priv->id_list_entry,
&this->id_list);
kfree(node);
goto unlock;
}
}

INIT_LIST_HEAD(&node->id_list);
list_add_tail(&node_id_priv->id_list_entry, &node->id_list);

rb_link_node(&node->rb_node, parent, new);
rb_insert_color(&node->rb_node, &id_table);

unlock:
spin_unlock_irqrestore(&id_table_lock, flags);
return 0;
}

static struct id_table_entry *
node_from_ndev_ip(struct rb_root *root, int ifindex, struct sockaddr *sa)
{
struct rb_node *node = root->rb_node;
struct id_table_entry *data;
int result;

while (node) {
data = container_of(node, struct id_table_entry, rb_node);
result = compare_netdev_and_ip(ifindex, sa, data);
if (result < 0)
node = node->rb_left;
else if (result > 0)
node = node->rb_right;
else
return data;
}

return NULL;
}

static void cma_remove_id_from_tree(struct rdma_id_private *id_priv)
{
struct id_table_entry *data;
unsigned long flags;

spin_lock_irqsave(&id_table_lock, flags);
if (list_empty(&id_priv->id_list_entry))
goto out;

data = node_from_ndev_ip(&id_table,
id_priv->id.route.addr.dev_addr.bound_dev_if,
cma_dst_addr(id_priv));
if (!data)
goto out;

list_del_init(&id_priv->id_list_entry);
if (list_empty(&data->id_list)) {
rb_erase(&data->rb_node, &id_table);
kfree(data);
}
out:
spin_unlock_irqrestore(&id_table_lock, flags);
}

static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
struct cma_device *cma_dev)
{
Expand Down Expand Up @@ -481,16 +612,6 @@ static void cma_release_dev(struct rdma_id_private *id_priv)
mutex_unlock(&lock);
}

static inline struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv)
{
return (struct sockaddr *) &id_priv->id.route.addr.src_addr;
}

static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv)
{
return (struct sockaddr *) &id_priv->id.route.addr.dst_addr;
}

static inline unsigned short cma_family(struct rdma_id_private *id_priv)
{
return id_priv->id.route.addr.src_addr.ss_family;
Expand Down Expand Up @@ -861,6 +982,7 @@ __rdma_create_id(struct net *net, rdma_cm_event_handler event_handler,
refcount_set(&id_priv->refcount, 1);
mutex_init(&id_priv->handler_mutex);
INIT_LIST_HEAD(&id_priv->device_item);
INIT_LIST_HEAD(&id_priv->id_list_entry);
INIT_LIST_HEAD(&id_priv->listen_list);
INIT_LIST_HEAD(&id_priv->mc_list);
get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
Expand Down Expand Up @@ -1883,6 +2005,7 @@ static void _destroy_id(struct rdma_id_private *id_priv,
cma_cancel_operation(id_priv, state);

rdma_restrack_del(&id_priv->res);
cma_remove_id_from_tree(id_priv);
if (id_priv->cma_dev) {
if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
if (id_priv->cm_id.ib)
Expand Down Expand Up @@ -3172,8 +3295,11 @@ int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms)
cma_id_get(id_priv);
if (rdma_cap_ib_sa(id->device, id->port_num))
ret = cma_resolve_ib_route(id_priv, timeout_ms);
else if (rdma_protocol_roce(id->device, id->port_num))
else if (rdma_protocol_roce(id->device, id->port_num)) {
ret = cma_resolve_iboe_route(id_priv);
if (!ret)
cma_add_id_to_tree(id_priv);
}
else if (rdma_protocol_iwarp(id->device, id->port_num))
ret = cma_resolve_iw_route(id_priv);
else
Expand Down Expand Up @@ -4922,10 +5048,87 @@ static int cma_netdev_callback(struct notifier_block *self, unsigned long event,
return ret;
}

static void cma_netevent_work_handler(struct work_struct *_work)
{
struct rdma_id_private *id_priv =
container_of(_work, struct rdma_id_private, id.net_work);
struct rdma_cm_event event = {};

mutex_lock(&id_priv->handler_mutex);

if (READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING ||
READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL)
goto out_unlock;

event.event = RDMA_CM_EVENT_UNREACHABLE;
event.status = -ETIMEDOUT;

if (cma_cm_event_handler(id_priv, &event)) {
__acquire(&id_priv->handler_mutex);
id_priv->cm_id.ib = NULL;
cma_id_put(id_priv);
destroy_id_handler_unlock(id_priv);
return;
}

out_unlock:
mutex_unlock(&id_priv->handler_mutex);
cma_id_put(id_priv);
}

static int cma_netevent_callback(struct notifier_block *self,
unsigned long event, void *ctx)
{
struct id_table_entry *ips_node = NULL;
struct rdma_id_private *current_id;
struct neighbour *neigh = ctx;
unsigned long flags;

if (event != NETEVENT_NEIGH_UPDATE)
return NOTIFY_DONE;

spin_lock_irqsave(&id_table_lock, flags);
if (neigh->tbl->family == AF_INET6) {
struct sockaddr_in6 neigh_sock_6;

neigh_sock_6.sin6_family = AF_INET6;
neigh_sock_6.sin6_addr = *(struct in6_addr *)neigh->primary_key;
ips_node = node_from_ndev_ip(&id_table, neigh->dev->ifindex,
(struct sockaddr *)&neigh_sock_6);
} else if (neigh->tbl->family == AF_INET) {
struct sockaddr_in neigh_sock_4;

neigh_sock_4.sin_family = AF_INET;
neigh_sock_4.sin_addr.s_addr = *(__be32 *)(neigh->primary_key);
ips_node = node_from_ndev_ip(&id_table, neigh->dev->ifindex,
(struct sockaddr *)&neigh_sock_4);
} else
goto out;

if (!ips_node)
goto out;

list_for_each_entry(current_id, &ips_node->id_list, id_list_entry) {
if (!memcmp(current_id->id.route.addr.dev_addr.dst_dev_addr,
neigh->ha, ETH_ALEN))
continue;
INIT_WORK(&current_id->id.net_work, cma_netevent_work_handler);
cma_id_get(current_id);
queue_work(cma_wq, &current_id->id.net_work);
}
out:
spin_unlock_irqrestore(&id_table_lock, flags);
return NOTIFY_DONE;
}

static struct notifier_block cma_nb = {
.notifier_call = cma_netdev_callback
};

static struct notifier_block cma_netevent_cb = {
.notifier_call = cma_netevent_callback
};

static void cma_send_device_removal_put(struct rdma_id_private *id_priv)
{
struct rdma_cm_event event = { .event = RDMA_CM_EVENT_DEVICE_REMOVAL };
Expand Down Expand Up @@ -5148,6 +5351,7 @@ static int __init cma_init(void)

ib_sa_register_client(&sa_client);
register_netdevice_notifier(&cma_nb);
register_netevent_notifier(&cma_netevent_cb);

ret = ib_register_client(&cma_client);
if (ret)
Expand All @@ -5162,6 +5366,7 @@ static int __init cma_init(void)
err_ib:
ib_unregister_client(&cma_client);
err:
unregister_netevent_notifier(&cma_netevent_cb);
unregister_netdevice_notifier(&cma_nb);
ib_sa_unregister_client(&sa_client);
unregister_pernet_subsys(&cma_pernet_operations);
Expand All @@ -5174,6 +5379,7 @@ static void __exit cma_cleanup(void)
{
cma_configfs_exit();
ib_unregister_client(&cma_client);
unregister_netevent_notifier(&cma_netevent_cb);
unregister_netdevice_notifier(&cma_nb);
ib_sa_unregister_client(&sa_client);
unregister_pernet_subsys(&cma_pernet_operations);
Expand Down
1 change: 1 addition & 0 deletions drivers/infiniband/core/cma_priv.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ struct rdma_id_private {
struct list_head listen_item;
struct list_head listen_list;
};
struct list_head id_list_entry;
struct cma_device *cma_dev;
struct list_head mc_list;

Expand Down
2 changes: 1 addition & 1 deletion drivers/infiniband/core/rdma_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ static int uverbs_try_lock_object(struct ib_uobject *uobj,
* In exclusive access mode, we check that the counter is zero (nobody
* claimed this object) and we set it to -1. Releasing a shared access
* lock is done simply by decreasing the counter. As for exclusive
* access locks, since only a single one of them is is allowed
* access locks, since only a single one of them is allowed
* concurrently, setting the counter to zero is enough for releasing
* this lock.
*/
Expand Down
Loading

0 comments on commit e495274

Please sign in to comment.