Skip to content

Commit

Permalink
RDMA: Allow ib_client's to fail when add() is called
Browse files Browse the repository at this point in the history
When a client is added it isn't allowed to fail, but all the client's have
various failure paths within their add routines.

This creates the very fringe condition where the client was added, failed
during add and didn't set the client_data. The core code will then still
call other client_data centric ops like remove(), rename(), get_nl_info(),
and get_net_dev_by_params() with NULL client_data - which is confusing and
unexpected.

If the add() callback fails, then do not call any more client ops for the
device, even remove.

Remove all the now redundant checks for NULL client_data in ops callbacks.

Update all the add() callbacks to return error codes
appropriately. EOPNOTSUPP is used for cases where the ULP does not support
the ib_device - eg because it only works with IB.

Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Leon Romanovsky <[email protected]>
Acked-by: Ursula Braun <[email protected]>
Signed-off-by: Jason Gunthorpe <[email protected]>
  • Loading branch information
jgunthorpe committed May 6, 2020
1 parent 04c349a commit 11a0ae4
Show file tree
Hide file tree
Showing 15 changed files with 142 additions and 124 deletions.
24 changes: 14 additions & 10 deletions drivers/infiniband/core/cm.c
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ const char *__attribute_const__ ibcm_reject_msg(int reason)
EXPORT_SYMBOL(ibcm_reject_msg);

struct cm_id_private;
static void cm_add_one(struct ib_device *device);
static int cm_add_one(struct ib_device *device);
static void cm_remove_one(struct ib_device *device, void *client_data);
static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv,
struct ib_cm_sidr_rep_param *param);
Expand Down Expand Up @@ -4382,7 +4382,7 @@ static void cm_remove_port_fs(struct cm_port *port)

}

static void cm_add_one(struct ib_device *ib_device)
static int cm_add_one(struct ib_device *ib_device)
{
struct cm_device *cm_dev;
struct cm_port *port;
Expand All @@ -4401,7 +4401,7 @@ static void cm_add_one(struct ib_device *ib_device)
cm_dev = kzalloc(struct_size(cm_dev, port, ib_device->phys_port_cnt),
GFP_KERNEL);
if (!cm_dev)
return;
return -ENOMEM;

cm_dev->ib_device = ib_device;
cm_dev->ack_delay = ib_device->attrs.local_ca_ack_delay;
Expand All @@ -4413,8 +4413,10 @@ static void cm_add_one(struct ib_device *ib_device)
continue;

port = kzalloc(sizeof *port, GFP_KERNEL);
if (!port)
if (!port) {
ret = -ENOMEM;
goto error1;
}

cm_dev->port[i-1] = port;
port->cm_dev = cm_dev;
Expand All @@ -4435,8 +4437,10 @@ static void cm_add_one(struct ib_device *ib_device)
cm_recv_handler,
port,
0);
if (IS_ERR(port->mad_agent))
if (IS_ERR(port->mad_agent)) {
ret = PTR_ERR(port->mad_agent);
goto error2;
}

ret = ib_modify_port(ib_device, i, 0, &port_modify);
if (ret)
Expand All @@ -4445,15 +4449,17 @@ static void cm_add_one(struct ib_device *ib_device)
count++;
}

if (!count)
if (!count) {
ret = -EOPNOTSUPP;
goto free;
}

ib_set_client_data(ib_device, &cm_client, cm_dev);

write_lock_irqsave(&cm.device_lock, flags);
list_add_tail(&cm_dev->list, &cm.device_list);
write_unlock_irqrestore(&cm.device_lock, flags);
return;
return 0;

error3:
ib_unregister_mad_agent(port->mad_agent);
Expand All @@ -4475,6 +4481,7 @@ static void cm_add_one(struct ib_device *ib_device)
}
free:
kfree(cm_dev);
return ret;
}

static void cm_remove_one(struct ib_device *ib_device, void *client_data)
Expand All @@ -4489,9 +4496,6 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
unsigned long flags;
int i;

if (!cm_dev)
return;

write_lock_irqsave(&cm.device_lock, flags);
list_del(&cm_dev->list);
write_unlock_irqrestore(&cm.device_lock, flags);
Expand Down
23 changes: 12 additions & 11 deletions drivers/infiniband/core/cma.c
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ struct rdma_cm_id *rdma_res_to_id(struct rdma_restrack_entry *res)
}
EXPORT_SYMBOL(rdma_res_to_id);

static void cma_add_one(struct ib_device *device);
static int cma_add_one(struct ib_device *device);
static void cma_remove_one(struct ib_device *device, void *client_data);

static struct ib_client cma_client = {
Expand Down Expand Up @@ -4638,29 +4638,34 @@ static struct notifier_block cma_nb = {
.notifier_call = cma_netdev_callback
};

static void cma_add_one(struct ib_device *device)
static int cma_add_one(struct ib_device *device)
{
struct cma_device *cma_dev;
struct rdma_id_private *id_priv;
unsigned int i;
unsigned long supported_gids = 0;
int ret;

cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL);
if (!cma_dev)
return;
return -ENOMEM;

cma_dev->device = device;
cma_dev->default_gid_type = kcalloc(device->phys_port_cnt,
sizeof(*cma_dev->default_gid_type),
GFP_KERNEL);
if (!cma_dev->default_gid_type)
if (!cma_dev->default_gid_type) {
ret = -ENOMEM;
goto free_cma_dev;
}

cma_dev->default_roce_tos = kcalloc(device->phys_port_cnt,
sizeof(*cma_dev->default_roce_tos),
GFP_KERNEL);
if (!cma_dev->default_roce_tos)
if (!cma_dev->default_roce_tos) {
ret = -ENOMEM;
goto free_gid_type;
}

rdma_for_each_port (device, i) {
supported_gids = roce_gid_type_mask_support(device, i);
Expand All @@ -4686,15 +4691,14 @@ static void cma_add_one(struct ib_device *device)
mutex_unlock(&lock);

trace_cm_add_one(device);
return;
return 0;

free_gid_type:
kfree(cma_dev->default_gid_type);

free_cma_dev:
kfree(cma_dev);

return;
return ret;
}

static int cma_remove_id_dev(struct rdma_id_private *id_priv)
Expand Down Expand Up @@ -4756,9 +4760,6 @@ static void cma_remove_one(struct ib_device *device, void *client_data)

trace_cm_remove_one(device);

if (!cma_dev)
return;

mutex_lock(&lock);
list_del(&cma_dev->list);
mutex_unlock(&lock);
Expand Down
16 changes: 14 additions & 2 deletions drivers/infiniband/core/device.c
Original file line number Diff line number Diff line change
Expand Up @@ -677,8 +677,20 @@ static int add_client_context(struct ib_device *device,
if (ret)
goto out;
downgrade_write(&device->client_data_rwsem);
if (client->add)
client->add(device);
if (client->add) {
if (client->add(device)) {
/*
* If a client fails to add then the error code is
* ignored, but we won't call any more ops on this
* client.
*/
xa_erase(&device->client_data, client->client_id);
up_read(&device->client_data_rwsem);
ib_device_put(device);
ib_client_put(client);
return 0;
}
}

/* Readers shall not see a client until add has been completed */
xa_set_mark(&device->client_data, client->client_id,
Expand Down
17 changes: 13 additions & 4 deletions drivers/infiniband/core/mad.c
Original file line number Diff line number Diff line change
Expand Up @@ -3076,27 +3076,35 @@ static int ib_mad_port_close(struct ib_device *device, int port_num)
return 0;
}

static void ib_mad_init_device(struct ib_device *device)
static int ib_mad_init_device(struct ib_device *device)
{
int start, i;
unsigned int count = 0;
int ret;

start = rdma_start_port(device);

for (i = start; i <= rdma_end_port(device); i++) {
if (!rdma_cap_ib_mad(device, i))
continue;

if (ib_mad_port_open(device, i)) {
ret = ib_mad_port_open(device, i);
if (ret) {
dev_err(&device->dev, "Couldn't open port %d\n", i);
goto error;
}
if (ib_agent_port_open(device, i)) {
ret = ib_agent_port_open(device, i);
if (ret) {
dev_err(&device->dev,
"Couldn't open port %d for agents\n", i);
goto error_agent;
}
count++;
}
return;
if (!count)
return -EOPNOTSUPP;

return 0;

error_agent:
if (ib_mad_port_close(device, i))
Expand All @@ -3113,6 +3121,7 @@ static void ib_mad_init_device(struct ib_device *device)
if (ib_mad_port_close(device, i))
dev_err(&device->dev, "Couldn't close port %d\n", i);
}
return ret;
}

static void ib_mad_remove_device(struct ib_device *device, void *client_data)
Expand Down
12 changes: 5 additions & 7 deletions drivers/infiniband/core/multicast.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
#include <rdma/ib_cache.h>
#include "sa.h"

static void mcast_add_one(struct ib_device *device);
static int mcast_add_one(struct ib_device *device);
static void mcast_remove_one(struct ib_device *device, void *client_data);

static struct ib_client mcast_client = {
Expand Down Expand Up @@ -815,7 +815,7 @@ static void mcast_event_handler(struct ib_event_handler *handler,
}
}

static void mcast_add_one(struct ib_device *device)
static int mcast_add_one(struct ib_device *device)
{
struct mcast_device *dev;
struct mcast_port *port;
Expand All @@ -825,7 +825,7 @@ static void mcast_add_one(struct ib_device *device)
dev = kmalloc(struct_size(dev, port, device->phys_port_cnt),
GFP_KERNEL);
if (!dev)
return;
return -ENOMEM;

dev->start_port = rdma_start_port(device);
dev->end_port = rdma_end_port(device);
Expand All @@ -845,14 +845,15 @@ static void mcast_add_one(struct ib_device *device)

if (!count) {
kfree(dev);
return;
return -EOPNOTSUPP;
}

dev->device = device;
ib_set_client_data(device, &mcast_client, dev);

INIT_IB_EVENT_HANDLER(&dev->event_handler, device, mcast_event_handler);
ib_register_event_handler(&dev->event_handler);
return 0;
}

static void mcast_remove_one(struct ib_device *device, void *client_data)
Expand All @@ -861,9 +862,6 @@ static void mcast_remove_one(struct ib_device *device, void *client_data)
struct mcast_port *port;
int i;

if (!dev)
return;

ib_unregister_event_handler(&dev->event_handler);
flush_workqueue(mcast_wq);

Expand Down
22 changes: 12 additions & 10 deletions drivers/infiniband/core/sa_query.c
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ static const struct nla_policy ib_nl_policy[LS_NLA_TYPE_MAX] = {
};


static void ib_sa_add_one(struct ib_device *device);
static int ib_sa_add_one(struct ib_device *device);
static void ib_sa_remove_one(struct ib_device *device, void *client_data);

static struct ib_client sa_client = {
Expand Down Expand Up @@ -2322,18 +2322,19 @@ static void ib_sa_event(struct ib_event_handler *handler,
}
}

static void ib_sa_add_one(struct ib_device *device)
static int ib_sa_add_one(struct ib_device *device)
{
struct ib_sa_device *sa_dev;
int s, e, i;
int count = 0;
int ret;

s = rdma_start_port(device);
e = rdma_end_port(device);

sa_dev = kzalloc(struct_size(sa_dev, port, e - s + 1), GFP_KERNEL);
if (!sa_dev)
return;
return -ENOMEM;

sa_dev->start_port = s;
sa_dev->end_port = e;
Expand All @@ -2353,8 +2354,10 @@ static void ib_sa_add_one(struct ib_device *device)
ib_register_mad_agent(device, i + s, IB_QPT_GSI,
NULL, 0, send_handler,
recv_handler, sa_dev, 0);
if (IS_ERR(sa_dev->port[i].agent))
if (IS_ERR(sa_dev->port[i].agent)) {
ret = PTR_ERR(sa_dev->port[i].agent);
goto err;
}

INIT_WORK(&sa_dev->port[i].update_task, update_sm_ah);
INIT_DELAYED_WORK(&sa_dev->port[i].ib_cpi_work,
Expand All @@ -2363,8 +2366,10 @@ static void ib_sa_add_one(struct ib_device *device)
count++;
}

if (!count)
if (!count) {
ret = -EOPNOTSUPP;
goto free;
}

ib_set_client_data(device, &sa_client, sa_dev);

Expand All @@ -2383,7 +2388,7 @@ static void ib_sa_add_one(struct ib_device *device)
update_sm_ah(&sa_dev->port[i].update_task);
}

return;
return 0;

err:
while (--i >= 0) {
Expand All @@ -2392,17 +2397,14 @@ static void ib_sa_add_one(struct ib_device *device)
}
free:
kfree(sa_dev);
return;
return ret;
}

static void ib_sa_remove_one(struct ib_device *device, void *client_data)
{
struct ib_sa_device *sa_dev = client_data;
int i;

if (!sa_dev)
return;

ib_unregister_event_handler(&sa_dev->event_handler);
flush_workqueue(ib_wq);

Expand Down
Loading

0 comments on commit 11a0ae4

Please sign in to comment.