Skip to content

Commit

Permalink
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/gi…
Browse files Browse the repository at this point in the history
…t/rdma/rdma

Pull rdma fixes from Jason Gunthorpe:
 "This is an unusually large bunch of bug fixes for the later rc cycle,
  rxe and mlx5 both dumped a lot of things at once. rxe continues to fix
  itself, and mlx5 is fixing a bunch of "queue counters" related bugs.

  There is one highly notable bug fix regarding the qkey. This small
  security check was missed in the original 2005 implementation and it
  allows some significant issues.

  Summary:

   - Two rtrs bug fixes for error unwind bugs

   - Several rxe bug fixes:
      * Incorrect Rx packet validation
      * Using memory without a refcount
      * Syzkaller found use before initialization
      * Regression fix for missing locking with the tasklet conversion
        from this merge window

   - Have bnxt report the correct link properties to userspace, this was
     a regression in v6.3

   - Several mlx5 bug fixes:
      * Kernel crash triggerable by userspace for the RAW ethernet
        profile
      * Defend against steering refcounting issues created by userspace
      * Incorrect change of QP port affinity parameters in some LAG
        configurations

   - Fix mlx5 Q counters:
      * Do not over allocate Q counters to allow userspace to use the
        full port capacity
      * Kernel crash triggered by eswitch due to mis-use of Q counters
      * Incorrect mlx5_device for Q counters in some LAG configurations

   - Properly implement the IBA spec restricting privileged qkeys to
     root

   - Always an error when reading from a disassociated device's event
     queue

   - isert bug fixes:
      * Avoid a deadlock with the CM handler and CM ID destruction
      * Correct list corruption due to incorrect locking
      * Fix a use after free around connection tear down"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma:
  RDMA/rxe: Fix rxe_cq_post
  IB/isert: Fix incorrect release of isert connection
  IB/isert: Fix possible list corruption in CMA handler
  IB/isert: Fix dead lock in ib_isert
  RDMA/mlx5: Fix affinity assignment
  IB/uverbs: Fix to consider event queue closing also upon non-blocking mode
  RDMA/uverbs: Restrict usage of privileged QKEYs
  RDMA/cma: Always set static rate to 0 for RoCE
  RDMA/mlx5: Fix Q-counters query in LAG mode
  RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters
  RDMA/mlx5: Fix Q-counters per vport allocation
  RDMA/mlx5: Create an indirect flow table for steering anchor
  RDMA/mlx5: Initiate dropless RQ for RAW Ethernet functions
  RDMA/rxe: Fix the use-before-initialization error of resp_pkts
  RDMA/bnxt_re: Fix reporting active_{speed,width} attributes
  RDMA/rxe: Fix ref count error in check_rkey()
  RDMA/rxe: Fix packet length checks
  RDMA/rtrs: Fix rxe_dealloc_pd warning
  RDMA/rtrs: Fix the last iu->buf leak in err path
  • Loading branch information
torvalds committed Jun 16, 2023
2 parents b7feaa4 + 0c7e314 commit 93fd8eb
Show file tree
Hide file tree
Showing 22 changed files with 447 additions and 130 deletions.
4 changes: 2 additions & 2 deletions drivers/infiniband/core/cma.c
Original file line number Diff line number Diff line change
Expand Up @@ -3295,7 +3295,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
route->path_rec->traffic_class = tos;
route->path_rec->mtu = iboe_get_mtu(ndev->mtu);
route->path_rec->rate_selector = IB_SA_EQ;
route->path_rec->rate = iboe_get_rate(ndev);
route->path_rec->rate = IB_RATE_PORT_CURRENT;
dev_put(ndev);
route->path_rec->packet_life_time_selector = IB_SA_EQ;
/* In case ACK timeout is set, use this value to calculate
Expand Down Expand Up @@ -4964,7 +4964,7 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
if (!ndev)
return -ENODEV;

ib.rec.rate = iboe_get_rate(ndev);
ib.rec.rate = IB_RATE_PORT_CURRENT;
ib.rec.hop_limit = 1;
ib.rec.mtu = iboe_get_mtu(ndev->mtu);

Expand Down
7 changes: 6 additions & 1 deletion drivers/infiniband/core/uverbs_cmd.c
Original file line number Diff line number Diff line change
Expand Up @@ -1850,8 +1850,13 @@ static int modify_qp(struct uverbs_attr_bundle *attrs,
attr->path_mtu = cmd->base.path_mtu;
if (cmd->base.attr_mask & IB_QP_PATH_MIG_STATE)
attr->path_mig_state = cmd->base.path_mig_state;
if (cmd->base.attr_mask & IB_QP_QKEY)
if (cmd->base.attr_mask & IB_QP_QKEY) {
if (cmd->base.qkey & IB_QP_SET_QKEY && !capable(CAP_NET_RAW)) {
ret = -EPERM;
goto release_qp;
}
attr->qkey = cmd->base.qkey;
}
if (cmd->base.attr_mask & IB_QP_RQ_PSN)
attr->rq_psn = cmd->base.rq_psn;
if (cmd->base.attr_mask & IB_QP_SQ_PSN)
Expand Down
12 changes: 5 additions & 7 deletions drivers/infiniband/core/uverbs_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -222,8 +222,12 @@ static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue,
spin_lock_irq(&ev_queue->lock);

while (list_empty(&ev_queue->event_list)) {
spin_unlock_irq(&ev_queue->lock);
if (ev_queue->is_closed) {
spin_unlock_irq(&ev_queue->lock);
return -EIO;
}

spin_unlock_irq(&ev_queue->lock);
if (filp->f_flags & O_NONBLOCK)
return -EAGAIN;

Expand All @@ -233,12 +237,6 @@ static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue,
return -ERESTARTSYS;

spin_lock_irq(&ev_queue->lock);

/* If device was disassociated and no event exists set an error */
if (list_empty(&ev_queue->event_list) && ev_queue->is_closed) {
spin_unlock_irq(&ev_queue->lock);
return -EIO;
}
}

event = list_entry(ev_queue->event_list.next, struct ib_uverbs_event, list);
Expand Down
2 changes: 0 additions & 2 deletions drivers/infiniband/hw/bnxt_re/bnxt_re.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,6 @@ struct bnxt_re_dev {

struct delayed_work worker;
u8 cur_prio_map;
u16 active_speed;
u8 active_width;

/* FP Notification Queue (CQ & SRQ) */
struct tasklet_struct nq_task;
Expand Down
7 changes: 4 additions & 3 deletions drivers/infiniband/hw/bnxt_re/ib_verbs.c
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ int bnxt_re_query_port(struct ib_device *ibdev, u32 port_num,
{
struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
int rc;

memset(port_attr, 0, sizeof(*port_attr));

Expand Down Expand Up @@ -228,10 +229,10 @@ int bnxt_re_query_port(struct ib_device *ibdev, u32 port_num,
port_attr->sm_sl = 0;
port_attr->subnet_timeout = 0;
port_attr->init_type_reply = 0;
port_attr->active_speed = rdev->active_speed;
port_attr->active_width = rdev->active_width;
rc = ib_get_eth_speed(&rdev->ibdev, port_num, &port_attr->active_speed,
&port_attr->active_width);

return 0;
return rc;
}

int bnxt_re_get_port_immutable(struct ib_device *ibdev, u32 port_num,
Expand Down
2 changes: 0 additions & 2 deletions drivers/infiniband/hw/bnxt_re/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -1077,8 +1077,6 @@ static int bnxt_re_ib_init(struct bnxt_re_dev *rdev)
return rc;
}
dev_info(rdev_to_dev(rdev), "Device registered with IB successfully");
ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed,
&rdev->active_width);
set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags);

event = netif_running(rdev->netdev) && netif_carrier_ok(rdev->netdev) ?
Expand Down
89 changes: 62 additions & 27 deletions drivers/infiniband/hw/mlx5/counters.c
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,8 @@ static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev,
!vport_qcounters_supported(dev)) || !port_num)
return &dev->port[0].cnts;

return &dev->port[port_num - 1].cnts;
return is_mdev_switchdev_mode(dev->mdev) ?
&dev->port[1].cnts : &dev->port[port_num - 1].cnts;
}

/**
Expand Down Expand Up @@ -262,7 +263,7 @@ static struct rdma_hw_stats *
mlx5_ib_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
const struct mlx5_ib_counters *cnts = &dev->port[port_num - 1].cnts;
const struct mlx5_ib_counters *cnts = get_counters(dev, port_num);

return do_alloc_stats(cnts);
}
Expand Down Expand Up @@ -329,19 +330,24 @@ static int mlx5_ib_query_q_counters_vport(struct mlx5_ib_dev *dev,
{
u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {};
u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {};
struct mlx5_core_dev *mdev;
__be32 val;
int ret, i;

if (!dev->port[port_num].rep ||
dev->port[port_num].rep->vport == MLX5_VPORT_UPLINK)
return 0;

mdev = mlx5_eswitch_get_core_dev(dev->port[port_num].rep->esw);
if (!mdev)
return -EOPNOTSUPP;

MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
MLX5_SET(query_q_counter_in, in, other_vport, 1);
MLX5_SET(query_q_counter_in, in, vport_number,
dev->port[port_num].rep->vport);
MLX5_SET(query_q_counter_in, in, aggregate, 1);
ret = mlx5_cmd_exec_inout(dev->mdev, query_q_counter, in, out);
ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out);
if (ret)
return ret;

Expand Down Expand Up @@ -575,43 +581,53 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
bool is_vport = is_mdev_switchdev_mode(dev->mdev) &&
port_num != MLX5_VPORT_PF;
const struct mlx5_ib_counter *names;
int j = 0, i;
int j = 0, i, size;

names = is_vport ? vport_basic_q_cnts : basic_q_cnts;
for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) {
size = is_vport ? ARRAY_SIZE(vport_basic_q_cnts) :
ARRAY_SIZE(basic_q_cnts);
for (i = 0; i < size; i++, j++) {
descs[j].name = names[i].name;
offsets[j] = basic_q_cnts[i].offset;
offsets[j] = names[i].offset;
}

names = is_vport ? vport_out_of_seq_q_cnts : out_of_seq_q_cnts;
size = is_vport ? ARRAY_SIZE(vport_out_of_seq_q_cnts) :
ARRAY_SIZE(out_of_seq_q_cnts);
if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) {
for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) {
for (i = 0; i < size; i++, j++) {
descs[j].name = names[i].name;
offsets[j] = out_of_seq_q_cnts[i].offset;
offsets[j] = names[i].offset;
}
}

names = is_vport ? vport_retrans_q_cnts : retrans_q_cnts;
size = is_vport ? ARRAY_SIZE(vport_retrans_q_cnts) :
ARRAY_SIZE(retrans_q_cnts);
if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) {
for (i = 0; i < size; i++, j++) {
descs[j].name = names[i].name;
offsets[j] = retrans_q_cnts[i].offset;
offsets[j] = names[i].offset;
}
}

names = is_vport ? vport_extended_err_cnts : extended_err_cnts;
size = is_vport ? ARRAY_SIZE(vport_extended_err_cnts) :
ARRAY_SIZE(extended_err_cnts);
if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) {
for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) {
for (i = 0; i < size; i++, j++) {
descs[j].name = names[i].name;
offsets[j] = extended_err_cnts[i].offset;
offsets[j] = names[i].offset;
}
}

names = is_vport ? vport_roce_accl_cnts : roce_accl_cnts;
size = is_vport ? ARRAY_SIZE(vport_roce_accl_cnts) :
ARRAY_SIZE(roce_accl_cnts);
if (MLX5_CAP_GEN(dev->mdev, roce_accl)) {
for (i = 0; i < ARRAY_SIZE(roce_accl_cnts); i++, j++) {
for (i = 0; i < size; i++, j++) {
descs[j].name = names[i].name;
offsets[j] = roce_accl_cnts[i].offset;
offsets[j] = names[i].offset;
}
}

Expand Down Expand Up @@ -661,25 +677,37 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
struct mlx5_ib_counters *cnts, u32 port_num)
{
u32 num_counters, num_op_counters = 0;
bool is_vport = is_mdev_switchdev_mode(dev->mdev) &&
port_num != MLX5_VPORT_PF;
u32 num_counters, num_op_counters = 0, size;

num_counters = ARRAY_SIZE(basic_q_cnts);
size = is_vport ? ARRAY_SIZE(vport_basic_q_cnts) :
ARRAY_SIZE(basic_q_cnts);
num_counters = size;

size = is_vport ? ARRAY_SIZE(vport_out_of_seq_q_cnts) :
ARRAY_SIZE(out_of_seq_q_cnts);
if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt))
num_counters += ARRAY_SIZE(out_of_seq_q_cnts);
num_counters += size;

size = is_vport ? ARRAY_SIZE(vport_retrans_q_cnts) :
ARRAY_SIZE(retrans_q_cnts);
if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
num_counters += ARRAY_SIZE(retrans_q_cnts);
num_counters += size;

size = is_vport ? ARRAY_SIZE(vport_extended_err_cnts) :
ARRAY_SIZE(extended_err_cnts);
if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters))
num_counters += ARRAY_SIZE(extended_err_cnts);
num_counters += size;

size = is_vport ? ARRAY_SIZE(vport_roce_accl_cnts) :
ARRAY_SIZE(roce_accl_cnts);
if (MLX5_CAP_GEN(dev->mdev, roce_accl))
num_counters += ARRAY_SIZE(roce_accl_cnts);
num_counters += size;

cnts->num_q_counters = num_counters;

if (is_mdev_switchdev_mode(dev->mdev) && port_num != MLX5_VPORT_PF)
if (is_vport)
goto skip_non_qcounters;

if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
Expand Down Expand Up @@ -725,11 +753,11 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
{
u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
int num_cnt_ports;
int num_cnt_ports = dev->num_ports;
int i, j;

num_cnt_ports = (!is_mdev_switchdev_mode(dev->mdev) ||
vport_qcounters_supported(dev)) ? dev->num_ports : 1;
if (is_mdev_switchdev_mode(dev->mdev))
num_cnt_ports = min(2, num_cnt_ports);

MLX5_SET(dealloc_q_counter_in, in, opcode,
MLX5_CMD_OP_DEALLOC_Q_COUNTER);
Expand Down Expand Up @@ -761,15 +789,22 @@ static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
{
u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
int num_cnt_ports;
int num_cnt_ports = dev->num_ports;
int err = 0;
int i;
bool is_shared;

MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0;
num_cnt_ports = (!is_mdev_switchdev_mode(dev->mdev) ||
vport_qcounters_supported(dev)) ? dev->num_ports : 1;

/*
* In switchdev we need to allocate two ports, one that is used for
* the device Q_counters and it is essentially the real Q_counters of
* this device, while the other is used as a helper for PF to be able to
* query all other vports.
*/
if (is_mdev_switchdev_mode(dev->mdev))
num_cnt_ports = min(2, num_cnt_ports);

for (i = 0; i < num_cnt_ports; i++) {
err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts, i);
Expand Down
Loading

0 comments on commit 93fd8eb

Please sign in to comment.