Skip to content

Commit

Permalink
RDMA/nes: Fix double CLOSE event indication crash
Browse files Browse the repository at this point in the history
During a stress testing in a large cluster, multiple close event are
detected and BUG() is hit in the iWARP core.  The cause is that the
active node gave up while waiting for an MPA response from the peer
and tried to close the connection by sending RST.  The passive node
driver receives the RST but is waiting for MPA response from the user.
When the MPA accept is received, the driver offloads the connection
and sends a CLOSE event.  The driver gets an AE indicating RESET
received and also sends a CLOSE event, hitting a BUG().

Fix this by correcting RESET handling and sending CLOSE events.

Signed-off-by: Faisal Latif <[email protected]>
Signed-off-by: Roland Dreier <[email protected]>
  • Loading branch information
fsl-latif authored and Roland Dreier committed Sep 8, 2010
1 parent 70c9db0 commit dae5872
Showing 1 changed file with 10 additions and 8 deletions.
18 changes: 10 additions & 8 deletions drivers/infiniband/hw/nes/nes_cm.c
Original file line number Diff line number Diff line change
Expand Up @@ -502,7 +502,9 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb,
static void nes_retrans_expired(struct nes_cm_node *cm_node)
{
struct iw_cm_id *cm_id = cm_node->cm_id;
switch (cm_node->state) {
enum nes_cm_node_state state = cm_node->state;
cm_node->state = NES_CM_STATE_CLOSED;
switch (state) {
case NES_CM_STATE_SYN_RCVD:
case NES_CM_STATE_CLOSING:
rem_ref_cm_node(cm_node->cm_core, cm_node);
Expand All @@ -511,7 +513,6 @@ static void nes_retrans_expired(struct nes_cm_node *cm_node)
case NES_CM_STATE_FIN_WAIT1:
if (cm_node->cm_id)
cm_id->rem_ref(cm_id);
cm_node->state = NES_CM_STATE_CLOSED;
send_reset(cm_node, NULL);
break;
default:
Expand Down Expand Up @@ -1439,9 +1440,6 @@ static void handle_rst_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
break;
case NES_CM_STATE_MPAREQ_RCVD:
passive_state = atomic_add_return(1, &cm_node->passive_state);
if (passive_state == NES_SEND_RESET_EVENT)
create_event(cm_node, NES_CM_EVENT_RESET);
cm_node->state = NES_CM_STATE_CLOSED;
dev_kfree_skb_any(skb);
break;
case NES_CM_STATE_ESTABLISHED:
Expand All @@ -1456,6 +1454,7 @@ static void handle_rst_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
case NES_CM_STATE_CLOSED:
drop_packet(skb);
break;
case NES_CM_STATE_FIN_WAIT2:
case NES_CM_STATE_FIN_WAIT1:
case NES_CM_STATE_LAST_ACK:
cm_node->cm_id->rem_ref(cm_node->cm_id);
Expand Down Expand Up @@ -2777,6 +2776,12 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
return -EINVAL;
}

passive_state = atomic_add_return(1, &cm_node->passive_state);
if (passive_state == NES_SEND_RESET_EVENT) {
rem_ref_cm_node(cm_node->cm_core, cm_node);
return -ECONNRESET;
}

/* associate the node with the QP */
nesqp->cm_node = (void *)cm_node;
cm_node->nesqp = nesqp;
Expand Down Expand Up @@ -2979,9 +2984,6 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
printk(KERN_ERR "%s[%u] OFA CM event_handler returned, "
"ret=%d\n", __func__, __LINE__, ret);

passive_state = atomic_add_return(1, &cm_node->passive_state);
if (passive_state == NES_SEND_RESET_EVENT)
create_event(cm_node, NES_CM_EVENT_RESET);
return 0;
}

Expand Down

0 comments on commit dae5872

Please sign in to comment.