forked from w180112/RDMA-example
-
Notifications
You must be signed in to change notification settings - Fork 0
/
rdma_write_client.c
246 lines (215 loc) · 6.89 KB
/
rdma_write_client.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
/*
* modified from http://www.digitalvampire.org/rdma-tutorial-2007/notes.pdf
*
* build:
* gcc -o client rdma_write_client.c -lrdmacm -libverbs
*
* usage:
* ./client <servername or ip> <val1> <val2>
*
* connect to server, send two integers, and waits for server to send back the sum.
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <errno.h>
#include <sys/queue.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netdb.h>
#include <arpa/inet.h>
#include <rdma/rdma_cma.h>
enum {
RESOLVE_TIMEOUT_MS = 5000,
};
struct pdata {
uint64_t buf_va;
uint32_t buf_rkey;
};
int main(int argc, char *argv[])
{
struct pdata server_pdata;
struct rdma_event_channel *cm_channel;
struct rdma_cm_id *cm_id;
struct rdma_cm_event *event;
struct rdma_conn_param conn_param = { };
struct ibv_pd *pd;
struct ibv_comp_channel *comp_chan;
struct ibv_cq *cq;
struct ibv_cq *evt_cq;
struct ibv_mr *mr;
struct ibv_qp_init_attr qp_attr = { };
struct ibv_sge sge;
struct ibv_send_wr send_wr = { };
struct ibv_send_wr *bad_send_wr;
struct ibv_recv_wr recv_wr = { };
struct ibv_recv_wr *bad_recv_wr;
struct ibv_wc wc;
void *cq_context;
struct addrinfo *res, *t;
struct addrinfo hints = {
.ai_family = AF_INET,
.ai_socktype = SOCK_STREAM
};
int n;
uint32_t *buf;
int err;
/* We use rdmacm lib to establish rdma connection and ibv lib to write, read, send, receive data here. */
/* In RDMA programming, transmission is a "asychronize" procedure, all the "events" were generated on NIC.
* Programmer should "get" those event and than ack and process them.
*/
/* In rdmacm lib, each event will generated by NIC and we should "get" these events from event channel,
* so we should create an event channel first.
*/
cm_channel = rdma_create_event_channel();
if (!cm_channel)
return 1;
/* Like socket fd in socket porgramming, we need to acquire a rdmacm id.
*/
err = rdma_create_id(cm_channel, &cm_id, NULL, RDMA_PS_TCP);
if (err)
return err;
/* Note: port 20000 doesn't equal to the socket port in TCP/IP,
* in RoCEv2, all of the packets use port 4791,
* port 20000 here indicates a higher level abstraction port
*/
n = getaddrinfo(argv[1], "20000", &hints, &res);
if (n < 0)
return 1;
/* Resolve addr. */
err = rdma_resolve_addr(cm_id, NULL, res->ai_addr, RESOLVE_TIMEOUT_MS);
if (err)
return err;
/* We need to "get" rdmacm event to acquire event occured on NIC. */
err = rdma_get_cm_event(cm_channel, &event);
if (err)
return err;
if (event->event != RDMA_CM_EVENT_ADDR_RESOLVED)
return 1;
/* Each rdmacm event should be acked. */
rdma_ack_cm_event(event);
err = rdma_resolve_route(cm_id, RESOLVE_TIMEOUT_MS);
if (err)
return err;
err = rdma_get_cm_event(cm_channel, &event);
if (err)
return err;
if (event->event != RDMA_CM_EVENT_ROUTE_RESOLVED)
return 1;
rdma_ack_cm_event(event);
/* Allocate protection domain, each pd can be used to create queue pair,
* register memory regien, etc.
* Each pd is a protection of a group of objects,
* it means you can't use these objects between different pd.
*/
pd = ibv_alloc_pd(cm_id->verbs);
if (!pd)
return 1;
/* A completion event channel like rdma_create_event_channel in libibverbs */
comp_chan = ibv_create_comp_channel(cm_id->verbs);
if (!comp_chan)
return 1;
/* create a completion queue, a cq contains a completion work request.
* All the events about NIC, transmission will be in the cq
* Since libibverbs is thread-safe, use multiple cqs to 1 or many completion channels is avaliable.
*/
cq = ibv_create_cq(cm_id->verbs,2,NULL,comp_chan,0);
if (!cq)
return 1;
/* Requests create compiletion notification when any work completion is add to the cq,
* therefore work completion can be "get" by using ibv_get_cq_event()
*/
if (ibv_req_notify_cq(cq,0))
return 1;
buf = calloc(2,sizeof(uint32_t));
if (!buf)
return 1;
/* register a memory region with a specific pd */
mr = ibv_reg_mr(pd, buf,2 * sizeof(uint32_t), IBV_ACCESS_LOCAL_WRITE);
if (!mr)
return 1;
qp_attr.cap.max_send_wr = 4;
qp_attr.cap.max_send_sge = 1;
qp_attr.cap.max_recv_wr = 1;
qp_attr.cap.max_recv_sge = 1;
qp_attr.send_cq = cq;
qp_attr.recv_cq = cq;
qp_attr.qp_type = IBV_QPT_RC;
/* create a queue pair, a qp is for post send/receive.
* If pd is NULL, rdma_create_qp will use default pd on RDMA device
*/
err = rdma_create_qp(cm_id,pd,&qp_attr);
if (err)
return err;
conn_param.initiator_depth = 1;
conn_param.retry_count = 7;
err = rdma_connect(cm_id,&conn_param);
if (err)
return err;
err = rdma_get_cm_event(cm_channel,&event);
if (err)
return err;
if (event->event != RDMA_CM_EVENT_ESTABLISHED)
return 1;
/* event->param.conn.private_data includes the memory info at server */
memcpy(&server_pdata,event->param.conn.private_data,sizeof(server_pdata));
rdma_ack_cm_event(event);
/* We prepare ibv_post_recv() first */
sge.addr = (uintptr_t)buf;
sge.length = sizeof(uint32_t);
sge.lkey = mr->lkey;
/* wr_id is used to identify the recv data when get ibv event */
recv_wr.wr_id = 0;
recv_wr.sg_list = &sge;
recv_wr.num_sge = 1;
if (ibv_post_recv(cm_id->qp,&recv_wr,&bad_recv_wr))
return 1;
buf[0] = strtoul(argv[2],NULL,0);
buf[1] = strtoul(argv[3],NULL,0);
buf[0] = htonl(buf[0]);
buf[1] = htonl(buf[1]);
sge.addr = (uintptr_t)buf;
sge.length = sizeof(buf);
sge.lkey = mr->lkey;
send_wr.wr_id = 1;
send_wr.opcode = IBV_WR_RDMA_WRITE;
/* set IBV_SEND_SIGNALED flag will cause an ibv event recv at sender when data transmit from memory to NIC */
send_wr.send_flags = IBV_SEND_SIGNALED;
send_wr.sg_list = &sge;
send_wr.num_sge = 1;
send_wr.wr.rdma.rkey = ntohl(server_pdata.buf_rkey);
send_wr.wr.rdma.remote_addr = ntohl(server_pdata.buf_va);
if (ibv_post_send(cm_id->qp,&send_wr,&bad_send_wr))
return 1;
while (1) {
if (ibv_get_cq_event(comp_chan,&evt_cq,&cq_context))
return 1;
if (ibv_req_notify_cq(cq,0))
return 1;
if (ibv_poll_cq(cq,1,&wc) != 1)
return 1;
if (wc.status != IBV_WC_SUCCESS)
return 1;
if (wc.wr_id == 0) {
printf("server ans : %d\n", ntohl(buf[0]));
break;
}
}
ibv_ack_cq_events(cq,2);
rdma_disconnect(cm_id);
err = rdma_get_cm_event(cm_channel,&event);
if (err)
return err;
rdma_ack_cm_event(event);
rdma_destroy_qp(cm_id);
ibv_dereg_mr(mr);
free(buf);
err = rdma_destroy_id(cm_id);
if (err) {
perror("rdma_destroy_id");
return err;
}
rdma_destroy_event_channel(cm_channel);
return 0;
}