Skip to content

Commit

Permalink
hw/pvrdma: Add support to allow guest to configure GID table
Browse files Browse the repository at this point in the history
The control over the RDMA device's GID table is done by updating the
device's Ethernet function addresses.
Usually the first GID entry is determined by the MAC address, the second
by the first IPv6 address and the third by the IPv4 address. Other
entries can be added by adding more IP addresses. The opposite is the
same, i.e. whenever an address is removed, the corresponding GID entry
is removed.

The process is done by the network and RDMA stacks. Whenever an address
is added the ib_core driver is notified and calls the device driver
add_gid function which in turn update the device.

To support this in pvrdma device we need to hook into the create_bind
and destroy_bind HW commands triggered by pvrdma driver in guest.
Whenever a change is made to the pvrdma port's GID table a special QMP
message is sent to be processed by libvirt to update the address of the
backend Ethernet device.

Signed-off-by: Yuval Shaia <[email protected]>
Reviewed-by: Marcel Apfelbaum<[email protected]>
Signed-off-by: Marcel Apfelbaum <[email protected]>
  • Loading branch information
Yuval Shaia authored and marcel-apf committed Dec 22, 2018
1 parent 4a5c990 commit 2b05705
Show file tree
Hide file tree
Showing 11 changed files with 462 additions and 163 deletions.
344 changes: 243 additions & 101 deletions hw/rdma/rdma_backend.c

Large diffs are not rendered by default.

22 changes: 12 additions & 10 deletions hw/rdma/rdma_backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,6 @@ enum ibv_special_qp_type {
IBV_QPT_GSI = 1,
};

static inline union ibv_gid *rdma_backend_gid(RdmaBackendDev *dev)
{
return &dev->gid;
}

static inline uint32_t rdma_backend_qpn(const RdmaBackendQP *qp)
{
return qp->ibqp ? qp->ibqp->qp_num : 1;
Expand All @@ -51,9 +46,15 @@ static inline uint32_t rdma_backend_mr_rkey(const RdmaBackendMR *mr)
int rdma_backend_init(RdmaBackendDev *backend_dev, PCIDevice *pdev,
RdmaDeviceResources *rdma_dev_res,
const char *backend_device_name, uint8_t port_num,
uint8_t backend_gid_idx, struct ibv_device_attr *dev_attr,
CharBackend *mad_chr_be, Error **errp);
struct ibv_device_attr *dev_attr, CharBackend *mad_chr_be,
Error **errp);
void rdma_backend_fini(RdmaBackendDev *backend_dev);
int rdma_backend_add_gid(RdmaBackendDev *backend_dev, const char *ifname,
union ibv_gid *gid);
int rdma_backend_del_gid(RdmaBackendDev *backend_dev, const char *ifname,
union ibv_gid *gid);
int rdma_backend_get_gid_index(RdmaBackendDev *backend_dev,
union ibv_gid *gid);
void rdma_backend_start(RdmaBackendDev *backend_dev);
void rdma_backend_stop(RdmaBackendDev *backend_dev);
void rdma_backend_register_comp_handler(void (*handler)(int status,
Expand Down Expand Up @@ -82,9 +83,9 @@ int rdma_backend_create_qp(RdmaBackendQP *qp, uint8_t qp_type,
int rdma_backend_qp_state_init(RdmaBackendDev *backend_dev, RdmaBackendQP *qp,
uint8_t qp_type, uint32_t qkey);
int rdma_backend_qp_state_rtr(RdmaBackendDev *backend_dev, RdmaBackendQP *qp,
uint8_t qp_type, union ibv_gid *dgid,
uint32_t dqpn, uint32_t rq_psn, uint32_t qkey,
bool use_qkey);
uint8_t qp_type, uint8_t sgid_idx,
union ibv_gid *dgid, uint32_t dqpn,
uint32_t rq_psn, uint32_t qkey, bool use_qkey);
int rdma_backend_qp_state_rts(RdmaBackendQP *qp, uint8_t qp_type,
uint32_t sq_psn, uint32_t qkey, bool use_qkey);
int rdma_backend_query_qp(RdmaBackendQP *qp, struct ibv_qp_attr *attr,
Expand All @@ -94,6 +95,7 @@ void rdma_backend_destroy_qp(RdmaBackendQP *qp);
void rdma_backend_post_send(RdmaBackendDev *backend_dev,
RdmaBackendQP *qp, uint8_t qp_type,
struct ibv_sge *sge, uint32_t num_sge,
uint8_t sgid_idx, union ibv_gid *sgid,
union ibv_gid *dgid, uint32_t dqpn, uint32_t dqkey,
void *ctx);
void rdma_backend_post_recv(RdmaBackendDev *backend_dev,
Expand Down
11 changes: 8 additions & 3 deletions hw/rdma/rdma_backend_defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "qemu/thread.h"
#include "chardev/char-fe.h"
#include <infiniband/verbs.h>
#include "contrib/rdmacm-mux/rdmacm-mux.h"

typedef struct RdmaDeviceResources RdmaDeviceResources;

Expand All @@ -34,19 +35,22 @@ typedef struct RecvMadList {
QList *list;
} RecvMadList;

typedef struct RdmaCmMux {
CharBackend *chr_be;
int can_receive;
} RdmaCmMux;

typedef struct RdmaBackendDev {
struct ibv_device_attr dev_attr;
RdmaBackendThread comp_thread;
union ibv_gid gid;
PCIDevice *dev;
RdmaDeviceResources *rdma_dev_res;
struct ibv_device *ib_dev;
struct ibv_context *context;
struct ibv_comp_channel *channel;
uint8_t port_num;
uint8_t backend_gid_idx;
RecvMadList recv_mads_list;
CharBackend *mad_chr_be;
RdmaCmMux rdmacm_mux;
} RdmaBackendDev;

typedef struct RdmaBackendPD {
Expand All @@ -66,6 +70,7 @@ typedef struct RdmaBackendCQ {
typedef struct RdmaBackendQP {
struct ibv_pd *ibpd;
struct ibv_qp *ibqp;
uint8_t sgid_idx;
} RdmaBackendQP;

#endif
104 changes: 100 additions & 4 deletions hw/rdma/rdma_rm.c
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,7 @@ int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle,
}

int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
uint32_t qp_handle, uint32_t attr_mask,
uint32_t qp_handle, uint32_t attr_mask, uint8_t sgid_idx,
union ibv_gid *dgid, uint32_t dqpn,
enum ibv_qp_state qp_state, uint32_t qkey,
uint32_t rq_psn, uint32_t sq_psn)
Expand All @@ -400,6 +400,7 @@ int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
int ret;

pr_dbg("qpn=0x%x\n", qp_handle);
pr_dbg("qkey=0x%x\n", qkey);

qp = rdma_rm_get_qp(dev_res, qp_handle);
if (!qp) {
Expand Down Expand Up @@ -430,9 +431,19 @@ int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
}

if (qp->qp_state == IBV_QPS_RTR) {
/* Get backend gid index */
pr_dbg("Guest sgid_idx=%d\n", sgid_idx);
sgid_idx = rdma_rm_get_backend_gid_index(dev_res, backend_dev,
sgid_idx);
if (sgid_idx <= 0) { /* TODO check also less than bk.max_sgid */
pr_dbg("Fail to get bk sgid_idx for sgid_idx %d\n", sgid_idx);
return -EIO;
}

ret = rdma_backend_qp_state_rtr(backend_dev, &qp->backend_qp,
qp->qp_type, dgid, dqpn, rq_psn,
qkey, attr_mask & IBV_QP_QKEY);
qp->qp_type, sgid_idx, dgid, dqpn,
rq_psn, qkey,
attr_mask & IBV_QP_QKEY);
if (ret) {
return -EIO;
}
Expand Down Expand Up @@ -523,11 +534,91 @@ void rdma_rm_dealloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id)
res_tbl_dealloc(&dev_res->cqe_ctx_tbl, cqe_ctx_id);
}

int rdma_rm_add_gid(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
const char *ifname, union ibv_gid *gid, int gid_idx)
{
int rc;

rc = rdma_backend_add_gid(backend_dev, ifname, gid);
if (rc) {
pr_dbg("Fail to add gid\n");
return -EINVAL;
}

memcpy(&dev_res->ports[0].gid_tbl[gid_idx].gid, gid, sizeof(*gid));

return 0;
}

int rdma_rm_del_gid(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
const char *ifname, int gid_idx)
{
int rc;

rc = rdma_backend_del_gid(backend_dev, ifname,
&dev_res->ports[0].gid_tbl[gid_idx].gid);
if (rc) {
pr_dbg("Fail to delete gid\n");
return -EINVAL;
}

memset(dev_res->ports[0].gid_tbl[gid_idx].gid.raw, 0,
sizeof(dev_res->ports[0].gid_tbl[gid_idx].gid));
dev_res->ports[0].gid_tbl[gid_idx].backend_gid_index = -1;

return 0;
}

int rdma_rm_get_backend_gid_index(RdmaDeviceResources *dev_res,
RdmaBackendDev *backend_dev, int sgid_idx)
{
if (unlikely(sgid_idx < 0 || sgid_idx > MAX_PORT_GIDS)) {
pr_dbg("Got invalid sgid_idx %d\n", sgid_idx);
return -EINVAL;
}

if (unlikely(dev_res->ports[0].gid_tbl[sgid_idx].backend_gid_index == -1)) {
dev_res->ports[0].gid_tbl[sgid_idx].backend_gid_index =
rdma_backend_get_gid_index(backend_dev,
&dev_res->ports[0].gid_tbl[sgid_idx].gid);
}

pr_dbg("backend_gid_index=%d\n",
dev_res->ports[0].gid_tbl[sgid_idx].backend_gid_index);

return dev_res->ports[0].gid_tbl[sgid_idx].backend_gid_index;
}

static void destroy_qp_hash_key(gpointer data)
{
g_bytes_unref(data);
}

static void init_ports(RdmaDeviceResources *dev_res)
{
int i, j;

memset(dev_res->ports, 0, sizeof(dev_res->ports));

for (i = 0; i < MAX_PORTS; i++) {
dev_res->ports[i].state = IBV_PORT_DOWN;
for (j = 0; j < MAX_PORT_GIDS; j++) {
dev_res->ports[i].gid_tbl[j].backend_gid_index = -1;
}
}
}

static void fini_ports(RdmaDeviceResources *dev_res,
RdmaBackendDev *backend_dev, const char *ifname)
{
int i;

dev_res->ports[0].state = IBV_PORT_DOWN;
for (i = 0; i < MAX_PORT_GIDS; i++) {
rdma_rm_del_gid(dev_res, backend_dev, ifname, i);
}
}

int rdma_rm_init(RdmaDeviceResources *dev_res, struct ibv_device_attr *dev_attr,
Error **errp)
{
Expand All @@ -545,11 +636,16 @@ int rdma_rm_init(RdmaDeviceResources *dev_res, struct ibv_device_attr *dev_attr,
dev_attr->max_qp_wr, sizeof(void *));
res_tbl_init("UC", &dev_res->uc_tbl, MAX_UCS, sizeof(RdmaRmUC));

init_ports(dev_res);

return 0;
}

void rdma_rm_fini(RdmaDeviceResources *dev_res)
void rdma_rm_fini(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
const char *ifname)
{
fini_ports(dev_res, backend_dev, ifname);

res_tbl_free(&dev_res->uc_tbl);
res_tbl_free(&dev_res->cqe_ctx_tbl);
res_tbl_free(&dev_res->qp_tbl);
Expand Down
17 changes: 15 additions & 2 deletions hw/rdma/rdma_rm.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@

int rdma_rm_init(RdmaDeviceResources *dev_res, struct ibv_device_attr *dev_attr,
Error **errp);
void rdma_rm_fini(RdmaDeviceResources *dev_res);
void rdma_rm_fini(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
const char *ifname);

int rdma_rm_alloc_pd(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
uint32_t *pd_handle, uint32_t ctx_handle);
Expand Down Expand Up @@ -55,7 +56,7 @@ int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle,
uint32_t recv_cq_handle, void *opaque, uint32_t *qpn);
RdmaRmQP *rdma_rm_get_qp(RdmaDeviceResources *dev_res, uint32_t qpn);
int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
uint32_t qp_handle, uint32_t attr_mask,
uint32_t qp_handle, uint32_t attr_mask, uint8_t sgid_idx,
union ibv_gid *dgid, uint32_t dqpn,
enum ibv_qp_state qp_state, uint32_t qkey,
uint32_t rq_psn, uint32_t sq_psn);
Expand All @@ -69,4 +70,16 @@ int rdma_rm_alloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t *cqe_ctx_id,
void *rdma_rm_get_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id);
void rdma_rm_dealloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id);

int rdma_rm_add_gid(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
const char *ifname, union ibv_gid *gid, int gid_idx);
int rdma_rm_del_gid(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
const char *ifname, int gid_idx);
int rdma_rm_get_backend_gid_index(RdmaDeviceResources *dev_res,
RdmaBackendDev *backend_dev, int sgid_idx);
static inline union ibv_gid *rdma_rm_get_gid(RdmaDeviceResources *dev_res,
int sgid_idx)
{
return &dev_res->ports[0].gid_tbl[sgid_idx].gid;
}

#endif
9 changes: 7 additions & 2 deletions hw/rdma/rdma_rm_defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
#include "rdma_backend_defs.h"

#define MAX_PORTS 1
#define MAX_PORT_GIDS 1
#define MAX_PORT_GIDS 255
#define MAX_GIDS MAX_PORT_GIDS
#define MAX_PORT_PKEYS 1
#define MAX_PKEYS MAX_PORT_PKEYS
Expand Down Expand Up @@ -86,8 +86,13 @@ typedef struct RdmaRmQP {
enum ibv_qp_state qp_state;
} RdmaRmQP;

typedef struct RdmaRmGid {
union ibv_gid gid;
int backend_gid_index;
} RdmaRmGid;

typedef struct RdmaRmPort {
union ibv_gid gid_tbl[MAX_PORT_GIDS];
RdmaRmGid gid_tbl[MAX_PORT_GIDS];
enum ibv_port_state state;
} RdmaRmPort;

Expand Down
16 changes: 16 additions & 0 deletions hw/rdma/rdma_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

#include "hw/pci/pci.h"
#include "sysemu/dma.h"
#include "stdio.h"

#define pr_info(fmt, ...) \
fprintf(stdout, "%s: %-20s (%3d): " fmt, "rdma", __func__, __LINE__,\
Expand All @@ -39,9 +40,24 @@ extern unsigned long pr_dbg_cnt;
#define pr_dbg(fmt, ...) \
fprintf(stdout, "%lx %ld: %-20s (%3d): " fmt, pthread_self(), pr_dbg_cnt++, \
__func__, __LINE__, ## __VA_ARGS__)

#define pr_dbg_buf(title, buf, len) \
{ \
int i; \
char *b = g_malloc0(len * 3 + 1); \
char b1[4]; \
for (i = 0; i < len; i++) { \
sprintf(b1, "%.2X ", buf[i] & 0x000000FF); \
strcat(b, b1); \
} \
pr_dbg("%s (%d): %s\n", title, len, b); \
g_free(b); \
}

#else
#define init_pr_dbg(void)
#define pr_dbg(fmt, ...)
#define pr_dbg_buf(title, buf, len)
#endif

void *rdma_pci_dma_map(PCIDevice *dev, dma_addr_t addr, dma_addr_t plen);
Expand Down
2 changes: 1 addition & 1 deletion hw/rdma/vmw/pvrdma.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@ typedef struct PVRDMADev {
int interrupt_mask;
struct ibv_device_attr dev_attr;
uint64_t node_guid;
char *backend_eth_device_name;
char *backend_device_name;
uint8_t backend_gid_idx;
uint8_t backend_port_num;
RdmaBackendDev backend_dev;
RdmaDeviceResources rdma_dev_res;
Expand Down
Loading

0 comments on commit 2b05705

Please sign in to comment.