Skip to content

Commit

Permalink
ovs-hyperv: make kernel return values netlink socket like
Browse files Browse the repository at this point in the history
In this patch, we make changes to usersapce as well as
kernel datapath on hyperv to make it more netlink socket
like. Previously, the kernel datapath did not distinguish
between "transport errors" and other errors. Netlink
semantics dictate that netlink functions should only
return an error only in the case of a "transport error"
which is generally something fatal. Eg. failure to
communicate with the OVS module, or an invalid command
altogether. Other errors such as an unsupported action,
or an invalid flow key is not considered a "transport
error", and in such cases, netlink functions are to return
success with a 'struct nlmsgerr' populated in the output
buffer.

This patch implements these semantics.

Signed-off-by: Nithin Raju <[email protected]>
Acked-by: Sorin Vinturis <[email protected]>
Reported-at: openvswitch/ovs-issues#72
Signed-off-by: Ben Pfaff <[email protected]>
  • Loading branch information
nithinrajub authored and blp committed Apr 29, 2015
1 parent 9719aee commit a51a508
Show file tree
Hide file tree
Showing 3 changed files with 114 additions and 51 deletions.
61 changes: 53 additions & 8 deletions datapath-windows/ovsext/Datapath.c
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,11 @@ typedef struct _NETLINK_CMD {
/* A netlink family is a group of commands. */
typedef struct _NETLINK_FAMILY {
CHAR *name;
UINT32 id;
UINT16 id;
UINT8 version;
UINT8 pad;
UINT8 pad1;
UINT16 maxAttr;
UINT16 pad2;
NETLINK_CMD *cmds; /* Array of netlink commands and handlers. */
UINT16 opsCount;
} NETLINK_FAMILY, *PNETLINK_FAMILY;
Expand Down Expand Up @@ -143,12 +144,12 @@ NETLINK_CMD nlControlFamilyCmdOps[] = {
},
{ .cmd = OVS_CTRL_CMD_EVENT_NOTIFY,
.handler = OvsReadEventCmdHandler,
.supportedDevOp = OVS_READ_EVENT_DEV_OP,
.supportedDevOp = OVS_READ_DEV_OP,
.validateDpIndex = FALSE,
},
{ .cmd = OVS_CTRL_CMD_READ_NOTIFY,
.handler = OvsReadPacketCmdHandler,
.supportedDevOp = OVS_READ_PACKET_DEV_OP,
.supportedDevOp = OVS_READ_DEV_OP,
.validateDpIndex = FALSE,
}
};
Expand Down Expand Up @@ -799,12 +800,17 @@ OvsDeviceControl(PDEVICE_OBJECT deviceObject,
inputBufferLen = 0;

ovsMsg = &ovsMsgReadOp;
ovsMsg->nlMsg.nlmsgType = OVS_WIN_NL_CTRL_FAMILY_ID;
RtlZeroMemory(ovsMsg, sizeof *ovsMsg);
ovsMsg->nlMsg.nlmsgLen = sizeof *ovsMsg;
ovsMsg->nlMsg.nlmsgType = nlControlFamilyOps.id;
ovsMsg->nlMsg.nlmsgPid = instance->pid;

/* An "artificial" command so we can use NL family function table*/
ovsMsg->genlMsg.cmd = (code == OVS_IOCTL_READ_EVENT) ?
OVS_CTRL_CMD_EVENT_NOTIFY :
OVS_CTRL_CMD_READ_NOTIFY;
ovsMsg->genlMsg.version = nlControlFamilyOps.version;

devOp = OVS_READ_DEV_OP;
break;

Expand Down Expand Up @@ -895,8 +901,8 @@ OvsDeviceControl(PDEVICE_OBJECT deviceObject,
}

/*
* For read operation, the netlink command has already been validated
* previously.
* For read operation, avoid duplicate validation since 'ovsMsg' is either
* "artificial" or was copied from a previously validated 'ovsMsg'.
*/
if (devOp != OVS_READ_DEV_OP) {
status = ValidateNetlinkCmd(devOp, instance, ovsMsg, nlFamilyOps);
Expand Down Expand Up @@ -982,7 +988,9 @@ ValidateNetlinkCmd(UINT32 devOp,

/*
* --------------------------------------------------------------------------
* Function to invoke the netlink command handler.
* Function to invoke the netlink command handler. The function also stores
* the return value of the handler function to construct a 'NL_ERROR' message,
* and in turn returns success to the caller.
* --------------------------------------------------------------------------
*/
static NTSTATUS
Expand All @@ -1004,6 +1012,43 @@ InvokeNetlinkCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
}
}

/*
* Netlink socket semantics dictate that the return value of the netlink
* function should be an error ONLY under fatal conditions. If the message
* made it all the way to the handler function, it is not a fatal condition.
* Absorb the error returned by the handler function into a 'struct
* NL_ERROR' and populate the 'output buffer' to return to userspace.
*
* This behavior is obviously applicable only to netlink commands that
* specify an 'output buffer'. For other commands, we return the error as
* is.
*
* 'STATUS_PENDING' is a special return value and userspace is equipped to
* handle it.
*/
if (status != STATUS_SUCCESS && status != STATUS_PENDING) {
if (usrParamsCtx->devOp != OVS_WRITE_DEV_OP && *replyLen == 0) {
NL_ERROR nlError = NlMapStatusToNlErr(status);
POVS_MESSAGE msgIn = (POVS_MESSAGE)usrParamsCtx->inputBuffer;
POVS_MESSAGE_ERROR msgError = (POVS_MESSAGE_ERROR)
usrParamsCtx->outputBuffer;

ASSERT(msgError);
NlBuildErrorMsg(msgIn, msgError, nlError);
*replyLen = msgError->nlMsg.nlmsgLen;
}

if (*replyLen != 0) {
status = STATUS_SUCCESS;
}
}

#ifdef DBG
if (usrParamsCtx->devOp != OVS_WRITE_DEV_OP) {
ASSERT(status == STATUS_PENDING || *replyLen != 0 || status == STATUS_SUCCESS);
}
#endif

return status;
}

Expand Down
2 changes: 0 additions & 2 deletions datapath-windows/ovsext/Datapath.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,6 @@
#define OVS_READ_DEV_OP (1 << 0)
#define OVS_WRITE_DEV_OP (1 << 1)
#define OVS_TRANSACTION_DEV_OP (1 << 2)
#define OVS_READ_EVENT_DEV_OP (1 << 3)
#define OVS_READ_PACKET_DEV_OP (1 << 4)

typedef struct _OVS_DEVICE_EXTENSION {
INT numberOpenInstance;
Expand Down
102 changes: 61 additions & 41 deletions lib/netlink-socket.c
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,8 @@ nl_sock_send__(struct nl_sock *sock, const struct ofpbuf *msg,
retval = -1;
/* XXX: Map to a more appropriate error based on GetLastError(). */
errno = EINVAL;
VLOG_DBG_RL(&rl, "fatal driver failure in write: %s",
ovs_lasterror_to_string());
} else {
retval = msg->size;
}
Expand Down Expand Up @@ -564,7 +566,10 @@ nl_sock_recv__(struct nl_sock *sock, struct ofpbuf *buf, bool wait)
DWORD bytes;
if (!DeviceIoControl(sock->handle, sock->read_ioctl,
NULL, 0, tail, sizeof tail, &bytes, NULL)) {
VLOG_DBG_RL(&rl, "fatal driver failure in transact: %s",
ovs_lasterror_to_string());
retval = -1;
/* XXX: Map to a more appropriate error. */
errno = EINVAL;
} else {
retval = bytes;
Expand Down Expand Up @@ -789,63 +794,78 @@ nl_sock_transact_multiple__(struct nl_sock *sock,
uint8_t reply_buf[65536];
for (i = 0; i < n; i++) {
DWORD reply_len;
bool ret;
struct nl_transaction *txn = transactions[i];
struct nlmsghdr *request_nlmsg, *reply_nlmsg;

if (!DeviceIoControl(sock->handle, OVS_IOCTL_TRANSACT,
txn->request->data,
txn->request->size,
reply_buf, sizeof reply_buf,
&reply_len, NULL)) {
ret = DeviceIoControl(sock->handle, OVS_IOCTL_TRANSACT,
txn->request->data,
txn->request->size,
reply_buf, sizeof reply_buf,
&reply_len, NULL);

if (ret && reply_len == 0) {
/*
* The current transaction did not produce any data to read and that
* is not an error as such. Continue with the remainder of the
* transactions.
*/
txn->error = 0;
if (txn->reply) {
ofpbuf_clear(txn->reply);
}
} else if (!ret) {
/* XXX: Map to a more appropriate error. */
error = EINVAL;
VLOG_DBG_RL(&rl, "fatal driver failure: %s",
ovs_lasterror_to_string());
break;
}

if (reply_len < sizeof *reply_nlmsg) {
nl_sock_record_errors__(transactions, n, 0);
VLOG_DBG_RL(&rl, "insufficient length of reply %#"PRIu32
" for seq: %#"PRIx32, reply_len, request_nlmsg->nlmsg_seq);
break;
}

/* Validate the sequence number in the reply. */
request_nlmsg = nl_msg_nlmsghdr(txn->request);
reply_nlmsg = (struct nlmsghdr *)reply_buf;
if (reply_len != 0) {
if (reply_len < sizeof *reply_nlmsg) {
nl_sock_record_errors__(transactions, n, 0);
VLOG_DBG_RL(&rl, "insufficient length of reply %#"PRIu32
" for seq: %#"PRIx32, reply_len, request_nlmsg->nlmsg_seq);
break;
}

if (request_nlmsg->nlmsg_seq != reply_nlmsg->nlmsg_seq) {
ovs_assert(request_nlmsg->nlmsg_seq == reply_nlmsg->nlmsg_seq);
VLOG_DBG_RL(&rl, "mismatched seq request %#"PRIx32
", reply %#"PRIx32, request_nlmsg->nlmsg_seq,
reply_nlmsg->nlmsg_seq);
break;
}
/* Validate the sequence number in the reply. */
request_nlmsg = nl_msg_nlmsghdr(txn->request);
reply_nlmsg = (struct nlmsghdr *)reply_buf;

/* Handle errors embedded within the netlink message. */
ofpbuf_use_stub(&tmp_reply, reply_buf, sizeof reply_buf);
tmp_reply.size = sizeof reply_buf;
if (nl_msg_nlmsgerr(&tmp_reply, &txn->error)) {
if (txn->reply) {
ofpbuf_clear(txn->reply);
}
if (txn->error) {
VLOG_DBG_RL(&rl, "received NAK error=%d (%s)",
error, ovs_strerror(txn->error));
if (request_nlmsg->nlmsg_seq != reply_nlmsg->nlmsg_seq) {
ovs_assert(request_nlmsg->nlmsg_seq == reply_nlmsg->nlmsg_seq);
VLOG_DBG_RL(&rl, "mismatched seq request %#"PRIx32
", reply %#"PRIx32, request_nlmsg->nlmsg_seq,
reply_nlmsg->nlmsg_seq);
break;
}
} else {
txn->error = 0;
if (txn->reply) {
/* Copy the reply to the buffer specified by the caller. */
if (reply_len > txn->reply->allocated) {
ofpbuf_reinit(txn->reply, reply_len);

/* Handle errors embedded within the netlink message. */
ofpbuf_use_stub(&tmp_reply, reply_buf, sizeof reply_buf);
tmp_reply.size = sizeof reply_buf;
if (nl_msg_nlmsgerr(&tmp_reply, &txn->error)) {
if (txn->reply) {
ofpbuf_clear(txn->reply);
}
if (txn->error) {
VLOG_DBG_RL(&rl, "received NAK error=%d (%s)",
error, ovs_strerror(txn->error));
}
} else {
txn->error = 0;
if (txn->reply) {
/* Copy the reply to the buffer specified by the caller. */
if (reply_len > txn->reply->allocated) {
ofpbuf_reinit(txn->reply, reply_len);
}
memcpy(txn->reply->data, reply_buf, reply_len);
txn->reply->size = reply_len;
}
memcpy(txn->reply->data, reply_buf, reply_len);
txn->reply->size = reply_len;
}
ofpbuf_uninit(&tmp_reply);
}
ofpbuf_uninit(&tmp_reply);

/* Count the number of successful transactions. */
(*done)++;
Expand Down

0 comments on commit a51a508

Please sign in to comment.