From 6ace578e5b5b58165ae86400e8b68c127c259928 Mon Sep 17 00:00:00 2001 From: Tatsuhiro Tsujikawa Date: Wed, 2 Aug 2023 17:47:51 +0900 Subject: [PATCH] nghttpx: Rework sending and receiving ECN bits Now ECN is sent along with sendmsg ancillary message. With this commit, ECN should work fine with the recent Mac OS. --- CMakeLists.txt | 1 + cmakeconfig.h.in | 3 +++ configure.ac | 1 + src/shrpx_quic.cc | 29 ++++++++++++++++++++++----- src/shrpx_quic_listener.cc | 3 +-- src/util.cc | 41 +++++++++++++++----------------------- src/util.h | 2 -- 7 files changed, 46 insertions(+), 34 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4cf6535a67..0f5a1ea0fd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -273,6 +273,7 @@ check_include_file("inttypes.h" HAVE_INTTYPES_H) check_include_file("limits.h" HAVE_LIMITS_H) check_include_file("netdb.h" HAVE_NETDB_H) check_include_file("netinet/in.h" HAVE_NETINET_IN_H) +check_include_file("netinet/ip.h" HAVE_NETINET_IP_H) check_include_file("pwd.h" HAVE_PWD_H) check_include_file("sys/socket.h" HAVE_SYS_SOCKET_H) check_include_file("sys/time.h" HAVE_SYS_TIME_H) diff --git a/cmakeconfig.h.in b/cmakeconfig.h.in index 49d19a31a0..8e7583e0d6 100644 --- a/cmakeconfig.h.in +++ b/cmakeconfig.h.in @@ -61,6 +61,9 @@ /* Define to 1 if you have the header file. */ #cmakedefine HAVE_NETINET_IN_H 1 +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_NETINET_IP_H 1 + /* Define to 1 if you have the header file. */ #cmakedefine HAVE_PWD_H 1 diff --git a/configure.ac b/configure.ac index 315f61b72b..04afdbc112 100644 --- a/configure.ac +++ b/configure.ac @@ -847,6 +847,7 @@ AC_CHECK_HEADERS([ \ limits.h \ netdb.h \ netinet/in.h \ + netinet/ip.h \ pwd.h \ stddef.h \ stdint.h \ diff --git a/src/shrpx_quic.cc b/src/shrpx_quic.cc index ffc2fec924..2ef3fa4cab 100644 --- a/src/shrpx_quic.cc +++ b/src/shrpx_quic.cc @@ -66,11 +66,11 @@ int quic_send_packet(const UpstreamAddr *faddr, const sockaddr *remote_sa, msg.msg_iov = &msg_iov; msg.msg_iovlen = 1; - uint8_t msg_ctrl[ + uint8_t msg_ctrl[CMSG_SPACE(sizeof(int)) + #ifdef UDP_SEGMENT - CMSG_SPACE(sizeof(uint16_t)) + + CMSG_SPACE(sizeof(uint16_t)) + #endif // UDP_SEGMENT - CMSG_SPACE(sizeof(in6_pktinfo))]; + CMSG_SPACE(sizeof(in6_pktinfo))]; memset(msg_ctrl, 0, sizeof(msg_ctrl)); @@ -121,9 +121,28 @@ int quic_send_packet(const UpstreamAddr *faddr, const sockaddr *remote_sa, } #endif // UDP_SEGMENT - msg.msg_controllen = controllen; + controllen += CMSG_SPACE(sizeof(int)); + cm = CMSG_NXTHDR(&msg, cm); + cm->cmsg_len = CMSG_LEN(sizeof(int)); + unsigned int tos = pi.ecn; + memcpy(CMSG_DATA(cm), &tos, sizeof(tos)); + + switch (local_sa->sa_family) { + case AF_INET: + cm->cmsg_level = IPPROTO_IP; + cm->cmsg_type = IP_TOS; + + break; + case AF_INET6: + cm->cmsg_level = IPPROTO_IPV6; + cm->cmsg_type = IPV6_TCLASS; - util::fd_set_send_ecn(faddr->fd, local_sa->sa_family, pi.ecn); + break; + default: + assert(0); + } + + msg.msg_controllen = controllen; ssize_t nwrite; diff --git a/src/shrpx_quic_listener.cc b/src/shrpx_quic_listener.cc index 9607ba1e9c..9b9f1203aa 100644 --- a/src/shrpx_quic_listener.cc +++ b/src/shrpx_quic_listener.cc @@ -59,8 +59,7 @@ void QUICListener::on_read() { msg.msg_iov = &msg_iov; msg.msg_iovlen = 1; - uint8_t msg_ctrl[CMSG_SPACE(sizeof(uint8_t)) + - CMSG_SPACE(sizeof(in6_pktinfo)) + + uint8_t msg_ctrl[CMSG_SPACE(sizeof(int)) + CMSG_SPACE(sizeof(in6_pktinfo)) + CMSG_SPACE(sizeof(uint16_t))]; msg.msg_control = msg_ctrl; diff --git a/src/util.cc b/src/util.cc index 87bbd0fb98..bfeb2051d3 100644 --- a/src/util.cc +++ b/src/util.cc @@ -41,6 +41,9 @@ #ifdef HAVE_NETINET_IN_H # include #endif // HAVE_NETINET_IN_H +#ifdef HAVE_NETINET_IP_H +# include +#endif // HAVE_NETINET_IP_H #include #ifdef _WIN32 # include @@ -1713,9 +1716,14 @@ unsigned int msghdr_get_ecn(msghdr *msg, int family) { switch (family) { case AF_INET: for (auto cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) { - if (cmsg->cmsg_level == IPPROTO_IP && cmsg->cmsg_type == IP_TOS && - cmsg->cmsg_len) { - return *reinterpret_cast(CMSG_DATA(cmsg)); + if (cmsg->cmsg_level == IPPROTO_IP && +# ifdef __APPLE__ + cmsg->cmsg_type == IP_RECVTOS +# else // !__APPLE__ + cmsg->cmsg_type == IP_TOS +# endif // !__APPLE__ + && cmsg->cmsg_len) { + return *reinterpret_cast(CMSG_DATA(cmsg)) & IPTOS_ECN_MASK; } } @@ -1724,7 +1732,11 @@ unsigned int msghdr_get_ecn(msghdr *msg, int family) { for (auto cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) { if (cmsg->cmsg_level == IPPROTO_IPV6 && cmsg->cmsg_type == IPV6_TCLASS && cmsg->cmsg_len) { - return *reinterpret_cast(CMSG_DATA(cmsg)); + unsigned int tos; + + memcpy(&tos, CMSG_DATA(cmsg), sizeof(tos)); + + return tos & IPTOS_ECN_MASK; } } @@ -1749,27 +1761,6 @@ size_t msghdr_get_udp_gro(msghdr *msg) { return gso_size; } - -int fd_set_send_ecn(int fd, int family, unsigned int ecn) { - switch (family) { - case AF_INET: - if (setsockopt(fd, IPPROTO_IP, IP_TOS, &ecn, - static_cast(sizeof(ecn))) == -1) { - return -1; - } - - return 0; - case AF_INET6: - if (setsockopt(fd, IPPROTO_IPV6, IPV6_TCLASS, &ecn, - static_cast(sizeof(ecn))) == -1) { - return -1; - } - - return 0; - } - - return -1; -} #endif // ENABLE_HTTP3 } // namespace util diff --git a/src/util.h b/src/util.h index c5575d85af..2cc576b568 100644 --- a/src/util.h +++ b/src/util.h @@ -962,8 +962,6 @@ unsigned int msghdr_get_ecn(msghdr *msg, int family); // msghdr_get_udp_gro returns UDP_GRO value from |msg|. If UDP_GRO is // not found, or UDP_GRO is not supported, this function returns 0. size_t msghdr_get_udp_gro(msghdr *msg); - -int fd_set_send_ecn(int fd, int family, unsigned int ecn); #endif // ENABLE_HTTP3 } // namespace util