From 365254427e5358a0c390aa5a60df5f449b9c0f00 Mon Sep 17 00:00:00 2001 From: "Michael C. Toren" Date: Wed, 22 May 2013 10:35:09 -0700 Subject: [PATCH] Merge BitTorrent libutp changes from uTorrent 3.4 (alpha) --- .gitignore | 6 + LICENSE | 2 +- Makefile | 51 +- libutp-2012.vcxproj | 146 + libutp-2012.vcxproj.filters | 65 + prop_sheets/debug-2012.props | 45 + prop_sheets/release-2012.props | 50 + prop_sheets/win32-2012.props | 15 + prop_sheets/x64-2012.props | 8 + ucat.c | 627 ++++ utp.h | 182 + utp_api.cpp | 139 + utp_callbacks.cpp | 208 ++ utp_callbacks.h | 47 + utp_hash.cpp | 239 ++ utp_hash.h | 146 + utp_internal.cpp | 6227 +++++++++++++++++--------------- utp_internal.h | 304 +- utp_packedsockaddr.cpp | 141 + utp_packedsockaddr.h | 60 + utp_templates.h | 381 +- utp_types.h | 162 +- utp_utils.cpp | 464 +-- utp_utils.h | 43 +- win32_inet_ntop.cpp | 192 +- win32_inet_ntop.h | 40 +- 26 files changed, 6405 insertions(+), 3585 deletions(-) create mode 100644 .gitignore create mode 100644 libutp-2012.vcxproj create mode 100644 libutp-2012.vcxproj.filters create mode 100644 prop_sheets/debug-2012.props create mode 100644 prop_sheets/release-2012.props create mode 100644 prop_sheets/win32-2012.props create mode 100644 prop_sheets/x64-2012.props create mode 100644 ucat.c create mode 100644 utp.h create mode 100644 utp_api.cpp create mode 100644 utp_callbacks.cpp create mode 100644 utp_callbacks.h create mode 100644 utp_hash.cpp create mode 100644 utp_hash.h create mode 100644 utp_packedsockaddr.cpp create mode 100644 utp_packedsockaddr.h diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..51e78ba --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +*.o +*.a +*.so +ucat +ucat-static +tags diff --git a/LICENSE b/LICENSE index 73acb81..7f6e16c 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2010 BitTorrent, Inc. +Copyright (c) 2010-2013 BitTorrent, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/Makefile b/Makefile index a1d9417..0c7520e 100644 --- a/Makefile +++ b/Makefile @@ -1,18 +1,47 @@ -SRCS = utp.cpp utp_utils.cpp -OBJS = utp.o utp_utils.o -CXXFLAGS = -fno-exceptions -fno-rtti -Wall -g +OBJS = utp_internal.o utp_utils.o utp_hash.o utp_callbacks.o utp_api.o utp_packedsockaddr.o +CFLAGS = -Wall -DPOSIX -g -fno-exceptions +CXXFLAGS = $(CFLAGS) -fPIC -fno-rtti +CC = gcc +CXX = g++ -all: libutp.a +CXXFLAGS += -Wno-sign-compare +CXXFLAGS += -fpermissive + +# Uncomment to enable utp_get_stats(), and a few extra sanity checks +CFLAGS += -D_DEBUG + +# Uncomment to enable debug logging +CFLAGS += -DUTP_DEBUG_LOGGING + +# Dynamically determine if librt is available. If so, assume we need to link +# against it for clock_gettime(2). This is required for clean builds on OSX; +# see for more. This should +# probably be ported to CMake at some point, but is suitable for now. +lrt := $(shell echo 'int main() {}' | $(CC) -xc -o /dev/null - -lrt >/dev/null 2>&1; echo $$?) +ifeq ($(strip $(lrt)),0) + LDFLAGS += -lrt +endif + +all: libutp.so libutp.a ucat ucat-static + +libutp.so: $(OBJS) + $(CXX) $(CXXFLAGS) -o libutp.so -shared $(OBJS) libutp.a: $(OBJS) - -rm -f libutp.a - ar q libutp.a $(OBJS) - ranlib libutp.a + ar rvs libutp.a $(OBJS) -.cpp.o: - g++ -c -DPOSIX -I . -I utp_config_lib $(CXXFLAGS) $< +ucat: ucat.o libutp.so + $(CC) $(CFLAGS) -o ucat ucat.o -L. -lutp $(LDFLAGS) -.PHONY: clean +ucat-static: ucat.o libutp.a + $(CXX) $(CXXFLAGS) -o ucat-static ucat.o libutp.a $(LDFLAGS) clean: - -rm -f $(OBJS) libutp.a + rm -f *.o libutp.so libutp.a ucat ucat-static + +tags: $(shell ls *.cpp *.h) + rm -f tags + ctags *.cpp *.h + +anyway: clean all +.PHONY: clean all anyway diff --git a/libutp-2012.vcxproj b/libutp-2012.vcxproj new file mode 100644 index 0000000..71f51f7 --- /dev/null +++ b/libutp-2012.vcxproj @@ -0,0 +1,146 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + + + + + + + + + + + + + + + + + + + + + {5984D5CD-6ADD-4EB7-82E7-A555888FBBBD} + libutp2012 + libutp + + + + StaticLibrary + true + v110 + Unicode + + + StaticLibrary + true + v110 + Unicode + + + StaticLibrary + false + true + Unicode + v110_xp + + + StaticLibrary + false + v110 + true + Unicode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Level3 + Disabled + + + true + + + + + Level3 + Disabled + + + true + + + + + Level3 + MaxSpeed + true + true + _WIN32_WINNT=0x501;%(PreprocessorDefinitions) + + + true + true + true + + + + + Level3 + MaxSpeed + true + true + + + true + true + true + + + + + + diff --git a/libutp-2012.vcxproj.filters b/libutp-2012.vcxproj.filters new file mode 100644 index 0000000..8e57840 --- /dev/null +++ b/libutp-2012.vcxproj.filters @@ -0,0 +1,65 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hpp;hxx;hm;inl;inc;xsd + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + diff --git a/prop_sheets/debug-2012.props b/prop_sheets/debug-2012.props new file mode 100644 index 0000000..60517a2 --- /dev/null +++ b/prop_sheets/debug-2012.props @@ -0,0 +1,45 @@ + + + + + + $(SolutionDir)Build\$(PlatformName)\$(Configuration)\ + $(OutDir)$(ProjectName)\ + + + + Level3 + _DEBUG;WIN32;ENABLE_I18N;ENABLE_SRP=1;%(PreprocessorDefinitions) + MultiThreadedDebug + false + false + false + Default + $(SolutionDir);$(SolutionDir)\yajl\src;$(SolutionDir)\ut_core\src;$(SolutionDir)\verification_lib;$(SolutionDir)\libtomcrypt\src\headers + true + false + false + c:\temp\$(ProjectName)-$(ConfigurationName)-$(PlatformName)-master.pch + true + false + + + true + + + true + false + + + true + + + BRANDED_UTORRENT;%(PreprocessorDefinitions) + + + + $(SolutionDir)\ut_core\src;%(AdditionalIncludeDirectories) + + + + \ No newline at end of file diff --git a/prop_sheets/release-2012.props b/prop_sheets/release-2012.props new file mode 100644 index 0000000..1e0fb81 --- /dev/null +++ b/prop_sheets/release-2012.props @@ -0,0 +1,50 @@ + + + + + + $(SolutionDir)Build\$(PlatformName)\$(Configuration)\ + $(OutDir)$(ProjectName)\ + + + + Level3 + $(SolutionDir);$(SolutionDir)\yajl\src;$(SolutionDir)\ut_core\src;$(SolutionDir)\verification_lib;$(SolutionDir)\libtomcrypt\src\headers + MinSpace + AnySuitable + true + Size + true + true + false + false + false + true + MultiThreaded + false + true + FastCall + Default + false + NDEBUG;WIN32;ENABLE_I18N;ENABLE_SRP=1;%(PreprocessorDefinitions) + false + c:\temp\$(ProjectName)-$(ConfigurationName)-$(PlatformName)-master.pch + true + + + + true + + + BRANDED_UTORRENT;%(PreprocessorDefinitions) + + + + $(SolutionDir)\ut_core\src;%(AdditionalIncludeDirectories) + + + false + + + + \ No newline at end of file diff --git a/prop_sheets/win32-2012.props b/prop_sheets/win32-2012.props new file mode 100644 index 0000000..21f6081 --- /dev/null +++ b/prop_sheets/win32-2012.props @@ -0,0 +1,15 @@ + + + + + + + + MachineX86 + + + 4Bytes + + + + \ No newline at end of file diff --git a/prop_sheets/x64-2012.props b/prop_sheets/x64-2012.props new file mode 100644 index 0000000..08b2b3e --- /dev/null +++ b/prop_sheets/x64-2012.props @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/ucat.c b/ucat.c new file mode 100644 index 0000000..3030c94 --- /dev/null +++ b/ucat.c @@ -0,0 +1,627 @@ +// vim:set ts=4 sw=4 ai: + +/* + * Copyright (c) 2010-2013 BitTorrent, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __linux__ + #include + #include +#endif + +#include "utp.h" + +// options +int o_debug; +char *o_local_address, *o_local_port, + *o_remote_address, *o_remote_port; +int o_listen; +int o_buf_size = 4096; +int o_numeric; + +utp_context *ctx; +utp_socket *s; + +int fd; +int buf_len = 0; +unsigned char *buf, *p; +int eof_flag, quit_flag, exit_code; + +void die(char *fmt, ...) +{ + va_list ap; + fflush(stdout); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + exit(1); +} + +void debug(char *fmt, ...) +{ + va_list ap; + if (o_debug) { + fflush(stdout); + fprintf(stderr, "debug: "); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + fflush(stderr); + } +} + +void pdie(char *err) +{ + debug("errno %d\n", errno); + fflush(stdout); + perror(err); + exit(1); +} + +void hexdump(const void *p, size_t len) +{ + int count = 1; + + while (len--) { + if (count == 1) + fprintf(stderr, " %p: ", p); + + fprintf(stderr, " %02x", *(unsigned char*)p++ & 0xff); + + if (count++ == 16) { + fprintf(stderr, "\n"); + count = 1; + } + } + + if (count != 1) + fprintf(stderr, "\n"); +} + +void handler(int number) +{ + debug("caught signal\n"); + if (s) + utp_close(s); + quit_flag = 1; + exit_code++; +} + +void write_data(void) +{ + if (! s) + goto out; + + while (p < buf+buf_len) { + size_t sent; + + sent = utp_write(s, p, buf+buf_len-p); + if (sent == 0) { + debug("socket no longer writable\n"); + return; + } + + p += sent; + + if (p == buf+buf_len) { + debug("wrote %zd bytes; buffer now empty\n", sent); + p = buf; + buf_len = 0; + } + else + debug("wrote %zd bytes; %d bytes left in buffer\n", sent, buf+buf_len-p); + } + +out: + if (buf_len == 0 && eof_flag) { + if (s) { + debug("Buffer empty, and previously found EOF. Closing socket\n"); + utp_close(s); + } + else { + quit_flag = 1; + } + } +} + +uint64 callback_on_read(utp_callback_arguments *a) +{ + const unsigned char *p; + ssize_t len, left; + + left = a->len; + p = a->buf; + + while (left) { + len = write(STDOUT_FILENO, p, left); + left -= len; + p += len; + debug("Wrote %d bytes, %d left\n", len, left); + } + utp_read_drained(a->socket); + return 0; +} + +uint64 callback_on_firewall(utp_callback_arguments *a) +{ + if (! o_listen) { + debug("Firewalling unexpected inbound connection in non-listen mode\n"); + return 1; + } + + if (s) { + debug("Firewalling unexpected second inbound connection\n"); + return 1; + } + + debug("Firewall allowing inbound connection\n"); + return 0; +} + +uint64 callback_on_accept(utp_callback_arguments *a) +{ + assert(!s); + s = a->socket; + debug("Accepted inbound socket %p\n", s); + write_data(); + return 0; +} + +uint64 callback_on_error(utp_callback_arguments *a) +{ + fprintf(stderr, "Error: %s\n", utp_error_code_names[a->error_code]); + utp_close(s); + s = NULL; + quit_flag = 1; + exit_code++; + return 0; +} + +uint64 callback_on_state_change(utp_callback_arguments *a) +{ + debug("state %d: %s\n", a->state, utp_state_names[a->state]); + utp_socket_stats *stats; + + switch (a->state) { + case UTP_STATE_CONNECT: + case UTP_STATE_WRITABLE: + write_data(); + break; + + case UTP_STATE_EOF: + debug("Received EOF from socket; closing\n"); + utp_close(a->socket); + break; + + case UTP_STATE_DESTROYING: + debug("UTP socket is being destroyed; exiting\n"); + + stats = utp_get_stats(a->socket); + if (stats) { + debug("Socket Statistics:\n"); + debug(" Bytes sent: %d\n", stats->nbytes_xmit); + debug(" Bytes received: %d\n", stats->nbytes_recv); + debug(" Packets received: %d\n", stats->nrecv); + debug(" Packets sent: %d\n", stats->nxmit); + debug(" Duplicate receives: %d\n", stats->nduprecv); + debug(" Retransmits: %d\n", stats->rexmit); + debug(" Fast Retransmits: %d\n", stats->fastrexmit); + debug(" Best guess at MTU: %d\n", stats->mtu_guess); + } + else { + debug("No socket statistics available\n"); + } + + s = NULL; + quit_flag = 1; + break; + } + + return 0; +} + +uint64 callback_sendto(utp_callback_arguments *a) +{ + struct sockaddr_in *sin = (struct sockaddr_in *) a->address; + + debug("sendto: %zd byte packet to %s:%d%s\n", a->address_len, inet_ntoa(sin->sin_addr), ntohs(sin->sin_port), + (a->flags & UTP_UDP_DONTFRAG) ? " (DF bit requested, but not yet implemented)" : ""); + + if (o_debug >= 3) + hexdump(a->buf, a->len); + + sendto(fd, a->buf, a->len, 0, a->address, a->address_len); + return 0; +} + +uint64 callback_log(utp_callback_arguments *a) +{ + fprintf(stderr, "log: %s\n", a->buf); + return 0; +} + +void setup(void) +{ + struct addrinfo hints, *res; + struct sockaddr_in sin, *sinp; + int error; + struct sigaction sigIntHandler; + + sigIntHandler.sa_handler = handler; + sigemptyset(&sigIntHandler.sa_mask); + sigIntHandler.sa_flags = 0; + + sigaction(SIGINT, &sigIntHandler, NULL); + + p = buf = malloc(o_buf_size); + if (!buf) + pdie("malloc"); + debug("Allocatd %d buffer\n", o_buf_size); + + fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); + if (fd < 0) + pdie("socket"); + + #ifdef __linux__ + int on = 1; + if (setsockopt(fd, SOL_IP, IP_RECVERR, &on, sizeof(on)) != 0) + pdie("setsockopt"); + #endif + + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_INET; + hints.ai_socktype = SOCK_DGRAM; + hints.ai_protocol = IPPROTO_UDP; + if (o_numeric) + hints.ai_flags |= AI_NUMERICHOST; + + if ((error = getaddrinfo(o_local_address, o_local_port, &hints, &res))) + die("getaddrinfo: %s\n", gai_strerror(error)); + + if (bind(fd, res->ai_addr, res->ai_addrlen) != 0) + pdie("bind"); + + freeaddrinfo(res); + + socklen_t len = sizeof(sin); + if (getsockname(fd, (struct sockaddr *) &sin, &len) != 0) + pdie("getsockname"); + debug("Bound to local %s:%d\n", inet_ntoa(sin.sin_addr), ntohs(sin.sin_port)); + + ctx = utp_init(2); + assert(ctx); + debug("UTP context %p\n", ctx); + + utp_set_callback(ctx, UTP_LOG, &callback_log); + utp_set_callback(ctx, UTP_SENDTO, &callback_sendto); + utp_set_callback(ctx, UTP_ON_ERROR, &callback_on_error); + utp_set_callback(ctx, UTP_ON_STATE_CHANGE, &callback_on_state_change); + utp_set_callback(ctx, UTP_ON_READ, &callback_on_read); + utp_set_callback(ctx, UTP_ON_FIREWALL, &callback_on_firewall); + utp_set_callback(ctx, UTP_ON_ACCEPT, &callback_on_accept); + + if (o_debug >= 2) { + utp_context_set_option(ctx, UTP_LOG_NORMAL, 1); + utp_context_set_option(ctx, UTP_LOG_MTU, 1); + utp_context_set_option(ctx, UTP_LOG_DEBUG, 1); + } + + if (! o_listen) { + s = utp_create_socket(ctx); + assert(s); + debug("UTP socket %p\n", s); + + if ((error = getaddrinfo(o_remote_address, o_remote_port, &hints, &res))) + die("getaddrinfo: %s\n", gai_strerror(error)); + + sinp = (struct sockaddr_in *)res->ai_addr; + debug("Connecting to %s:%d\n", inet_ntoa(sinp->sin_addr), ntohs(sinp->sin_port)); + + utp_connect(s, res->ai_addr, res->ai_addrlen); + freeaddrinfo(res); + } +} + +#ifdef __linux__ +void handle_icmp() +{ + while (1) { + unsigned char vec_buf[4096], ancillary_buf[4096]; + struct iovec iov = { vec_buf, sizeof(vec_buf) }; + struct sockaddr_in remote; + struct msghdr msg; + ssize_t len; + struct cmsghdr *cmsg; + struct sock_extended_err *e; + struct sockaddr *icmp_addr; + struct sockaddr_in *icmp_sin; + + memset(&msg, 0, sizeof(msg)); + + msg.msg_name = &remote; + msg.msg_namelen = sizeof(remote); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_flags = 0; + msg.msg_control = ancillary_buf; + msg.msg_controllen = sizeof(ancillary_buf); + + len = recvmsg(fd, &msg, MSG_ERRQUEUE | MSG_DONTWAIT); + + if (len < 0) { + if (errno == EAGAIN || errno == EWOULDBLOCK) + break; + else + pdie("recvmsg"); + } + + for (cmsg = CMSG_FIRSTHDR(&msg); + cmsg; + cmsg = CMSG_NXTHDR(&msg, cmsg)) + { + if (cmsg->cmsg_type != IP_RECVERR) { + debug("Unhandled errqueue type: %d\n", cmsg->cmsg_type); + continue; + } + + if (cmsg->cmsg_level != SOL_IP) { + debug("Unhandled errqueue level: %d\n", cmsg->cmsg_level); + continue; + } + + debug("errqueue: IP_RECVERR, SOL_IP, len %zd\n", cmsg->cmsg_len); + + if (remote.sin_family != AF_INET) { + debug("Address family is %d, not AF_INET? Ignoring\n", remote.sin_family); + continue; + } + + debug("Remote host: %s:%d\n", inet_ntoa(remote.sin_addr), ntohs(remote.sin_port)); + + e = (struct sock_extended_err *) CMSG_DATA(cmsg); + + if (!e) { + debug("errqueue: sock_extended_err is NULL?\n"); + continue; + } + + if (e->ee_origin != SO_EE_ORIGIN_ICMP) { + debug("errqueue: Unexpected origin: %d\n", e->ee_origin); + continue; + } + + debug(" ee_errno: %d\n", e->ee_errno); + debug(" ee_origin: %d\n", e->ee_origin); + debug(" ee_type: %d\n", e->ee_type); + debug(" ee_code: %d\n", e->ee_code); + debug(" ee_info: %d\n", e->ee_info); // discovered MTU for EMSGSIZE errors + debug(" ee_data: %d\n", e->ee_data); + + // "Node that caused the error" + // "Node that generated the error" + icmp_addr = (struct sockaddr *) SO_EE_OFFENDER(e); + icmp_sin = (struct sockaddr_in *) icmp_addr; + + if (icmp_addr->sa_family != AF_INET) { + debug("ICMP's address family is %d, not AF_INET?\n", icmp_addr->sa_family); + continue; + } + + if (icmp_sin->sin_port != 0) { + debug("ICMP's 'port' is not 0?\n"); + continue; + } + + debug("msg_flags: %d", msg.msg_flags); + if (o_debug) { + if (msg.msg_flags & MSG_TRUNC) fprintf(stderr, " MSG_TRUNC"); + if (msg.msg_flags & MSG_CTRUNC) fprintf(stderr, " MSG_CTRUNC"); + if (msg.msg_flags & MSG_EOR) fprintf(stderr, " MSG_EOR"); + if (msg.msg_flags & MSG_OOB) fprintf(stderr, " MSG_OOB"); + if (msg.msg_flags & MSG_ERRQUEUE) fprintf(stderr, " MSG_ERRQUEUE"); + fprintf(stderr, "\n"); + } + + if (o_debug >= 3) + hexdump(vec_buf, len); + + if (e->ee_type == 3 && e->ee_code == 4) { + debug("ICMP type 3, code 4: Fragmentation error, discovered MTU %d\n", e->ee_info); + utp_process_icmp_fragmentation(ctx, vec_buf, len, (struct sockaddr *)&remote, sizeof(remote), e->ee_info); + } + else { + debug("ICMP type %d, code %d\n", e->ee_type, e->ee_code); + utp_process_icmp_error(ctx, vec_buf, len, (struct sockaddr *)&remote, sizeof(remote)); + } + } + } +} +#endif + +void network_loop(void) +{ + unsigned char socket_data[4096]; + struct sockaddr_in src_addr; + socklen_t addrlen = sizeof(src_addr); + ssize_t len; + int ret; + + struct pollfd p[2]; + + p[0].fd = STDIN_FILENO; + p[0].events = (o_buf_size-buf_len && !eof_flag) ? POLLIN : 0; + + p[1].fd = fd; + p[1].events = POLLIN; + + ret = poll(p, 2, 500); + if (ret < 0) { + if (errno == EINTR) + debug("poll() returned EINTR\n"); + else + pdie("poll"); + } + else if (ret == 0) { + if (o_debug >= 3) + debug("poll() timeout\n"); + } + else { + if ((p[0].revents & POLLIN) == POLLIN) { + len = read(STDIN_FILENO, buf+buf_len, o_buf_size-buf_len); + if (len < 0 && errno != EINTR) + pdie("read stdin"); + if (len == 0) { + debug("EOF from file\n"); + eof_flag = 1; + close(STDIN_FILENO); + } + else { + buf_len += len; + debug("Read %d bytes, buffer now %d bytes long\n", len, buf_len); + } + write_data(); + } + + #ifdef __linux__ + if ((p[1].revents & POLLERR) == POLLERR) + handle_icmp(); + #endif + + if ((p[1].revents & POLLIN) == POLLIN) { + while (1) { + len = recvfrom(fd, socket_data, sizeof(socket_data), MSG_DONTWAIT, (struct sockaddr *)&src_addr, &addrlen); + if (len < 0) { + if (errno == EAGAIN || errno == EWOULDBLOCK) { + utp_issue_deferred_acks(ctx); + break; + } + else + pdie("recv"); + } + + debug("Received %zd byte UDP packet from %s:%d\n", len, inet_ntoa(src_addr.sin_addr), ntohs(src_addr.sin_port)); + if (o_debug >= 3) + hexdump(socket_data, len); + + if (! utp_process_udp(ctx, socket_data, len, (struct sockaddr *)&src_addr, addrlen)) + debug("UDP packet not handled by UTP. Ignoring.\n"); + } + } + } + + utp_check_timeouts(ctx); +} + +void usage(char *name) +{ + fprintf(stderr, "\nUsage:\n"); + fprintf(stderr, " %s [options] \n", name); + fprintf(stderr, " %s [options] -l -p \n", name); + fprintf(stderr, "\n"); + fprintf(stderr, "Options:\n"); + fprintf(stderr, " -h Help\n"); + fprintf(stderr, " -d Debug mode; use multiple times to increase verbosity.\n"); + fprintf(stderr, " -l Listen mode\n"); + fprintf(stderr, " -p Local port\n"); + fprintf(stderr, " -s Source IP\n"); + fprintf(stderr, " -B Buffer size\n"); + fprintf(stderr, " -n Don't resolve hostnames\n"); + fprintf(stderr, "\n"); + exit(1); +} + +int main(int argc, char *argv[]) +{ + int i; + + o_local_address = "0.0.0.0"; + + while (1) { + int c = getopt (argc, argv, "hdlp:B:s:n"); + if (c == -1) break; + switch(c) { + case 'h': usage(argv[0]); break; + case 'd': o_debug++; break; + case 'l': o_listen++; break; + case 'p': o_local_port = optarg; break; + case 'B': o_buf_size = atoi(optarg); break; + case 's': o_local_address = optarg; break; + case 'n': o_numeric++; break; + //case 'w': break; // timeout for connects and final net reads + default: + die("Unhandled argument: %c\n", c); + } + } + + for (i = optind; i < argc; i++) { + switch(i - optind) { + case 0: o_remote_address = argv[i]; break; + case 1: o_remote_port = argv[i]; break; + } + } + + if (o_listen && (o_remote_port || o_remote_address)) + usage(argv[0]); + + if (! o_listen && (!o_remote_port || !o_remote_address)) + usage(argv[0]); + + setup(); + while (!quit_flag) + network_loop(); + + if (buf_len) { + fprintf(stderr, "Warning: send buffer not empty\n"); + exit_code++; + } + + utp_context_stats *stats = utp_get_context_stats(ctx); + + if (stats) { + debug(" Bucket size: <23 <373 <723 <1400 >1400\n"); + debug("Number of packets sent: %5d %5d %5d %5d %5d\n", + stats->_nraw_send[0], stats->_nraw_send[1], stats->_nraw_send[2], stats->_nraw_send[3], stats->_nraw_send[4]); + debug("Number of packets recv: %5d %5d %5d %5d %5d\n", + stats->_nraw_recv[0], stats->_nraw_recv[1], stats->_nraw_recv[2], stats->_nraw_recv[3], stats->_nraw_recv[4]); + } + else { + debug("utp_get_context_stats() failed?\n"); + } + + debug("Destorying context\n"); + utp_destroy(ctx); + return exit_code; +} diff --git a/utp.h b/utp.h new file mode 100644 index 0000000..9d4ed40 --- /dev/null +++ b/utp.h @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2010-2013 BitTorrent, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef __UTP_H__ +#define __UTP_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include "utp_types.h" + +typedef struct UTPSocket utp_socket; +typedef struct struct_utp_context utp_context; + +enum { + UTP_UDP_DONTFRAG = 2, // Used to be a #define as UDP_IP_DONTFRAG +}; + +enum { + // socket has reveived syn-ack (notification only for outgoing connection completion) + // this implies writability + UTP_STATE_CONNECT = 1, + + // socket is able to send more data + UTP_STATE_WRITABLE = 2, + + // connection closed + UTP_STATE_EOF = 3, + + // socket is being destroyed, meaning all data has been sent if possible. + // it is not valid to refer to the socket after this state change occurs + UTP_STATE_DESTROYING = 4, +}; + +extern const char *utp_state_names[]; + +// Errors codes that can be passed to UTP_ON_ERROR callback +enum { + UTP_ECONNREFUSED = 0, + UTP_ECONNRESET, + UTP_ETIMEDOUT, +}; + +extern const char *utp_error_code_names[]; + +enum { + // callback names + UTP_ON_FIREWALL = 0, + UTP_ON_ACCEPT, + UTP_ON_CONNECT, + UTP_ON_ERROR, + UTP_ON_READ, + UTP_ON_OVERHEAD_STATISTICS, + UTP_ON_STATE_CHANGE, + UTP_GET_READ_BUFFER_SIZE, + UTP_ON_DELAY_SAMPLE, + UTP_GET_UDP_MTU, + UTP_GET_UDP_OVERHEAD, + UTP_GET_MILLISECONDS, + UTP_GET_MICROSECONDS, + UTP_GET_RANDOM, + UTP_LOG, + UTP_SENDTO, + + // context and socket options that may be set/queried + UTP_LOG_NORMAL, + UTP_LOG_MTU, + UTP_LOG_DEBUG, + UTP_SNDBUF, + UTP_RCVBUF, + UTP_TARGET_DELAY, + + UTP_ARRAY_SIZE, // must be last +}; + +extern const char *utp_callback_names[]; + +typedef struct { + utp_context *context; + utp_socket *socket; + size_t len; + uint32 flags; + int callback_type; + const byte *buf; + + union { + const struct sockaddr *address; + int send; + int sample_ms; + int error_code; + int state; + }; + + union { + socklen_t address_len; + int type; + }; +} utp_callback_arguments; + +typedef uint64 utp_callback_t(utp_callback_arguments *); + +// Returned by utp_get_context_stats() +typedef struct { + uint32 _nraw_recv[5]; // total packets recieved less than 300/600/1200/MTU bytes fpr all connections (context-wide) + uint32 _nraw_send[5]; // total packets sent less than 300/600/1200/MTU bytes for all connections (context-wide) +} utp_context_stats; + +// Returned by utp_get_stats() +typedef struct { + uint64 nbytes_recv; // total bytes received + uint64 nbytes_xmit; // total bytes transmitted + uint32 rexmit; // retransmit counter + uint32 fastrexmit; // fast retransmit counter + uint32 nxmit; // transmit counter + uint32 nrecv; // receive counter (total) + uint32 nduprecv; // duplicate receive counter + uint32 mtu_guess; // Best guess at MTU +} utp_socket_stats; + +#define UTP_IOV_MAX 1024 + +// For utp_writev, to writes data from multiple buffers +struct utp_iovec { + void *iov_base; + size_t iov_len; +}; + +// Public Functions +utp_context* utp_init (int version); +void utp_destroy (utp_context *ctx); +void utp_set_callback (utp_context *ctx, int callback_name, utp_callback_t *proc); +void* utp_context_set_userdata (utp_context *ctx, void *userdata); +void* utp_context_get_userdata (utp_context *ctx); +int utp_context_set_option (utp_context *ctx, int opt, int val); +int utp_context_get_option (utp_context *ctx, int opt); +int utp_process_udp (utp_context *ctx, const byte *buf, size_t len, const struct sockaddr *to, socklen_t tolen); +int utp_process_icmp_error (utp_context *ctx, const byte *buffer, size_t len, const struct sockaddr *to, socklen_t tolen); +int utp_process_icmp_fragmentation (utp_context *ctx, const byte *buffer, size_t len, const struct sockaddr *to, socklen_t tolen, uint16 next_hop_mtu); +void utp_check_timeouts (utp_context *ctx); +void utp_issue_deferred_acks (utp_context *ctx); +utp_context_stats* utp_get_context_stats (utp_context *ctx); +utp_socket* utp_create_socket (utp_context *ctx); +void* utp_set_userdata (utp_socket *s, void *userdata); +void* utp_get_userdata (utp_socket *s); +int utp_setsockopt (utp_socket *s, int opt, int val); +int utp_getsockopt (utp_socket *s, int opt); +int utp_connect (utp_socket *s, const struct sockaddr *to, socklen_t tolen); +ssize_t utp_write (utp_socket *s, void *buf, size_t count); +ssize_t utp_writev (utp_socket *s, struct utp_iovec *iovec, size_t num_iovecs); +int utp_getpeername (utp_socket *s, struct sockaddr *addr, socklen_t *addrlen); +void utp_read_drained (utp_socket *s); +int utp_get_delays (utp_socket *s, uint32 *ours, uint32 *theirs, uint32 *age); +utp_socket_stats* utp_get_stats (utp_socket *s); +utp_context* utp_get_context (utp_socket *s); +void utp_close (utp_socket *s); + +#ifdef __cplusplus +} +#endif + +#endif //__UTP_H__ diff --git a/utp_api.cpp b/utp_api.cpp new file mode 100644 index 0000000..63aff18 --- /dev/null +++ b/utp_api.cpp @@ -0,0 +1,139 @@ +// vim:set ts=4 sw=4 ai: + +/* + * Copyright (c) 2010-2013 BitTorrent, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include "utp_internal.h" +#include "utp_utils.h" + +extern "C" { + +const char * utp_callback_names[] = { + "UTP_ON_FIREWALL", + "UTP_ON_ACCEPT", + "UTP_ON_CONNECT", + "UTP_ON_ERROR", + "UTP_ON_READ", + "UTP_ON_OVERHEAD_STATISTICS", + "UTP_ON_STATE_CHANGE", + "UTP_GET_READ_BUFFER_SIZE", + "UTP_ON_DELAY_SAMPLE", + "UTP_GET_UDP_MTU", + "UTP_GET_UDP_OVERHEAD", + "UTP_GET_MILLISECONDS", + "UTP_GET_MICROSECONDS", + "UTP_GET_RANDOM", + "UTP_LOG", + "UTP_SENDTO", +}; + +const char * utp_error_code_names[] = { + "UTP_ECONNREFUSED", + "UTP_ECONNRESET", + "UTP_ETIMEDOUT", +}; + +const char *utp_state_names[] = { + NULL, + "UTP_STATE_CONNECT", + "UTP_STATE_WRITABLE", + "UTP_STATE_EOF", + "UTP_STATE_DESTROYING", +}; + +struct_utp_context::struct_utp_context() + : userdata(NULL) + , current_ms(0) + , last_utp_socket(NULL) + , log_normal(false) + , log_mtu(false) + , log_debug(false) +{ + memset(&context_stats, 0, sizeof(context_stats)); + memset(callbacks, 0, sizeof(callbacks)); + target_delay = CCONTROL_TARGET; + utp_sockets = new UTPSocketHT; + + callbacks[UTP_GET_UDP_MTU] = &utp_default_get_udp_mtu; + callbacks[UTP_GET_UDP_OVERHEAD] = &utp_default_get_udp_overhead; + callbacks[UTP_GET_MILLISECONDS] = &utp_default_get_milliseconds; + callbacks[UTP_GET_MICROSECONDS] = &utp_default_get_microseconds; + callbacks[UTP_GET_RANDOM] = &utp_default_get_random; + + // 1 MB of receive buffer (i.e. max bandwidth delay product) + // means that from a peer with 200 ms RTT, we cannot receive + // faster than 5 MB/s + // from a peer with 10 ms RTT, we cannot receive faster than + // 100 MB/s. This is assumed to be good enough, since bandwidth + // often is proportional to RTT anyway + // when setting a download rate limit, all sockets should have + // their receive buffer set much lower, to say 60 kiB or so + opt_rcvbuf = opt_sndbuf = 1024 * 1024; + last_check = 0; +} + +struct_utp_context::~struct_utp_context() { + delete this->utp_sockets; +} + +utp_context* utp_init (int version) +{ + assert(version == 2); + if (version != 2) + return NULL; + utp_context *ctx = new utp_context; + return ctx; +} + +void utp_destroy(utp_context *ctx) { + assert(ctx); + if (ctx) delete ctx; +} + +void utp_set_callback(utp_context *ctx, int callback_name, utp_callback_t *proc) { + assert(ctx); + if (ctx) ctx->callbacks[callback_name] = proc; +} + +void* utp_context_set_userdata(utp_context *ctx, void *userdata) { + assert(ctx); + if (ctx) ctx->userdata = userdata; + return ctx ? ctx->userdata : NULL; +} + +void* utp_context_get_userdata(utp_context *ctx) { + assert(ctx); + return ctx ? ctx->userdata : NULL; +} + +utp_context_stats* utp_get_context_stats(utp_context *ctx) { + assert(ctx); + return ctx ? &ctx->context_stats : NULL; +} + +ssize_t utp_write(utp_socket *socket, void *buf, size_t len) { + struct utp_iovec iovec = { buf, len }; + return utp_writev(socket, &iovec, 1); +} + +} diff --git a/utp_callbacks.cpp b/utp_callbacks.cpp new file mode 100644 index 0000000..d084814 --- /dev/null +++ b/utp_callbacks.cpp @@ -0,0 +1,208 @@ +// vim:set ts=4 sw=4 ai: + +/* + * Copyright (c) 2010-2013 BitTorrent, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "utp_callbacks.h" + +int utp_call_on_firewall(utp_context *ctx, const struct sockaddr *address, socklen_t address_len) +{ + utp_callback_arguments args; + if (!ctx->callbacks[UTP_ON_FIREWALL]) return 0; + args.callback_type = UTP_ON_FIREWALL; + args.context = ctx; + args.socket = NULL; + args.address = address; + args.address_len = address_len; + return ctx->callbacks[UTP_ON_FIREWALL](&args); +} + +void utp_call_on_accept(utp_context *ctx, utp_socket *socket, const struct sockaddr *address, socklen_t address_len) +{ + utp_callback_arguments args; + if (!ctx->callbacks[UTP_ON_ACCEPT]) return; + args.callback_type = UTP_ON_ACCEPT; + args.context = ctx; + args.socket = socket; + args.address = address; + args.address_len = address_len; + ctx->callbacks[UTP_ON_ACCEPT](&args); +} + +void utp_call_on_connect(utp_context *ctx, utp_socket *socket) +{ + utp_callback_arguments args; + if (!ctx->callbacks[UTP_ON_CONNECT]) return; + args.callback_type = UTP_ON_CONNECT; + args.context = ctx; + args.socket = socket; + ctx->callbacks[UTP_ON_CONNECT](&args); +} + +void utp_call_on_error(utp_context *ctx, utp_socket *socket, int error_code) +{ + utp_callback_arguments args; + if (!ctx->callbacks[UTP_ON_ERROR]) return; + args.callback_type = UTP_ON_ERROR; + args.context = ctx; + args.socket = socket; + args.error_code = error_code; + ctx->callbacks[UTP_ON_ERROR](&args); +} + +void utp_call_on_read(utp_context *ctx, utp_socket *socket, const byte *buf, size_t len) +{ + utp_callback_arguments args; + if (!ctx->callbacks[UTP_ON_READ]) return; + args.callback_type = UTP_ON_READ; + args.context = ctx; + args.socket = socket; + args.buf = buf; + args.len = len; + ctx->callbacks[UTP_ON_READ](&args); +} + +void utp_call_on_overhead_statistics(utp_context *ctx, utp_socket *socket, int send, size_t len, int type) +{ + utp_callback_arguments args; + if (!ctx->callbacks[UTP_ON_OVERHEAD_STATISTICS]) return; + args.callback_type = UTP_ON_OVERHEAD_STATISTICS; + args.context = ctx; + args.socket = socket; + args.send = send; + args.len = len; + args.type = type; + ctx->callbacks[UTP_ON_OVERHEAD_STATISTICS](&args); +} + +void utp_call_on_delay_sample(utp_context *ctx, utp_socket *socket, int sample_ms) +{ + utp_callback_arguments args; + if (!ctx->callbacks[UTP_ON_DELAY_SAMPLE]) return; + args.callback_type = UTP_ON_DELAY_SAMPLE; + args.context = ctx; + args.socket = socket; + args.sample_ms = sample_ms; + ctx->callbacks[UTP_ON_DELAY_SAMPLE](&args); +} + +void utp_call_on_state_change(utp_context *ctx, utp_socket *socket, int state) +{ + utp_callback_arguments args; + if (!ctx->callbacks[UTP_ON_STATE_CHANGE]) return; + args.callback_type = UTP_ON_STATE_CHANGE; + args.context = ctx; + args.socket = socket; + args.state = state; + ctx->callbacks[UTP_ON_STATE_CHANGE](&args); +} + +uint16 utp_call_get_udp_mtu(utp_context *ctx, utp_socket *socket, const struct sockaddr *address, socklen_t address_len) +{ + utp_callback_arguments args; + if (!ctx->callbacks[UTP_GET_UDP_MTU]) return 0; + args.callback_type = UTP_GET_UDP_MTU; + args.context = ctx; + args.socket = socket; + args.address = address; + args.address_len = address_len; + return ctx->callbacks[UTP_GET_UDP_MTU](&args); +} + +uint16 utp_call_get_udp_overhead(utp_context *ctx, utp_socket *socket, const struct sockaddr *address, socklen_t address_len) +{ + utp_callback_arguments args; + if (!ctx->callbacks[UTP_GET_UDP_OVERHEAD]) return 0; + args.callback_type = UTP_GET_UDP_OVERHEAD; + args.context = ctx; + args.socket = socket; + args.address = address; + args.address_len = address_len; + return ctx->callbacks[UTP_GET_UDP_OVERHEAD](&args); +} + +uint64 utp_call_get_milliseconds(utp_context *ctx, utp_socket *socket) +{ + utp_callback_arguments args; + if (!ctx->callbacks[UTP_GET_MILLISECONDS]) return 0; + args.callback_type = UTP_GET_MILLISECONDS; + args.context = ctx; + args.socket = socket; + return ctx->callbacks[UTP_GET_MILLISECONDS](&args); +} + +uint64 utp_call_get_microseconds(utp_context *ctx, utp_socket *socket) +{ + utp_callback_arguments args; + if (!ctx->callbacks[UTP_GET_MICROSECONDS]) return 0; + args.callback_type = UTP_GET_MICROSECONDS; + args.context = ctx; + args.socket = socket; + return ctx->callbacks[UTP_GET_MICROSECONDS](&args); +} + +uint32 utp_call_get_random(utp_context *ctx, utp_socket *socket) +{ + utp_callback_arguments args; + if (!ctx->callbacks[UTP_GET_RANDOM]) return 0; + args.callback_type = UTP_GET_RANDOM; + args.context = ctx; + args.socket = socket; + return ctx->callbacks[UTP_GET_RANDOM](&args); +} + +size_t utp_call_get_read_buffer_size(utp_context *ctx, utp_socket *socket) +{ + utp_callback_arguments args; + if (!ctx->callbacks[UTP_GET_READ_BUFFER_SIZE]) return 0; + args.callback_type = UTP_GET_READ_BUFFER_SIZE; + args.context = ctx; + args.socket = socket; + return ctx->callbacks[UTP_GET_READ_BUFFER_SIZE](&args); +} + +void utp_call_log(utp_context *ctx, utp_socket *socket, const byte *buf) +{ + utp_callback_arguments args; + if (!ctx->callbacks[UTP_LOG]) return; + args.callback_type = UTP_LOG; + args.context = ctx; + args.socket = socket; + args.buf = buf; + ctx->callbacks[UTP_LOG](&args); +} + +void utp_call_sendto(utp_context *ctx, utp_socket *socket, const byte *buf, size_t len, const struct sockaddr *address, socklen_t address_len, uint32 flags) +{ + utp_callback_arguments args; + if (!ctx->callbacks[UTP_SENDTO]) return; + args.callback_type = UTP_SENDTO; + args.context = ctx; + args.socket = socket; + args.buf = buf; + args.len = len; + args.address = address; + args.address_len = address_len; + args.flags = flags; + ctx->callbacks[UTP_SENDTO](&args); +} + diff --git a/utp_callbacks.h b/utp_callbacks.h new file mode 100644 index 0000000..649e7e1 --- /dev/null +++ b/utp_callbacks.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2010-2013 BitTorrent, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef __UTP_CALLBACKS_H__ +#define __UTP_CALLBACKS_H__ + +#include "utp.h" +#include "utp_internal.h" + +// Generated by running: grep ^[a-z] utp_callbacks.cpp | sed 's/$/;/' +int utp_call_on_firewall(utp_context *ctx, const struct sockaddr *address, socklen_t address_len); +void utp_call_on_accept(utp_context *ctx, utp_socket *s, const struct sockaddr *address, socklen_t address_len); +void utp_call_on_connect(utp_context *ctx, utp_socket *s); +void utp_call_on_error(utp_context *ctx, utp_socket *s, int error_code); +void utp_call_on_read(utp_context *ctx, utp_socket *s, const byte *buf, size_t len); +void utp_call_on_overhead_statistics(utp_context *ctx, utp_socket *s, int send, size_t len, int type); +void utp_call_on_delay_sample(utp_context *ctx, utp_socket *s, int sample_ms); +void utp_call_on_state_change(utp_context *ctx, utp_socket *s, int state); +uint16 utp_call_get_udp_mtu(utp_context *ctx, utp_socket *s, const struct sockaddr *address, socklen_t address_len); +uint16 utp_call_get_udp_overhead(utp_context *ctx, utp_socket *s, const struct sockaddr *address, socklen_t address_len); +uint64 utp_call_get_milliseconds(utp_context *ctx, utp_socket *s); +uint64 utp_call_get_microseconds(utp_context *ctx, utp_socket *s); +uint32 utp_call_get_random(utp_context *ctx, utp_socket *s); +size_t utp_call_get_read_buffer_size(utp_context *ctx, utp_socket *s); +void utp_call_log(utp_context *ctx, utp_socket *s, const byte *buf); +void utp_call_sendto(utp_context *ctx, utp_socket *s, const byte *buf, size_t len, const struct sockaddr *address, socklen_t address_len, uint32 flags); + +#endif // __UTP_CALLBACKS_H__ diff --git a/utp_hash.cpp b/utp_hash.cpp new file mode 100644 index 0000000..e061a41 --- /dev/null +++ b/utp_hash.cpp @@ -0,0 +1,239 @@ +/* + * Copyright (c) 2010-2013 BitTorrent, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "utp_hash.h" +#include "utp_types.h" + +#ifdef STRICT_ALIGN +inline uint32 Read32(const void *p) +{ + uint32 tmp; + memcpy(&tmp, p, sizeof tmp); + return tmp; +} + +#else +inline uint32 Read32(const void *p) { return *(uint32*)p; } +#endif + + +// Get the amount of memory required for the hash parameters and the bucket set +// Waste a space for an unused bucket in order to ensure the following managed memory have 32-bit aligned addresses +// TODO: make this 64-bit clean +#define BASE_SIZE(bc) (sizeof(utp_hash_t) + sizeof(utp_link_t) * ((bc) + 1)) + +// Get a pointer to the base of the structure array managed by the hash table +#define get_bep(h) ((byte*)(h)) + BASE_SIZE((h)->N) + +// Get the address of the information associated with a specific structure in the array, +// given the address of the base of the structure. +// This assumes a utp_link_t link member is at the end of the structure. +// Given compilers filling out the memory to a 32-bit clean value, this may mean that +// the location named in the structure may not be the location actually used by the hash table, +// since the compiler may have padded the end of the structure with 2 bytes after the utp_link_t member. +// TODO: this macro should not require that the variable pointing at the hash table be named 'hash' +#define ptr_to_link(p) (utp_link_t *) (((byte *) (p)) + hash->E - sizeof(utp_link_t)) + +// Calculate how much to allocate for a hash table with bucket count, total size, and structure count +// TODO: make this 64-bit clean +#define ALLOCATION_SIZE(bc, ts, sc) (BASE_SIZE((bc)) + (ts) * (sc)) + +utp_hash_t *utp_hash_create(int N, int key_size, int total_size, int initial, utp_hash_compute_t hashfun, utp_hash_equal_t compfun) +{ + // Must have odd number of hash buckets (prime number is best) + assert(N % 2); + // Ensure structures will be at aligned memory addresses + // TODO: make this 64-bit clean + assert(0 == (total_size % 4)); + + int size = ALLOCATION_SIZE(N, total_size, initial); + utp_hash_t *hash = (utp_hash_t *) malloc( size ); + memset( hash, 0, size ); + + for (int i = 0; i < N + 1; ++i) + hash->inits[i] = HASH_UNUSED; + hash->N = N; + hash->K = key_size; + hash->E = total_size; + hash->hash_compute = hashfun; + hash->hash_equal = compfun; + hash->allocated = initial; + hash->count = 0; + hash->used = 0; + hash->free = HASH_UNUSED; + return hash; +} + +uint utp_hash_mem(const void *keyp, size_t keysize) +{ + uint hash = 0; + uint n = keysize; + while (n >= 4) { + hash ^= Read32(keyp); + keyp = (byte*)keyp + sizeof(uint32); + hash = (hash << 13) | (hash >> 19); + n -= 4; + } + while (n != 0) { + hash ^= *(byte*)keyp; + keyp = (byte*)keyp + sizeof(byte); + hash = (hash << 8) | (hash >> 24); + n--; + } + return hash; +} + +uint utp_hash_mkidx(utp_hash_t *hash, const void *keyp) +{ + // Generate a key from the hash + return hash->hash_compute(keyp, hash->K) % hash->N; +} + +static inline bool compare(byte *a, byte *b,int n) +{ + assert(n >= 4); + if (Read32(a) != Read32(b)) return false; + return memcmp(a+4, b+4, n-4) == 0; +} + +#define COMPARE(h,k1,k2,ks) (((h)->hash_equal) ? (h)->hash_equal((void*)k1,(void*)k2,ks) : compare(k1,k2,ks)) + +// Look-up a key in the hash table. +// Returns NULL if not found +void *utp_hash_lookup(utp_hash_t *hash, const void *key) +{ + utp_link_t idx = utp_hash_mkidx(hash, key); + + // base pointer + byte *bep = get_bep(hash); + + utp_link_t cur = hash->inits[idx]; + while (cur != HASH_UNUSED) { + byte *key2 = bep + (cur * hash->E); + if (COMPARE(hash, (byte*)key, key2, hash->K)) + return key2; + cur = *ptr_to_link(key2); + } + + return NULL; +} + +// Add a new element to the hash table. +// Returns a pointer to the new element. +// This assumes the element is not already present! +void *utp_hash_add(utp_hash_t **hashp, const void *key) +{ + //Allocate a new entry + byte *elemp; + utp_link_t elem; + utp_hash_t *hash = *hashp; + utp_link_t idx = utp_hash_mkidx(hash, key); + + if ((elem=hash->free) == HASH_UNUSED) { + utp_link_t all = hash->allocated; + if (hash->used == all) { + utp_hash_t *nhash; + if (all <= (HASH_UNUSED/2)) { + all *= 2; + } else if (all != HASH_UNUSED) { + all = HASH_UNUSED; + } else { + // too many items! can't grow! + assert(0); + return NULL; + } + // otherwise need to allocate. + nhash = (utp_hash_t*)realloc(hash, ALLOCATION_SIZE(hash->N, hash->E, all)); + if (!nhash) { + // out of memory (or too big to allocate) + assert(nhash); + return NULL; + } + hash = *hashp = nhash; + hash->allocated = all; + } + + elem = hash->used++; + elemp = get_bep(hash) + elem * hash->E; + } else { + elemp = get_bep(hash) + elem * hash->E; + hash->free = *ptr_to_link(elemp); + } + + *ptr_to_link(elemp) = hash->inits[idx]; + hash->inits[idx] = elem; + hash->count++; + + // copy key into it + memcpy(elemp, key, hash->K); + return elemp; +} + +// Delete an element from the utp_hash_t +// Returns a pointer to the already deleted element. +void *utp_hash_del(utp_hash_t *hash, const void *key) +{ + utp_link_t idx = utp_hash_mkidx(hash, key); + + // base pointer + byte *bep = get_bep(hash); + + utp_link_t *curp = &hash->inits[idx]; + utp_link_t cur; + while ((cur=*curp) != HASH_UNUSED) { + byte *key2 = bep + (cur * hash->E); + if (COMPARE(hash,(byte*)key,(byte*)key2, hash->K )) { + // found an item that matched. unlink it + *curp = *ptr_to_link(key2); + // Insert into freelist + *ptr_to_link(key2) = hash->free; + hash->free = cur; + hash->count--; + return key2; + } + curp = ptr_to_link(key2); + } + + return NULL; +} + +void *utp_hash_iterate(utp_hash_t *hash, utp_hash_iterator_t *iter) +{ + utp_link_t elem; + + if ((elem=iter->elem) == HASH_UNUSED) { + // Find a bucket with an element + utp_link_t buck = iter->bucket + 1; + for(;;) { + if (buck >= hash->N) + return NULL; + if ((elem = hash->inits[buck]) != HASH_UNUSED) + break; + buck++; + } + iter->bucket = buck; + } + + byte *elemp = get_bep(hash) + (elem * hash->E); + iter->elem = *ptr_to_link(elemp); + return elemp; +} diff --git a/utp_hash.h b/utp_hash.h new file mode 100644 index 0000000..922f9a8 --- /dev/null +++ b/utp_hash.h @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2010-2013 BitTorrent, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef __UTP_HASH_H__ +#define __UTP_HASH_H__ + +#include // memset +#include // malloc + +#include "utp_types.h" +#include "utp_templates.h" + +// TODO: make utp_link_t a template parameter to HashTable +typedef uint32 utp_link_t; +#define HASH_UNUSED ((utp_link_t)-1) + +#ifdef _MSC_VER +// Silence the warning about the C99-compliant zero-length array at the end of the structure +#pragma warning (disable: 4200) +#endif + +typedef uint32 (*utp_hash_compute_t)(const void *keyp, size_t keysize); +typedef uint (*utp_hash_equal_t)(const void *key_a, const void *key_b, size_t keysize); + +// In memory the HashTable is laid out as follows: +// ---------------------------- low +// | hash table data members | +// ---------------------------- _ +// | indices | ^ +// | . | | utp_link_t indices into the key-values. +// | . | . +// ---------------------------- - <----- bep +// | keys and values | each key-value pair has size total_size +// | . | +// | . | +// ---------------------------- high +// +// The code depends on the ability of the compiler to pad the length +// of the hash table data members structure to +// a length divisible by 32-bits with no remainder. +// +// Since the number of hash buckets (indices) should be odd, the code +// asserts this and adds one to the hash bucket count to ensure that the +// following key-value pairs array starts on a 32-bit boundary. +// +// The key-value pairs array should start on a 32-bit boundary, otherwise +// processors like the ARM will silently mangle 32-bit data in these structures +// (e.g., turning 0xABCD into 0XCDAB when moving a value from memory to register +// when the memory address is 16 bits offset from a 32-bit boundary), +// also, the value will be stored at an address two bytes lower than the address +// value would ordinarily indicate. +// +// The key-value pair is of type T. The first field in T must +// be the key, i.e., the first K bytes of T contains the key. +// total_size = sizeof(T) and thus sizeof(T) >= sizeof(K) +// +// N is the number of buckets. +// +struct utp_hash_t { + utp_link_t N; + byte K; + byte E; + size_t count; + utp_hash_compute_t hash_compute; + utp_hash_equal_t hash_equal; + utp_link_t allocated; + utp_link_t used; + utp_link_t free; + utp_link_t inits[0]; +}; + +#ifdef _MSC_VER +#pragma warning (default: 4200) +#endif + +struct utp_hash_iterator_t { + utp_link_t bucket; + utp_link_t elem; + + utp_hash_iterator_t() : bucket(0xffffffff), elem(0xffffffff) {} +}; + +uint utp_hash_mem(const void *keyp, size_t keysize); +uint utp_hash_comp(const void *key_a, const void *key_b, size_t keysize); + +utp_hash_t *utp_hash_create(int N, int key_size, int total_size, int initial, utp_hash_compute_t hashfun = utp_hash_mem, utp_hash_equal_t eqfun = NULL); +void *utp_hash_lookup(utp_hash_t *hash, const void *key); +void *utp_hash_add(utp_hash_t **hashp, const void *key); +void *utp_hash_del(utp_hash_t *hash, const void *key); + +void *utp_hash_iterate(utp_hash_t *hash, utp_hash_iterator_t *iter); + +/* + This HashTable requires that T have at least sizeof(K)+sizeof(utp_link_t) bytes. + Usually done like this: + + struct K { + int whatever; + }; + + struct T { + K wtf; + utp_link_t link; // also wtf + }; +*/ + +template class utpHashTable { + utp_hash_t *hash; +public: + static uint compare(const void *k1, const void *k2, size_t ks) { + return *((K*)k1) == *((K*)k2); + } + static uint32 compute_hash(const void *k, size_t ks) { + return ((K*)k)->compute_hash(); + } + void Init() { hash = NULL; } + bool Allocated() { return (hash != NULL); } + void Free() { free(hash); hash = NULL; } + void Create(int N, int initial) { hash = utp_hash_create(N, sizeof(K), sizeof(T), initial, &compute_hash, &compare); } + T *Lookup(const K &key) { return (T*)utp_hash_lookup(hash, &key); } + T *Add(const K &key) { return (T*)utp_hash_add(&hash, &key); } + T *Delete(const K &key) { return (T*)utp_hash_del(hash, &key); } + T *Iterate(utp_hash_iterator_t &iterator) { return (T*)utp_hash_iterate(hash, &iterator); } + size_t GetCount() { return hash->count; } +}; + +#endif //__UTP_HASH_H__ diff --git a/utp_internal.cpp b/utp_internal.cpp index ccadcf9..2102abe 100644 --- a/utp_internal.cpp +++ b/utp_internal.cpp @@ -1,2860 +1,3367 @@ -#include - -#include "utp.h" -#include "templates.h" - -#include -#include -#include -#include -#include -#include -#include // for UINT_MAX - -#ifdef WIN32 -#include "win32_inet_ntop.h" - -// newer versions of MSVC define these in errno.h -#ifndef ECONNRESET -#define ECONNRESET WSAECONNRESET -#define EMSGSIZE WSAEMSGSIZE -#define ECONNREFUSED WSAECONNREFUSED -#define ETIMEDOUT WSAETIMEDOUT -#endif -#endif - -#ifdef POSIX -typedef sockaddr_storage SOCKADDR_STORAGE; -#endif // POSIX - -// number of bytes to increase max window size by, per RTT. This is -// scaled down linearly proportional to off_target. i.e. if all packets -// in one window have 0 delay, window size will increase by this number. -// Typically it's less. TCP increases one MSS per RTT, which is 1500 -#define MAX_CWND_INCREASE_BYTES_PER_RTT 3000 -#define CUR_DELAY_SIZE 3 -// experiments suggest that a clock skew of 10 ms per 325 seconds -// is not impossible. Reset delay_base every 13 minutes. The clock -// skew is dealt with by observing the delay base in the other -// direction, and adjusting our own upwards if the opposite direction -// delay base keeps going down -#define DELAY_BASE_HISTORY 13 -#define MAX_WINDOW_DECAY 100 // ms - -#define REORDER_BUFFER_SIZE 32 -#define REORDER_BUFFER_MAX_SIZE 511 -#define OUTGOING_BUFFER_MAX_SIZE 511 - -#define PACKET_SIZE 350 - -// this is the minimum max_window value. It can never drop below this -#define MIN_WINDOW_SIZE 10 - -// when window sizes are smaller than one packet_size, this -// will pace the packets to average at the given window size -// if it's not set, it will simply not send anything until -// there's a timeout -#define USE_PACKET_PACING 1 - -// if we receive 4 or more duplicate acks, we resend the packet -// that hasn't been acked yet -#define DUPLICATE_ACKS_BEFORE_RESEND 3 - -#define DELAYED_ACK_BYTE_THRESHOLD 2400 // bytes -#define DELAYED_ACK_TIME_THRESHOLD 100 // milliseconds - -#define RST_INFO_TIMEOUT 10000 -#define RST_INFO_LIMIT 1000 -// 29 seconds determined from measuring many home NAT devices -#define KEEPALIVE_INTERVAL 29000 - - -#define SEQ_NR_MASK 0xFFFF -#define ACK_NR_MASK 0xFFFF - -#define DIV_ROUND_UP(num, denom) ((num + denom - 1) / denom) - -#include "utp_utils.h" -#include "utp_config.h" - -#define LOG_UTP if (g_log_utp) utp_log -#define LOG_UTPV if (g_log_utp_verbose) utp_log - -uint32 g_current_ms; - -// The totals are derived from the following data: -// 45: IPv6 address including embedded IPv4 address -// 11: Scope Id -// 2: Brackets around IPv6 address when port is present -// 6: Port (including colon) -// 1: Terminating null byte -char addrbuf[65]; -char addrbuf2[65]; -#define addrfmt(x, s) x.fmt(s, sizeof(s)) - -#if (defined(__SVR4) && defined(__sun)) -#pragma pack(1) -#else -#pragma pack(push,1) -#endif - -struct PACKED_ATTRIBUTE PackedSockAddr { - - // The values are always stored here in network byte order - union { - byte _in6[16]; // IPv6 - uint16 _in6w[8]; // IPv6, word based (for convenience) - uint32 _in6d[4]; // Dword access - in6_addr _in6addr; // For convenience - } _in; - - // Host byte order - uint16 _port; - -#define _sin4 _in._in6d[3] // IPv4 is stored where it goes if mapped - -#define _sin6 _in._in6 -#define _sin6w _in._in6w -#define _sin6d _in._in6d - - byte get_family() const - { - return (IN6_IS_ADDR_V4MAPPED(&_in._in6addr) != 0) ? AF_INET : AF_INET6; - } - - bool operator==(const PackedSockAddr& rhs) const - { - if (&rhs == this) - return true; - if (_port != rhs._port) - return false; - return memcmp(_sin6, rhs._sin6, sizeof(_sin6)) == 0; - } - bool operator!=(const PackedSockAddr& rhs) const { return !(*this == rhs); } - - PackedSockAddr(const SOCKADDR_STORAGE* sa, socklen_t len) - { - if (sa->ss_family == AF_INET) { - assert(len >= sizeof(sockaddr_in)); - const sockaddr_in *sin = (sockaddr_in*)sa; - _sin6w[0] = 0; - _sin6w[1] = 0; - _sin6w[2] = 0; - _sin6w[3] = 0; - _sin6w[4] = 0; - _sin6w[5] = 0xffff; - _sin4 = sin->sin_addr.s_addr; - _port = ntohs(sin->sin_port); - } else { - assert(len >= sizeof(sockaddr_in6)); - const sockaddr_in6 *sin6 = (sockaddr_in6*)sa; - _in._in6addr = sin6->sin6_addr; - _port = ntohs(sin6->sin6_port); - } - } - - SOCKADDR_STORAGE get_sockaddr_storage(socklen_t *len = NULL) const - { - SOCKADDR_STORAGE sa; - const byte family = get_family(); - if (family == AF_INET) { - sockaddr_in *sin = (sockaddr_in*)&sa; - if (len) *len = sizeof(sockaddr_in); - memset(sin, 0, sizeof(sockaddr_in)); - sin->sin_family = family; - sin->sin_port = htons(_port); - sin->sin_addr.s_addr = _sin4; - } else { - sockaddr_in6 *sin6 = (sockaddr_in6*)&sa; - memset(sin6, 0, sizeof(sockaddr_in6)); - if (len) *len = sizeof(sockaddr_in6); - sin6->sin6_family = family; - sin6->sin6_addr = _in._in6addr; - sin6->sin6_port = htons(_port); - } - return sa; - } - - cstr fmt(str s, size_t len) const - { - memset(s, 0, len); - const byte family = get_family(); - str i; - if (family == AF_INET) { - inet_ntop(family, (uint32*)&_sin4, s, len); - i = s; - while (*++i) {} - } else { - i = s; - *i++ = '['; - inet_ntop(family, (in6_addr*)&_in._in6addr, i, len-1); - while (*++i) {} - *i++ = ']'; - } - snprintf(i, len - (i-s), ":%u", _port); - return s; - } -} ALIGNED_ATTRIBUTE(4); - -struct PACKED_ATTRIBUTE RST_Info { - PackedSockAddr addr; - uint32 connid; - uint32 timestamp; - uint16 ack_nr; -}; - -// these packet sizes are including the uTP header wich -// is either 20 or 23 bytes depending on version -#define PACKET_SIZE_EMPTY_BUCKET 0 -#define PACKET_SIZE_EMPTY 23 -#define PACKET_SIZE_SMALL_BUCKET 1 -#define PACKET_SIZE_SMALL 373 -#define PACKET_SIZE_MID_BUCKET 2 -#define PACKET_SIZE_MID 723 -#define PACKET_SIZE_BIG_BUCKET 3 -#define PACKET_SIZE_BIG 1400 -#define PACKET_SIZE_HUGE_BUCKET 4 - -struct PACKED_ATTRIBUTE PacketFormat { - // connection ID - uint32_big connid; - uint32_big tv_sec; - uint32_big tv_usec; - uint32_big reply_micro; - // receive window size in PACKET_SIZE chunks - byte windowsize; - // Type of the first extension header - byte ext; - // Flags - byte flags; - // Sequence number - uint16_big seq_nr; - // Acknowledgment number - uint16_big ack_nr; -}; - -struct PACKED_ATTRIBUTE PacketFormatAck { - PacketFormat pf; - byte ext_next; - byte ext_len; - byte acks[4]; -}; - -struct PACKED_ATTRIBUTE PacketFormatExtensions { - PacketFormat pf; - byte ext_next; - byte ext_len; - byte extensions[8]; -}; - -struct PACKED_ATTRIBUTE PacketFormatV1 { - // packet_type (4 high bits) - // protocol version (4 low bits) - byte ver_type; - byte version() const { return ver_type & 0xf; } - byte type() const { return ver_type >> 4; } - void set_version(byte v) { ver_type = (ver_type & 0xf0) | (v & 0xf); } - void set_type(byte t) { ver_type = (ver_type & 0xf) | (t << 4); } - - // Type of the first extension header - byte ext; - // connection ID - uint16_big connid; - uint32_big tv_usec; - uint32_big reply_micro; - // receive window size in bytes - uint32_big windowsize; - // Sequence number - uint16_big seq_nr; - // Acknowledgment number - uint16_big ack_nr; -}; - -struct PACKED_ATTRIBUTE PacketFormatAckV1 { - PacketFormatV1 pf; - byte ext_next; - byte ext_len; - byte acks[4]; -}; - -struct PACKED_ATTRIBUTE PacketFormatExtensionsV1 { - PacketFormatV1 pf; - byte ext_next; - byte ext_len; - byte extensions[8]; -}; - -#if (defined(__SVR4) && defined(__sun)) -#pragma pack(0) -#else -#pragma pack(pop) -#endif - -enum { - ST_DATA = 0, // Data packet. - ST_FIN = 1, // Finalize the connection. This is the last packet. - ST_STATE = 2, // State packet. Used to transmit an ACK with no data. - ST_RESET = 3, // Terminate connection forcefully. - ST_SYN = 4, // Connect SYN - ST_NUM_STATES, // used for bounds checking -}; - -static const cstr flagnames[] = { - "ST_DATA","ST_FIN","ST_STATE","ST_RESET","ST_SYN" -}; - -enum CONN_STATE { - CS_IDLE = 0, - CS_SYN_SENT = 1, - CS_CONNECTED = 2, - CS_CONNECTED_FULL = 3, - CS_GOT_FIN = 4, - CS_DESTROY_DELAY = 5, - CS_FIN_SENT = 6, - CS_RESET = 7, - CS_DESTROY = 8, -}; - -static const cstr statenames[] = { - "IDLE","SYN_SENT","CONNECTED","CONNECTED_FULL","GOT_FIN","DESTROY_DELAY","FIN_SENT","RESET","DESTROY" -}; - -struct OutgoingPacket { - size_t length; - size_t payload; - uint64 time_sent; // microseconds - uint transmissions:31; - bool need_resend:1; - byte data[1]; -}; - -void no_read(void *socket, const byte *bytes, size_t count) {} -void no_write(void *socket, byte *bytes, size_t count) {} -size_t no_rb_size(void *socket) { return 0; } -void no_state(void *socket, int state) {} -void no_error(void *socket, int errcode) {} -void no_overhead(void *socket, bool send, size_t count, int type) {} - -UTPFunctionTable zero_funcs = { - &no_read, - &no_write, - &no_rb_size, - &no_state, - &no_error, - &no_overhead, -}; - -struct SizableCircularBuffer { - // This is the mask. Since it's always a power of 2, adding 1 to this value will return the size. - size_t mask; - // This is the elements that the circular buffer points to - void **elements; - - void *get(size_t i) { assert(elements); return elements ? elements[i & mask] : NULL; } - void put(size_t i, void *data) { assert(elements); elements[i&mask] = data; } - - void grow(size_t item, size_t index); - void ensure_size(size_t item, size_t index) { if (index > mask) grow(item, index); } - size_t size() { return mask + 1; } -}; - -static struct UTPGlobalStats _global_stats; - -// Item contains the element we want to make space for -// index is the index in the list. -void SizableCircularBuffer::grow(size_t item, size_t index) -{ - // Figure out the new size. - size_t size = mask + 1; - do size *= 2; while (index >= size); - - // Allocate the new buffer - void **buf = (void**)calloc(size, sizeof(void*)); - - size--; - - // Copy elements from the old buffer to the new buffer - for (size_t i = 0; i <= mask; i++) { - buf[(item - index + i) & size] = get(item - index + i); - } - - // Swap to the newly allocated buffer - mask = size; - free(elements); - elements = buf; -} - -// compare if lhs is less than rhs, taking wrapping -// into account. if lhs is close to UINT_MAX and rhs -// is close to 0, lhs is assumed to have wrapped and -// considered smaller -bool wrapping_compare_less(uint32 lhs, uint32 rhs) -{ - // distance walking from lhs to rhs, downwards - const uint32 dist_down = lhs - rhs; - // distance walking from lhs to rhs, upwards - const uint32 dist_up = rhs - lhs; - - // if the distance walking up is shorter, lhs - // is less than rhs. If the distance walking down - // is shorter, then rhs is less than lhs - return dist_up < dist_down; -} - -struct DelayHist { - uint32 delay_base; - - // this is the history of delay samples, - // normalized by using the delay_base. These - // values are always greater than 0 and measures - // the queuing delay in microseconds - uint32 cur_delay_hist[CUR_DELAY_SIZE]; - size_t cur_delay_idx; - - // this is the history of delay_base. It's - // a number that doesn't have an absolute meaning - // only relative. It doesn't make sense to initialize - // it to anything other than values relative to - // what's been seen in the real world. - uint32 delay_base_hist[DELAY_BASE_HISTORY]; - size_t delay_base_idx; - // the time when we last stepped the delay_base_idx - uint32 delay_base_time; - - bool delay_base_initialized; - - void clear() - { - delay_base_initialized = false; - delay_base = 0; - cur_delay_idx = 0; - delay_base_idx = 0; - delay_base_time = g_current_ms; - for (size_t i = 0; i < CUR_DELAY_SIZE; i++) { - cur_delay_hist[i] = 0; - } - for (size_t i = 0; i < DELAY_BASE_HISTORY; i++) { - delay_base_hist[i] = 0; - } - } - - void shift(const uint32 offset) - { - // the offset should never be "negative" - // assert(offset < 0x10000000); - - // increase all of our base delays by this amount - // this is used to take clock skew into account - // by observing the other side's changes in its base_delay - for (size_t i = 0; i < DELAY_BASE_HISTORY; i++) { - delay_base_hist[i] += offset; - } - delay_base += offset; - } - - void add_sample(const uint32 sample) - { - // The two clocks (in the two peers) are assumed not to - // progress at the exact same rate. They are assumed to be - // drifting, which causes the delay samples to contain - // a systematic error, either they are under- - // estimated or over-estimated. This is why we update the - // delay_base every two minutes, to adjust for this. - - // This means the values will keep drifting and eventually wrap. - // We can cross the wrapping boundry in two directions, either - // going up, crossing the highest value, or going down, crossing 0. - - // if the delay_base is close to the max value and sample actually - // wrapped on the other end we would see something like this: - // delay_base = 0xffffff00, sample = 0x00000400 - // sample - delay_base = 0x500 which is the correct difference - - // if the delay_base is instead close to 0, and we got an even lower - // sample (that will eventually update the delay_base), we may see - // something like this: - // delay_base = 0x00000400, sample = 0xffffff00 - // sample - delay_base = 0xfffffb00 - // this needs to be interpreted as a negative number and the actual - // recorded delay should be 0. - - // It is important that all arithmetic that assume wrapping - // is done with unsigned intergers. Signed integers are not guaranteed - // to wrap the way unsigned integers do. At least GCC takes advantage - // of this relaxed rule and won't necessarily wrap signed ints. - - // remove the clock offset and propagation delay. - // delay base is min of the sample and the current - // delay base. This min-operation is subject to wrapping - // and care needs to be taken to correctly choose the - // true minimum. - - // specifically the problem case is when delay_base is very small - // and sample is very large (because it wrapped past zero), sample - // needs to be considered the smaller - - if (!delay_base_initialized) { - // delay_base being 0 suggests that we haven't initialized - // it or its history with any real measurements yet. Initialize - // everything with this sample. - for (size_t i = 0; i < DELAY_BASE_HISTORY; i++) { - // if we don't have a value, set it to the current sample - delay_base_hist[i] = sample; - continue; - } - delay_base = sample; - delay_base_initialized = true; - } - - if (wrapping_compare_less(sample, delay_base_hist[delay_base_idx])) { - // sample is smaller than the current delay_base_hist entry - // update it - delay_base_hist[delay_base_idx] = sample; - } - - // is sample lower than delay_base? If so, update delay_base - if (wrapping_compare_less(sample, delay_base)) { - // sample is smaller than the current delay_base - // update it - delay_base = sample; - } - - // this operation may wrap, and is supposed to - const uint32 delay = sample - delay_base; - // sanity check. If this is triggered, something fishy is going on - // it means the measured sample was greater than 32 seconds! -// assert(delay < 0x2000000); - - cur_delay_hist[cur_delay_idx] = delay; - cur_delay_idx = (cur_delay_idx + 1) % CUR_DELAY_SIZE; - - // once every minute - if (g_current_ms - delay_base_time > 60 * 1000) { - delay_base_time = g_current_ms; - delay_base_idx = (delay_base_idx + 1) % DELAY_BASE_HISTORY; - // clear up the new delay base history spot by initializing - // it to the current sample, then update it - delay_base_hist[delay_base_idx] = sample; - delay_base = delay_base_hist[0]; - // Assign the lowest delay in the last 2 minutes to delay_base - for (size_t i = 0; i < DELAY_BASE_HISTORY; i++) { - if (wrapping_compare_less(delay_base_hist[i], delay_base)) - delay_base = delay_base_hist[i]; - } - } - } - - uint32 get_value() - { - uint32 value = UINT_MAX; - for (size_t i = 0; i < CUR_DELAY_SIZE; i++) { - value = min(cur_delay_hist[i], value); - } - // value could be UINT_MAX if we have no samples yet... - return value; - } -}; - -struct UTPSocket { - PackedSockAddr addr; - - size_t idx; - - uint16 reorder_count; - byte duplicate_ack; - - // the number of bytes we've received but not acked yet - size_t bytes_since_ack; - - // the number of packets in the send queue. Packets that haven't - // yet been sent count as well as packets marked as needing resend - // the oldest un-acked packet in the send queue is seq_nr - cur_window_packets - uint16 cur_window_packets; - - // how much of the window is used, number of bytes in-flight - // packets that have not yet been sent do not count, packets - // that are marked as needing to be re-sent (due to a timeout) - // don't count either - size_t cur_window; - // maximum window size, in bytes - size_t max_window; - // SO_SNDBUF setting, in bytes - size_t opt_sndbuf; - // SO_RCVBUF setting, in bytes - size_t opt_rcvbuf; - - // Is a FIN packet in the reassembly buffer? - bool got_fin:1; - // Timeout procedure - bool fast_timeout:1; - - // max receive window for other end, in bytes - size_t max_window_user; - // 0 = original uTP header, 1 = second revision - byte version; - CONN_STATE state; - // TickCount when we last decayed window (wraps) - int32 last_rwin_decay; - - // the sequence number of the FIN packet. This field is only set - // when we have received a FIN, and the flag field has the FIN flag set. - // it is used to know when it is safe to destroy the socket, we must have - // received all packets up to this sequence number first. - uint16 eof_pkt; - - // All sequence numbers up to including this have been properly received - // by us - uint16 ack_nr; - // This is the sequence number for the next packet to be sent. - uint16 seq_nr; - - uint16 timeout_seq_nr; - - // This is the sequence number of the next packet we're allowed to - // do a fast resend with. This makes sure we only do a fast-resend - // once per packet. We can resend the packet with this sequence number - // or any later packet (with a higher sequence number). - uint16 fast_resend_seq_nr; - - uint32 reply_micro; - - // the time when we need to send another ack. If there's - // nothing to ack, this is a very large number - uint32 ack_time; - - uint32 last_got_packet; - uint32 last_sent_packet; - uint32 last_measured_delay; - uint32 last_maxed_out_window; - - // the last time we added send quota to the connection - // when adding send quota, this is subtracted from the - // current time multiplied by max_window / rtt - // which is the current allowed send rate. - int32 last_send_quota; - - // the number of bytes we are allowed to send on - // this connection. If this is more than one packet - // size when we run out of data to send, it is clamped - // to the packet size - // this value is multiplied by 100 in order to get - // higher accuracy when dealing with low rates - int32 send_quota; - - SendToProc *send_to_proc; - void *send_to_userdata; - UTPFunctionTable func; - void *userdata; - - // Round trip time - uint rtt; - // Round trip time variance - uint rtt_var; - // Round trip timeout - uint rto; - DelayHist rtt_hist; - uint retransmit_timeout; - // The RTO timer will timeout here. - uint rto_timeout; - // When the window size is set to zero, start this timer. It will send a new packet every 30secs. - uint32 zerowindow_time; - - uint32 conn_seed; - // Connection ID for packets I receive - uint32 conn_id_recv; - // Connection ID for packets I send - uint32 conn_id_send; - // Last rcv window we advertised, in bytes - size_t last_rcv_win; - - DelayHist our_hist; - DelayHist their_hist; - - // extension bytes from SYN packet - byte extensions[8]; - - SizableCircularBuffer inbuf, outbuf; - -#ifdef _DEBUG - // Public stats, returned by UTP_GetStats(). See utp.h - UTPStats _stats; -#endif // _DEBUG - - // Calculates the current receive window - size_t get_rcv_window() const - { - // If we don't have a connection (such as during connection - // establishment, always act as if we have an empty buffer). - if (!userdata) return opt_rcvbuf; - - // Trim window down according to what's already in buffer. - const size_t numbuf = func.get_rb_size(userdata); - assert((int)numbuf >= 0); - return opt_rcvbuf > numbuf ? opt_rcvbuf - numbuf : 0; - } - - // Test if we're ready to decay max_window - // XXX this breaks when spaced by > INT_MAX/2, which is 49 - // days; the failure mode in that case is we do an extra decay - // or fail to do one when we really shouldn't. - bool can_decay_win(int32 msec) const - { - return msec - last_rwin_decay >= MAX_WINDOW_DECAY; - } - - // If we can, decay max window, returns true if we actually did so - void maybe_decay_win() - { - if (can_decay_win(g_current_ms)) { - // TCP uses 0.5 - max_window = (size_t)(max_window * .5); - last_rwin_decay = g_current_ms; - if (max_window < MIN_WINDOW_SIZE) - max_window = MIN_WINDOW_SIZE; - } - } - - size_t get_header_size() const - { - return (version ? sizeof(PacketFormatV1) : sizeof(PacketFormat)); - } - - size_t get_header_extensions_size() const - { - return (version ? sizeof(PacketFormatExtensionsV1) : sizeof(PacketFormatExtensions)); - } - - void sent_ack() - { - ack_time = g_current_ms + 0x70000000; - bytes_since_ack = 0; - } - - size_t get_udp_mtu() const - { - socklen_t len; - SOCKADDR_STORAGE sa = addr.get_sockaddr_storage(&len); - return UTP_GetUDPMTU((const struct sockaddr *)&sa, len); - } - - size_t get_udp_overhead() const - { - socklen_t len; - SOCKADDR_STORAGE sa = addr.get_sockaddr_storage(&len); - return UTP_GetUDPOverhead((const struct sockaddr *)&sa, len); - } - - uint64 get_global_utp_bytes_sent() const - { - socklen_t len; - SOCKADDR_STORAGE sa = addr.get_sockaddr_storage(&len); - return UTP_GetGlobalUTPBytesSent((const struct sockaddr *)&sa, len); - } - - size_t get_overhead() const - { - return get_udp_overhead() + get_header_size(); - } - - void send_data(PacketFormat* b, size_t length, bandwidth_type_t type); - - void send_ack(bool synack = false); - - void send_keep_alive(); - - static void send_rst(SendToProc *send_to_proc, void *send_to_userdata, - const PackedSockAddr &addr, uint32 conn_id_send, - uint16 ack_nr, uint16 seq_nr, byte version); - - void send_packet(OutgoingPacket *pkt); - - bool is_writable(size_t to_write); - - bool flush_packets(); - - void write_outgoing_packet(size_t payload, uint flags); - - void update_send_quota(); - -#ifdef _DEBUG - void check_invariant(); -#endif - - void check_timeouts(); - - int ack_packet(uint16 seq); - - size_t selective_ack_bytes(uint base, const byte* mask, byte len, int64& min_rtt); - - void selective_ack(uint base, const byte *mask, byte len); - - void apply_ledbat_ccontrol(size_t bytes_acked, uint32 actual_delay, int64 min_rtt); - - size_t get_packet_size(); -}; - -Array g_rst_info; -Array g_utp_sockets; - -static void UTP_RegisterSentPacket(size_t length) { - if (length <= PACKET_SIZE_MID) { - if (length <= PACKET_SIZE_EMPTY) { - _global_stats._nraw_send[PACKET_SIZE_EMPTY_BUCKET]++; - } else if (length <= PACKET_SIZE_SMALL) { - _global_stats._nraw_send[PACKET_SIZE_SMALL_BUCKET]++; - } else - _global_stats._nraw_send[PACKET_SIZE_MID_BUCKET]++; - } else { - if (length <= PACKET_SIZE_BIG) { - _global_stats._nraw_send[PACKET_SIZE_BIG_BUCKET]++; - } else - _global_stats._nraw_send[PACKET_SIZE_HUGE_BUCKET]++; - } -} - -void send_to_addr(SendToProc *send_to_proc, void *send_to_userdata, const byte *p, size_t len, const PackedSockAddr &addr) -{ - socklen_t tolen; - SOCKADDR_STORAGE to = addr.get_sockaddr_storage(&tolen); - UTP_RegisterSentPacket(len); - send_to_proc(send_to_userdata, p, len, (const struct sockaddr *)&to, tolen); -} - -void UTPSocket::send_data(PacketFormat* b, size_t length, bandwidth_type_t type) -{ - // time stamp this packet with local time, the stamp goes into - // the header of every packet at the 8th byte for 8 bytes : - // two integers, check packet.h for more - uint64 time = UTP_GetMicroseconds(); - - PacketFormatV1* b1 = (PacketFormatV1*)b; - if (version == 0) { - b->tv_sec = (uint32)(time / 1000000); - b->tv_usec = time % 1000000; - b->reply_micro = reply_micro; - } else { - b1->tv_usec = (uint32)time; - b1->reply_micro = reply_micro; - } - - last_sent_packet = g_current_ms; - -#ifdef _DEBUG - _stats._nbytes_xmit += length; - ++_stats._nxmit; -#endif - if (userdata) { - size_t n; - if (type == payload_bandwidth) { - // if this packet carries payload, just - // count the header as overhead - type = header_overhead; - n = get_overhead(); - } else { - n = length + get_udp_overhead(); - } - func.on_overhead(userdata, true, n, type); - } -#if g_log_utp_verbose - int flags = version == 0 ? b->flags : b1->type(); - uint16 seq_nr = version == 0 ? b->seq_nr : b1->seq_nr; - uint16 ack_nr = version == 0 ? b->ack_nr : b1->ack_nr; - LOG_UTPV("0x%08x: send %s len:%u id:%u timestamp:"I64u" reply_micro:%u flags:%s seq_nr:%u ack_nr:%u", - this, addrfmt(addr, addrbuf), (uint)length, conn_id_send, time, reply_micro, flagnames[flags], - seq_nr, ack_nr); -#endif - send_to_addr(send_to_proc, send_to_userdata, (const byte*)b, length, addr); -} - -void UTPSocket::send_ack(bool synack) -{ - PacketFormatExtensions pfe; - zeromem(&pfe); - PacketFormatExtensionsV1& pfe1 = (PacketFormatExtensionsV1&)pfe; - PacketFormatAck& pfa = (PacketFormatAck&)pfe1; - PacketFormatAckV1& pfa1 = (PacketFormatAckV1&)pfe1; - - size_t len; - last_rcv_win = get_rcv_window(); - if (version == 0) { - pfa.pf.connid = conn_id_send; - pfa.pf.ack_nr = (uint16)ack_nr; - pfa.pf.seq_nr = (uint16)seq_nr; - pfa.pf.flags = ST_STATE; - pfa.pf.ext = 0; - pfa.pf.windowsize = (byte)DIV_ROUND_UP(last_rcv_win, PACKET_SIZE); - len = sizeof(PacketFormat); - } else { - pfa1.pf.set_version(1); - pfa1.pf.set_type(ST_STATE); - pfa1.pf.ext = 0; - pfa1.pf.connid = conn_id_send; - pfa1.pf.ack_nr = ack_nr; - pfa1.pf.seq_nr = seq_nr; - pfa1.pf.windowsize = (uint32)last_rcv_win; - len = sizeof(PacketFormatV1); - } - - // we never need to send EACK for connections - // that are shutting down - if (reorder_count != 0 && state < CS_GOT_FIN) { - // if reorder count > 0, send an EACK. - // reorder count should always be 0 - // for synacks, so this should not be - // as synack - assert(!synack); - if (version == 0) { - pfa.pf.ext = 1; - pfa.ext_next = 0; - pfa.ext_len = 4; - } else { - pfa1.pf.ext = 1; - pfa1.ext_next = 0; - pfa1.ext_len = 4; - } - uint m = 0; - - // reorder count should only be non-zero - // if the packet ack_nr + 1 has not yet - // been received - assert(inbuf.get(ack_nr + 1) == NULL); - size_t window = min(14+16, inbuf.size()); - // Generate bit mask of segments received. - for (size_t i = 0; i < window; i++) { - if (inbuf.get(ack_nr + i + 2) != NULL) { - m |= 1 << i; - LOG_UTPV("0x%08x: EACK packet [%u]", this, ack_nr + i + 2); - } - } - if (version == 0) { - pfa.acks[0] = (byte)m; - pfa.acks[1] = (byte)(m >> 8); - pfa.acks[2] = (byte)(m >> 16); - pfa.acks[3] = (byte)(m >> 24); - } else { - pfa1.acks[0] = (byte)m; - pfa1.acks[1] = (byte)(m >> 8); - pfa1.acks[2] = (byte)(m >> 16); - pfa1.acks[3] = (byte)(m >> 24); - } - len += 4 + 2; - LOG_UTPV("0x%08x: Sending EACK %u [%u] bits:[%032b]", this, ack_nr, conn_id_send, m); - } else if (synack) { - // we only send "extensions" in response to SYN - // and the reorder count is 0 in that state - - LOG_UTPV("0x%08x: Sending ACK %u [%u] with extension bits", this, ack_nr, conn_id_send); - if (version == 0) { - pfe.pf.ext = 2; - pfe.ext_next = 0; - pfe.ext_len = 8; - memset(pfe.extensions, 0, 8); - } else { - pfe1.pf.ext = 2; - pfe1.ext_next = 0; - pfe1.ext_len = 8; - memset(pfe1.extensions, 0, 8); - } - len += 8 + 2; - } else { - LOG_UTPV("0x%08x: Sending ACK %u [%u]", this, ack_nr, conn_id_send); - } - - sent_ack(); - send_data((PacketFormat*)&pfe, len, ack_overhead); -} - -void UTPSocket::send_keep_alive() -{ - ack_nr--; - LOG_UTPV("0x%08x: Sending KeepAlive ACK %u [%u]", this, ack_nr, conn_id_send); - send_ack(); - ack_nr++; -} - -void UTPSocket::send_rst(SendToProc *send_to_proc, void *send_to_userdata, - const PackedSockAddr &addr, uint32 conn_id_send, uint16 ack_nr, uint16 seq_nr, byte version) -{ - PacketFormat pf; - zeromem(&pf); - PacketFormatV1& pf1 = (PacketFormatV1&)pf; - - size_t len; - if (version == 0) { - pf.connid = conn_id_send; - pf.ack_nr = ack_nr; - pf.seq_nr = seq_nr; - pf.flags = ST_RESET; - pf.ext = 0; - pf.windowsize = 0; - len = sizeof(PacketFormat); - } else { - pf1.set_version(1); - pf1.set_type(ST_RESET); - pf1.ext = 0; - pf1.connid = conn_id_send; - pf1.ack_nr = ack_nr; - pf1.seq_nr = seq_nr; - pf1.windowsize = 0; - len = sizeof(PacketFormatV1); - } - - LOG_UTPV("%s: Sending RST id:%u seq_nr:%u ack_nr:%u", addrfmt(addr, addrbuf), conn_id_send, seq_nr, ack_nr); - LOG_UTPV("send %s len:%u id:%u", addrfmt(addr, addrbuf), (uint)len, conn_id_send); - send_to_addr(send_to_proc, send_to_userdata, (const byte*)&pf1, len, addr); -} - -void UTPSocket::send_packet(OutgoingPacket *pkt) -{ - // only count against the quota the first time we - // send the packet. Don't enforce quota when closing - // a socket. Only enforce the quota when we're sending - // at slow rates (max window < packet size) - size_t max_send = min(max_window, opt_sndbuf, max_window_user); - - if (pkt->transmissions == 0 || pkt->need_resend) { - cur_window += pkt->payload; - } - - size_t packet_size = get_packet_size(); - if (pkt->transmissions == 0 && max_send < packet_size) { - assert(state == CS_FIN_SENT || - (int32)pkt->payload <= send_quota / 100); - send_quota = send_quota - (int32)(pkt->payload * 100); - } - - pkt->need_resend = false; - - PacketFormatV1* p1 = (PacketFormatV1*)pkt->data; - PacketFormat* p = (PacketFormat*)pkt->data; - if (version == 0) { - p->ack_nr = ack_nr; - } else { - p1->ack_nr = ack_nr; - } - pkt->time_sent = UTP_GetMicroseconds(); - pkt->transmissions++; - sent_ack(); - send_data((PacketFormat*)pkt->data, pkt->length, - (state == CS_SYN_SENT) ? connect_overhead - : (pkt->transmissions == 1) ? payload_bandwidth - : retransmit_overhead); -} - -bool UTPSocket::is_writable(size_t to_write) -{ - // return true if it's OK to stuff another packet into the - // outgoing queue. Since we may be using packet pacing, we - // might not actually send the packet right away to affect the - // cur_window. The only thing that happens when we add another - // packet is that cur_window_packets is increased. - size_t max_send = min(max_window, opt_sndbuf, max_window_user); - - size_t packet_size = get_packet_size(); - - if (cur_window + packet_size >= max_window) - last_maxed_out_window = g_current_ms; - - // if we don't have enough quota, we can't write regardless - if (USE_PACKET_PACING) { - if (send_quota / 100 < (int32)to_write) return false; - } - - // subtract one to save space for the FIN packet - if (cur_window_packets >= OUTGOING_BUFFER_MAX_SIZE - 1) return false; - - // if sending another packet would not make the window exceed - // the max_window, we can write - if (cur_window + packet_size <= max_send) return true; - - // if the window size is less than a packet, and we have enough - // quota to send a packet, we can write, even though it would - // make the window exceed the max size - // the last condition is needed to not put too many packets - // in the send buffer. cur_window isn't updated until we flush - // the send buffer, so we need to take the number of packets - // into account - if (USE_PACKET_PACING) { - if (max_window < to_write && - cur_window < max_window && - cur_window_packets == 0) { - return true; - } - } - - return false; -} - -bool UTPSocket::flush_packets() -{ - size_t packet_size = get_packet_size(); - - // send packets that are waiting on the pacer to be sent - // i has to be an unsigned 16 bit counter to wrap correctly - // signed types are not guaranteed to wrap the way you expect - for (uint16 i = seq_nr - cur_window_packets; i != seq_nr; ++i) { - OutgoingPacket *pkt = (OutgoingPacket*)outbuf.get(i); - if (pkt == 0 || (pkt->transmissions > 0 && pkt->need_resend == false)) continue; - // have we run out of quota? - if (!is_writable(pkt->payload)) { - return true; - } - - // Nagle check - // don't send the last packet if we have one packet in-flight - // and the current packet is still smaller than packet_size. - if (i != ((seq_nr - 1) & ACK_NR_MASK) || - cur_window_packets == 1 || - pkt->payload >= packet_size) { - send_packet(pkt); - - // No need to send another ack if there is nothing to reorder. - if (reorder_count == 0) { - sent_ack(); - } - } - } - return false; -} - -void UTPSocket::write_outgoing_packet(size_t payload, uint flags) -{ - // Setup initial timeout timer - if (cur_window_packets == 0) { - retransmit_timeout = rto; - rto_timeout = g_current_ms + retransmit_timeout; - assert(cur_window == 0); - } - - size_t packet_size = get_packet_size(); - do { - assert(cur_window_packets < OUTGOING_BUFFER_MAX_SIZE); - assert(flags == ST_DATA || flags == ST_FIN); - - size_t added = 0; - - OutgoingPacket *pkt = NULL; - - if (cur_window_packets > 0) { - pkt = (OutgoingPacket*)outbuf.get(seq_nr - 1); - } - - const size_t header_size = get_header_size(); - bool append = true; - - // if there's any room left in the last packet in the window - // and it hasn't been sent yet, fill that frame first - if (payload && pkt && !pkt->transmissions && pkt->payload < packet_size) { - // Use the previous unsent packet - added = min(payload + pkt->payload, max(packet_size, pkt->payload)) - pkt->payload; - pkt = (OutgoingPacket*)realloc(pkt, - (sizeof(OutgoingPacket) - 1) + - header_size + - pkt->payload + added); - outbuf.put(seq_nr - 1, pkt); - append = false; - assert(!pkt->need_resend); - } else { - // Create the packet to send. - added = payload; - pkt = (OutgoingPacket*)malloc((sizeof(OutgoingPacket) - 1) + - header_size + - added); - pkt->payload = 0; - pkt->transmissions = 0; - pkt->need_resend = false; - } - - if (added) { - // Fill it with data from the upper layer. - func.on_write(userdata, pkt->data + header_size + pkt->payload, added); - } - pkt->payload += added; - pkt->length = header_size + pkt->payload; - - last_rcv_win = get_rcv_window(); - - PacketFormat* p = (PacketFormat*)pkt->data; - PacketFormatV1* p1 = (PacketFormatV1*)pkt->data; - if (version == 0) { - p->connid = conn_id_send; - p->ext = 0; - p->windowsize = (byte)DIV_ROUND_UP(last_rcv_win, PACKET_SIZE); - p->ack_nr = ack_nr; - p->flags = flags; - } else { - p1->set_version(1); - p1->set_type(flags); - p1->ext = 0; - p1->connid = conn_id_send; - p1->windowsize = (uint32)last_rcv_win; - p1->ack_nr = ack_nr; - } - - if (append) { - // Remember the message in the outgoing queue. - outbuf.ensure_size(seq_nr, cur_window_packets); - outbuf.put(seq_nr, pkt); - if (version == 0) p->seq_nr = seq_nr; - else p1->seq_nr = seq_nr; - seq_nr++; - cur_window_packets++; - } - - payload -= added; - - } while (payload); - - flush_packets(); -} - -void UTPSocket::update_send_quota() -{ - int dt = g_current_ms - last_send_quota; - if (dt == 0) return; - last_send_quota = g_current_ms; - size_t add = max_window * dt * 100 / (rtt_hist.delay_base?rtt_hist.delay_base:50); - if (add > max_window * 100 && add > MAX_CWND_INCREASE_BYTES_PER_RTT * 100) add = max_window; - send_quota += (int32)add; -// LOG_UTPV("0x%08x: UTPSocket::update_send_quota dt:%d rtt:%u max_window:%u quota:%d", -// this, dt, rtt, (uint)max_window, send_quota / 100); -} - -#ifdef _DEBUG -void UTPSocket::check_invariant() -{ - if (reorder_count > 0) { - assert(inbuf.get(ack_nr + 1) == NULL); - } - - size_t outstanding_bytes = 0; - for (int i = 0; i < cur_window_packets; ++i) { - OutgoingPacket *pkt = (OutgoingPacket*)outbuf.get(seq_nr - i - 1); - if (pkt == 0 || pkt->transmissions == 0 || pkt->need_resend) continue; - outstanding_bytes += pkt->payload; - } - assert(outstanding_bytes == cur_window); -} -#endif - -void UTPSocket::check_timeouts() -{ -#ifdef _DEBUG - check_invariant(); -#endif - - // this invariant should always be true - assert(cur_window_packets == 0 || outbuf.get(seq_nr - cur_window_packets)); - - LOG_UTPV("0x%08x: CheckTimeouts timeout:%d max_window:%u cur_window:%u quota:%d " - "state:%s cur_window_packets:%u bytes_since_ack:%u ack_time:%d", - this, (int)(rto_timeout - g_current_ms), (uint)max_window, (uint)cur_window, - send_quota / 100, statenames[state], cur_window_packets, - (uint)bytes_since_ack, (int)(g_current_ms - ack_time)); - - update_send_quota(); - flush_packets(); - - - if (USE_PACKET_PACING) { - // In case the new send quota made it possible to send another packet - // Mark the socket as writable. If we don't use pacing, the send - // quota does not affect if the socket is writeable - // if we don't use packet pacing, the writable event is triggered - // whenever the cur_window falls below the max_window, so we don't - // need this check then - if (state == CS_CONNECTED_FULL && is_writable(get_packet_size())) { - state = CS_CONNECTED; - LOG_UTPV("0x%08x: Socket writable. max_window:%u cur_window:%u quota:%d packet_size:%u", - this, (uint)max_window, (uint)cur_window, send_quota / 100, (uint)get_packet_size()); - func.on_state(userdata, UTP_STATE_WRITABLE); - } - } - - switch (state) { - case CS_SYN_SENT: - case CS_CONNECTED_FULL: - case CS_CONNECTED: - case CS_FIN_SENT: { - - // Reset max window... - if ((int)(g_current_ms - zerowindow_time) >= 0 && max_window_user == 0) { - max_window_user = PACKET_SIZE; - } - - if ((int)(g_current_ms - rto_timeout) >= 0 && - (!(USE_PACKET_PACING) || cur_window_packets > 0) && - rto_timeout > 0) { - - /* - OutgoingPacket *pkt = (OutgoingPacket*)outbuf.get(seq_nr - cur_window_packets); - - // If there were a lot of retransmissions, force recomputation of round trip time - if (pkt->transmissions >= 4) - rtt = 0; - */ - - // Increase RTO - const uint new_timeout = retransmit_timeout * 2; - if (new_timeout >= 30000 || (state == CS_SYN_SENT && new_timeout > 6000)) { - // more than 30 seconds with no reply. kill it. - // if we haven't even connected yet, give up sooner. 6 seconds - // means 2 tries at the following timeouts: 3, 6 seconds - if (state == CS_FIN_SENT) - state = CS_DESTROY; - else - state = CS_RESET; - func.on_error(userdata, ETIMEDOUT); - goto getout; - } - - retransmit_timeout = new_timeout; - rto_timeout = g_current_ms + new_timeout; - - // On Timeout - duplicate_ack = 0; - - // rate = min_rate - max_window = get_packet_size(); - send_quota = max((int32)max_window * 100, send_quota); - - // every packet should be considered lost - for (int i = 0; i < cur_window_packets; ++i) { - OutgoingPacket *pkt = (OutgoingPacket*)outbuf.get(seq_nr - i - 1); - if (pkt == 0 || pkt->transmissions == 0 || pkt->need_resend) continue; - pkt->need_resend = true; - assert(cur_window >= pkt->payload); - cur_window -= pkt->payload; - } - - // used in parse_log.py - LOG_UTP("0x%08x: Packet timeout. Resend. seq_nr:%u. timeout:%u max_window:%u", - this, seq_nr - cur_window_packets, retransmit_timeout, (uint)max_window); - - fast_timeout = true; - timeout_seq_nr = seq_nr; - - if (cur_window_packets > 0) { - OutgoingPacket *pkt = (OutgoingPacket*)outbuf.get(seq_nr - cur_window_packets); - assert(pkt); - send_quota = max((int32)pkt->length * 100, send_quota); - - // Re-send the packet. - send_packet(pkt); - } - } - - // Mark the socket as writable - if (state == CS_CONNECTED_FULL && is_writable(get_packet_size())) { - state = CS_CONNECTED; - LOG_UTPV("0x%08x: Socket writable. max_window:%u cur_window:%u quota:%d packet_size:%u", - this, (uint)max_window, (uint)cur_window, send_quota / 100, (uint)get_packet_size()); - func.on_state(userdata, UTP_STATE_WRITABLE); - } - - if (state >= CS_CONNECTED && state <= CS_FIN_SENT) { - // Send acknowledgment packets periodically, or when the threshold is reached - if (bytes_since_ack > DELAYED_ACK_BYTE_THRESHOLD || - (int)(g_current_ms - ack_time) >= 0) { - send_ack(); - } - - if ((int)(g_current_ms - last_sent_packet) >= KEEPALIVE_INTERVAL) { - send_keep_alive(); - } - } - - break; - } - - // Close? - case CS_GOT_FIN: - case CS_DESTROY_DELAY: - if ((int)(g_current_ms - rto_timeout) >= 0) { - state = (state == CS_DESTROY_DELAY) ? CS_DESTROY : CS_RESET; - if (cur_window_packets > 0 && userdata) { - func.on_error(userdata, ECONNRESET); - } - } - break; - // prevent warning - case CS_IDLE: - case CS_RESET: - case CS_DESTROY: - break; - } - - getout: - - // make sure we don't accumulate quota when we don't have - // anything to send - int32 limit = max((int32)max_window / 2, 5 * (int32)get_packet_size()) * 100; - if (send_quota > limit) send_quota = limit; -} - -// returns: -// 0: the packet was acked. -// 1: it means that the packet had already been acked -// 2: the packet has not been sent yet -int UTPSocket::ack_packet(uint16 seq) -{ - OutgoingPacket *pkt = (OutgoingPacket*)outbuf.get(seq); - - // the packet has already been acked (or not sent) - if (pkt == NULL) { - LOG_UTPV("0x%08x: got ack for:%u (already acked, or never sent)", this, seq); - return 1; - } - - // can't ack packets that haven't been sent yet! - if (pkt->transmissions == 0) { - LOG_UTPV("0x%08x: got ack for:%u (never sent, pkt_size:%u need_resend:%u)", - this, seq, (uint)pkt->payload, pkt->need_resend); - return 2; - } - - LOG_UTPV("0x%08x: got ack for:%u (pkt_size:%u need_resend:%u)", - this, seq, (uint)pkt->payload, pkt->need_resend); - - outbuf.put(seq, NULL); - - // if we never re-sent the packet, update the RTT estimate - if (pkt->transmissions == 1) { - // Estimate the round trip time. - const uint32 ertt = (uint32)((UTP_GetMicroseconds() - pkt->time_sent) / 1000); - if (rtt == 0) { - // First round trip time sample - rtt = ertt; - rtt_var = ertt / 2; - // sanity check. rtt should never be more than 6 seconds -// assert(rtt < 6000); - } else { - // Compute new round trip times - const int delta = (int)rtt - ertt; - rtt_var = rtt_var + (int)(abs(delta) - rtt_var) / 4; - rtt = rtt - rtt/8 + ertt/8; - // sanity check. rtt should never be more than 6 seconds -// assert(rtt < 6000); - rtt_hist.add_sample(ertt); - } - rto = max(rtt + rtt_var * 4, 500); - LOG_UTPV("0x%08x: rtt:%u avg:%u var:%u rto:%u", - this, ertt, rtt, rtt_var, rto); - } - retransmit_timeout = rto; - rto_timeout = g_current_ms + rto; - // if need_resend is set, this packet has already - // been considered timed-out, and is not included in - // the cur_window anymore - if (!pkt->need_resend) { - assert(cur_window >= pkt->payload); - cur_window -= pkt->payload; - } - free(pkt); - return 0; -} - -// count the number of bytes that were acked by the EACK header -size_t UTPSocket::selective_ack_bytes(uint base, const byte* mask, byte len, int64& min_rtt) -{ - if (cur_window_packets == 0) return 0; - - size_t acked_bytes = 0; - int bits = len * 8; - - do { - uint v = base + bits; - - // ignore bits that haven't been sent yet - // see comment in UTPSocket::selective_ack - if (((seq_nr - v - 1) & ACK_NR_MASK) >= (uint16)(cur_window_packets - 1)) - continue; - - // ignore bits that represents packets we haven't sent yet - // or packets that have already been acked - OutgoingPacket *pkt = (OutgoingPacket*)outbuf.get(v); - if (!pkt || pkt->transmissions == 0) - continue; - - // Count the number of segments that were successfully received past it. - if (bits >= 0 && mask[bits>>3] & (1 << (bits & 7))) { - assert((int)(pkt->payload) >= 0); - acked_bytes += pkt->payload; - min_rtt = min(min_rtt, UTP_GetMicroseconds() - pkt->time_sent); - continue; - } - } while (--bits >= -1); - return acked_bytes; -} - -void UTPSocket::selective_ack(uint base, const byte *mask, byte len) -{ - if (cur_window_packets == 0) return; - - // the range is inclusive [0, 31] bits - int bits = len * 8 - 1; - - int count = 0; - - // resends is a stack of sequence numbers we need to resend. Since we - // iterate in reverse over the acked packets, at the end, the top packets - // are the ones we want to resend - int resends[32]; - int nr = 0; - - LOG_UTPV("0x%08x: Got EACK [%032b] base:%u", this, *(uint32*)mask, base); - do { - // we're iterating over the bits from higher sequence numbers - // to lower (kind of in reverse order, wich might not be very - // intuitive) - uint v = base + bits; - - // ignore bits that haven't been sent yet - // and bits that fall below the ACKed sequence number - // this can happen if an EACK message gets - // reordered and arrives after a packet that ACKs up past - // the base for thie EACK message - - // this is essentially the same as: - // if v >= seq_nr || v <= seq_nr - cur_window_packets - // but it takes wrapping into account - - // if v == seq_nr the -1 will make it wrap. if v > seq_nr - // it will also wrap (since it will fall further below 0) - // and be > cur_window_packets. - // if v == seq_nr - cur_window_packets, the result will be - // seq_nr - (seq_nr - cur_window_packets) - 1 - // == seq_nr - seq_nr + cur_window_packets - 1 - // == cur_window_packets - 1 which will be caught by the - // test. If v < seq_nr - cur_window_packets the result will grow - // fall furhter outside of the cur_window_packets range. - - // sequence number space: - // - // rejected < accepted > rejected - // <============+--------------+============> - // ^ ^ - // | | - // (seq_nr-wnd) seq_nr - - if (((seq_nr - v - 1) & ACK_NR_MASK) >= (uint16)(cur_window_packets - 1)) - continue; - - // this counts as a duplicate ack, even though we might have - // received an ack for this packet previously (in another EACK - // message for instance) - bool bit_set = bits >= 0 && mask[bits>>3] & (1 << (bits & 7)); - - // if this packet is acked, it counts towards the duplicate ack counter - if (bit_set) count++; - - // ignore bits that represents packets we haven't sent yet - // or packets that have already been acked - OutgoingPacket *pkt = (OutgoingPacket*)outbuf.get(v); - if (!pkt || pkt->transmissions == 0) { - LOG_UTPV("0x%08x: skipping %u. pkt:%08x transmissions:%u %s", - this, v, pkt, pkt?pkt->transmissions:0, pkt?"(not sent yet?)":"(already acked?)"); - continue; - } - - // Count the number of segments that were successfully received past it. - if (bit_set) { - // the selective ack should never ACK the packet we're waiting for to decrement cur_window_packets - assert((v & outbuf.mask) != ((seq_nr - cur_window_packets) & outbuf.mask)); - ack_packet(v); - continue; - } - - // Resend segments - // if count is less than our re-send limit, we haven't seen enough - // acked packets in front of this one to warrant a re-send. - // if count == 0, we're still going through the tail of zeroes - if (((v - fast_resend_seq_nr) & ACK_NR_MASK) <= OUTGOING_BUFFER_MAX_SIZE && - count >= DUPLICATE_ACKS_BEFORE_RESEND && - duplicate_ack < DUPLICATE_ACKS_BEFORE_RESEND) { - resends[nr++] = v; - LOG_UTPV("0x%08x: no ack for %u", this, v); - } else { - LOG_UTPV("0x%08x: not resending %u count:%d dup_ack:%u fast_resend_seq_nr:%u", - this, v, count, duplicate_ack, fast_resend_seq_nr); - } - } while (--bits >= -1); - - if (((base - 1 - fast_resend_seq_nr) & ACK_NR_MASK) < 256 && - count >= DUPLICATE_ACKS_BEFORE_RESEND && - duplicate_ack < DUPLICATE_ACKS_BEFORE_RESEND) { - // if we get enough duplicate acks to start - // resending, the first packet we should resend - // is base-1 - resends[nr++] = base - 1; - } else { - LOG_UTPV("0x%08x: not resending %u count:%d dup_ack:%u fast_resend_seq_nr:%u", - this, base - 1, count, duplicate_ack, fast_resend_seq_nr); - } - - bool back_off = false; - int i = 0; - while (nr > 0) { - uint v = resends[--nr]; - // don't consider the tail of 0:es to be lost packets - // only unacked packets with acked packets after should - // be considered lost - OutgoingPacket *pkt = (OutgoingPacket*)outbuf.get(v); - - // this may be an old (re-ordered) packet, and some of the - // packets in here may have been acked already. In which - // case they will not be in the send queue anymore - if (!pkt) continue; - - // used in parse_log.py - LOG_UTP("0x%08x: Packet %u lost. Resending", this, v); - - // On Loss - back_off = true; -#ifdef _DEBUG - ++_stats._rexmit; -#endif - send_packet(pkt); - fast_resend_seq_nr = v + 1; - - // Re-send max 4 packets. - if (++i >= 4) break; - } - - if (back_off) - maybe_decay_win(); - - duplicate_ack = count; -} - -void UTPSocket::apply_ledbat_ccontrol(size_t bytes_acked, uint32 actual_delay, int64 min_rtt) -{ - // the delay can never be greater than the rtt. The min_rtt - // variable is the RTT in microseconds - - assert(min_rtt >= 0); - int32 our_delay = min(our_hist.get_value(), uint32(min_rtt)); - assert(our_delay != INT_MAX); - assert(our_delay >= 0); - - SOCKADDR_STORAGE sa = addr.get_sockaddr_storage(); - UTP_DelaySample((sockaddr*)&sa, our_delay / 1000); - - // This test the connection under heavy load from foreground - // traffic. Pretend that our delays are very high to force the - // connection to use sub-packet size window sizes - //our_delay *= 4; - - // target is microseconds - int target = CCONTROL_TARGET; - if (target <= 0) target = 100000; - - double off_target = target - our_delay; - - // this is the same as: - // - // (min(off_target, target) / target) * (bytes_acked / max_window) * MAX_CWND_INCREASE_BYTES_PER_RTT - // - // so, it's scaling the max increase by the fraction of the window this ack represents, and the fraction - // of the target delay the current delay represents. - // The min() around off_target protects against crazy values of our_delay, which may happen when th - // timestamps wraps, or by just having a malicious peer sending garbage. This caps the increase - // of the window size to MAX_CWND_INCREASE_BYTES_PER_RTT per rtt. - // as for large negative numbers, this direction is already capped at the min packet size further down - // the min around the bytes_acked protects against the case where the window size was recently - // shrunk and the number of acked bytes exceeds that. This is considered no more than one full - // window, in order to keep the gain within sane boundries. - - assert(bytes_acked > 0); - double window_factor = (double)min(bytes_acked, max_window) / (double)max(max_window, bytes_acked); - double delay_factor = off_target / target; - double scaled_gain = MAX_CWND_INCREASE_BYTES_PER_RTT * window_factor * delay_factor; - - // since MAX_CWND_INCREASE_BYTES_PER_RTT is a cap on how much the window size (max_window) - // may increase per RTT, we may not increase the window size more than that proportional - // to the number of bytes that were acked, so that once one window has been acked (one rtt) - // the increase limit is not exceeded - // the +1. is to allow for floating point imprecision - assert(scaled_gain <= 1. + MAX_CWND_INCREASE_BYTES_PER_RTT * (int)min(bytes_acked, max_window) / (double)max(max_window, bytes_acked)); - - if (scaled_gain > 0 && g_current_ms - last_maxed_out_window > 300) { - // if it was more than 300 milliseconds since we tried to send a packet - // and stopped because we hit the max window, we're most likely rate - // limited (which prevents us from ever hitting the window size) - // if this is the case, we cannot let the max_window grow indefinitely - scaled_gain = 0; - } - - if (scaled_gain + max_window < MIN_WINDOW_SIZE) { - max_window = MIN_WINDOW_SIZE; - } else { - max_window = (size_t)(max_window + scaled_gain); - } - - // make sure that the congestion window is below max - // make sure that we don't shrink our window too small - max_window = clamp(max_window, MIN_WINDOW_SIZE, opt_sndbuf); - - // used in parse_log.py - LOG_UTP("0x%08x: actual_delay:%u our_delay:%d their_delay:%u off_target:%d max_window:%u " - "delay_base:%u delay_sum:%d target_delay:%d acked_bytes:%u cur_window:%u " - "scaled_gain:%f rtt:%u rate:%u quota:%d wnduser:%u rto:%u timeout:%d get_microseconds:"I64u" " - "cur_window_packets:%u packet_size:%u their_delay_base:%u their_actual_delay:%u", - this, actual_delay, our_delay / 1000, their_hist.get_value() / 1000, - (int)off_target / 1000, (uint)(max_window), our_hist.delay_base, - (our_delay + their_hist.get_value()) / 1000, target / 1000, (uint)bytes_acked, - (uint)(cur_window - bytes_acked), (float)(scaled_gain), rtt, - (uint)(max_window * 1000 / (rtt_hist.delay_base?rtt_hist.delay_base:50)), - send_quota / 100, (uint)max_window_user, rto, (int)(rto_timeout - g_current_ms), - UTP_GetMicroseconds(), cur_window_packets, (uint)get_packet_size(), - their_hist.delay_base, their_hist.delay_base + their_hist.get_value()); -} - -static void UTP_RegisterRecvPacket(UTPSocket *conn, size_t len) -{ -#ifdef _DEBUG - ++conn->_stats._nrecv; - conn->_stats._nbytes_recv += len; -#endif - - if (len <= PACKET_SIZE_MID) { - if (len <= PACKET_SIZE_EMPTY) { - _global_stats._nraw_recv[PACKET_SIZE_EMPTY_BUCKET]++; - } else if (len <= PACKET_SIZE_SMALL) { - _global_stats._nraw_recv[PACKET_SIZE_SMALL_BUCKET]++; - } else - _global_stats._nraw_recv[PACKET_SIZE_MID_BUCKET]++; - } else { - if (len <= PACKET_SIZE_BIG) { - _global_stats._nraw_recv[PACKET_SIZE_BIG_BUCKET]++; - } else - _global_stats._nraw_recv[PACKET_SIZE_HUGE_BUCKET]++; - } -} - -// returns the max number of bytes of payload the uTP -// connection is allowed to send -size_t UTPSocket::get_packet_size() -{ - int header_size = version == 1 - ? sizeof(PacketFormatV1) - : sizeof(PacketFormat); - - size_t mtu = get_udp_mtu(); - - if (DYNAMIC_PACKET_SIZE_ENABLED) { - SOCKADDR_STORAGE sa = addr.get_sockaddr_storage(); - size_t max_packet_size = UTP_GetPacketSize((sockaddr*)&sa); - return min(mtu - header_size, max_packet_size); - } - else - { - return mtu - header_size; - } -} - -// Process an incoming packet -// syn is true if this is the first packet received. It will cut off parsing -// as soon as the header is done -size_t UTP_ProcessIncoming(UTPSocket *conn, const byte *packet, size_t len, bool syn = false) -{ - UTP_RegisterRecvPacket(conn, len); - - g_current_ms = UTP_GetMilliseconds(); - - conn->update_send_quota(); - - const PacketFormat *pf = (PacketFormat*)packet; - const PacketFormatV1 *pf1 = (PacketFormatV1*)packet; - const byte *packet_end = packet + len; - - uint16 pk_seq_nr; - uint16 pk_ack_nr; - uint8 pk_flags; - if (conn->version == 0) { - pk_seq_nr = pf->seq_nr; - pk_ack_nr = pf->ack_nr; - pk_flags = pf->flags; - } else { - pk_seq_nr = pf1->seq_nr; - pk_ack_nr = pf1->ack_nr; - pk_flags = pf1->type(); - } - - if (pk_flags >= ST_NUM_STATES) return 0; - - LOG_UTPV("0x%08x: Got %s. seq_nr:%u ack_nr:%u state:%s version:%u timestamp:"I64u" reply_micro:%u", - conn, flagnames[pk_flags], pk_seq_nr, pk_ack_nr, statenames[conn->state], conn->version, - conn->version == 0?(uint64)(pf->tv_sec) * 1000000 + pf->tv_usec:uint64(pf1->tv_usec), - conn->version == 0?(uint32)(pf->reply_micro):(uint32)(pf1->reply_micro)); - - // mark receipt time - uint64 time = UTP_GetMicroseconds(); - - // RSTs are handled earlier, since the connid matches the send id not the recv id - assert(pk_flags != ST_RESET); - - // TODO: maybe send a ST_RESET if we're in CS_RESET? - - const byte *selack_ptr = NULL; - - // Unpack UTP packet options - // Data pointer - const byte *data = (const byte*)pf + conn->get_header_size(); - if (conn->get_header_size() > len) { - LOG_UTPV("0x%08x: Invalid packet size (less than header size)", conn); - return 0; - } - // Skip the extension headers - uint extension = conn->version == 0 ? pf->ext : pf1->ext; - if (extension != 0) { - do { - // Verify that the packet is valid. - data += 2; - - if ((int)(packet_end - data) < 0 || (int)(packet_end - data) < data[-1]) { - LOG_UTPV("0x%08x: Invalid len of extensions", conn); - return 0; - } - - switch(extension) { - case 1: // Selective Acknowledgment - selack_ptr = data; - break; - case 2: // extension bits - if (data[-1] != 8) { - LOG_UTPV("0x%08x: Invalid len of extension bits header", conn); - return 0; - } - memcpy(conn->extensions, data, 8); - LOG_UTPV("0x%08x: got extension bits:%02x%02x%02x%02x%02x%02x%02x%02x", conn, - conn->extensions[0], conn->extensions[1], conn->extensions[2], conn->extensions[3], - conn->extensions[4], conn->extensions[5], conn->extensions[6], conn->extensions[7]); - } - extension = data[-2]; - data += data[-1]; - } while (extension); - } - - if (conn->state == CS_SYN_SENT) { - // if this is a syn-ack, initialize our ack_nr - // to match the sequence number we got from - // the other end - conn->ack_nr = (pk_seq_nr - 1) & SEQ_NR_MASK; - } - - g_current_ms = UTP_GetMilliseconds(); - conn->last_got_packet = g_current_ms; - - if (syn) { - return 0; - } - - // seqnr is the number of packets past the expected - // packet this is. ack_nr is the last acked, seq_nr is the - // current. Subtracring 1 makes 0 mean "this is the next - // expected packet". - const uint seqnr = (pk_seq_nr - conn->ack_nr - 1) & SEQ_NR_MASK; - - // Getting an invalid sequence number? - if (seqnr >= REORDER_BUFFER_MAX_SIZE) { - if (seqnr >= (SEQ_NR_MASK + 1) - REORDER_BUFFER_MAX_SIZE && pk_flags != ST_STATE) { - conn->ack_time = g_current_ms + min(conn->ack_time - g_current_ms, DELAYED_ACK_TIME_THRESHOLD); - } - LOG_UTPV(" Got old Packet/Ack (%u/%u)=%u!", pk_seq_nr, conn->ack_nr, seqnr); - return 0; - } - - // Process acknowledgment - // acks is the number of packets that was acked - int acks = (pk_ack_nr - (conn->seq_nr - 1 - conn->cur_window_packets)) & ACK_NR_MASK; - - // this happens when we receive an old ack nr - if (acks > conn->cur_window_packets) acks = 0; - - // if we get the same ack_nr as in the last packet - // increase the duplicate_ack counter, otherwise reset - // it to 0 - if (conn->cur_window_packets > 0) { - if (pk_ack_nr == ((conn->seq_nr - conn->cur_window_packets - 1) & ACK_NR_MASK) && - conn->cur_window_packets > 0) { - //++conn->duplicate_ack; - } else { - conn->duplicate_ack = 0; - } - - // TODO: if duplicate_ack == DUPLICATE_ACK_BEFORE_RESEND - // and fast_resend_seq_nr <= ack_nr + 1 - // resend ack_nr + 1 - } - - // figure out how many bytes were acked - size_t acked_bytes = 0; - - // the minimum rtt of all acks - // this is the upper limit on the delay we get back - // from the other peer. Our delay cannot exceed - // the rtt of the packet. If it does, clamp it. - // this is done in apply_ledbat_ccontrol() - int64 min_rtt = INT64_MAX; - - for (int i = 0; i < acks; ++i) { - int seq = conn->seq_nr - conn->cur_window_packets + i; - OutgoingPacket *pkt = (OutgoingPacket*)conn->outbuf.get(seq); - if (pkt == 0 || pkt->transmissions == 0) continue; - assert((int)(pkt->payload) >= 0); - acked_bytes += pkt->payload; - min_rtt = min(min_rtt, UTP_GetMicroseconds() - pkt->time_sent); - } - - // count bytes acked by EACK - if (selack_ptr != NULL) { - acked_bytes += conn->selective_ack_bytes((pk_ack_nr + 2) & ACK_NR_MASK, - selack_ptr, selack_ptr[-1], min_rtt); - } - - LOG_UTPV("0x%08x: acks:%d acked_bytes:%u seq_nr:%d cur_window:%u cur_window_packets:%u relative_seqnr:%u max_window:%u min_rtt:%u rtt:%u", - conn, acks, (uint)acked_bytes, conn->seq_nr, (uint)conn->cur_window, conn->cur_window_packets, - seqnr, (uint)conn->max_window, (uint)(min_rtt / 1000), conn->rtt); - - uint64 p; - - if (conn->version == 0) { - p = uint64(pf->tv_sec) * 1000000 + pf->tv_usec; - } else { - p = pf1->tv_usec; - } - - conn->last_measured_delay = g_current_ms; - - // get delay in both directions - // record the delay to report back - const uint32 their_delay = (uint32)(p == 0 ? 0 : time - p); - conn->reply_micro = their_delay; - uint32 prev_delay_base = conn->their_hist.delay_base; - if (their_delay != 0) conn->their_hist.add_sample(their_delay); - - // if their new delay base is less than their previous one - // we should shift our delay base in the other direction in order - // to take the clock skew into account - if (prev_delay_base != 0 && - wrapping_compare_less(conn->their_hist.delay_base, prev_delay_base)) { - // never adjust more than 10 milliseconds - if (prev_delay_base - conn->their_hist.delay_base <= 10000) { - conn->our_hist.shift(prev_delay_base - conn->their_hist.delay_base); - } - } - - const uint32 actual_delay = conn->version==0 - ?(pf->reply_micro==INT_MAX?0:uint32(pf->reply_micro)) - :(uint32(pf1->reply_micro)==INT_MAX?0:uint32(pf1->reply_micro)); - - // if the actual delay is 0, it means the other end - // hasn't received a sample from us yet, and doesn't - // know what it is. We can't update out history unless - // we have a true measured sample - prev_delay_base = conn->our_hist.delay_base; - if (actual_delay != 0) conn->our_hist.add_sample(actual_delay); - - // if our new delay base is less than our previous one - // we should shift the other end's delay base in the other - // direction in order to take the clock skew into account - // This is commented out because it creates bad interactions - // with our adjustment in the other direction. We don't really - // need our estimates of the other peer to be very accurate - // anyway. The problem with shifting here is that we're more - // likely shift it back later because of a low latency. This - // second shift back would cause us to shift our delay base - // which then get's into a death spiral of shifting delay bases -/* if (prev_delay_base != 0 && - wrapping_compare_less(conn->our_hist.delay_base, prev_delay_base)) { - // never adjust more than 10 milliseconds - if (prev_delay_base - conn->our_hist.delay_base <= 10000) { - conn->their_hist.Shift(prev_delay_base - conn->our_hist.delay_base); - } - } -*/ - - // if the delay estimate exceeds the RTT, adjust the base_delay to - // compensate - if (conn->our_hist.get_value() > uint32(min_rtt)) { - conn->our_hist.shift(conn->our_hist.get_value() - min_rtt); - } - - // only apply the congestion controller on acks - // if we don't have a delay measurement, there's - // no point in invoking the congestion control - if (actual_delay != 0 && acked_bytes >= 1) - conn->apply_ledbat_ccontrol(acked_bytes, actual_delay, min_rtt); - - // sanity check, the other end should never ack packets - // past the point we've sent - if (acks <= conn->cur_window_packets) { - conn->max_window_user = conn->version == 0 - ? pf->windowsize * PACKET_SIZE : pf1->windowsize; - - // If max user window is set to 0, then we startup a timer - // That will reset it to 1 after 15 seconds. - if (conn->max_window_user == 0) - // Reset max_window_user to 1 every 15 seconds. - conn->zerowindow_time = g_current_ms + 15000; - - // Respond to connect message - // Switch to CONNECTED state. - if (conn->state == CS_SYN_SENT) { - conn->state = CS_CONNECTED; - conn->func.on_state(conn->userdata, UTP_STATE_CONNECT); - - // We've sent a fin, and everything was ACKed (including the FIN), - // it's safe to destroy the socket. cur_window_packets == acks - // means that this packet acked all the remaining packets that - // were in-flight. - } else if (conn->state == CS_FIN_SENT && conn->cur_window_packets == acks) { - conn->state = CS_DESTROY; - } - - // Update fast resend counter - if (wrapping_compare_less(conn->fast_resend_seq_nr, (pk_ack_nr + 1) & ACK_NR_MASK)) - conn->fast_resend_seq_nr = pk_ack_nr + 1; - - LOG_UTPV("0x%08x: fast_resend_seq_nr:%u", conn, conn->fast_resend_seq_nr); - - for (int i = 0; i < acks; ++i) { - int ack_status = conn->ack_packet(conn->seq_nr - conn->cur_window_packets); - // if ack_status is 0, the packet was acked. - // if acl_stauts is 1, it means that the packet had already been acked - // if it's 2, the packet has not been sent yet - // We need to break this loop in the latter case. This could potentially - // happen if we get an ack_nr that does not exceed what we have stuffed - // into the outgoing buffer, but does exceed what we have sent - if (ack_status == 2) { -#ifdef _DEBUG - OutgoingPacket* pkt = (OutgoingPacket*)conn->outbuf.get(conn->seq_nr - conn->cur_window_packets); - assert(pkt->transmissions == 0); -#endif - break; - } - conn->cur_window_packets--; - } -#ifdef _DEBUG - if (conn->cur_window_packets == 0) assert(conn->cur_window == 0); -#endif - - // packets in front of this may have been acked by a - // selective ack (EACK). Keep decreasing the window packet size - // until we hit a packet that is still waiting to be acked - // in the send queue - // this is especially likely to happen when the other end - // has the EACK send bug older versions of uTP had - while (conn->cur_window_packets > 0 && !conn->outbuf.get(conn->seq_nr - conn->cur_window_packets)) - conn->cur_window_packets--; - -#ifdef _DEBUG - if (conn->cur_window_packets == 0) assert(conn->cur_window == 0); -#endif - - // this invariant should always be true - assert(conn->cur_window_packets == 0 || conn->outbuf.get(conn->seq_nr - conn->cur_window_packets)); - - // flush Nagle - if (conn->cur_window_packets == 1) { - OutgoingPacket *pkt = (OutgoingPacket*)conn->outbuf.get(conn->seq_nr - 1); - // do we still have quota? - if (pkt->transmissions == 0 && - (!(USE_PACKET_PACING) || conn->send_quota / 100 >= (int32)pkt->length)) { - conn->send_packet(pkt); - - // No need to send another ack if there is nothing to reorder. - if (conn->reorder_count == 0) { - conn->sent_ack(); - } - } - } - - // Fast timeout-retry - if (conn->fast_timeout) { - LOG_UTPV("Fast timeout %u,%u,%u?", (uint)conn->cur_window, conn->seq_nr - conn->timeout_seq_nr, conn->timeout_seq_nr); - // if the fast_resend_seq_nr is not pointing to the oldest outstanding packet, it suggests that we've already - // resent the packet that timed out, and we should leave the fast-timeout mode. - if (((conn->seq_nr - conn->cur_window_packets) & ACK_NR_MASK) != conn->fast_resend_seq_nr) { - conn->fast_timeout = false; - } else { - // resend the oldest packet and increment fast_resend_seq_nr - // to not allow another fast resend on it again - OutgoingPacket *pkt = (OutgoingPacket*)conn->outbuf.get(conn->seq_nr - conn->cur_window_packets); - if (pkt && pkt->transmissions > 0) { - LOG_UTPV("0x%08x: Packet %u fast timeout-retry.", conn, conn->seq_nr - conn->cur_window_packets); -#ifdef _DEBUG - ++conn->_stats._fastrexmit; -#endif - conn->fast_resend_seq_nr++; - conn->send_packet(pkt); - } - } - } - } - - // Process selective acknowledgent - if (selack_ptr != NULL) { - conn->selective_ack(pk_ack_nr + 2, selack_ptr, selack_ptr[-1]); - } - - // this invariant should always be true - assert(conn->cur_window_packets == 0 || conn->outbuf.get(conn->seq_nr - conn->cur_window_packets)); - - LOG_UTPV("0x%08x: acks:%d acked_bytes:%u seq_nr:%u cur_window:%u cur_window_packets:%u quota:%d", - conn, acks, (uint)acked_bytes, conn->seq_nr, (uint)conn->cur_window, conn->cur_window_packets, - conn->send_quota / 100); - - // In case the ack dropped the current window below - // the max_window size, Mark the socket as writable - if (conn->state == CS_CONNECTED_FULL && conn->is_writable(conn->get_packet_size())) { - conn->state = CS_CONNECTED; - LOG_UTPV("0x%08x: Socket writable. max_window:%u cur_window:%u quota:%d packet_size:%u", - conn, (uint)conn->max_window, (uint)conn->cur_window, conn->send_quota / 100, (uint)conn->get_packet_size()); - conn->func.on_state(conn->userdata, UTP_STATE_WRITABLE); - } - - if (pk_flags == ST_STATE) { - // This is a state packet only. - return 0; - } - - // The connection is not in a state that can accept data? - if (conn->state != CS_CONNECTED && - conn->state != CS_CONNECTED_FULL && - conn->state != CS_FIN_SENT) { - return 0; - } - - // Is this a finalize packet? - if (pk_flags == ST_FIN && !conn->got_fin) { - LOG_UTPV("Got FIN eof_pkt:%u", pk_seq_nr); - conn->got_fin = true; - conn->eof_pkt = pk_seq_nr; - // at this point, it is possible for the - // other end to have sent packets with - // sequence numbers higher than seq_nr. - // if this is the case, our reorder_count - // is out of sync. This case is dealt with - // when we re-order and hit the eof_pkt. - // we'll just ignore any packets with - // sequence numbers past this - } - - // Getting an in-order packet? - if (seqnr == 0) { - size_t count = packet_end - data; - if (count > 0 && conn->state != CS_FIN_SENT) { - LOG_UTPV("0x%08x: Got Data len:%u (rb:%u)", conn, (uint)count, (uint)conn->func.get_rb_size(conn->userdata)); - // Post bytes to the upper layer - conn->func.on_read(conn->userdata, data, count); - } - conn->ack_nr++; - conn->bytes_since_ack += count; - - // Check if the next packet has been received too, but waiting - // in the reorder buffer. - for (;;) { - - if (conn->got_fin && conn->eof_pkt == conn->ack_nr) { - if (conn->state != CS_FIN_SENT) { - conn->state = CS_GOT_FIN; - conn->rto_timeout = g_current_ms + min(conn->rto * 3, 60); - - LOG_UTPV("0x%08x: Posting EOF", conn); - conn->func.on_state(conn->userdata, UTP_STATE_EOF); - } - - // if the other end wants to close, ack immediately - conn->send_ack(); - - // reorder_count is not necessarily 0 at this point. - // even though it is most of the time, the other end - // may have sent packets with higher sequence numbers - // than what later end up being eof_pkt - // since we have received all packets up to eof_pkt - // just ignore the ones after it. - conn->reorder_count = 0; - } - - // Quick get-out in case there is nothing to reorder - if (conn->reorder_count == 0) - break; - - // Check if there are additional buffers in the reorder buffers - // that need delivery. - byte *p = (byte*)conn->inbuf.get(conn->ack_nr+1); - if (p == NULL) - break; - conn->inbuf.put(conn->ack_nr+1, NULL); - count = *(uint*)p; - if (count > 0 && conn->state != CS_FIN_SENT) { - // Pass the bytes to the upper layer - conn->func.on_read(conn->userdata, p + sizeof(uint), count); - } - conn->ack_nr++; - conn->bytes_since_ack += count; - - // Free the element from the reorder buffer - free(p); - assert(conn->reorder_count > 0); - conn->reorder_count--; - } - - // start the delayed ACK timer - conn->ack_time = g_current_ms + min(conn->ack_time - g_current_ms, DELAYED_ACK_TIME_THRESHOLD); - } else { - // Getting an out of order packet. - // The packet needs to be remembered and rearranged later. - - // if we have received a FIN packet, and the EOF-sequence number - // is lower than the sequence number of the packet we just received - // something is wrong. - if (conn->got_fin && pk_seq_nr > conn->eof_pkt) { - LOG_UTPV("0x%08x: Got an invalid packet sequence number, past EOF " - "reorder_count:%u len:%u (rb:%u)", - conn, conn->reorder_count, (uint)(packet_end - data), (uint)conn->func.get_rb_size(conn->userdata)); - return 0; - } - - // if the sequence number is entirely off the expected - // one, just drop it. We can't allocate buffer space in - // the inbuf entirely based on untrusted input - if (seqnr > 0x3ff) { - LOG_UTPV("0x%08x: Got an invalid packet sequence number, too far off " - "reorder_count:%u len:%u (rb:%u)", - conn, conn->reorder_count, (uint)(packet_end - data), (uint)conn->func.get_rb_size(conn->userdata)); - return 0; - } - - // we need to grow the circle buffer before we - // check if the packet is already in here, so that - // we don't end up looking at an older packet (since - // the indices wraps around). - conn->inbuf.ensure_size(pk_seq_nr + 1, seqnr + 1); - - // Has this packet already been received? (i.e. a duplicate) - // If that is the case, just discard it. - if (conn->inbuf.get(pk_seq_nr) != NULL) { -#ifdef _DEBUG - ++conn->_stats._nduprecv; -#endif - return 0; - } - - // Allocate memory to fit the packet that needs to re-ordered - byte *mem = (byte*)malloc((packet_end - data) + sizeof(uint)); - *(uint*)mem = (uint)(packet_end - data); - memcpy(mem + sizeof(uint), data, packet_end - data); - - // Insert into reorder buffer and increment the count - // of # of packets to be reordered. - // we add one to seqnr in order to leave the last - // entry empty, that way the assert in send_ack - // is valid. we have to add one to seqnr too, in order - // to make the circular buffer grow around the correct - // point (which is conn->ack_nr + 1). - assert(conn->inbuf.get(pk_seq_nr) == NULL); - assert((pk_seq_nr & conn->inbuf.mask) != ((conn->ack_nr+1) & conn->inbuf.mask)); - conn->inbuf.put(pk_seq_nr, mem); - conn->reorder_count++; - - LOG_UTPV("0x%08x: Got out of order data reorder_count:%u len:%u (rb:%u)", - conn, conn->reorder_count, (uint)(packet_end - data), (uint)conn->func.get_rb_size(conn->userdata)); - - // Setup so the partial ACK message will get sent immediately. - conn->ack_time = g_current_ms + min(conn->ack_time - g_current_ms, 1); - } - - // If ack_time or ack_bytes indicate that we need to send and ack, send one - // here instead of waiting for the timer to trigger - LOG_UTPV("bytes_since_ack:%u ack_time:%d", - (uint)conn->bytes_since_ack, (int)(g_current_ms - conn->ack_time)); - if (conn->state == CS_CONNECTED || conn->state == CS_CONNECTED_FULL) { - if (conn->bytes_since_ack > DELAYED_ACK_BYTE_THRESHOLD || - (int)(g_current_ms - conn->ack_time) >= 0) { - conn->send_ack(); - } - } - return (size_t)(packet_end - data); -} - -inline bool UTP_IsV1(PacketFormatV1 const* pf) -{ - return pf->version() == 1 && pf->type() < ST_NUM_STATES && pf->ext < 3; -} - -void UTP_Free(UTPSocket *conn) -{ - LOG_UTPV("0x%08x: Killing socket", conn); - - conn->func.on_state(conn->userdata, UTP_STATE_DESTROYING); - UTP_SetCallbacks(conn, NULL, NULL); - - assert(conn->idx < g_utp_sockets.GetCount()); - assert(g_utp_sockets[conn->idx] == conn); - - // Unlink object from the global list - assert(g_utp_sockets.GetCount() > 0); - - UTPSocket *last = g_utp_sockets[g_utp_sockets.GetCount() - 1]; - - assert(last->idx < g_utp_sockets.GetCount()); - assert(g_utp_sockets[last->idx] == last); - - last->idx = conn->idx; - - g_utp_sockets[conn->idx] = last; - - // Decrease the count - g_utp_sockets.SetCount(g_utp_sockets.GetCount() - 1); - - // Free all memory occupied by the socket object. - for (size_t i = 0; i <= conn->inbuf.mask; i++) { - free(conn->inbuf.elements[i]); - } - for (size_t i = 0; i <= conn->outbuf.mask; i++) { - free(conn->outbuf.elements[i]); - } - free(conn->inbuf.elements); - free(conn->outbuf.elements); - - // Finally free the socket object - free(conn); -} - - -// Public functions: -/////////////////////////////////////////////////////////////////////////////// - -// Create a UTP socket -UTPSocket *UTP_Create(SendToProc *send_to_proc, void *send_to_userdata, const struct sockaddr *addr, socklen_t addrlen) -{ - UTPSocket *conn = (UTPSocket*)calloc(1, sizeof(UTPSocket)); - - g_current_ms = UTP_GetMilliseconds(); - - UTP_SetCallbacks(conn, NULL, NULL); - conn->our_hist.clear(); - conn->their_hist.clear(); - conn->rto = 3000; - conn->rtt_var = 800; - conn->seq_nr = 1; - conn->ack_nr = 0; - conn->max_window_user = 255 * PACKET_SIZE; - conn->addr = PackedSockAddr((const SOCKADDR_STORAGE*)addr, addrlen); - conn->send_to_proc = send_to_proc; - conn->send_to_userdata = send_to_userdata; - conn->ack_time = g_current_ms + 0x70000000; - conn->last_got_packet = g_current_ms; - conn->last_sent_packet = g_current_ms; - conn->last_measured_delay = g_current_ms + 0x70000000; - conn->last_rwin_decay = int32(g_current_ms) - MAX_WINDOW_DECAY; - conn->last_send_quota = g_current_ms; - conn->send_quota = PACKET_SIZE * 100; - conn->cur_window_packets = 0; - conn->fast_resend_seq_nr = conn->seq_nr; - - // default to version 1 - UTP_SetSockopt(conn, SO_UTPVERSION, 1); - - // we need to fit one packet in the window - // when we start the connection - conn->max_window = conn->get_packet_size(); - conn->state = CS_IDLE; - - conn->outbuf.mask = 15; - conn->inbuf.mask = 15; - - conn->outbuf.elements = (void**)calloc(16, sizeof(void*)); - conn->inbuf.elements = (void**)calloc(16, sizeof(void*)); - - conn->idx = g_utp_sockets.Append(conn); - - LOG_UTPV("0x%08x: UTP_Create", conn); - - return conn; -} - -void UTP_SetCallbacks(UTPSocket *conn, UTPFunctionTable *funcs, void *userdata) -{ - assert(conn); - - if (funcs == NULL) { - funcs = &zero_funcs; - } - conn->func = *funcs; - conn->userdata = userdata; -} - -bool UTP_SetSockopt(UTPSocket* conn, int opt, int val) -{ - assert(conn); - - switch (opt) { - case SO_SNDBUF: - assert(val >= 1); - conn->opt_sndbuf = val; - return true; - case SO_RCVBUF: - conn->opt_rcvbuf = val; - return true; - case SO_UTPVERSION: - assert(conn->state == CS_IDLE); - if (conn->state != CS_IDLE) { - // too late - return false; - } - if (conn->version == 1 && val == 0) { - conn->reply_micro = INT_MAX; - conn->opt_rcvbuf = 200 * 1024; - conn->opt_sndbuf = OUTGOING_BUFFER_MAX_SIZE * PACKET_SIZE; - } else if (conn->version == 0 && val == 1) { - conn->reply_micro = 0; - conn->opt_rcvbuf = 3 * 1024 * 1024 + 512 * 1024; - conn->opt_sndbuf = conn->opt_rcvbuf; - } - conn->version = val; - return true; - } - - return false; -} - -// Try to connect to a specified host. -// 'initial' is the number of data bytes to send in the connect packet. -void UTP_Connect(UTPSocket *conn) -{ - assert(conn); - - assert(conn->state == CS_IDLE); - assert(conn->cur_window_packets == 0); - assert(conn->outbuf.get(conn->seq_nr) == NULL); - assert(sizeof(PacketFormatV1) == 20); - - conn->state = CS_SYN_SENT; - - g_current_ms = UTP_GetMilliseconds(); - - // Create and send a connect message - uint32 conn_seed = UTP_Random(); - - // we identify newer versions by setting the - // first two bytes to 0x0001 - if (conn->version > 0) { - conn_seed &= 0xffff; - } - - // used in parse_log.py - LOG_UTP("0x%08x: UTP_Connect conn_seed:%u packet_size:%u (B) " - "target_delay:%u (ms) delay_history:%u " - "delay_base_history:%u (minutes)", - conn, conn_seed, PACKET_SIZE, CCONTROL_TARGET / 1000, - CUR_DELAY_SIZE, DELAY_BASE_HISTORY); - - // Setup initial timeout timer. - conn->retransmit_timeout = 3000; - conn->rto_timeout = g_current_ms + conn->retransmit_timeout; - conn->last_rcv_win = conn->get_rcv_window(); - - conn->conn_seed = conn_seed; - conn->conn_id_recv = conn_seed; - conn->conn_id_send = conn_seed+1; - // if you need compatibiltiy with 1.8.1, use this. it increases attackability though. - //conn->seq_nr = 1; - conn->seq_nr = UTP_Random(); - - // Create the connect packet. - const size_t header_ext_size = conn->get_header_extensions_size(); - - OutgoingPacket *pkt = (OutgoingPacket*)malloc(sizeof(OutgoingPacket) - 1 + header_ext_size); - - PacketFormatExtensions* p = (PacketFormatExtensions*)pkt->data; - PacketFormatExtensionsV1* p1 = (PacketFormatExtensionsV1*)pkt->data; - - memset(p, 0, header_ext_size); - // SYN packets are special, and have the receive ID in the connid field, - // instead of conn_id_send. - if (conn->version == 0) { - p->pf.connid = conn->conn_id_recv; - p->pf.ext = 2; - p->pf.windowsize = (byte)DIV_ROUND_UP(conn->last_rcv_win, PACKET_SIZE); - p->pf.seq_nr = conn->seq_nr; - p->pf.flags = ST_SYN; - p->ext_next = 0; - p->ext_len = 8; - memset(p->extensions, 0, 8); - } else { - p1->pf.set_version(1); - p1->pf.set_type(ST_SYN); - p1->pf.ext = 2; - p1->pf.connid = conn->conn_id_recv; - p1->pf.windowsize = (uint32)conn->last_rcv_win; - p1->pf.seq_nr = conn->seq_nr; - p1->ext_next = 0; - p1->ext_len = 8; - memset(p1->extensions, 0, 8); - } - pkt->transmissions = 0; - pkt->length = header_ext_size; - pkt->payload = 0; - - //LOG_UTPV("0x%08x: Sending connect %s [%u].", - // conn, addrfmt(conn->addr, addrbuf), conn_seed); - - // Remember the message in the outgoing queue. - conn->outbuf.ensure_size(conn->seq_nr, conn->cur_window_packets); - conn->outbuf.put(conn->seq_nr, pkt); - conn->seq_nr++; - conn->cur_window_packets++; - - conn->send_packet(pkt); -} - -bool UTP_IsIncomingUTP(UTPGotIncomingConnection *incoming_proc, - SendToProc *send_to_proc, void *send_to_userdata, - const byte *buffer, size_t len, const struct sockaddr *to, socklen_t tolen) -{ - const PackedSockAddr addr((const SOCKADDR_STORAGE*)to, tolen); - - if (len < sizeof(PacketFormat) && len < sizeof(PacketFormatV1)) { - LOG_UTPV("recv %s len:%u too small", addrfmt(addr, addrbuf), (uint)len); - return false; - } - - const PacketFormat* p = (PacketFormat*)buffer; - const PacketFormatV1* p1 = (PacketFormatV1*)buffer; - - const byte version = UTP_IsV1(p1); - const uint32 id = (version == 0) ? p->connid : uint32(p1->connid); - - if (version == 0 && len < sizeof(PacketFormat)) { - LOG_UTPV("recv %s len:%u version:%u too small", addrfmt(addr, addrbuf), (uint)len, version); - return false; - } - - if (version == 1 && len < sizeof(PacketFormatV1)) { - LOG_UTPV("recv %s len:%u version:%u too small", addrfmt(addr, addrbuf), (uint)len, version); - return false; - } - - LOG_UTPV("recv %s len:%u id:%u", addrfmt(addr, addrbuf), (uint)len, id); - - const PacketFormat *pf = (PacketFormat*)p; - const PacketFormatV1 *pf1 = (PacketFormatV1*)p; - - if (version == 0) { - LOG_UTPV("recv id:%u seq_nr:%u ack_nr:%u", id, (uint)pf->seq_nr, (uint)pf->ack_nr); - } else { - LOG_UTPV("recv id:%u seq_nr:%u ack_nr:%u", id, (uint)pf1->seq_nr, (uint)pf1->ack_nr); - } - - const byte flags = version == 0 ? pf->flags : pf1->type(); - - for (size_t i = 0; i < g_utp_sockets.GetCount(); i++) { - UTPSocket *conn = g_utp_sockets[i]; - //LOG_UTPV("Examining UTPSocket %s for %s and (seed:%u s:%u r:%u) for %u", - // addrfmt(conn->addr, addrbuf), addrfmt(addr, addrbuf2), conn->conn_seed, conn->conn_id_send, conn->conn_id_recv, id); - if (conn->addr != addr) - continue; - - if (flags == ST_RESET && (conn->conn_id_send == id || conn->conn_id_recv == id)) { - LOG_UTPV("0x%08x: recv RST for existing connection", conn); - if (!conn->userdata || conn->state == CS_FIN_SENT) { - conn->state = CS_DESTROY; - } else { - conn->state = CS_RESET; - } - if (conn->userdata) { - conn->func.on_overhead(conn->userdata, false, len + conn->get_udp_overhead(), - close_overhead); - const int err = conn->state == CS_SYN_SENT ? - ECONNREFUSED : - ECONNRESET; - conn->func.on_error(conn->userdata, err); - } - return true; - } else if (flags != ST_SYN && conn->conn_id_recv == id) { - LOG_UTPV("0x%08x: recv processing", conn); - const size_t read = UTP_ProcessIncoming(conn, buffer, len); - if (conn->userdata) { - conn->func.on_overhead(conn->userdata, false, - (len - read) + conn->get_udp_overhead(), - header_overhead); - } - return true; - } - } - - if (flags == ST_RESET) { - LOG_UTPV("recv RST for unknown connection"); - return true; - } - - const uint32 seq_nr = version == 0 ? pf->seq_nr : pf1->seq_nr; - if (flags != ST_SYN) { - for (size_t i = 0; i < g_rst_info.GetCount(); i++) { - if (g_rst_info[i].connid != id) - continue; - if (g_rst_info[i].addr != addr) - continue; - if (seq_nr != g_rst_info[i].ack_nr) - continue; - g_rst_info[i].timestamp = UTP_GetMilliseconds(); - LOG_UTPV("recv not sending RST to non-SYN (stored)"); - return true; - } - if (g_rst_info.GetCount() > RST_INFO_LIMIT) { - LOG_UTPV("recv not sending RST to non-SYN (limit at %u stored)", (uint)g_rst_info.GetCount()); - return true; - } - LOG_UTPV("recv send RST to non-SYN (%u stored)", (uint)g_rst_info.GetCount()); - RST_Info &r = g_rst_info.Append(); - r.addr = addr; - r.connid = id; - r.ack_nr = seq_nr; - r.timestamp = UTP_GetMilliseconds(); - - UTPSocket::send_rst(send_to_proc, send_to_userdata, addr, id, seq_nr, UTP_Random(), version); - return true; - } - - if (incoming_proc) { - LOG_UTPV("Incoming connection from %s uTP version:%u", addrfmt(addr, addrbuf), version); - - // Create a new UTP socket to handle this new connection - UTPSocket *conn = UTP_Create(send_to_proc, send_to_userdata, to, tolen); - // Need to track this value to be able to detect duplicate CONNECTs - conn->conn_seed = id; - // This is value that identifies this connection for them. - conn->conn_id_send = id; - // This is value that identifies this connection for us. - conn->conn_id_recv = id+1; - conn->ack_nr = seq_nr; - conn->seq_nr = UTP_Random(); - conn->fast_resend_seq_nr = conn->seq_nr; - - UTP_SetSockopt(conn, SO_UTPVERSION, version); - conn->state = CS_CONNECTED; - - const size_t read = UTP_ProcessIncoming(conn, buffer, len, true); - - LOG_UTPV("0x%08x: recv send connect ACK", conn); - conn->send_ack(true); - - incoming_proc(send_to_userdata, conn); - - // we report overhead after incoming_proc, because the callbacks are setup now - if (conn->userdata) { - // SYN - conn->func.on_overhead(conn->userdata, false, (len - read) + conn->get_udp_overhead(), - header_overhead); - // SYNACK - conn->func.on_overhead(conn->userdata, true, conn->get_overhead(), - ack_overhead); - } - } - - return true; -} - -bool UTP_HandleICMP(const byte* buffer, size_t len, const struct sockaddr *to, socklen_t tolen) -{ - const PackedSockAddr addr((const SOCKADDR_STORAGE*)to, tolen); - - // Want the whole packet so we have connection ID - if (len < sizeof(PacketFormat)) { - return false; - } - - const PacketFormat* p = (PacketFormat*)buffer; - const PacketFormatV1* p1 = (PacketFormatV1*)buffer; - - const byte version = UTP_IsV1(p1); - const uint32 id = (version == 0) ? p->connid : uint32(p1->connid); - - for (size_t i = 0; i < g_utp_sockets.GetCount(); ++i) { - UTPSocket *conn = g_utp_sockets[i]; - if (conn->addr == addr && - conn->conn_id_recv == id) { - // Don't pass on errors for idle/closed connections - if (conn->state != CS_IDLE) { - if (!conn->userdata || conn->state == CS_FIN_SENT) { - LOG_UTPV("0x%08x: icmp packet causing socket destruction", conn); - conn->state = CS_DESTROY; - } else { - conn->state = CS_RESET; - } - if (conn->userdata) { - const int err = conn->state == CS_SYN_SENT ? - ECONNREFUSED : - ECONNRESET; - LOG_UTPV("0x%08x: icmp packet causing error on socket:%d", conn, err); - conn->func.on_error(conn->userdata, err); - } - } - return true; - } - } - return false; -} - -// Write bytes to the UTP socket. -// Returns true if the socket is still writable. -bool UTP_Write(UTPSocket *conn, size_t bytes) -{ - assert(conn); - -#ifdef g_log_utp_verbose - size_t param = bytes; -#endif - - if (conn->state != CS_CONNECTED) { - LOG_UTPV("0x%08x: UTP_Write %u bytes = false (not CS_CONNECTED)", conn, (uint)bytes); - return false; - } - - g_current_ms = UTP_GetMilliseconds(); - - conn->update_send_quota(); - - // don't send unless it will all fit in the window - size_t packet_size = conn->get_packet_size(); - size_t num_to_send = min(bytes, packet_size); - while (conn->is_writable(num_to_send)) { - // Send an outgoing packet. - // Also add it to the outgoing of packets that have been sent but not ACKed. - - if (num_to_send == 0) { - LOG_UTPV("0x%08x: UTP_Write %u bytes = true", conn, (uint)param); - return true; - } - bytes -= num_to_send; - - LOG_UTPV("0x%08x: Sending packet. seq_nr:%u ack_nr:%u wnd:%u/%u/%u rcv_win:%u size:%u quota:%d cur_window_packets:%u", - conn, conn->seq_nr, conn->ack_nr, - (uint)(conn->cur_window + num_to_send), - (uint)conn->max_window, (uint)conn->max_window_user, - (uint)conn->last_rcv_win, num_to_send, conn->send_quota / 100, - conn->cur_window_packets); - conn->write_outgoing_packet(num_to_send, ST_DATA); - num_to_send = min(bytes, packet_size); - } - - // mark the socket as not being writable. - conn->state = CS_CONNECTED_FULL; - LOG_UTPV("0x%08x: UTP_Write %u bytes = false", conn, (uint)bytes); - return false; -} - -void UTP_RBDrained(UTPSocket *conn) -{ - assert(conn); - - const size_t rcvwin = conn->get_rcv_window(); - - if (rcvwin > conn->last_rcv_win) { - // If last window was 0 send ACK immediately, otherwise should set timer - if (conn->last_rcv_win == 0) { - conn->send_ack(); - } else { - conn->ack_time = g_current_ms + min(conn->ack_time - g_current_ms, DELAYED_ACK_TIME_THRESHOLD); - } - } -} - -void UTP_CheckTimeouts() -{ - g_current_ms = UTP_GetMilliseconds(); - - for (size_t i = 0; i < g_rst_info.GetCount(); i++) { - if ((int)(g_current_ms - g_rst_info[i].timestamp) >= RST_INFO_TIMEOUT) { - g_rst_info.MoveUpLast(i); - i--; - } - } - if (g_rst_info.GetCount() != g_rst_info.GetAlloc()) { - g_rst_info.Compact(); - } - - for (size_t i = 0; i != g_utp_sockets.GetCount(); i++) { - UTPSocket *conn = g_utp_sockets[i]; - conn->check_timeouts(); - - // Check if the object was deleted - if (conn->state == CS_DESTROY) { - LOG_UTPV("0x%08x: Destroying", conn); - UTP_Free(conn); - i--; - } - } -} - -size_t UTP_GetPacketSize(UTPSocket *socket) -{ - return socket->get_packet_size(); -} - -void UTP_GetPeerName(UTPSocket *conn, struct sockaddr *addr, socklen_t *addrlen) -{ - assert(conn); - - socklen_t len; - const SOCKADDR_STORAGE sa = conn->addr.get_sockaddr_storage(&len); - *addrlen = min(len, *addrlen); - memcpy(addr, &sa, *addrlen); -} - -void UTP_GetDelays(UTPSocket *conn, int32 *ours, int32 *theirs, uint32 *age) -{ - assert(conn); - - if (ours) *ours = conn->our_hist.get_value(); - if (theirs) *theirs = conn->their_hist.get_value(); - if (age) *age = g_current_ms - conn->last_measured_delay; -} - -#ifdef _DEBUG -void UTP_GetStats(UTPSocket *conn, UTPStats *stats) -{ - assert(conn); - - *stats = conn->_stats; -} -#endif // _DEBUG - -void UTP_GetGlobalStats(UTPGlobalStats *stats) -{ - *stats = _global_stats; -} - -// Close the UTP socket. -// It is not valid for the upper layer to refer to socket after it is closed. -// Data will keep to try being delivered after the close. -void UTP_Close(UTPSocket *conn) -{ - assert(conn); - - assert(conn->state != CS_DESTROY_DELAY && conn->state != CS_FIN_SENT && conn->state != CS_DESTROY); - - LOG_UTPV("0x%08x: UTP_Close in state:%s", conn, statenames[conn->state]); - - switch(conn->state) { - case CS_CONNECTED: - case CS_CONNECTED_FULL: - conn->state = CS_FIN_SENT; - conn->write_outgoing_packet(0, ST_FIN); - break; - - case CS_SYN_SENT: - conn->rto_timeout = UTP_GetMilliseconds() + min(conn->rto * 2, 60); - case CS_GOT_FIN: - conn->state = CS_DESTROY_DELAY; - break; - - default: - conn->state = CS_DESTROY; - break; - } -} +/* + * Copyright (c) 2010-2013 BitTorrent, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include // for UINT_MAX + +#include "utp_types.h" +#include "utp_packedsockaddr.h" +#include "utp_internal.h" +#include "utp_hash.h" + +#define TIMEOUT_CHECK_INTERVAL 500 + +// number of bytes to increase max window size by, per RTT. This is +// scaled down linearly proportional to off_target. i.e. if all packets +// in one window have 0 delay, window size will increase by this number. +// Typically it's less. TCP increases one MSS per RTT, which is 1500 +#define MAX_CWND_INCREASE_BYTES_PER_RTT 3000 +#define CUR_DELAY_SIZE 3 +// experiments suggest that a clock skew of 10 ms per 325 seconds +// is not impossible. Reset delay_base every 13 minutes. The clock +// skew is dealt with by observing the delay base in the other +// direction, and adjusting our own upwards if the opposite direction +// delay base keeps going down +#define DELAY_BASE_HISTORY 13 +#define MAX_WINDOW_DECAY 100 // ms + +#define REORDER_BUFFER_SIZE 32 +#define REORDER_BUFFER_MAX_SIZE 1024 +#define OUTGOING_BUFFER_MAX_SIZE 1024 + +#define PACKET_SIZE 1435 + +// this is the minimum max_window value. It can never drop below this +#define MIN_WINDOW_SIZE 10 + +// if we receive 4 or more duplicate acks, we resend the packet +// that hasn't been acked yet +#define DUPLICATE_ACKS_BEFORE_RESEND 3 + +#define RST_INFO_TIMEOUT 10000 +#define RST_INFO_LIMIT 1000 +// 29 seconds determined from measuring many home NAT devices +#define KEEPALIVE_INTERVAL 29000 + + +#define SEQ_NR_MASK 0xFFFF +#define ACK_NR_MASK 0xFFFF +#define TIMESTAMP_MASK 0xFFFFFFFF + +#define DIV_ROUND_UP(num, denom) ((num + denom - 1) / denom) + +// The totals are derived from the following data: +// 45: IPv6 address including embedded IPv4 address +// 11: Scope Id +// 2: Brackets around IPv6 address when port is present +// 6: Port (including colon) +// 1: Terminating null byte +char addrbuf[65]; +#define addrfmt(x, s) x.fmt(s, sizeof(s)) + + +#if (defined(__SVR4) && defined(__sun)) + #pragma pack(1) +#else + #pragma pack(push,1) +#endif + + +// these packet sizes are including the uTP header wich +// is either 20 or 23 bytes depending on version +#define PACKET_SIZE_EMPTY_BUCKET 0 +#define PACKET_SIZE_EMPTY 23 +#define PACKET_SIZE_SMALL_BUCKET 1 +#define PACKET_SIZE_SMALL 373 +#define PACKET_SIZE_MID_BUCKET 2 +#define PACKET_SIZE_MID 723 +#define PACKET_SIZE_BIG_BUCKET 3 +#define PACKET_SIZE_BIG 1400 +#define PACKET_SIZE_HUGE_BUCKET 4 + +struct PACKED_ATTRIBUTE PacketFormatV1 { + // packet_type (4 high bits) + // protocol version (4 low bits) + byte ver_type; + byte version() const { return ver_type & 0xf; } + byte type() const { return ver_type >> 4; } + void set_version(byte v) { ver_type = (ver_type & 0xf0) | (v & 0xf); } + void set_type(byte t) { ver_type = (ver_type & 0xf) | (t << 4); } + + // Type of the first extension header + byte ext; + // connection ID + uint16_big connid; + uint32_big tv_usec; + uint32_big reply_micro; + // receive window size in bytes + uint32_big windowsize; + // Sequence number + uint16_big seq_nr; + // Acknowledgment number + uint16_big ack_nr; +}; + +struct PACKED_ATTRIBUTE PacketFormatAckV1 { + PacketFormatV1 pf; + byte ext_next; + byte ext_len; + byte acks[4]; +}; + +#if (defined(__SVR4) && defined(__sun)) + #pragma pack(0) +#else + #pragma pack(pop) +#endif + +enum { + ST_DATA = 0, // Data packet. + ST_FIN = 1, // Finalize the connection. This is the last packet. + ST_STATE = 2, // State packet. Used to transmit an ACK with no data. + ST_RESET = 3, // Terminate connection forcefully. + ST_SYN = 4, // Connect SYN + ST_NUM_STATES, // used for bounds checking +}; + +static const cstr flagnames[] = { + "ST_DATA","ST_FIN","ST_STATE","ST_RESET","ST_SYN" +}; + +enum CONN_STATE { + CS_UNINITIALIZED = 0, + CS_IDLE, + CS_SYN_SENT, + CS_CONNECTED, + CS_CONNECTED_FULL, + CS_GOT_FIN, + CS_DESTROY_DELAY, + CS_FIN_SENT, + CS_RESET, + CS_DESTROY +}; + +static const cstr statenames[] = { + "UNINITIALIZED", "IDLE","SYN_SENT","CONNECTED","CONNECTED_FULL","GOT_FIN","DESTROY_DELAY","FIN_SENT","RESET","DESTROY" +}; + +struct OutgoingPacket { + size_t length; + size_t payload; + uint64 time_sent; // microseconds + uint transmissions:31; + bool need_resend:1; + byte data[1]; +}; + +struct SizableCircularBuffer { + // This is the mask. Since it's always a power of 2, adding 1 to this value will return the size. + size_t mask; + // This is the elements that the circular buffer points to + void **elements; + + void *get(size_t i) { assert(elements); return elements ? elements[i & mask] : NULL; } + void put(size_t i, void *data) { assert(elements); elements[i&mask] = data; } + + void grow(size_t item, size_t index); + void ensure_size(size_t item, size_t index) { if (index > mask) grow(item, index); } + size_t size() { return mask + 1; } +}; + +// Item contains the element we want to make space for +// index is the index in the list. +void SizableCircularBuffer::grow(size_t item, size_t index) +{ + // Figure out the new size. + size_t size = mask + 1; + do size *= 2; while (index >= size); + + // Allocate the new buffer + void **buf = (void**)calloc(size, sizeof(void*)); + + size--; + + // Copy elements from the old buffer to the new buffer + for (size_t i = 0; i <= mask; i++) { + buf[(item - index + i) & size] = get(item - index + i); + } + + // Swap to the newly allocated buffer + mask = size; + free(elements); + elements = buf; +} + +// compare if lhs is less than rhs, taking wrapping +// into account. if lhs is close to UINT_MAX and rhs +// is close to 0, lhs is assumed to have wrapped and +// considered smaller +bool wrapping_compare_less(uint32 lhs, uint32 rhs, uint32 mask) +{ + // distance walking from lhs to rhs, downwards + const uint32 dist_down = (lhs - rhs) & mask; + // distance walking from lhs to rhs, upwards + const uint32 dist_up = (rhs - lhs) & mask; + + // if the distance walking up is shorter, lhs + // is less than rhs. If the distance walking down + // is shorter, then rhs is less than lhs + return dist_up < dist_down; +} + +struct DelayHist { + uint32 delay_base; + + // this is the history of delay samples, + // normalized by using the delay_base. These + // values are always greater than 0 and measures + // the queuing delay in microseconds + uint32 cur_delay_hist[CUR_DELAY_SIZE]; + size_t cur_delay_idx; + + // this is the history of delay_base. It's + // a number that doesn't have an absolute meaning + // only relative. It doesn't make sense to initialize + // it to anything other than values relative to + // what's been seen in the real world. + uint32 delay_base_hist[DELAY_BASE_HISTORY]; + size_t delay_base_idx; + // the time when we last stepped the delay_base_idx + uint64 delay_base_time; + + bool delay_base_initialized; + + void clear(uint64 current_ms) + { + delay_base_initialized = false; + delay_base = 0; + cur_delay_idx = 0; + delay_base_idx = 0; + delay_base_time = current_ms; + for (size_t i = 0; i < CUR_DELAY_SIZE; i++) { + cur_delay_hist[i] = 0; + } + for (size_t i = 0; i < DELAY_BASE_HISTORY; i++) { + delay_base_hist[i] = 0; + } + } + + void shift(const uint32 offset) + { + // the offset should never be "negative" + // assert(offset < 0x10000000); + + // increase all of our base delays by this amount + // this is used to take clock skew into account + // by observing the other side's changes in its base_delay + for (size_t i = 0; i < DELAY_BASE_HISTORY; i++) { + delay_base_hist[i] += offset; + } + delay_base += offset; + } + + void add_sample(const uint32 sample, uint64 current_ms) + { + // The two clocks (in the two peers) are assumed not to + // progress at the exact same rate. They are assumed to be + // drifting, which causes the delay samples to contain + // a systematic error, either they are under- + // estimated or over-estimated. This is why we update the + // delay_base every two minutes, to adjust for this. + + // This means the values will keep drifting and eventually wrap. + // We can cross the wrapping boundry in two directions, either + // going up, crossing the highest value, or going down, crossing 0. + + // if the delay_base is close to the max value and sample actually + // wrapped on the other end we would see something like this: + // delay_base = 0xffffff00, sample = 0x00000400 + // sample - delay_base = 0x500 which is the correct difference + + // if the delay_base is instead close to 0, and we got an even lower + // sample (that will eventually update the delay_base), we may see + // something like this: + // delay_base = 0x00000400, sample = 0xffffff00 + // sample - delay_base = 0xfffffb00 + // this needs to be interpreted as a negative number and the actual + // recorded delay should be 0. + + // It is important that all arithmetic that assume wrapping + // is done with unsigned intergers. Signed integers are not guaranteed + // to wrap the way unsigned integers do. At least GCC takes advantage + // of this relaxed rule and won't necessarily wrap signed ints. + + // remove the clock offset and propagation delay. + // delay base is min of the sample and the current + // delay base. This min-operation is subject to wrapping + // and care needs to be taken to correctly choose the + // true minimum. + + // specifically the problem case is when delay_base is very small + // and sample is very large (because it wrapped past zero), sample + // needs to be considered the smaller + + if (!delay_base_initialized) { + // delay_base being 0 suggests that we haven't initialized + // it or its history with any real measurements yet. Initialize + // everything with this sample. + for (size_t i = 0; i < DELAY_BASE_HISTORY; i++) { + // if we don't have a value, set it to the current sample + delay_base_hist[i] = sample; + continue; + } + delay_base = sample; + delay_base_initialized = true; + } + + if (wrapping_compare_less(sample, delay_base_hist[delay_base_idx], TIMESTAMP_MASK)) { + // sample is smaller than the current delay_base_hist entry + // update it + delay_base_hist[delay_base_idx] = sample; + } + + // is sample lower than delay_base? If so, update delay_base + if (wrapping_compare_less(sample, delay_base, TIMESTAMP_MASK)) { + // sample is smaller than the current delay_base + // update it + delay_base = sample; + } + + // this operation may wrap, and is supposed to + const uint32 delay = sample - delay_base; + // sanity check. If this is triggered, something fishy is going on + // it means the measured sample was greater than 32 seconds! + //assert(delay < 0x2000000); + + cur_delay_hist[cur_delay_idx] = delay; + cur_delay_idx = (cur_delay_idx + 1) % CUR_DELAY_SIZE; + + // once every minute + if (current_ms - delay_base_time > 60 * 1000) { + delay_base_time = current_ms; + delay_base_idx = (delay_base_idx + 1) % DELAY_BASE_HISTORY; + // clear up the new delay base history spot by initializing + // it to the current sample, then update it + delay_base_hist[delay_base_idx] = sample; + delay_base = delay_base_hist[0]; + // Assign the lowest delay in the last 2 minutes to delay_base + for (size_t i = 0; i < DELAY_BASE_HISTORY; i++) { + if (wrapping_compare_less(delay_base_hist[i], delay_base, TIMESTAMP_MASK)) + delay_base = delay_base_hist[i]; + } + } + } + + uint32 get_value() + { + uint32 value = UINT_MAX; + for (size_t i = 0; i < CUR_DELAY_SIZE; i++) { + value = min(cur_delay_hist[i], value); + } + // value could be UINT_MAX if we have no samples yet... + return value; + } +}; + +struct UTPSocket { + ~UTPSocket(); + + PackedSockAddr addr; + utp_context *ctx; + + int ida; //for ack socket list + + uint16 retransmit_count; + + uint16 reorder_count; + byte duplicate_ack; + + // the number of packets in the send queue. Packets that haven't + // yet been sent count as well as packets marked as needing resend + // the oldest un-acked packet in the send queue is seq_nr - cur_window_packets + uint16 cur_window_packets; + + // how much of the window is used, number of bytes in-flight + // packets that have not yet been sent do not count, packets + // that are marked as needing to be re-sent (due to a timeout) + // don't count either + size_t cur_window; + // maximum window size, in bytes + size_t max_window; + // UTP_SNDBUF setting, in bytes + size_t opt_sndbuf; + // UTP_RCVBUF setting, in bytes + size_t opt_rcvbuf; + + // this is the target delay, in microseconds + // for this socket. defaults to 100000. + size_t target_delay; + + // Is a FIN packet in the reassembly buffer? + bool got_fin:1; + // Timeout procedure + bool fast_timeout:1; + + // max receive window for other end, in bytes + size_t max_window_user; + CONN_STATE state; + // TickCount when we last decayed window (wraps) + int32 last_rwin_decay; + + // the sequence number of the FIN packet. This field is only set + // when we have received a FIN, and the flag field has the FIN flag set. + // it is used to know when it is safe to destroy the socket, we must have + // received all packets up to this sequence number first. + uint16 eof_pkt; + + // All sequence numbers up to including this have been properly received + // by us + uint16 ack_nr; + // This is the sequence number for the next packet to be sent. + uint16 seq_nr; + + uint16 timeout_seq_nr; + + // This is the sequence number of the next packet we're allowed to + // do a fast resend with. This makes sure we only do a fast-resend + // once per packet. We can resend the packet with this sequence number + // or any later packet (with a higher sequence number). + uint16 fast_resend_seq_nr; + + uint32 reply_micro; + + uint64 last_got_packet; + uint64 last_sent_packet; + uint64 last_measured_delay; + + // timestamp of the last time the cwnd was full + // this is used to prevent the congestion window + // from growing when we're not sending at capacity + mutable uint64 last_maxed_out_window; + + void *userdata; + + // Round trip time + uint rtt; + // Round trip time variance + uint rtt_var; + // Round trip timeout + uint rto; + DelayHist rtt_hist; + uint retransmit_timeout; + // The RTO timer will timeout here. + uint64 rto_timeout; + // When the window size is set to zero, start this timer. It will send a new packet every 30secs. + uint64 zerowindow_time; + + uint32 conn_seed; + // Connection ID for packets I receive + uint32 conn_id_recv; + // Connection ID for packets I send + uint32 conn_id_send; + // Last rcv window we advertised, in bytes + size_t last_rcv_win; + + DelayHist our_hist; + DelayHist their_hist; + + // extension bytes from SYN packet + byte extensions[8]; + + // MTU Discovery + // time when we should restart the MTU discovery + uint64 mtu_discover_time; + // ceiling and floor of binary search. last is the mtu size + // we're currently using + uint32 mtu_ceiling, mtu_floor, mtu_last; + // we only ever have a single probe in flight at any given time. + // this is the sequence number of that probe, and the size of + // that packet + uint32 mtu_probe_seq, mtu_probe_size; + + // this is the average delay samples, as compared to the initial + // sample. It's averaged over 5 seconds + int32 average_delay; + // this is the sum of all the delay samples + // we've made recently. The important distinction + // of these samples is that they are all made compared + // to the initial sample, this is to deal with + // wrapping in a simple way. + int64 current_delay_sum; + // number of sample ins current_delay_sum + int current_delay_samples; + // initialized to 0, set to the first raw delay sample + // each sample that's added to current_delay_sum + // is subtracted from the value first, to make it + // a delay relative to this sample + uint32 average_delay_base; + // the next time we should add an average delay + // sample into average_delay_hist + uint64 average_sample_time; + // the estimated clock drift between our computer + // and the endpoint computer. The unit is microseconds + // per 5 seconds + int32 clock_drift; + // just used for logging + int32 clock_drift_raw; + + SizableCircularBuffer inbuf, outbuf; + + #ifdef _DEBUG + // Public per-socket statistics, returned by utp_get_stats() + utp_socket_stats _stats; + #endif + + // true if we're in slow-start (exponential growth) phase + bool slow_start; + + // the slow-start threshold, in bytes + size_t ssthresh; + + void log(int level, char const *fmt, ...) + { + va_list va; + char buf[4096], buf2[4096]; + + va_start(va, fmt); + vsnprintf(buf, 4096, fmt, va); + va_end(va); + buf[4095] = '\0'; + + snprintf(buf2, 4096, "%p %s %06d %s", this, addrfmt(addr, addrbuf), conn_id_recv, buf); + buf2[4095] = '\0'; + + ctx->log(level, this, buf2); + } + + void schedule_ack(); + + // called every time mtu_floor or mtu_ceiling are adjusted + void mtu_search_update(); + void mtu_reset(); + + // Calculates the current receive window + size_t get_rcv_window() + { + // Trim window down according to what's already in buffer. + const size_t numbuf = utp_call_get_read_buffer_size(this->ctx, this); + assert((int)numbuf >= 0); + return opt_rcvbuf > numbuf ? opt_rcvbuf - numbuf : 0; + } + + // Test if we're ready to decay max_window + // XXX this breaks when spaced by > INT_MAX/2, which is 49 + // days; the failure mode in that case is we do an extra decay + // or fail to do one when we really shouldn't. + bool can_decay_win(int32 msec) const + { + return msec - last_rwin_decay >= MAX_WINDOW_DECAY; + } + + // If we can, decay max window, returns true if we actually did so + void maybe_decay_win(uint64 current_ms) + { + if (can_decay_win(current_ms)) { + // TCP uses 0.5 + max_window = (size_t)(max_window * .5); + last_rwin_decay = current_ms; + if (max_window < MIN_WINDOW_SIZE) + max_window = MIN_WINDOW_SIZE; + slow_start = false; + ssthresh = max_window; + } + } + + size_t get_header_size() const + { + return sizeof(PacketFormatV1); + } + + size_t get_udp_mtu() + { + socklen_t len; + SOCKADDR_STORAGE sa = addr.get_sockaddr_storage(&len); + return utp_call_get_udp_mtu(this->ctx, this, (const struct sockaddr *)&sa, len); + } + + size_t get_udp_overhead() + { + socklen_t len; + SOCKADDR_STORAGE sa = addr.get_sockaddr_storage(&len); + return utp_call_get_udp_overhead(this->ctx, this, (const struct sockaddr *)&sa, len); + } + + size_t get_overhead() + { + return get_udp_overhead() + get_header_size(); + } + + void send_data(byte* b, size_t length, bandwidth_type_t type, uint32 flags = 0); + + void send_ack(bool synack = false); + + void send_keep_alive(); + + static void send_rst(utp_context *ctx, + const PackedSockAddr &addr, uint32 conn_id_send, + uint16 ack_nr, uint16 seq_nr); + + void send_packet(OutgoingPacket *pkt); + + bool is_full(int bytes = -1); + bool flush_packets(); + void write_outgoing_packet(size_t payload, uint flags, struct utp_iovec *iovec, size_t num_iovecs); + + #ifdef _DEBUG + void check_invariant(); + #endif + + void check_timeouts(); + int ack_packet(uint16 seq); + size_t selective_ack_bytes(uint base, const byte* mask, byte len, int64& min_rtt); + void selective_ack(uint base, const byte *mask, byte len); + void apply_ccontrol(size_t bytes_acked, uint32 actual_delay, int64 min_rtt); + size_t get_packet_size() const; +}; + +void removeSocketFromAckList(UTPSocket *conn) +{ + if (conn->ida >= 0) + { + UTPSocket *last = conn->ctx->ack_sockets[conn->ctx->ack_sockets.GetCount() - 1]; + + assert(last->ida < conn->ctx->ack_sockets.GetCount()); + assert(conn->ctx->ack_sockets[last->ida] == last); + last->ida = conn->ida; + conn->ctx->ack_sockets[conn->ida] = last; + conn->ida = -1; + + // Decrease the count + conn->ctx->ack_sockets.SetCount(conn->ctx->ack_sockets.GetCount() - 1); + } +} + +static void utp_register_sent_packet(utp_context *ctx, size_t length) +{ + if (length <= PACKET_SIZE_MID) { + if (length <= PACKET_SIZE_EMPTY) { + ctx->context_stats._nraw_send[PACKET_SIZE_EMPTY_BUCKET]++; + } else if (length <= PACKET_SIZE_SMALL) { + ctx->context_stats._nraw_send[PACKET_SIZE_SMALL_BUCKET]++; + } else + ctx->context_stats._nraw_send[PACKET_SIZE_MID_BUCKET]++; + } else { + if (length <= PACKET_SIZE_BIG) { + ctx->context_stats._nraw_send[PACKET_SIZE_BIG_BUCKET]++; + } else + ctx->context_stats._nraw_send[PACKET_SIZE_HUGE_BUCKET]++; + } +} + +void send_to_addr(utp_context *ctx, const byte *p, size_t len, const PackedSockAddr &addr, int flags = 0) +{ + socklen_t tolen; + SOCKADDR_STORAGE to = addr.get_sockaddr_storage(&tolen); + utp_register_sent_packet(ctx, len); + utp_call_sendto(ctx, NULL, p, len, (const struct sockaddr *)&to, tolen, flags); +} + +void UTPSocket::schedule_ack() +{ + if (ida == -1){ + #if UTP_DEBUG_LOGGING + log(UTP_LOG_DEBUG, "schedule_ack"); + #endif + ida = ctx->ack_sockets.Append(this); + } else { + #if UTP_DEBUG_LOGGING + log(UTP_LOG_DEBUG, "schedule_ack: already in list"); + #endif + } +} + +void UTPSocket::send_data(byte* b, size_t length, bandwidth_type_t type, uint32 flags) +{ + // time stamp this packet with local time, the stamp goes into + // the header of every packet at the 8th byte for 8 bytes : + // two integers, check packet.h for more + uint64 time = utp_call_get_microseconds(ctx, this); + + PacketFormatV1* b1 = (PacketFormatV1*)b; + b1->tv_usec = (uint32)time; + b1->reply_micro = reply_micro; + + last_sent_packet = ctx->current_ms; + + #ifdef _DEBUG + _stats.nbytes_xmit += length; + ++_stats.nxmit; + #endif + + if (ctx->callbacks[UTP_ON_OVERHEAD_STATISTICS]) { + size_t n; + if (type == payload_bandwidth) { + // if this packet carries payload, just + // count the header as overhead + type = header_overhead; + n = get_overhead(); + } else { + n = length + get_udp_overhead(); + } + utp_call_on_overhead_statistics(ctx, this, true, n, type); + } +#if UTP_DEBUG_LOGGING + int flags2 = b1->type(); + uint16 seq_nr = b1->seq_nr; + uint16 ack_nr = b1->ack_nr; + log(UTP_LOG_DEBUG, "send %s len:%u id:%u timestamp:"I64u" reply_micro:%u flags:%s seq_nr:%u ack_nr:%u", + addrfmt(addr, addrbuf), (uint)length, conn_id_send, time, reply_micro, flagnames[flags2], + seq_nr, ack_nr); +#endif + send_to_addr(ctx, b, length, addr, flags); + removeSocketFromAckList(this); +} + +void UTPSocket::send_ack(bool synack) +{ + PacketFormatAckV1 pfa; + zeromem(&pfa); + + size_t len; + last_rcv_win = get_rcv_window(); + pfa.pf.set_version(1); + pfa.pf.set_type(ST_STATE); + pfa.pf.ext = 0; + pfa.pf.connid = conn_id_send; + pfa.pf.ack_nr = ack_nr; + pfa.pf.seq_nr = seq_nr; + pfa.pf.windowsize = (uint32)last_rcv_win; + len = sizeof(PacketFormatV1); + + // we never need to send EACK for connections + // that are shutting down + if (reorder_count != 0 && state < CS_GOT_FIN) { + // if reorder count > 0, send an EACK. + // reorder count should always be 0 + // for synacks, so this should not be + // as synack + assert(!synack); + pfa.pf.ext = 1; + pfa.ext_next = 0; + pfa.ext_len = 4; + uint m = 0; + + // reorder count should only be non-zero + // if the packet ack_nr + 1 has not yet + // been received + assert(inbuf.get(ack_nr + 1) == NULL); + size_t window = min(14+16, inbuf.size()); + // Generate bit mask of segments received. + for (size_t i = 0; i < window; i++) { + if (inbuf.get(ack_nr + i + 2) != NULL) { + m |= 1 << i; + + #if UTP_DEBUG_LOGGING + log(UTP_LOG_DEBUG, "EACK packet [%u]", ack_nr + i + 2); + #endif + } + } + pfa.acks[0] = (byte)m; + pfa.acks[1] = (byte)(m >> 8); + pfa.acks[2] = (byte)(m >> 16); + pfa.acks[3] = (byte)(m >> 24); + len += 4 + 2; + + #if UTP_DEBUG_LOGGING + log(UTP_LOG_DEBUG, "Sending EACK %u [%u] bits:[%032b]", ack_nr, conn_id_send, m); + #endif + } else { + #if UTP_DEBUG_LOGGING + log(UTP_LOG_DEBUG, "Sending ACK %u [%u]", ack_nr, conn_id_send); + #endif + } + + send_data((byte*)&pfa, len, ack_overhead); + removeSocketFromAckList(this); +} + +void UTPSocket::send_keep_alive() +{ + ack_nr--; + + #if UTP_DEBUG_LOGGING + log(UTP_LOG_DEBUG, "Sending KeepAlive ACK %u [%u]", ack_nr, conn_id_send); + #endif + + send_ack(); + ack_nr++; +} + +void UTPSocket::send_rst(utp_context *ctx, + const PackedSockAddr &addr, uint32 conn_id_send, uint16 ack_nr, uint16 seq_nr) +{ + PacketFormatV1 pf1; + zeromem(&pf1); + + size_t len; + pf1.set_version(1); + pf1.set_type(ST_RESET); + pf1.ext = 0; + pf1.connid = conn_id_send; + pf1.ack_nr = ack_nr; + pf1.seq_nr = seq_nr; + pf1.windowsize = 0; + len = sizeof(PacketFormatV1); + +// LOG_DEBUG("%s: Sending RST id:%u seq_nr:%u ack_nr:%u", addrfmt(addr, addrbuf), conn_id_send, seq_nr, ack_nr); +// LOG_DEBUG("send %s len:%u id:%u", addrfmt(addr, addrbuf), (uint)len, conn_id_send); + send_to_addr(ctx, (const byte*)&pf1, len, addr); +} + +void UTPSocket::send_packet(OutgoingPacket *pkt) +{ + // only count against the quota the first time we + // send the packet. Don't enforce quota when closing + // a socket. Only enforce the quota when we're sending + // at slow rates (max window < packet size) + + //size_t max_send = min(max_window, opt_sndbuf, max_window_user); + time_t cur_time = utp_call_get_milliseconds(this->ctx, this); + + if (pkt->transmissions == 0 || pkt->need_resend) { + cur_window += pkt->payload; + } + + pkt->need_resend = false; + + PacketFormatV1* p1 = (PacketFormatV1*)pkt->data; + p1->ack_nr = ack_nr; + pkt->time_sent = utp_call_get_microseconds(this->ctx, this); + + //socklen_t salen; + //SOCKADDR_STORAGE sa = addr.get_sockaddr_storage(&salen); + bool use_as_mtu_probe = false; + + // TODO: this is subject to nasty wrapping issues! Below as well + if (mtu_discover_time < cur_time) { + // it's time to reset our MTU assupmtions + // and trigger a new search + mtu_reset(); + } + + // don't use packets that are larger then mtu_ceiling + // as probes, since they were probably used as probes + // already and failed, now we need it to fragment + // just to get it through + // if seq_nr == 1, the probe would end up being 0 + // which is a magic number representing no-probe + // that why we don't send a probe for a packet with + // sequence number 0 + if (mtu_floor < mtu_ceiling + && pkt->length > mtu_floor + && pkt->length <= mtu_ceiling + && mtu_probe_seq == 0 + && seq_nr != 1 + && pkt->transmissions == 0) { + + // we've already incremented seq_nr + // for this packet + mtu_probe_seq = (seq_nr - 1) & ACK_NR_MASK; + mtu_probe_size = pkt->length; + assert(pkt->length >= mtu_floor); + assert(pkt->length <= mtu_ceiling); + use_as_mtu_probe = true; + log(UTP_LOG_MTU, "MTU [PROBE] floor:%d ceiling:%d current:%d" + , mtu_floor, mtu_ceiling, mtu_probe_size); + } + + pkt->transmissions++; + send_data((byte*)pkt->data, pkt->length, + (state == CS_SYN_SENT) ? connect_overhead + : (pkt->transmissions == 1) ? payload_bandwidth + : retransmit_overhead, use_as_mtu_probe ? UTP_UDP_DONTFRAG : 0); +} + +bool UTPSocket::is_full(int bytes) +{ + size_t packet_size = get_packet_size(); + if (bytes < 0) bytes = packet_size; + else if (bytes > packet_size) bytes = packet_size; + size_t max_send = min(max_window, opt_sndbuf, max_window_user); + + // subtract one to save space for the FIN packet + if (cur_window_packets >= OUTGOING_BUFFER_MAX_SIZE - 1) { + + #if UTP_DEBUG_LOGGING + log(UTP_LOG_DEBUG, "is_full:false cur_window_packets:%d MAX:%d", cur_window_packets, OUTGOING_BUFFER_MAX_SIZE - 1); + #endif + + last_maxed_out_window = ctx->current_ms; + return true; + } + + #if UTP_DEBUG_LOGGING + log(UTP_LOG_DEBUG, "is_full:%s. cur_window:%u pkt:%u max:%u cur_window_packets:%u max_window:%u" + , (cur_window + bytes > max_send) ? "true" : "false" + , cur_window, bytes, max_send, cur_window_packets + , max_window); + #endif + + if (cur_window + bytes > max_send) { + last_maxed_out_window = ctx->current_ms; + return true; + } + return false; +} + +bool UTPSocket::flush_packets() +{ + size_t packet_size = get_packet_size(); + + // send packets that are waiting on the pacer to be sent + // i has to be an unsigned 16 bit counter to wrap correctly + // signed types are not guaranteed to wrap the way you expect + for (uint16 i = seq_nr - cur_window_packets; i != seq_nr; ++i) { + OutgoingPacket *pkt = (OutgoingPacket*)outbuf.get(i); + if (pkt == 0 || (pkt->transmissions > 0 && pkt->need_resend == false)) continue; + // have we run out of quota? + if (is_full()) return true; + + // Nagle check + // don't send the last packet if we have one packet in-flight + // and the current packet is still smaller than packet_size. + if (i != ((seq_nr - 1) & ACK_NR_MASK) || + cur_window_packets == 1 || + pkt->payload >= packet_size) { + send_packet(pkt); + } + } + return false; +} + +// @payload: number of bytes to send +// @flags: either ST_DATA, or ST_FIN +// @iovec: base address of iovec array +// @num_iovecs: number of iovecs in array +void UTPSocket::write_outgoing_packet(size_t payload, uint flags, struct utp_iovec *iovec, size_t num_iovecs) +{ + // Setup initial timeout timer + if (cur_window_packets == 0) { + retransmit_timeout = rto; + rto_timeout = ctx->current_ms + retransmit_timeout; + assert(cur_window == 0); + } + + size_t packet_size = get_packet_size(); + do { + assert(cur_window_packets < OUTGOING_BUFFER_MAX_SIZE); + assert(flags == ST_DATA || flags == ST_FIN); + + size_t added = 0; + + OutgoingPacket *pkt = NULL; + + if (cur_window_packets > 0) { + pkt = (OutgoingPacket*)outbuf.get(seq_nr - 1); + } + + const size_t header_size = get_header_size(); + bool append = true; + + // if there's any room left in the last packet in the window + // and it hasn't been sent yet, fill that frame first + if (payload && pkt && !pkt->transmissions && pkt->payload < packet_size) { + // Use the previous unsent packet + added = min(payload + pkt->payload, max(packet_size, pkt->payload)) - pkt->payload; + pkt = (OutgoingPacket*)realloc(pkt, + (sizeof(OutgoingPacket) - 1) + + header_size + + pkt->payload + added); + outbuf.put(seq_nr - 1, pkt); + append = false; + assert(!pkt->need_resend); + } else { + // Create the packet to send. + added = payload; + pkt = (OutgoingPacket*)malloc((sizeof(OutgoingPacket) - 1) + + header_size + + added); + pkt->payload = 0; + pkt->transmissions = 0; + pkt->need_resend = false; + } + + if (added) { + assert(flags == ST_DATA); + + // Fill it with data from the upper layer. + unsigned char *p = pkt->data + header_size + pkt->payload; + size_t needed = added; + + /* + while (needed) { + *p = *(char*)iovec[0].iov_base; + p++; + iovec[0].iov_base = (char *)iovec[0].iov_base + 1; + needed--; + } + */ + + for (size_t i = 0; i < num_iovecs && needed; i++) { + if (iovec[i].iov_len == 0) + continue; + + size_t num = min(needed, iovec[i].iov_len); + memcpy(p, iovec[i].iov_base, num); + + p += num; + + iovec[i].iov_len -= num; + iovec[i].iov_base = (byte*)iovec[i].iov_base + num; // iovec[i].iov_base += num, but without void* pointers + needed -= num; + } + + assert(needed == 0); + } + pkt->payload += added; + pkt->length = header_size + pkt->payload; + + last_rcv_win = get_rcv_window(); + + PacketFormatV1* p1 = (PacketFormatV1*)pkt->data; + p1->set_version(1); + p1->set_type(flags); + p1->ext = 0; + p1->connid = conn_id_send; + p1->windowsize = (uint32)last_rcv_win; + p1->ack_nr = ack_nr; + + if (append) { + // Remember the message in the outgoing queue. + outbuf.ensure_size(seq_nr, cur_window_packets); + outbuf.put(seq_nr, pkt); + p1->seq_nr = seq_nr; + seq_nr++; + cur_window_packets++; + } + + payload -= added; + + } while (payload); + + flush_packets(); +} + +#ifdef _DEBUG +void UTPSocket::check_invariant() +{ + if (reorder_count > 0) { + assert(inbuf.get(ack_nr + 1) == NULL); + } + + size_t outstanding_bytes = 0; + for (int i = 0; i < cur_window_packets; ++i) { + OutgoingPacket *pkt = (OutgoingPacket*)outbuf.get(seq_nr - i - 1); + if (pkt == 0 || pkt->transmissions == 0 || pkt->need_resend) continue; + outstanding_bytes += pkt->payload; + } + assert(outstanding_bytes == cur_window); +} +#endif + +void UTPSocket::check_timeouts() +{ + #ifdef _DEBUG + check_invariant(); + #endif + + // this invariant should always be true + assert(cur_window_packets == 0 || outbuf.get(seq_nr - cur_window_packets)); + + #if UTP_DEBUG_LOGGING + log(UTP_LOG_DEBUG, "CheckTimeouts timeout:%d max_window:%u cur_window:%u " + "state:%s cur_window_packets:%u", + (int)(rto_timeout - ctx->current_ms), (uint)max_window, (uint)cur_window, + statenames[state], cur_window_packets); + #endif + + if (state != CS_DESTROY) flush_packets(); + + switch (state) { + case CS_SYN_SENT: + case CS_CONNECTED_FULL: + case CS_CONNECTED: + case CS_FIN_SENT: { + + // Reset max window... + if ((int)(ctx->current_ms - zerowindow_time) >= 0 && max_window_user == 0) { + max_window_user = PACKET_SIZE; + } + + if ((int)(ctx->current_ms - rto_timeout) >= 0 + && rto_timeout > 0) { + + bool ignore_loss = false; + + if (cur_window_packets == 1 + && ((seq_nr - 1) & ACK_NR_MASK) == mtu_probe_seq + && mtu_probe_seq != 0) { + // we only had a single outstanding packet that timed out, and it was the probe + mtu_ceiling = mtu_probe_size - 1; + mtu_search_update(); + // this packet was most likely dropped because the packet size being + // too big and not because congestion. To accelerate the binary search for + // the MTU, resend immediately and don't reset the window size + ignore_loss = true; + log(UTP_LOG_MTU, "MTU [PROBE-TIMEOUT] floor:%d ceiling:%d current:%d" + , mtu_floor, mtu_ceiling, mtu_last); + } + // we dropepd the probe, clear these fields to + // allow us to send a new one + mtu_probe_seq = mtu_probe_size = 0; + log(UTP_LOG_MTU, "MTU [TIMEOUT]"); + + /* + OutgoingPacket *pkt = (OutgoingPacket*)outbuf.get(seq_nr - cur_window_packets); + + // If there were a lot of retransmissions, force recomputation of round trip time + if (pkt->transmissions >= 4) + rtt = 0; + */ + + // Increase RTO + const uint new_timeout = ignore_loss ? retransmit_timeout : retransmit_timeout * 2; + + if (retransmit_count >= 4 || (state == CS_SYN_SENT && retransmit_count >= 2)) { + // 4 consecutive transmissions have timed out. Kill it. If we + // haven't even connected yet, give up after only 2 consecutive + // failed transmissions. + if (state == CS_FIN_SENT) + state = CS_DESTROY; + else + state = CS_RESET; + utp_call_on_error(ctx, this, UTP_ETIMEDOUT); + return; + } + + retransmit_timeout = new_timeout; + rto_timeout = ctx->current_ms + new_timeout; + + if (!ignore_loss) { + // On Timeout + duplicate_ack = 0; + + int packet_size = get_packet_size(); + + if (cur_window_packets == 0 && max_window > packet_size) { + // we don't have any packets in-flight, even though + // we could. This implies that the connection is just + // idling. No need to be aggressive about resetting the + // congestion window. Just let it decay by a 3:rd. + // don't set it any lower than the packet size though + max_window = max(max_window * 2 / 3, size_t(packet_size)); + } else { + // our delay was so high that our congestion window + // was shrunk below one packet, preventing us from + // sending anything for one time-out period. Now, reset + // the congestion window to fit one packet, to start over + // again + max_window = packet_size; + slow_start = true; + } + } + + // every packet should be considered lost + for (int i = 0; i < cur_window_packets; ++i) { + OutgoingPacket *pkt = (OutgoingPacket*)outbuf.get(seq_nr - i - 1); + if (pkt == 0 || pkt->transmissions == 0 || pkt->need_resend) continue; + pkt->need_resend = true; + assert(cur_window >= pkt->payload); + cur_window -= pkt->payload; + } + + if (cur_window_packets > 0) { + retransmit_count++; + // used in parse_log.py + log(UTP_LOG_NORMAL, "Packet timeout. Resend. seq_nr:%u. timeout:%u " + "max_window:%u cur_window_packets:%d" + , seq_nr - cur_window_packets, retransmit_timeout + , (uint)max_window, int(cur_window_packets)); + + fast_timeout = true; + timeout_seq_nr = seq_nr; + + OutgoingPacket *pkt = (OutgoingPacket*)outbuf.get(seq_nr - cur_window_packets); + assert(pkt); + + // Re-send the packet. + send_packet(pkt); + } + } + + // Mark the socket as writable. If the cwnd has grown, or if the number of + // bytes in-flight is lower than cwnd, we need to make the socket writable again + // in case it isn't + if (state == CS_CONNECTED_FULL && !is_full()) { + state = CS_CONNECTED; + + #if UTP_DEBUG_LOGGING + log(UTP_LOG_DEBUG, "Socket writable. max_window:%u cur_window:%u packet_size:%u", + (uint)max_window, (uint)cur_window, (uint)get_packet_size()); + #endif + utp_call_on_state_change(this->ctx, this, UTP_STATE_WRITABLE); + } + + if (state >= CS_CONNECTED && state <= CS_FIN_SENT) { + if ((int)(ctx->current_ms - last_sent_packet) >= KEEPALIVE_INTERVAL) { + send_keep_alive(); + } + } + break; + } + + // Close? + case CS_GOT_FIN: + case CS_DESTROY_DELAY: + if ((int)(ctx->current_ms - rto_timeout) >= 0) { + state = (state == CS_DESTROY_DELAY) ? CS_DESTROY : CS_RESET; + if (cur_window_packets > 0) { + utp_call_on_error(ctx, this, UTP_ECONNRESET); + } + } + break; + // prevent warning + case CS_UNINITIALIZED: + case CS_IDLE: + case CS_RESET: + case CS_DESTROY: + break; + } +} + +// this should be called every time we change mtu_floor or mtu_ceiling +void UTPSocket::mtu_search_update() +{ + assert(mtu_floor <= mtu_ceiling); + + // binary search + mtu_last = (mtu_floor + mtu_ceiling) / 2; + + // enable a new probe to be sent + mtu_probe_seq = mtu_probe_size = 0; + + // if the floor and ceiling are close enough, consider the + // MTU binary search complete. We set the current value + // to floor since that's the only size we know can go through + // also set the ceiling to floor to terminate the searching + if (mtu_ceiling - mtu_floor <= 16) { + mtu_last = mtu_floor; + log(UTP_LOG_MTU, "MTU [DONE] floor:%d ceiling:%d current:%d" + , mtu_floor, mtu_ceiling, mtu_last); + mtu_ceiling = mtu_floor; + assert(mtu_floor <= mtu_ceiling); + // Do another search in 30 minutes + mtu_discover_time = utp_call_get_milliseconds(this->ctx, this) + 30 * 60 * 1000; + } +} + +void UTPSocket::mtu_reset() +{ + mtu_ceiling = get_udp_mtu(); + // Less would not pass TCP... + mtu_floor = 576; + log(UTP_LOG_MTU, "MTU [RESET] floor:%d ceiling:%d current:%d" + , mtu_floor, mtu_ceiling, mtu_last); + assert(mtu_floor <= mtu_ceiling); + mtu_discover_time = utp_call_get_milliseconds(this->ctx, this) + 30 * 60 * 1000; +} + +// returns: +// 0: the packet was acked. +// 1: it means that the packet had already been acked +// 2: the packet has not been sent yet +int UTPSocket::ack_packet(uint16 seq) +{ + OutgoingPacket *pkt = (OutgoingPacket*)outbuf.get(seq); + + // the packet has already been acked (or not sent) + if (pkt == NULL) { + + #if UTP_DEBUG_LOGGING + log(UTP_LOG_DEBUG, "got ack for:%u (already acked, or never sent)", seq); + #endif + + return 1; + } + + // can't ack packets that haven't been sent yet! + if (pkt->transmissions == 0) { + + #if UTP_DEBUG_LOGGING + log(UTP_LOG_DEBUG, "got ack for:%u (never sent, pkt_size:%u need_resend:%u)", + seq, (uint)pkt->payload, pkt->need_resend); + #endif + + return 2; + } + + #if UTP_DEBUG_LOGGING + log(UTP_LOG_DEBUG, "got ack for:%u (pkt_size:%u need_resend:%u)", + seq, (uint)pkt->payload, pkt->need_resend); + #endif + + outbuf.put(seq, NULL); + + // if we never re-sent the packet, update the RTT estimate + if (pkt->transmissions == 1) { + // Estimate the round trip time. + const uint32 ertt = (uint32)((utp_call_get_microseconds(this->ctx, this) - pkt->time_sent) / 1000); + if (rtt == 0) { + // First round trip time sample + rtt = ertt; + rtt_var = ertt / 2; + // sanity check. rtt should never be more than 6 seconds +// assert(rtt < 6000); + } else { + // Compute new round trip times + const int delta = (int)rtt - ertt; + rtt_var = rtt_var + (int)(abs(delta) - rtt_var) / 4; + rtt = rtt - rtt/8 + ertt/8; + // sanity check. rtt should never be more than 6 seconds +// assert(rtt < 6000); + rtt_hist.add_sample(ertt, ctx->current_ms); + } + rto = max(rtt + rtt_var * 4, 1000); + + #if UTP_DEBUG_LOGGING + log(UTP_LOG_DEBUG, "rtt:%u avg:%u var:%u rto:%u", + ertt, rtt, rtt_var, rto); + #endif + + } + retransmit_timeout = rto; + rto_timeout = ctx->current_ms + rto; + // if need_resend is set, this packet has already + // been considered timed-out, and is not included in + // the cur_window anymore + if (!pkt->need_resend) { + assert(cur_window >= pkt->payload); + cur_window -= pkt->payload; + } + free(pkt); + retransmit_count = 0; + return 0; +} + +// count the number of bytes that were acked by the EACK header +size_t UTPSocket::selective_ack_bytes(uint base, const byte* mask, byte len, int64& min_rtt) +{ + if (cur_window_packets == 0) return 0; + + size_t acked_bytes = 0; + int bits = len * 8; + uint64 now = utp_call_get_microseconds(this->ctx, this); + + do { + uint v = base + bits; + + // ignore bits that haven't been sent yet + // see comment in UTPSocket::selective_ack + if (((seq_nr - v - 1) & ACK_NR_MASK) >= (uint16)(cur_window_packets - 1)) + continue; + + // ignore bits that represents packets we haven't sent yet + // or packets that have already been acked + OutgoingPacket *pkt = (OutgoingPacket*)outbuf.get(v); + if (!pkt || pkt->transmissions == 0) + continue; + + // Count the number of segments that were successfully received past it. + if (bits >= 0 && mask[bits>>3] & (1 << (bits & 7))) { + assert((int)(pkt->payload) >= 0); + acked_bytes += pkt->payload; + if (pkt->time_sent < now) + min_rtt = min(min_rtt, now - pkt->time_sent); + else + min_rtt = min(min_rtt, 50000); + continue; + } + } while (--bits >= -1); + return acked_bytes; +} + +enum { MAX_EACK = 128 }; + +void UTPSocket::selective_ack(uint base, const byte *mask, byte len) +{ + if (cur_window_packets == 0) return; + + // the range is inclusive [0, 31] bits + int bits = len * 8 - 1; + + int count = 0; + + // resends is a stack of sequence numbers we need to resend. Since we + // iterate in reverse over the acked packets, at the end, the top packets + // are the ones we want to resend + int resends[MAX_EACK]; + int nr = 0; + +#if UTP_DEBUG_LOGGING + char bitmask[1024] = {0}; + int counter = bits; + for (int i = 0; i <= bits; ++i) { + bool bit_set = counter >= 0 && mask[counter>>3] & (1 << (counter & 7)); + bitmask[i] = bit_set ? '1' : '0'; + --counter; + } + + log(UTP_LOG_DEBUG, "Got EACK [%s] base:%u", bitmask, base); +#endif + + do { + // we're iterating over the bits from higher sequence numbers + // to lower (kind of in reverse order, wich might not be very + // intuitive) + uint v = base + bits; + + // ignore bits that haven't been sent yet + // and bits that fall below the ACKed sequence number + // this can happen if an EACK message gets + // reordered and arrives after a packet that ACKs up past + // the base for thie EACK message + + // this is essentially the same as: + // if v >= seq_nr || v <= seq_nr - cur_window_packets + // but it takes wrapping into account + + // if v == seq_nr the -1 will make it wrap. if v > seq_nr + // it will also wrap (since it will fall further below 0) + // and be > cur_window_packets. + // if v == seq_nr - cur_window_packets, the result will be + // seq_nr - (seq_nr - cur_window_packets) - 1 + // == seq_nr - seq_nr + cur_window_packets - 1 + // == cur_window_packets - 1 which will be caught by the + // test. If v < seq_nr - cur_window_packets the result will grow + // fall furhter outside of the cur_window_packets range. + + // sequence number space: + // + // rejected < accepted > rejected + // <============+--------------+============> + // ^ ^ + // | | + // (seq_nr-wnd) seq_nr + + if (((seq_nr - v - 1) & ACK_NR_MASK) >= (uint16)(cur_window_packets - 1)) + continue; + + // this counts as a duplicate ack, even though we might have + // received an ack for this packet previously (in another EACK + // message for instance) + bool bit_set = bits >= 0 && mask[bits>>3] & (1 << (bits & 7)); + + // if this packet is acked, it counts towards the duplicate ack counter + if (bit_set) count++; + + // ignore bits that represents packets we haven't sent yet + // or packets that have already been acked + OutgoingPacket *pkt = (OutgoingPacket*)outbuf.get(v); + if (!pkt || pkt->transmissions == 0) { + + #if UTP_DEBUG_LOGGING + log(UTP_LOG_DEBUG, "skipping %u. pkt:%08x transmissions:%u %s", + v, pkt, pkt?pkt->transmissions:0, pkt?"(not sent yet?)":"(already acked?)"); + #endif + continue; + } + + // Count the number of segments that were successfully received past it. + if (bit_set) { + // the selective ack should never ACK the packet we're waiting for to decrement cur_window_packets + assert((v & outbuf.mask) != ((seq_nr - cur_window_packets) & outbuf.mask)); + ack_packet(v); + continue; + } + + // Resend segments + // if count is less than our re-send limit, we haven't seen enough + // acked packets in front of this one to warrant a re-send. + // if count == 0, we're still going through the tail of zeroes + if (((v - fast_resend_seq_nr) & ACK_NR_MASK) <= OUTGOING_BUFFER_MAX_SIZE && + count >= DUPLICATE_ACKS_BEFORE_RESEND) { + // resends is a stack, and we're mostly interested in the top of it + // if we're full, just throw away the lower half + if (nr >= MAX_EACK - 2) { + memmove(resends, &resends[MAX_EACK/2], MAX_EACK/2 * sizeof(resends[0])); + nr -= MAX_EACK / 2; + } + resends[nr++] = v; + + #if UTP_DEBUG_LOGGING + log(UTP_LOG_DEBUG, "no ack for %u", v); + #endif + + } else { + + #if UTP_DEBUG_LOGGING + log(UTP_LOG_DEBUG, "not resending %u count:%d dup_ack:%u fast_resend_seq_nr:%u", + v, count, duplicate_ack, fast_resend_seq_nr); + #endif + } + } while (--bits >= -1); + + if (((base - 1 - fast_resend_seq_nr) & ACK_NR_MASK) <= OUTGOING_BUFFER_MAX_SIZE && + count >= DUPLICATE_ACKS_BEFORE_RESEND) { + // if we get enough duplicate acks to start + // resending, the first packet we should resend + // is base-1 + resends[nr++] = (base - 1) & ACK_NR_MASK; + + #if UTP_DEBUG_LOGGING + log(UTP_LOG_DEBUG, "no ack for %u", (base - 1) & ACK_NR_MASK); + #endif + + } else { + #if UTP_DEBUG_LOGGING + log(UTP_LOG_DEBUG, "not resending %u count:%d dup_ack:%u fast_resend_seq_nr:%u", + base - 1, count, duplicate_ack, fast_resend_seq_nr); + #endif + } + + bool back_off = false; + int i = 0; + while (nr > 0) { + uint v = resends[--nr]; + // don't consider the tail of 0:es to be lost packets + // only unacked packets with acked packets after should + // be considered lost + OutgoingPacket *pkt = (OutgoingPacket*)outbuf.get(v); + + // this may be an old (re-ordered) packet, and some of the + // packets in here may have been acked already. In which + // case they will not be in the send queue anymore + if (!pkt) continue; + + // used in parse_log.py + log(UTP_LOG_NORMAL, "Packet %u lost. Resending", v); + + // On Loss + back_off = true; + + #ifdef _DEBUG + ++_stats.rexmit; + #endif + + send_packet(pkt); + fast_resend_seq_nr = (v + 1) & ACK_NR_MASK; + + // Re-send max 4 packets. + if (++i >= 4) break; + } + + if (back_off) + maybe_decay_win(ctx->current_ms); + + duplicate_ack = count; +} + +void UTPSocket::apply_ccontrol(size_t bytes_acked, uint32 actual_delay, int64 min_rtt) +{ + // the delay can never be greater than the rtt. The min_rtt + // variable is the RTT in microseconds + + assert(min_rtt >= 0); + int32 our_delay = min(our_hist.get_value(), uint32(min_rtt)); + assert(our_delay != INT_MAX); + assert(our_delay >= 0); + + utp_call_on_delay_sample(this->ctx, this, our_delay / 1000); + + // This test the connection under heavy load from foreground + // traffic. Pretend that our delays are very high to force the + // connection to use sub-packet size window sizes + //our_delay *= 4; + + // target is microseconds + int target = target_delay; + if (target <= 0) target = 100000; + + // this is here to compensate for very large clock drift that affects + // the congestion controller into giving certain endpoints an unfair + // share of the bandwidth. We have an estimate of the clock drift + // (clock_drift). The unit of this is microseconds per 5 seconds. + // empirically, a reasonable cut-off appears to be about 200000 + // (which is pretty high). The main purpose is to compensate for + // people trying to "cheat" uTP by making their clock run slower, + // and this definitely catches that without any risk of false positives + // if clock_drift < -200000 start applying a penalty delay proportional + // to how far beoynd -200000 the clock drift is + int32 penalty = 0; + if (clock_drift < -200000) { + penalty = (-clock_drift - 200000) / 7; + our_delay += penalty; + } + + double off_target = target - our_delay; + + // this is the same as: + // + // (min(off_target, target) / target) * (bytes_acked / max_window) * MAX_CWND_INCREASE_BYTES_PER_RTT + // + // so, it's scaling the max increase by the fraction of the window this ack represents, and the fraction + // of the target delay the current delay represents. + // The min() around off_target protects against crazy values of our_delay, which may happen when th + // timestamps wraps, or by just having a malicious peer sending garbage. This caps the increase + // of the window size to MAX_CWND_INCREASE_BYTES_PER_RTT per rtt. + // as for large negative numbers, this direction is already capped at the min packet size further down + // the min around the bytes_acked protects against the case where the window size was recently + // shrunk and the number of acked bytes exceeds that. This is considered no more than one full + // window, in order to keep the gain within sane boundries. + + assert(bytes_acked > 0); + double window_factor = (double)min(bytes_acked, max_window) / (double)max(max_window, bytes_acked); + + double delay_factor = off_target / target; + double scaled_gain = MAX_CWND_INCREASE_BYTES_PER_RTT * window_factor * delay_factor; + + // since MAX_CWND_INCREASE_BYTES_PER_RTT is a cap on how much the window size (max_window) + // may increase per RTT, we may not increase the window size more than that proportional + // to the number of bytes that were acked, so that once one window has been acked (one rtt) + // the increase limit is not exceeded + // the +1. is to allow for floating point imprecision + assert(scaled_gain <= 1. + MAX_CWND_INCREASE_BYTES_PER_RTT * (double)min(bytes_acked, max_window) / (double)max(max_window, bytes_acked)); + + if (scaled_gain > 0 && ctx->current_ms - last_maxed_out_window > 1000) { + // if it was more than 1 second since we tried to send a packet + // and stopped because we hit the max window, we're most likely rate + // limited (which prevents us from ever hitting the window size) + // if this is the case, we cannot let the max_window grow indefinitely + scaled_gain = 0; + } + + size_t ledbat_cwnd = (max_window + scaled_gain < MIN_WINDOW_SIZE)?MIN_WINDOW_SIZE:max_window + scaled_gain; + + if (slow_start) { + size_t ss_cwnd = max_window + window_factor*get_packet_size(); + if (ss_cwnd > ssthresh) { + slow_start = false; + } else if (our_delay > target*0.9) { + // even if we're a little under the target delay, we conservatively + // discontinue the slow start phase + slow_start = false; + ssthresh = max_window; + } else { + max_window = max(ss_cwnd, ledbat_cwnd); + } + } else { + max_window = ledbat_cwnd; + } + + + // make sure that the congestion window is below max + // make sure that we don't shrink our window too small + max_window = clamp(max_window, MIN_WINDOW_SIZE, opt_sndbuf); + + // used in parse_log.py + log(UTP_LOG_NORMAL, "actual_delay:%u our_delay:%d their_delay:%u off_target:%d max_window:%u " + "delay_base:%u delay_sum:%d target_delay:%d acked_bytes:%u cur_window:%u " + "scaled_gain:%f rtt:%u rate:%u wnduser:%u rto:%u timeout:%d get_microseconds:"I64u" " + "cur_window_packets:%u packet_size:%u their_delay_base:%u their_actual_delay:%u " + "average_delay:%d clock_drift:%d clock_drift_raw:%d delay_penalty:%d current_delay_sum:"I64u + "current_delay_samples:%d average_delay_base:%d last_maxed_out_window:"I64u" opt_sndbuf:%d " + "current_ms:"I64u"", + actual_delay, our_delay / 1000, their_hist.get_value() / 1000, + int(off_target / 1000), uint(max_window), uint32(our_hist.delay_base), + int((our_delay + their_hist.get_value()) / 1000), int(target / 1000), uint(bytes_acked), + (uint)(cur_window - bytes_acked), (float)(scaled_gain), rtt, + (uint)(max_window * 1000 / (rtt_hist.delay_base?rtt_hist.delay_base:50)), + (uint)max_window_user, rto, (int)(rto_timeout - ctx->current_ms), + utp_call_get_microseconds(this->ctx, this), cur_window_packets, (uint)get_packet_size(), + their_hist.delay_base, their_hist.delay_base + their_hist.get_value(), + average_delay, clock_drift, clock_drift_raw, penalty / 1000, + current_delay_sum, current_delay_samples, average_delay_base, + uint64(last_maxed_out_window), int(opt_sndbuf), uint64(ctx->current_ms)); +} + +static void utp_register_recv_packet(UTPSocket *conn, size_t len) +{ + #ifdef _DEBUG + ++conn->_stats.nrecv; + conn->_stats.nbytes_recv += len; + #endif + + if (len <= PACKET_SIZE_MID) { + if (len <= PACKET_SIZE_EMPTY) { + conn->ctx->context_stats._nraw_recv[PACKET_SIZE_EMPTY_BUCKET]++; + } else if (len <= PACKET_SIZE_SMALL) { + conn->ctx->context_stats._nraw_recv[PACKET_SIZE_SMALL_BUCKET]++; + } else + conn->ctx->context_stats._nraw_recv[PACKET_SIZE_MID_BUCKET]++; + } else { + if (len <= PACKET_SIZE_BIG) { + conn->ctx->context_stats._nraw_recv[PACKET_SIZE_BIG_BUCKET]++; + } else + conn->ctx->context_stats._nraw_recv[PACKET_SIZE_HUGE_BUCKET]++; + } +} + +// returns the max number of bytes of payload the uTP +// connection is allowed to send +size_t UTPSocket::get_packet_size() const +{ + int header_size = sizeof(PacketFormatV1); + size_t mtu = mtu_last ? mtu_last : mtu_ceiling; + return mtu - header_size; +} + +// Process an incoming packet +// syn is true if this is the first packet received. It will cut off parsing +// as soon as the header is done +size_t utp_process_incoming(UTPSocket *conn, const byte *packet, size_t len, bool syn = false) +{ + utp_register_recv_packet(conn, len); + + conn->ctx->current_ms = utp_call_get_milliseconds(conn->ctx, conn); + + const PacketFormatV1 *pf1 = (PacketFormatV1*)packet; + const byte *packet_end = packet + len; + + uint16 pk_seq_nr = pf1->seq_nr; + uint16 pk_ack_nr = pf1->ack_nr; + uint8 pk_flags = pf1->type(); + + if (pk_flags >= ST_NUM_STATES) return 0; + + #if UTP_DEBUG_LOGGING + conn->log(UTP_LOG_DEBUG, "Got %s. seq_nr:%u ack_nr:%u state:%s timestamp:"I64u" reply_micro:%u" + , flagnames[pk_flags], pk_seq_nr, pk_ack_nr, statenames[conn->state] + , uint64(pf1->tv_usec), (uint32)(pf1->reply_micro)); + #endif + + // mark receipt time + uint64 time = utp_call_get_microseconds(conn->ctx, conn); + + // RSTs are handled earlier, since the connid matches the send id not the recv id + assert(pk_flags != ST_RESET); + + // TODO: maybe send a ST_RESET if we're in CS_RESET? + + const byte *selack_ptr = NULL; + + // Unpack UTP packet options + // Data pointer + const byte *data = (const byte*)pf1 + conn->get_header_size(); + if (conn->get_header_size() > len) { + + #if UTP_DEBUG_LOGGING + conn->log(UTP_LOG_DEBUG, "Invalid packet size (less than header size)"); + #endif + + return 0; + } + // Skip the extension headers + uint extension = pf1->ext; + if (extension != 0) { + do { + // Verify that the packet is valid. + data += 2; + + if ((int)(packet_end - data) < 0 || (int)(packet_end - data) < data[-1]) { + + #if UTP_DEBUG_LOGGING + conn->log(UTP_LOG_DEBUG, "Invalid len of extensions"); + #endif + + return 0; + } + + switch(extension) { + case 1: // Selective Acknowledgment + selack_ptr = data; + break; + case 2: // extension bits + if (data[-1] != 8) { + + #if UTP_DEBUG_LOGGING + conn->log(UTP_LOG_DEBUG, "Invalid len of extension bits header"); + #endif + + return 0; + } + memcpy(conn->extensions, data, 8); + + #if UTP_DEBUG_LOGGING + conn->log(UTP_LOG_DEBUG, "got extension bits:%02x%02x%02x%02x%02x%02x%02x%02x", + conn->extensions[0], conn->extensions[1], conn->extensions[2], conn->extensions[3], + conn->extensions[4], conn->extensions[5], conn->extensions[6], conn->extensions[7]); + #endif + } + extension = data[-2]; + data += data[-1]; + } while (extension); + } + + if (conn->state == CS_SYN_SENT) { + // if this is a syn-ack, initialize our ack_nr + // to match the sequence number we got from + // the other end + conn->ack_nr = (pk_seq_nr - 1) & SEQ_NR_MASK; + } + + conn->last_got_packet = conn->ctx->current_ms; + + if (syn) { + return 0; + } + + // seqnr is the number of packets past the expected + // packet this is. ack_nr is the last acked, seq_nr is the + // current. Subtracring 1 makes 0 mean "this is the next + // expected packet". + const uint seqnr = (pk_seq_nr - conn->ack_nr - 1) & SEQ_NR_MASK; + + // Getting an invalid sequence number? + if (seqnr >= REORDER_BUFFER_MAX_SIZE) { + if (seqnr >= (SEQ_NR_MASK + 1) - REORDER_BUFFER_MAX_SIZE && pk_flags != ST_STATE) { + conn->schedule_ack(); + } + + #if UTP_DEBUG_LOGGING + conn->log(UTP_LOG_DEBUG, " Got old Packet/Ack (%u/%u)=%u" + , pk_seq_nr, conn->ack_nr, seqnr); + #endif + return 0; + } + + // Process acknowledgment + // acks is the number of packets that was acked + int acks = (pk_ack_nr - (conn->seq_nr - 1 - conn->cur_window_packets)) & ACK_NR_MASK; + + // this happens when we receive an old ack nr + if (acks > conn->cur_window_packets) acks = 0; + + // if we get the same ack_nr as in the last packet + // increase the duplicate_ack counter, otherwise reset + // it to 0 + if (conn->cur_window_packets > 0) { + if (pk_ack_nr == ((conn->seq_nr - conn->cur_window_packets - 1) & ACK_NR_MASK) + && conn->cur_window_packets > 0) { + ++conn->duplicate_ack; + if (conn->duplicate_ack == DUPLICATE_ACKS_BEFORE_RESEND && conn->mtu_probe_seq) { + // It's likely that the probe was rejected due to its size, but we haven't got an + // ICMP report back yet + if (pk_ack_nr == ((conn->mtu_probe_seq - 1) & ACK_NR_MASK)) { + conn->mtu_ceiling = conn->mtu_probe_size - 1; + conn->mtu_search_update(); + conn->log(UTP_LOG_MTU, "MTU [DUPACK] floor:%d ceiling:%d current:%d" + , conn->mtu_floor, conn->mtu_ceiling, conn->mtu_last); + } else { + // A non-probe was blocked before our probe. + // Can't conclude much, send a new probe + conn->mtu_probe_seq = conn->mtu_probe_size = 0; + } + } + } else { + conn->duplicate_ack = 0; + } + + // TODO: if duplicate_ack == DUPLICATE_ACK_BEFORE_RESEND + // and fast_resend_seq_nr <= ack_nr + 1 + // resend ack_nr + 1 + // also call maybe_decay_win() + } + + // figure out how many bytes were acked + size_t acked_bytes = 0; + + // the minimum rtt of all acks + // this is the upper limit on the delay we get back + // from the other peer. Our delay cannot exceed + // the rtt of the packet. If it does, clamp it. + // this is done in apply_ledbat_ccontrol() + int64 min_rtt = INT64_MAX; + + uint64 now = utp_call_get_microseconds(conn->ctx, conn); + + for (int i = 0; i < acks; ++i) { + int seq = (conn->seq_nr - conn->cur_window_packets + i) & ACK_NR_MASK; + OutgoingPacket *pkt = (OutgoingPacket*)conn->outbuf.get(seq); + if (pkt == 0 || pkt->transmissions == 0) continue; + assert((int)(pkt->payload) >= 0); + acked_bytes += pkt->payload; + if (conn->mtu_probe_seq && seq == conn->mtu_probe_seq) { + conn->mtu_floor = conn->mtu_probe_size; + conn->mtu_search_update(); + conn->log(UTP_LOG_MTU, "MTU [ACK] floor:%d ceiling:%d current:%d" + , conn->mtu_floor, conn->mtu_ceiling, conn->mtu_last); + } + + // in case our clock is not monotonic + if (pkt->time_sent < now) + min_rtt = min(min_rtt, now - pkt->time_sent); + else + min_rtt = min(min_rtt, 50000); + } + + // count bytes acked by EACK + if (selack_ptr != NULL) { + acked_bytes += conn->selective_ack_bytes((pk_ack_nr + 2) & ACK_NR_MASK, + selack_ptr, selack_ptr[-1], min_rtt); + } + + #if UTP_DEBUG_LOGGING + conn->log(UTP_LOG_DEBUG, "acks:%d acked_bytes:%u seq_nr:%d cur_window:%u cur_window_packets:%u relative_seqnr:%u max_window:%u min_rtt:%u rtt:%u", + acks, (uint)acked_bytes, conn->seq_nr, (uint)conn->cur_window, conn->cur_window_packets, + seqnr, (uint)conn->max_window, (uint)(min_rtt / 1000), conn->rtt); + #endif + + uint64 p = pf1->tv_usec; + + conn->last_measured_delay = conn->ctx->current_ms; + + // get delay in both directions + // record the delay to report back + const uint32 their_delay = (uint32)(p == 0 ? 0 : time - p); + conn->reply_micro = their_delay; + uint32 prev_delay_base = conn->their_hist.delay_base; + if (their_delay != 0) conn->their_hist.add_sample(their_delay, conn->ctx->current_ms); + + // if their new delay base is less than their previous one + // we should shift our delay base in the other direction in order + // to take the clock skew into account + if (prev_delay_base != 0 && + wrapping_compare_less(conn->their_hist.delay_base, prev_delay_base, TIMESTAMP_MASK)) { + // never adjust more than 10 milliseconds + if (prev_delay_base - conn->their_hist.delay_base <= 10000) { + conn->our_hist.shift(prev_delay_base - conn->their_hist.delay_base); + } + } + + const uint32 actual_delay = (uint32(pf1->reply_micro)==INT_MAX?0:uint32(pf1->reply_micro)); + + // if the actual delay is 0, it means the other end + // hasn't received a sample from us yet, and doesn't + // know what it is. We can't update out history unless + // we have a true measured sample + prev_delay_base = conn->our_hist.delay_base; + if (actual_delay != 0) { + conn->our_hist.add_sample(actual_delay, conn->ctx->current_ms); + + // this is keeping an average of the delay samples + // we've recevied within the last 5 seconds. We sum + // all the samples and increase the count in order to + // calculate the average every 5 seconds. The samples + // are based off of the average_delay_base to deal with + // wrapping counters. + if (conn->average_delay_base == 0) conn->average_delay_base = actual_delay; + int64 average_delay_sample = 0; + // distance walking from lhs to rhs, downwards + const uint32 dist_down = conn->average_delay_base - actual_delay; + // distance walking from lhs to rhs, upwards + const uint32 dist_up = actual_delay - conn->average_delay_base; + + if (dist_down > dist_up) { +// assert(dist_up < INT_MAX / 4); + // average_delay_base < actual_delay, we should end up + // with a positive sample + average_delay_sample = dist_up; + } else { +// assert(-int64(dist_down) < INT_MAX / 4); + // average_delay_base >= actual_delay, we should end up + // with a negative sample + average_delay_sample = -int64(dist_down); + } + conn->current_delay_sum += average_delay_sample; + ++conn->current_delay_samples; + + if (conn->ctx->current_ms > conn->average_sample_time) { + + int32 prev_average_delay = conn->average_delay; + + assert(conn->current_delay_sum / conn->current_delay_samples < INT_MAX); + assert(conn->current_delay_sum / conn->current_delay_samples > -INT_MAX); + // write the new average + conn->average_delay = conn->current_delay_sum / conn->current_delay_samples; + // each slot represents 5 seconds + conn->average_sample_time += 5000; + + conn->current_delay_sum = 0; + conn->current_delay_samples = 0; + + // this makes things very confusing when logging the average delay +//#if !g_log_utp + // normalize the average samples + // since we're only interested in the slope + // of the curve formed by the average delay samples, + // we can cancel out the actual offset to make sure + // we won't have problems with wrapping. + int min_sample = min(prev_average_delay, conn->average_delay); + int max_sample = max(prev_average_delay, conn->average_delay); + + // normalize around zero. Try to keep the min <= 0 and max >= 0 + int adjust = 0; + if (min_sample > 0) { + // adjust all samples (and the baseline) down by min_sample + adjust = -min_sample; + } else if (max_sample < 0) { + // adjust all samples (and the baseline) up by -max_sample + adjust = -max_sample; + } + if (adjust) { + conn->average_delay_base -= adjust; + conn->average_delay += adjust; + prev_average_delay += adjust; + } +//#endif + + // update the clock drift estimate + // the unit is microseconds per 5 seconds + // what we're doing is just calculating the average of the + // difference between each slot. Since each slot is 5 seconds + // and the timestamps unit are microseconds, we'll end up with + // the average slope across our history. If there is a consistent + // trend, it will show up in this value + + //int64 slope = 0; + int32 drift = conn->average_delay - prev_average_delay; + + // clock_drift is a rolling average + conn->clock_drift = (int64(conn->clock_drift) * 7 + drift) / 8; + conn->clock_drift_raw = drift; + } + } + + // if our new delay base is less than our previous one + // we should shift the other end's delay base in the other + // direction in order to take the clock skew into account + // This is commented out because it creates bad interactions + // with our adjustment in the other direction. We don't really + // need our estimates of the other peer to be very accurate + // anyway. The problem with shifting here is that we're more + // likely shift it back later because of a low latency. This + // second shift back would cause us to shift our delay base + // which then get's into a death spiral of shifting delay bases +/* if (prev_delay_base != 0 && + wrapping_compare_less(conn->our_hist.delay_base, prev_delay_base)) { + // never adjust more than 10 milliseconds + if (prev_delay_base - conn->our_hist.delay_base <= 10000) { + conn->their_hist.Shift(prev_delay_base - conn->our_hist.delay_base); + } + } +*/ + + // if the delay estimate exceeds the RTT, adjust the base_delay to + // compensate + assert(min_rtt >= 0); + if (int64(conn->our_hist.get_value()) > min_rtt) { + conn->our_hist.shift(conn->our_hist.get_value() - min_rtt); + } + + // only apply the congestion controller on acks + // if we don't have a delay measurement, there's + // no point in invoking the congestion control + if (actual_delay != 0 && acked_bytes >= 1) + conn->apply_ccontrol(acked_bytes, actual_delay, min_rtt); + + // sanity check, the other end should never ack packets + // past the point we've sent + if (acks <= conn->cur_window_packets) { + conn->max_window_user = pf1->windowsize; + + // If max user window is set to 0, then we startup a timer + // That will reset it to 1 after 15 seconds. + if (conn->max_window_user == 0) + // Reset max_window_user to 1 every 15 seconds. + conn->zerowindow_time = conn->ctx->current_ms + 15000; + + // Respond to connect message + // Switch to CONNECTED state. + if (conn->state == CS_SYN_SENT) { + conn->state = CS_CONNECTED; + + // If the user has defined the ON_CONNECT callback, use that to + // notify the user that the socket is now connected. If ON_CONNECT + // has not been defined, notify the user via ON_STATE_CHANGE. + if (conn->ctx->callbacks[UTP_ON_CONNECT]) + utp_call_on_connect(conn->ctx, conn); + else + utp_call_on_state_change(conn->ctx, conn, UTP_STATE_CONNECT); + + // We've sent a fin, and everything was ACKed (including the FIN), + // it's safe to destroy the socket. cur_window_packets == acks + // means that this packet acked all the remaining packets that + // were in-flight. + } else if (conn->state == CS_FIN_SENT && conn->cur_window_packets == acks) { + conn->state = CS_DESTROY; + } + + // Update fast resend counter + if (wrapping_compare_less(conn->fast_resend_seq_nr + , (pk_ack_nr + 1) & ACK_NR_MASK, ACK_NR_MASK)) + conn->fast_resend_seq_nr = (pk_ack_nr + 1) & ACK_NR_MASK; + + #if UTP_DEBUG_LOGGING + conn->log(UTP_LOG_DEBUG, "fast_resend_seq_nr:%u", conn->fast_resend_seq_nr); + #endif + + for (int i = 0; i < acks; ++i) { + int ack_status = conn->ack_packet(conn->seq_nr - conn->cur_window_packets); + // if ack_status is 0, the packet was acked. + // if acl_stauts is 1, it means that the packet had already been acked + // if it's 2, the packet has not been sent yet + // We need to break this loop in the latter case. This could potentially + // happen if we get an ack_nr that does not exceed what we have stuffed + // into the outgoing buffer, but does exceed what we have sent + if (ack_status == 2) { + #ifdef _DEBUG + OutgoingPacket* pkt = (OutgoingPacket*)conn->outbuf.get(conn->seq_nr - conn->cur_window_packets); + assert(pkt->transmissions == 0); + #endif + + break; + } + conn->cur_window_packets--; + + #if UTP_DEBUG_LOGGING + conn->log(UTP_LOG_DEBUG, "decementing cur_window_packets:%u", conn->cur_window_packets); + #endif + + } + + #ifdef _DEBUG + if (conn->cur_window_packets == 0) + assert(conn->cur_window == 0); + #endif + + // packets in front of this may have been acked by a + // selective ack (EACK). Keep decreasing the window packet size + // until we hit a packet that is still waiting to be acked + // in the send queue + // this is especially likely to happen when the other end + // has the EACK send bug older versions of uTP had + while (conn->cur_window_packets > 0 && !conn->outbuf.get(conn->seq_nr - conn->cur_window_packets)) { + conn->cur_window_packets--; + + #if UTP_DEBUG_LOGGING + conn->log(UTP_LOG_DEBUG, "decementing cur_window_packets:%u", conn->cur_window_packets); + #endif + + } + + #ifdef _DEBUG + if (conn->cur_window_packets == 0) + assert(conn->cur_window == 0); + #endif + + // this invariant should always be true + assert(conn->cur_window_packets == 0 || conn->outbuf.get(conn->seq_nr - conn->cur_window_packets)); + + // flush Nagle + if (conn->cur_window_packets == 1) { + OutgoingPacket *pkt = (OutgoingPacket*)conn->outbuf.get(conn->seq_nr - 1); + // do we still have quota? + if (pkt->transmissions == 0) { + conn->send_packet(pkt); + } + } + + // Fast timeout-retry + if (conn->fast_timeout) { + + #if UTP_DEBUG_LOGGING + conn->log(UTP_LOG_DEBUG, "Fast timeout %u,%u,%u?", (uint)conn->cur_window, conn->seq_nr - conn->timeout_seq_nr, conn->timeout_seq_nr); + #endif + + // if the fast_resend_seq_nr is not pointing to the oldest outstanding packet, it suggests that we've already + // resent the packet that timed out, and we should leave the fast-timeout mode. + if (((conn->seq_nr - conn->cur_window_packets) & ACK_NR_MASK) != conn->fast_resend_seq_nr) { + conn->fast_timeout = false; + } else { + // resend the oldest packet and increment fast_resend_seq_nr + // to not allow another fast resend on it again + OutgoingPacket *pkt = (OutgoingPacket*)conn->outbuf.get(conn->seq_nr - conn->cur_window_packets); + if (pkt && pkt->transmissions > 0) { + + #if UTP_DEBUG_LOGGING + conn->log(UTP_LOG_DEBUG, "Packet %u fast timeout-retry.", conn->seq_nr - conn->cur_window_packets); + #endif + + #ifdef _DEBUG + ++conn->_stats.fastrexmit; + #endif + + conn->fast_resend_seq_nr++; + conn->send_packet(pkt); + } + } + } + } + + // Process selective acknowledgent + if (selack_ptr != NULL) { + conn->selective_ack(pk_ack_nr + 2, selack_ptr, selack_ptr[-1]); + } + + // this invariant should always be true + assert(conn->cur_window_packets == 0 || conn->outbuf.get(conn->seq_nr - conn->cur_window_packets)); + + #if UTP_DEBUG_LOGGING + conn->log(UTP_LOG_DEBUG, "acks:%d acked_bytes:%u seq_nr:%u cur_window:%u cur_window_packets:%u ", + acks, (uint)acked_bytes, conn->seq_nr, (uint)conn->cur_window, conn->cur_window_packets); + #endif + + // In case the ack dropped the current window below + // the max_window size, Mark the socket as writable + if (conn->state == CS_CONNECTED_FULL && !conn->is_full()) { + conn->state = CS_CONNECTED; + #if UTP_DEBUG_LOGGING + conn->log(UTP_LOG_DEBUG, "Socket writable. max_window:%u cur_window:%u packet_size:%u", + (uint)conn->max_window, (uint)conn->cur_window, (uint)conn->get_packet_size()); + #endif + utp_call_on_state_change(conn->ctx, conn, UTP_STATE_WRITABLE); + } + + if (pk_flags == ST_STATE) { + // This is a state packet only. + return 0; + } + + // The connection is not in a state that can accept data? + if (conn->state != CS_CONNECTED && + conn->state != CS_CONNECTED_FULL && + conn->state != CS_FIN_SENT) { + return 0; + } + + // Is this a finalize packet? + if (pk_flags == ST_FIN && !conn->got_fin) { + + #if UTP_DEBUG_LOGGING + conn->log(UTP_LOG_DEBUG, "Got FIN eof_pkt:%u", pk_seq_nr); + #endif + + conn->got_fin = true; + conn->eof_pkt = pk_seq_nr; + // at this point, it is possible for the + // other end to have sent packets with + // sequence numbers higher than seq_nr. + // if this is the case, our reorder_count + // is out of sync. This case is dealt with + // when we re-order and hit the eof_pkt. + // we'll just ignore any packets with + // sequence numbers past this + } + + // Getting an in-order packet? + if (seqnr == 0) { + size_t count = packet_end - data; + if (count > 0 && conn->state != CS_FIN_SENT) { + + #if UTP_DEBUG_LOGGING + conn->log(UTP_LOG_DEBUG, "Got Data len:%u (rb:%u)", (uint)count, (uint)utp_call_get_read_buffer_size(conn->ctx, conn)); + #endif + + // Post bytes to the upper layer + utp_call_on_read(conn->ctx, conn, data, count); + } + conn->ack_nr++; + + // Check if the next packet has been received too, but waiting + // in the reorder buffer. + for (;;) { + + if (conn->got_fin && conn->eof_pkt == conn->ack_nr) { + if (conn->state != CS_FIN_SENT) { + conn->state = CS_GOT_FIN; + conn->rto_timeout = conn->ctx->current_ms + min(conn->rto * 3, 60); + + + #if UTP_DEBUG_LOGGING + conn->log(UTP_LOG_DEBUG, "Posting EOF"); + #endif + + utp_call_on_state_change(conn->ctx, conn, UTP_STATE_EOF); + } + + // if the other end wants to close, ack + conn->send_ack(); + + // reorder_count is not necessarily 0 at this point. + // even though it is most of the time, the other end + // may have sent packets with higher sequence numbers + // than what later end up being eof_pkt + // since we have received all packets up to eof_pkt + // just ignore the ones after it. + conn->reorder_count = 0; + } + + // Quick get-out in case there is nothing to reorder + if (conn->reorder_count == 0) + break; + + // Check if there are additional buffers in the reorder buffers + // that need delivery. + byte *p = (byte*)conn->inbuf.get(conn->ack_nr+1); + if (p == NULL) + break; + conn->inbuf.put(conn->ack_nr+1, NULL); + count = *(uint*)p; + if (count > 0 && conn->state != CS_FIN_SENT) { + // Pass the bytes to the upper layer + utp_call_on_read(conn->ctx, conn, p + sizeof(uint), count); + } + conn->ack_nr++; + + // Free the element from the reorder buffer + free(p); + assert(conn->reorder_count > 0); + conn->reorder_count--; + } + + conn->schedule_ack(); + } else { + // Getting an out of order packet. + // The packet needs to be remembered and rearranged later. + + // if we have received a FIN packet, and the EOF-sequence number + // is lower than the sequence number of the packet we just received + // something is wrong. + if (conn->got_fin && pk_seq_nr > conn->eof_pkt) { + + #if UTP_DEBUG_LOGGING + conn->log(UTP_LOG_DEBUG, "Got an invalid packet sequence number, past EOF " + "reorder_count:%u len:%u (rb:%u)", + conn->reorder_count, (uint)(packet_end - data), (uint)utp_call_get_read_buffer_size(conn->ctx, conn)); + #endif + return 0; + } + + // if the sequence number is entirely off the expected + // one, just drop it. We can't allocate buffer space in + // the inbuf entirely based on untrusted input + if (seqnr > 0x3ff) { + + #if UTP_DEBUG_LOGGING + conn->log(UTP_LOG_DEBUG, "0x%08x: Got an invalid packet sequence number, too far off " + "reorder_count:%u len:%u (rb:%u)", + conn->reorder_count, (uint)(packet_end - data), (uint)utp_call_get_read_buffer_size(conn->ctx, conn)); + #endif + return 0; + } + + // we need to grow the circle buffer before we + // check if the packet is already in here, so that + // we don't end up looking at an older packet (since + // the indices wraps around). + conn->inbuf.ensure_size(pk_seq_nr + 1, seqnr + 1); + + // Has this packet already been received? (i.e. a duplicate) + // If that is the case, just discard it. + if (conn->inbuf.get(pk_seq_nr) != NULL) { + #ifdef _DEBUG + ++conn->_stats.nduprecv; + #endif + + return 0; + } + + // Allocate memory to fit the packet that needs to re-ordered + byte *mem = (byte*)malloc((packet_end - data) + sizeof(uint)); + *(uint*)mem = (uint)(packet_end - data); + memcpy(mem + sizeof(uint), data, packet_end - data); + + // Insert into reorder buffer and increment the count + // of # of packets to be reordered. + // we add one to seqnr in order to leave the last + // entry empty, that way the assert in send_ack + // is valid. we have to add one to seqnr too, in order + // to make the circular buffer grow around the correct + // point (which is conn->ack_nr + 1). + assert(conn->inbuf.get(pk_seq_nr) == NULL); + assert((pk_seq_nr & conn->inbuf.mask) != ((conn->ack_nr+1) & conn->inbuf.mask)); + conn->inbuf.put(pk_seq_nr, mem); + conn->reorder_count++; + + #if UTP_DEBUG_LOGGING + conn->log(UTP_LOG_DEBUG, "0x%08x: Got out of order data reorder_count:%u len:%u (rb:%u)", + conn->reorder_count, (uint)(packet_end - data), (uint)utp_call_get_read_buffer_size(conn->ctx, conn)); + #endif + + conn->schedule_ack(); + } + + return (size_t)(packet_end - data); +} + +inline byte UTP_Version(PacketFormatV1 const* pf) +{ + return (pf->type() < ST_NUM_STATES && pf->ext < 3 ? pf->version() : 0); +} + +UTPSocket::~UTPSocket() +{ + #if UTP_DEBUG_LOGGING + log(UTP_LOG_DEBUG, "Killing socket"); + #endif + + utp_call_on_state_change(ctx, this, UTP_STATE_DESTROYING); + + if (ctx->last_utp_socket == this) { + ctx->last_utp_socket = NULL; + } + + // Remove object from the global hash table + UTPSocketKeyData* kd = ctx->utp_sockets->Delete(UTPSocketKey(addr, conn_id_recv)); + assert(kd); + + // remove the socket from ack_sockets if it was there also + removeSocketFromAckList(this); + + // Free all memory occupied by the socket object. + for (size_t i = 0; i <= inbuf.mask; i++) { + free(inbuf.elements[i]); + } + for (size_t i = 0; i <= outbuf.mask; i++) { + free(outbuf.elements[i]); + } + // TODO: The circular buffer should have a destructor + free(inbuf.elements); + free(outbuf.elements); +} + +void UTP_FreeAll(struct UTPSocketHT *utp_sockets) { + utp_hash_iterator_t it; + UTPSocketKeyData* keyData; + while ((keyData = utp_sockets->Iterate(it))) { + delete keyData->socket; + } +} + +void utp_initialize_socket( utp_socket *conn, + const struct sockaddr *addr, + socklen_t addrlen, + bool need_seed_gen, + uint32 conn_seed, + uint32 conn_id_recv, + uint32 conn_id_send) +{ + PackedSockAddr psaddr = PackedSockAddr((const SOCKADDR_STORAGE*)addr, addrlen); + + if (need_seed_gen) { + do { + conn_seed = utp_call_get_random(conn->ctx, conn); + // we identify v1 and higher by setting the first two bytes to 0x0001 + conn_seed &= 0xffff; + } while (conn->ctx->utp_sockets->Lookup(UTPSocketKey(psaddr, conn_seed))); + + conn_id_recv += conn_seed; + conn_id_send += conn_seed; + } + + conn->state = CS_IDLE; + conn->conn_seed = conn_seed; + conn->conn_id_recv = conn_id_recv; + conn->conn_id_send = conn_id_send; + conn->addr = psaddr; + conn->ctx->current_ms = utp_call_get_milliseconds(conn->ctx, NULL); + conn->last_got_packet = conn->ctx->current_ms; + conn->last_sent_packet = conn->ctx->current_ms; + conn->last_measured_delay = conn->ctx->current_ms + 0x70000000; + conn->average_sample_time = conn->ctx->current_ms + 5000; + conn->last_rwin_decay = int32(conn->ctx->current_ms) - MAX_WINDOW_DECAY; + + conn->our_hist.clear(conn->ctx->current_ms); + conn->their_hist.clear(conn->ctx->current_ms); + conn->rtt_hist.clear(conn->ctx->current_ms); + + // initialize MTU floor and ceiling + conn->mtu_reset(); + conn->mtu_last = conn->mtu_ceiling; + + conn->ctx->utp_sockets->Add(UTPSocketKey(conn->addr, conn->conn_id_recv))->socket = conn; + + // we need to fit one packet in the window when we start the connection + conn->max_window = conn->get_packet_size(); + + #if UTP_DEBUG_LOGGING + conn->log(UTP_LOG_DEBUG, "UTP socket initialized"); + #endif +} + +utp_socket* utp_create_socket(utp_context *ctx) +{ + assert(ctx); + if (!ctx) return NULL; + + UTPSocket *conn = new UTPSocket; // TODO: UTPSocket should have a constructor + + conn->state = CS_UNINITIALIZED; + conn->ctx = ctx; + conn->userdata = NULL; + conn->reorder_count = 0; + conn->duplicate_ack = 0; + conn->timeout_seq_nr = 0; + conn->last_rcv_win = 0; + conn->got_fin = false; + conn->fast_timeout = false; + conn->rtt = 0; + conn->retransmit_timeout = 0; + conn->rto_timeout = 0; + conn->zerowindow_time = 0; + conn->average_delay = 0; + conn->current_delay_samples = 0; + conn->cur_window = 0; + conn->eof_pkt = 0; + conn->last_maxed_out_window = 0; + conn->mtu_probe_seq = 0; + conn->mtu_probe_size = 0; + conn->current_delay_sum = 0; + conn->average_delay_base = 0; + conn->retransmit_count = 0; + conn->rto = 3000; + conn->rtt_var = 800; + conn->seq_nr = 1; + conn->ack_nr = 0; + conn->max_window_user = 255 * PACKET_SIZE; + conn->cur_window_packets = 0; + conn->fast_resend_seq_nr = conn->seq_nr; + conn->target_delay = ctx->target_delay; + conn->reply_micro = 0; + conn->opt_sndbuf = ctx->opt_sndbuf; + conn->opt_rcvbuf = ctx->opt_rcvbuf; + conn->slow_start = true; + conn->ssthresh = conn->opt_sndbuf; + conn->clock_drift = 0; + conn->clock_drift_raw = 0; + conn->outbuf.mask = 15; + conn->inbuf.mask = 15; + conn->outbuf.elements = (void**)calloc(16, sizeof(void*)); + conn->inbuf.elements = (void**)calloc(16, sizeof(void*)); + conn->ida = -1; // set the index of every new socket in ack_sockets to + // -1, which also means it is not in ack_sockets yet + + memset(conn->extensions, 0, sizeof(conn->extensions)); + + #ifdef _DEBUG + memset(&conn->_stats, 0, sizeof(utp_socket_stats)); + #endif + + return conn; +} + +int utp_context_set_option(utp_context *ctx, int opt, int val) +{ + assert(ctx); + if (!ctx) return -1; + + switch (opt) { + case UTP_LOG_NORMAL: + ctx->log_normal = val ? true : false; + return 0; + + case UTP_LOG_MTU: + ctx->log_mtu = val ? true : false; + return 0; + + case UTP_LOG_DEBUG: + ctx->log_debug = val ? true : false; + return 0; + + case UTP_TARGET_DELAY: + ctx->target_delay = val; + return 0; + + case UTP_SNDBUF: + assert(val >= 1); + ctx->opt_sndbuf = val; + return 0; + + case UTP_RCVBUF: + assert(val >= 1); + ctx->opt_rcvbuf = val; + return 0; + } + return -1; +} + +int utp_context_get_option(utp_context *ctx, int opt) +{ + assert(ctx); + if (!ctx) return -1; + + switch (opt) { + case UTP_LOG_NORMAL: return ctx->log_normal ? 1 : 0; + case UTP_LOG_MTU: return ctx->log_mtu ? 1 : 0; + case UTP_LOG_DEBUG: return ctx->log_debug ? 1 : 0; + case UTP_TARGET_DELAY: return ctx->target_delay; + case UTP_SNDBUF: return ctx->opt_sndbuf; + case UTP_RCVBUF: return ctx->opt_rcvbuf; + } + return -1; +} + + +int utp_setsockopt(UTPSocket* conn, int opt, int val) +{ + assert(conn); + if (!conn) return -1; + + switch (opt) { + + case UTP_SNDBUF: + assert(val >= 1); + conn->opt_sndbuf = val; + return 0; + + case UTP_RCVBUF: + assert(val >= 1); + conn->opt_rcvbuf = val; + return 0; + + case UTP_TARGET_DELAY: + conn->target_delay = val; + return 0; + } + + return -1; +} + +int utp_getsockopt(UTPSocket* conn, int opt) +{ + assert(conn); + if (!conn) return -1; + + switch (opt) { + case UTP_SNDBUF: return conn->opt_sndbuf; + case UTP_RCVBUF: return conn->opt_rcvbuf; + case UTP_TARGET_DELAY: return conn->target_delay; + } + + return -1; +} + +// Try to connect to a specified host. +int utp_connect(utp_socket *conn, const struct sockaddr *to, socklen_t tolen) +{ + assert(conn); + if (!conn) return -1; + + assert(conn->state == CS_UNINITIALIZED); + if (conn->state != CS_UNINITIALIZED) { + conn->state = CS_DESTROY; + return -1; + } + + utp_initialize_socket(conn, to, tolen, true, 0, 0, 1); + + assert(conn->cur_window_packets == 0); + assert(conn->outbuf.get(conn->seq_nr) == NULL); + assert(sizeof(PacketFormatV1) == 20); + + conn->state = CS_SYN_SENT; + conn->ctx->current_ms = utp_call_get_milliseconds(conn->ctx, conn); + + // Create and send a connect message + + // used in parse_log.py + conn->log(UTP_LOG_NORMAL, "UTP_Connect conn_seed:%u packet_size:%u (B) " + "target_delay:%u (ms) delay_history:%u " + "delay_base_history:%u (minutes)", + conn->conn_seed, PACKET_SIZE, conn->target_delay / 1000, + CUR_DELAY_SIZE, DELAY_BASE_HISTORY); + + // Setup initial timeout timer. + conn->retransmit_timeout = 3000; + conn->rto_timeout = conn->ctx->current_ms + conn->retransmit_timeout; + conn->last_rcv_win = conn->get_rcv_window(); + + // if you need compatibiltiy with 1.8.1, use this. it increases attackability though. + //conn->seq_nr = 1; + conn->seq_nr = utp_call_get_random(conn->ctx, conn); + + // Create the connect packet. + const size_t header_size = sizeof(PacketFormatV1); + + OutgoingPacket *pkt = (OutgoingPacket*)malloc(sizeof(OutgoingPacket) - 1 + header_size); + PacketFormatV1* p1 = (PacketFormatV1*)pkt->data; + + memset(p1, 0, header_size); + // SYN packets are special, and have the receive ID in the connid field, + // instead of conn_id_send. + p1->set_version(1); + p1->set_type(ST_SYN); + p1->ext = 0; + p1->connid = conn->conn_id_recv; + p1->windowsize = (uint32)conn->last_rcv_win; + p1->seq_nr = conn->seq_nr; + pkt->transmissions = 0; + pkt->length = header_size; + pkt->payload = 0; + + /* + #if UTP_DEBUG_LOGGING + conn->log(UTP_LOG_DEBUG, "Sending connect %s [%u].", + addrfmt(conn->addr, addrbuf), conn_seed); + #endif + */ + + // Remember the message in the outgoing queue. + conn->outbuf.ensure_size(conn->seq_nr, conn->cur_window_packets); + conn->outbuf.put(conn->seq_nr, pkt); + conn->seq_nr++; + conn->cur_window_packets++; + + #if UTP_DEBUG_LOGGING + conn->log(UTP_LOG_DEBUG, "incrementing cur_window_packets:%u", conn->cur_window_packets); + #endif + + conn->send_packet(pkt); + return 0; +} + +// Returns 1 if the UDP payload was recognized as a UTP packet, or 0 if it was not +int utp_process_udp(utp_context *ctx, const byte *buffer, size_t len, const struct sockaddr *to, socklen_t tolen) +{ + assert(ctx); + if (!ctx) return 0; + + assert(buffer); + if (!buffer) return 0; + + assert(to); + if (!to) return 0; + + const PackedSockAddr addr((const SOCKADDR_STORAGE*)to, tolen); + + if (len < sizeof(PacketFormatV1)) { + #if UTP_DEBUG_LOGGING + ctx->log(UTP_LOG_DEBUG, NULL, "recv %s len:%u too small", addrfmt(addr, addrbuf), (uint)len); + #endif + return 0; + } + + const PacketFormatV1 *pf1 = (PacketFormatV1*)buffer; + const byte version = UTP_Version(pf1); + const uint32 id = uint32(pf1->connid); + + if (version != 1) { + #if UTP_DEBUG_LOGGING + ctx->log(UTP_LOG_DEBUG, NULL, "recv %s len:%u version:%u unsupported version", addrfmt(addr, addrbuf), (uint)len, version); + #endif + + return 0; + } + + #if UTP_DEBUG_LOGGING + ctx->log(UTP_LOG_DEBUG, NULL, "recv %s len:%u id:%u", addrfmt(addr, addrbuf), (uint)len, id); + ctx->log(UTP_LOG_DEBUG, NULL, "recv id:%u seq_nr:%u ack_nr:%u", id, (uint)pf1->seq_nr, (uint)pf1->ack_nr); + #endif + + const byte flags = pf1->type(); + + if (flags == ST_RESET) { + // id is either our recv id or our send id + // if it's our send id, and we initiated the connection, our recv id is id + 1 + // if it's our send id, and we did not initiate the connection, our recv id is id - 1 + // we have to check every case + + UTPSocketKeyData* keyData; + if ( (keyData = ctx->utp_sockets->Lookup(UTPSocketKey(addr, id))) || + ((keyData = ctx->utp_sockets->Lookup(UTPSocketKey(addr, id + 1))) && keyData->socket->conn_id_send == id) || + ((keyData = ctx->utp_sockets->Lookup(UTPSocketKey(addr, id - 1))) && keyData->socket->conn_id_send == id)) + { + UTPSocket* conn = keyData->socket; + + #if UTP_DEBUG_LOGGING + ctx->log(UTP_LOG_DEBUG, NULL, "recv RST for existing connection"); + #endif + + if (conn->state == CS_FIN_SENT) + conn->state = CS_DESTROY; + else + conn->state = CS_RESET; + + utp_call_on_overhead_statistics(conn->ctx, conn, false, len + conn->get_udp_overhead(), close_overhead); + const int err = (conn->state == CS_SYN_SENT) ? UTP_ECONNREFUSED : UTP_ECONNRESET; + utp_call_on_error(conn->ctx, conn, err); + } + else { + #if UTP_DEBUG_LOGGING + ctx->log(UTP_LOG_DEBUG, NULL, "recv RST for unknown connection"); + #endif + } + return 1; + } + else if (flags != ST_SYN) { + UTPSocket* conn = NULL; + + if (ctx->last_utp_socket && ctx->last_utp_socket->addr == addr && ctx->last_utp_socket->conn_id_recv == id) { + conn = ctx->last_utp_socket; + } else { + UTPSocketKeyData* keyData = ctx->utp_sockets->Lookup(UTPSocketKey(addr, id)); + if (keyData) { + conn = keyData->socket; + ctx->last_utp_socket = conn; + } + } + + if (conn) { + + #if UTP_DEBUG_LOGGING + ctx->log(UTP_LOG_DEBUG, NULL, "recv processing"); + #endif + + const size_t read = utp_process_incoming(conn, buffer, len); + utp_call_on_overhead_statistics(conn->ctx, conn, false, (len - read) + conn->get_udp_overhead(), header_overhead); + return 1; + } + } + + // We have not found a matching utp_socket, and this isn't a SYN. Reject it. + const uint32 seq_nr = pf1->seq_nr; + if (flags != ST_SYN) { + ctx->current_ms = utp_call_get_milliseconds(ctx, NULL); + + for (size_t i = 0; i < ctx->rst_info.GetCount(); i++) { + if ((ctx->rst_info[i].connid == id) && + (ctx->rst_info[i].addr == addr) && + (ctx->rst_info[i].ack_nr == seq_nr)) + { + ctx->rst_info[i].timestamp = ctx->current_ms; + + #if UTP_DEBUG_LOGGING + ctx->log(UTP_LOG_DEBUG, NULL, "recv not sending RST to non-SYN (stored)"); + #endif + + return 1; + } + } + + if (ctx->rst_info.GetCount() > RST_INFO_LIMIT) { + + #if UTP_DEBUG_LOGGING + ctx->log(UTP_LOG_DEBUG, NULL, "recv not sending RST to non-SYN (limit at %u stored)", (uint)ctx->rst_info.GetCount()); + #endif + + return 1; + } + + #if UTP_DEBUG_LOGGING + ctx->log(UTP_LOG_DEBUG, NULL, "recv send RST to non-SYN (%u stored)", (uint)ctx->rst_info.GetCount()); + #endif + + RST_Info &r = ctx->rst_info.Append(); + r.addr = addr; + r.connid = id; + r.ack_nr = seq_nr; + r.timestamp = ctx->current_ms; + + UTPSocket::send_rst(ctx, addr, id, seq_nr, utp_call_get_random(ctx, NULL)); + return 1; + } + + if (ctx->callbacks[UTP_ON_ACCEPT]) { + + #if UTP_DEBUG_LOGGING + ctx->log(UTP_LOG_DEBUG, NULL, "Incoming connection from %s", addrfmt(addr, addrbuf)); + #endif + + UTPSocketKeyData* keyData = ctx->utp_sockets->Lookup(UTPSocketKey(addr, id + 1)); + if (keyData) { + + #if UTP_DEBUG_LOGGING + ctx->log(UTP_LOG_DEBUG, NULL, "rejected incoming connection, connection already exists"); + #endif + + return 1; + } + + if (ctx->utp_sockets->GetCount() > 3000) { + + #if UTP_DEBUG_LOGGING + ctx->log(UTP_LOG_DEBUG, NULL, "rejected incoming connection, too many uTP sockets %d", ctx->utp_sockets->GetCount()); + #endif + + return 1; + } + // true means yes, block connection. false means no, don't block. + if (utp_call_on_firewall(ctx, to, tolen)) { + + #if UTP_DEBUG_LOGGING + ctx->log(UTP_LOG_DEBUG, NULL, "rejected incoming connection, firewall callback returned true"); + #endif + + return 1; + } + + // Create a new UTP socket to handle this new connection + UTPSocket *conn = utp_create_socket(ctx); + utp_initialize_socket(conn, to, tolen, false, id, id+1, id); + conn->ack_nr = seq_nr; + conn->seq_nr = utp_call_get_random(ctx, NULL); + conn->fast_resend_seq_nr = conn->seq_nr; + conn->state = CS_CONNECTED; + + const size_t read = utp_process_incoming(conn, buffer, len, true); + + #if UTP_DEBUG_LOGGING + ctx->log(UTP_LOG_DEBUG, NULL, "recv send connect ACK"); + #endif + + conn->send_ack(true); + + utp_call_on_accept(ctx, conn, to, tolen); + + // we report overhead after on_accept(), because the callbacks are setup now + utp_call_on_overhead_statistics(conn->ctx, conn, false, (len - read) + conn->get_udp_overhead(), header_overhead); // SYN + utp_call_on_overhead_statistics(conn->ctx, conn, true, conn->get_overhead(), ack_overhead); // SYNACK + } + else { + + #if UTP_DEBUG_LOGGING + ctx->log(UTP_LOG_DEBUG, NULL, "rejected incoming connection, UTP_ON_ACCEPT callback not set"); + #endif + + } + + return 1; +} + +// Called by utp_process_icmp_fragmentation() and utp_process_icmp_error() below +static UTPSocket* parse_icmp_payload(utp_context *ctx, const byte *buffer, size_t len, const struct sockaddr *to, socklen_t tolen) +{ + assert(ctx); + if (!ctx) return NULL; + + assert(buffer); + if (!buffer) return NULL; + + assert(to); + if (!to) return NULL; + + const PackedSockAddr addr((const SOCKADDR_STORAGE*)to, tolen); + + // ICMP packets are only required to quote the first 8 bytes of the layer4 + // payload. The UDP payload is 8 bytes, and the UTP header is another 20 + // bytes. So, in order to find the entire UTP header, we need the ICMP + // packet to quote 28 bytes. + if (len < sizeof(PacketFormatV1)) { + #if UTP_DEBUG_LOGGING + ctx->log(UTP_LOG_DEBUG, NULL, "Ignoring ICMP from %s: runt length %d", addrfmt(addr, addrbuf), len); + #endif + return NULL; + } + + const PacketFormatV1 *pf = (PacketFormatV1*)buffer; + const byte version = UTP_Version(pf); + const uint32 id = uint32(pf->connid); + + if (version != 1) { + #if UTP_DEBUG_LOGGING + ctx->log(UTP_LOG_DEBUG, NULL, "Ignoring ICMP from %s: not UTP version 1", addrfmt(addr, addrbuf)); + #endif + return NULL; + } + + UTPSocketKeyData* keyData; + + if ( (keyData = ctx->utp_sockets->Lookup(UTPSocketKey(addr, id))) || + ((keyData = ctx->utp_sockets->Lookup(UTPSocketKey(addr, id + 1))) && keyData->socket->conn_id_send == id) || + ((keyData = ctx->utp_sockets->Lookup(UTPSocketKey(addr, id - 1))) && keyData->socket->conn_id_send == id)) + { + return keyData->socket; + } + + #if UTP_DEBUG_LOGGING + ctx->log(UTP_LOG_DEBUG, NULL, "Ignoring ICMP from %s: No matching connection found for id %u", addrfmt(addr, addrbuf), id); + #endif + return NULL; +} + +// Should be called when an ICMP Type 3, Code 4 packet (fragmentation needed) is received, to adjust the MTU +// +// Returns 1 if the UDP payload (delivered in the ICMP packet) was recognized as a UTP packet, or 0 if it was not +// +// @ctx: utp_context +// @buf: Contents of the original UDP payload, which the ICMP packet quoted. *Not* the ICMP packet itself. +// @len: buffer length +// @to: destination address of the original UDP pakcet +// @tolen: address length +// @next_hop_mtu: +int utp_process_icmp_fragmentation(utp_context *ctx, const byte* buffer, size_t len, const struct sockaddr *to, socklen_t tolen, uint16 next_hop_mtu) +{ + UTPSocket* conn = parse_icmp_payload(ctx, buffer, len, to, tolen); + if (!conn) return 0; + + // Constrain the next_hop_mtu to sane values. It might not be initialized or sent properly + if (next_hop_mtu >= 576 && next_hop_mtu < 0x2000) { + conn->mtu_ceiling = min(next_hop_mtu, conn->mtu_ceiling); + conn->mtu_search_update(); + // this is something of a speecial case, where we don't set mtu_last + // to the value in between the floor and the ceiling. We can update the + // floor, because there might be more network segments after the one + // that sent this ICMP with smaller MTUs. But we want to test this + // MTU size first. If the next probe gets through, mtu_floor is updated + conn->mtu_last = conn->mtu_ceiling; + } else { + // Otherwise, binary search. At this point we don't actually know + // what size the packet that failed was, and apparently we can't + // trust the next hop mtu either. It seems reasonably conservative + // to just lower the ceiling. This should not happen on working networks + // anyway. + conn->mtu_ceiling = (conn->mtu_floor + conn->mtu_ceiling) / 2; + conn->mtu_search_update(); + } + + conn->log(UTP_LOG_MTU, "MTU [ICMP] floor:%d ceiling:%d current:%d", conn->mtu_floor, conn->mtu_ceiling, conn->mtu_last); + return 1; +} + +// Should be called when an ICMP message is received that should tear down the connection. +// +// Returns 1 if the UDP payload (delivered in the ICMP packet) was recognized as a UTP packet, or 0 if it was not +// +// @ctx: utp_context +// @buf: Contents of the original UDP payload, which the ICMP packet quoted. *Not* the ICMP packet itself. +// @len: buffer length +// @to: destination address of the original UDP pakcet +// @tolen: address length +int utp_process_icmp_error(utp_context *ctx, const byte *buffer, size_t len, const struct sockaddr *to, socklen_t tolen) +{ + UTPSocket* conn = parse_icmp_payload(ctx, buffer, len, to, tolen); + if (!conn) return 0; + + const int err = (conn->state == CS_SYN_SENT) ? UTP_ECONNREFUSED : UTP_ECONNRESET; + const PackedSockAddr addr((const SOCKADDR_STORAGE*)to, tolen); + + switch(conn->state) { + // Don't pass on errors for idle/closed connections + case CS_IDLE: + #if UTP_DEBUG_LOGGING + ctx->log(UTP_LOG_DEBUG, NULL, "ICMP from %s in state CS_IDLE, ignoring", addrfmt(addr, addrbuf)); + #endif + return 1; + + case CS_FIN_SENT: + #if UTP_DEBUG_LOGGING + ctx->log(UTP_LOG_DEBUG, NULL, "ICMP from %s in state CS_FIN_SENT, setting state to CS_DESTROY and causing error %d", addrfmt(addr, addrbuf), err); + #endif + conn->state = CS_DESTROY; + break; + + default: + #if UTP_DEBUG_LOGGING + ctx->log(UTP_LOG_DEBUG, NULL, "ICMP from %s, setting state to CS_RESET and causing error %d", addrfmt(addr, addrbuf), err); + #endif + conn->state = CS_RESET; + break; + } + + utp_call_on_error(conn->ctx, conn, err); + return 1; +} + +// Write bytes to the UTP socket. Returns the number of bytes written. +// 0 indicates the socket is no longer writable, -1 indicates an error +ssize_t utp_writev(utp_socket *conn, struct utp_iovec *iovec_input, size_t num_iovecs) +{ + static utp_iovec iovec[UTP_IOV_MAX]; + + assert(conn); + if (!conn) return -1; + + assert(iovec_input); + if (!iovec_input) return -1; + + assert(num_iovecs); + if (!num_iovecs) return -1; + + if (num_iovecs > UTP_IOV_MAX) + num_iovecs = UTP_IOV_MAX; + + memcpy(iovec, iovec_input, sizeof(struct utp_iovec)*num_iovecs); + + size_t bytes = 0; + size_t sent = 0; + for (size_t i = 0; i < num_iovecs; i++) + bytes += iovec[i].iov_len; + + #if UTP_DEBUG_LOGGING + size_t param = bytes; + #endif + + if (conn->state != CS_CONNECTED) { + #if UTP_DEBUG_LOGGING + conn->log(UTP_LOG_DEBUG, "UTP_Write %u bytes = false (not CS_CONNECTED)", (uint)bytes); + #endif + return 0; + } + + conn->ctx->current_ms = utp_call_get_milliseconds(conn->ctx, conn); + + // don't send unless it will all fit in the window + size_t packet_size = conn->get_packet_size(); + size_t num_to_send = min(bytes, packet_size); + while (!conn->is_full(num_to_send)) { + // Send an outgoing packet. + // Also add it to the outgoing of packets that have been sent but not ACKed. + + bytes -= num_to_send; + sent += num_to_send; + + #if UTP_DEBUG_LOGGING + conn->log(UTP_LOG_DEBUG, "Sending packet. seq_nr:%u ack_nr:%u wnd:%u/%u/%u rcv_win:%u size:%u cur_window_packets:%u", + conn->seq_nr, conn->ack_nr, + (uint)(conn->cur_window + num_to_send), + (uint)conn->max_window, (uint)conn->max_window_user, + (uint)conn->last_rcv_win, num_to_send, + conn->cur_window_packets); + #endif + conn->write_outgoing_packet(num_to_send, ST_DATA, iovec, num_iovecs); + num_to_send = min(bytes, packet_size); + + if (num_to_send == 0) { + #if UTP_DEBUG_LOGGING + conn->log(UTP_LOG_DEBUG, "UTP_Write %u bytes = true", (uint)param); + #endif + return sent; + } + } + + bool full = conn->is_full(); + if (full) { + // mark the socket as not being writable. + conn->state = CS_CONNECTED_FULL; + } + + #if UTP_DEBUG_LOGGING + conn->log(UTP_LOG_DEBUG, "UTP_Write %u bytes = %s", (uint)bytes, full ? "false" : "true"); + #endif + + // returns whether or not the socket is still writable + // if the congestion window is not full, we can still write to it + //return !full; + return sent; +} + +void utp_read_drained(utp_socket *conn) +{ + assert(conn); + if (!conn) return; + + assert(conn->state != CS_UNINITIALIZED); + if (conn->state == CS_UNINITIALIZED) return; + + const size_t rcvwin = conn->get_rcv_window(); + + if (rcvwin > conn->last_rcv_win) { + // If last window was 0 send ACK immediately, otherwise should set timer + if (conn->last_rcv_win == 0) { + conn->send_ack(); + } else { + conn->ctx->current_ms = utp_call_get_milliseconds(conn->ctx, conn); + conn->schedule_ack(); + } + } +} + +// Should be called each time the UDP socket is drained +void utp_issue_deferred_acks(utp_context *ctx) +{ + assert(ctx); + if (!ctx) return; + + for (size_t i = 0; i < ctx->ack_sockets.GetCount(); i++) { + UTPSocket *conn = ctx->ack_sockets[i]; + conn->send_ack(); + i--; + } +} + +// Should be called every 500ms +void utp_check_timeouts(utp_context *ctx) +{ + assert(ctx); + if (!ctx) return; + + ctx->current_ms = utp_call_get_milliseconds(ctx, NULL); + + if (ctx->current_ms - ctx->last_check < TIMEOUT_CHECK_INTERVAL) + return; + + ctx->last_check = ctx->current_ms; + + for (size_t i = 0; i < ctx->rst_info.GetCount(); i++) { + if ((int)(ctx->current_ms - ctx->rst_info[i].timestamp) >= RST_INFO_TIMEOUT) { + ctx->rst_info.MoveUpLast(i); + i--; + } + } + if (ctx->rst_info.GetCount() != ctx->rst_info.GetAlloc()) { + ctx->rst_info.Compact(); + } + + utp_hash_iterator_t it; + UTPSocketKeyData* keyData; + while ((keyData = ctx->utp_sockets->Iterate(it))) { + UTPSocket *conn = keyData->socket; + conn->check_timeouts(); + + // Check if the object was deleted + if (conn->state == CS_DESTROY) { + #if UTP_DEBUG_LOGGING + conn->log(UTP_LOG_DEBUG, "Destroying"); + #endif + delete conn; + } + } +} + +int utp_getpeername(utp_socket *conn, struct sockaddr *addr, socklen_t *addrlen) +{ + assert(addr); + if (!addr) return -1; + + assert(addrlen); + if (!addrlen) return -1; + + assert(conn); + if (!conn) return -1; + + assert(conn->state != CS_UNINITIALIZED); + if (conn->state == CS_UNINITIALIZED) return -1; + + socklen_t len; + const SOCKADDR_STORAGE sa = conn->addr.get_sockaddr_storage(&len); + *addrlen = min(len, *addrlen); + memcpy(addr, &sa, *addrlen); + return 0; +} + +int utp_get_delays(UTPSocket *conn, uint32 *ours, uint32 *theirs, uint32 *age) +{ + assert(conn); + if (!conn) return -1; + + assert(conn->state != CS_UNINITIALIZED); + if (conn->state == CS_UNINITIALIZED) { + if (ours) *ours = 0; + if (theirs) *theirs = 0; + if (age) *age = 0; + return -1; + } + + if (ours) *ours = conn->our_hist.get_value(); + if (theirs) *theirs = conn->their_hist.get_value(); + if (age) *age = conn->ctx->current_ms - conn->last_measured_delay; + return 0; +} + +// Close the UTP socket. +// It is not valid for the upper layer to refer to socket after it is closed. +// Data will keep to try being delivered after the close. +void utp_close(UTPSocket *conn) +{ + assert(conn); + if (!conn) return; + + assert(conn->state != CS_UNINITIALIZED + && conn->state != CS_DESTROY_DELAY + && conn->state != CS_FIN_SENT + && conn->state != CS_DESTROY); + + #if UTP_DEBUG_LOGGING + conn->log(UTP_LOG_DEBUG, "UTP_Close in state:%s", statenames[conn->state]); + #endif + + switch(conn->state) { + case CS_CONNECTED: + case CS_CONNECTED_FULL: + conn->state = CS_FIN_SENT; + conn->write_outgoing_packet(0, ST_FIN, NULL, 0); + break; + + case CS_SYN_SENT: + conn->rto_timeout = utp_call_get_milliseconds(conn->ctx, conn) + min(conn->rto * 2, 60); + // fall through + case CS_GOT_FIN: + conn->state = CS_DESTROY_DELAY; + break; + + default: + conn->state = CS_DESTROY; + break; + } +} + +utp_context* utp_get_context(utp_socket *socket) { + assert(socket); + return socket ? socket->ctx : NULL; +} + +void* utp_set_userdata(utp_socket *socket, void *userdata) { + assert(socket); + if (socket) socket->userdata = userdata; + return socket ? socket->userdata : NULL; +} + +void* utp_get_userdata(utp_socket *socket) { + assert(socket); + return socket ? socket->userdata : NULL; +} + +void struct_utp_context::log(int level, utp_socket *socket, char const *fmt, ...) +{ + switch (level) { + case UTP_LOG_NORMAL: if (!log_normal) return; + case UTP_LOG_MTU: if (!log_mtu) return; + case UTP_LOG_DEBUG: if (!log_debug) return; + } + + va_list va; + char buf[4096]; + + va_start(va, fmt); + vsnprintf(buf, 4096, fmt, va); + buf[4095] = '\0'; + va_end(va); + + utp_call_log(this, socket, (const byte *)buf); +} + +utp_socket_stats* utp_get_stats(utp_socket *socket) +{ + #ifdef _DEBUG + assert(socket); + if (!socket) return NULL; + socket->_stats.mtu_guess = socket->mtu_last ? socket->mtu_last : socket->mtu_ceiling; + return &socket->_stats; + #else + return NULL; + #endif +} diff --git a/utp_internal.h b/utp_internal.h index 0086d4c..f9ef605 100644 --- a/utp_internal.h +++ b/utp_internal.h @@ -1,165 +1,139 @@ -#ifndef __UTP_H__ -#define __UTP_H__ - -#include "utypes.h" - -#ifdef WIN32 -#define _CRT_SECURE_NO_DEPRECATE -#define WIN32_LEAN_AND_MEAN -#include -#include -#include -#pragma comment(lib,"ws2_32.lib") -#else -#include -#include -#include -#include -#include -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -struct UTPSocket; - -// Used to set sockopt on a uTP socket to set the version of uTP -// to use for outgoing connections. This can only be called before -// the uTP socket is connected -#define SO_UTPVERSION 99 - -enum { - // socket has reveived syn-ack (notification only for outgoing connection completion) - // this implies writability - UTP_STATE_CONNECT = 1, - - // socket is able to send more data - UTP_STATE_WRITABLE = 2, - - // connection closed - UTP_STATE_EOF = 3, - - // socket is being destroyed, meaning all data has been sent if possible. - // it is not valid to refer to the socket after this state change occurs - UTP_STATE_DESTROYING = 4, -}; - -// Callbacks called by a uTP socket (register with UTP_SetCallbacks) - -// The uTP socket layer calls this when bytes have been received from the network. -typedef void UTPOnReadProc(void *userdata, const byte *bytes, size_t count); - -// The uTP socket layer calls this to fill the outgoing buffer with bytes. -// The uTP layer takes responsibility that those bytes will be delivered. -typedef void UTPOnWriteProc(void *userdata, byte *bytes, size_t count); - -// The uTP socket layer calls this to retrieve number of bytes currently in read buffer -typedef size_t UTPGetRBSize(void *userdata); - -// The uTP socket layer calls this whenever the socket becomes writable. -typedef void UTPOnStateChangeProc(void *userdata, int state); - -// The uTP socket layer calls this when an error occurs on the socket. -// These errors currently include ECONNREFUSED, ECONNRESET and ETIMEDOUT, but -// could eventually include any BSD socket error. -typedef void UTPOnErrorProc(void *userdata, int errcode); - -// The uTP socket layer calls this to report overhead statistics -typedef void UTPOnOverheadProc(void *userdata, bool send, size_t count, int type); - -struct UTPFunctionTable { - UTPOnReadProc *on_read; - UTPOnWriteProc *on_write; - UTPGetRBSize *get_rb_size; - UTPOnStateChangeProc *on_state; - UTPOnErrorProc *on_error; - UTPOnOverheadProc *on_overhead; -}; - - -// The uTP socket layer calls this when a new incoming uTP connection is established -// this implies writability -typedef void UTPGotIncomingConnection(void *userdata, struct UTPSocket* s); - -// The uTP socket layer calls this to send UDP packets -typedef void SendToProc(void *userdata, const byte *p, size_t len, const struct sockaddr *to, socklen_t tolen); - - -// Functions which can be called with a uTP socket - -// Create a uTP socket -struct UTPSocket *UTP_Create(SendToProc *send_to_proc, void *send_to_userdata, - const struct sockaddr *addr, socklen_t addrlen); - -// Setup the callbacks - must be done before connect or on incoming connection -void UTP_SetCallbacks(struct UTPSocket *socket, struct UTPFunctionTable *func, void *userdata); - -// Valid options include SO_SNDBUF, SO_RCVBUF and SO_UTPVERSION -bool UTP_SetSockopt(struct UTPSocket *socket, int opt, int val); - -// Try to connect to a specified host. -void UTP_Connect(struct UTPSocket *socket); - -// Process a UDP packet from the network. This will process a packet for an existing connection, -// or create a new connection and call incoming_proc. Returns true if the packet was processed -// in some way, false if the packet did not appear to be uTP. -bool UTP_IsIncomingUTP(UTPGotIncomingConnection *incoming_proc, - SendToProc *send_to_proc, void *send_to_userdata, - const byte *buffer, size_t len, const struct sockaddr *to, socklen_t tolen); - -// Process an ICMP received UDP packet. -bool UTP_HandleICMP(const byte* buffer, size_t len, const struct sockaddr *to, socklen_t tolen); - -// Write bytes to the uTP socket. -// Returns true if the socket is still writable. -bool UTP_Write(struct UTPSocket *socket, size_t count); - -// Notify the uTP socket of buffer drain -void UTP_RBDrained(struct UTPSocket *socket); - -// Call periodically to process timeouts and other periodic events -void UTP_CheckTimeouts(void); - -// Retrieves the peer address of the specified socket, stores this address in the -// sockaddr structure pointed to by the addr argument, and stores the length of this -// address in the object pointed to by the addrlen argument. -void UTP_GetPeerName(struct UTPSocket *socket, struct sockaddr *addr, socklen_t *addrlen); - -void UTP_GetDelays(struct UTPSocket *socket, int32 *ours, int32 *theirs, uint32 *age); - -size_t UTP_GetPacketSize(struct UTPSocket *socket); - -#ifdef _DEBUG -struct UTPStats { - uint64 _nbytes_recv; // total bytes received - uint64 _nbytes_xmit; // total bytes transmitted - uint32 _rexmit; // retransmit counter - uint32 _fastrexmit; // fast retransmit counter - uint32 _nxmit; // transmit counter - uint32 _nrecv; // receive counter (total) - uint32 _nduprecv; // duplicate receive counter -}; - -// Get stats for UTP socket -void UTP_GetStats(struct UTPSocket *socket, UTPStats *stats); -#endif - -// Close the UTP socket. -// It is not valid to issue commands for this socket after it is closed. -// This does not actually destroy the socket until outstanding data is sent, at which -// point the socket will change to the UTP_STATE_DESTROYING state. -void UTP_Close(struct UTPSocket *socket); - -struct UTPGlobalStats { - uint32 _nraw_recv[5]; // total packets recieved less than 300/600/1200/MTU bytes fpr all connections (global) - uint32 _nraw_send[5]; // total packets sent less than 300/600/1200/MTU bytes for all connections (global) -}; - -void UTP_GetGlobalStats(struct UTPGlobalStats *stats); - -#ifdef __cplusplus -} -#endif - -#endif //__UTP_H__ +/* + * Copyright (c) 2010-2013 BitTorrent, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef __UTP_INTERNAL_H__ +#define __UTP_INTERNAL_H__ + +#include +#include +#include +#include + +#include "utp.h" +#include "utp_callbacks.h" +#include "utp_templates.h" +#include "utp_hash.h" +#include "utp_hash.h" +#include "utp_packedsockaddr.h" + +/* These originally lived in utp_config.h */ +#define CCONTROL_TARGET (100 * 1000) // us + +enum bandwidth_type_t { + payload_bandwidth, connect_overhead, + close_overhead, ack_overhead, + header_overhead, retransmit_overhead +}; + +#ifdef WIN32 + #ifdef _MSC_VER + #include "win32_inet_ntop.h" + #endif + + // newer versions of MSVC define these in errno.h + #ifndef ECONNRESET + #define ECONNRESET WSAECONNRESET + #define EMSGSIZE WSAEMSGSIZE + #define ECONNREFUSED WSAECONNREFUSED + #define ETIMEDOUT WSAETIMEDOUT + #endif +#endif + +struct PACKED_ATTRIBUTE RST_Info { + PackedSockAddr addr; + uint32 connid; + uint16 ack_nr; + uint64 timestamp; +}; + +// It's really important that we don't have duplicate keys in the hash table. +// If we do, we'll eventually crash. if we try to remove the second instance +// of the key, we'll accidentally remove the first instead. then later, +// checkTimeouts will try to access the second one's already freed memory. +void UTP_FreeAll(struct UTPSocketHT *utp_sockets); + +struct UTPSocketKey { + PackedSockAddr addr; + uint32 recv_id; // "conn_seed", "conn_id" + + UTPSocketKey(const PackedSockAddr& _addr, uint32 _recv_id) { + memset(this, 0, sizeof(*this)); + addr = _addr; + recv_id = _recv_id; + } + + bool operator == (const UTPSocketKey &other) const { + return recv_id == other.recv_id && addr == other.addr; + } + + uint32 compute_hash() const { + return recv_id ^ addr.compute_hash(); + } +}; + +struct UTPSocketKeyData { + UTPSocketKey key; + UTPSocket *socket; + utp_link_t link; +}; + +#define UTP_SOCKET_BUCKETS 79 +#define UTP_SOCKET_INIT 15 + +struct UTPSocketHT : utpHashTable { + UTPSocketHT() { + const int buckets = UTP_SOCKET_BUCKETS; + const int initial = UTP_SOCKET_INIT; + this->Create(buckets, initial); + } + ~UTPSocketHT() { + UTP_FreeAll(this); + this->Free(); + } +}; + +struct struct_utp_context { + void *userdata; + utp_callback_t* callbacks[UTP_ARRAY_SIZE]; + + uint64 current_ms; + utp_context_stats context_stats; + UTPSocket *last_utp_socket; + Array ack_sockets; + Array rst_info; + UTPSocketHT *utp_sockets; + size_t target_delay; + size_t opt_sndbuf; + size_t opt_rcvbuf; + uint64 last_check; + + struct_utp_context(); + ~struct_utp_context(); + + void log(int level, utp_socket *socket, char const *fmt, ...); + + bool log_normal:1; // log normal events? + bool log_mtu:1; // log MTU related events? + bool log_debug:1; // log debugging events? (Must also compile with UTP_DEBUG_LOGGING defined) +}; + +#endif //__UTP_INTERNAL_H__ diff --git a/utp_packedsockaddr.cpp b/utp_packedsockaddr.cpp new file mode 100644 index 0000000..e73e6c6 --- /dev/null +++ b/utp_packedsockaddr.cpp @@ -0,0 +1,141 @@ +// vim:set ts=4 sw=4 ai: + +/* + * Copyright (c) 2010-2013 BitTorrent, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include + +#include "utp_types.h" +#include "utp_hash.h" +#include "utp_packedsockaddr.h" + +#ifdef WIN32 + #include "win32_inet_ntop.h" +#endif + +byte PackedSockAddr::get_family() const +{ + #if defined(__WINE__) || defined(__sh__) + return ((_sin6d[0] == 0) && (_sin6d[1] == 0) && (_sin6d[2] == htonl(0xffff)) != 0) ? + AF_INET : AF_INET6; + #else + return (IN6_IS_ADDR_V4MAPPED(&_in._in6addr) != 0) ? AF_INET : AF_INET6; + #endif // defined(__WINE__) || defined(__sh__) +} + +bool PackedSockAddr::operator==(const PackedSockAddr& rhs) const +{ + if (&rhs == this) + return true; + if (_port != rhs._port) + return false; + return memcmp(_sin6, rhs._sin6, sizeof(_sin6)) == 0; +} + +bool PackedSockAddr::operator!=(const PackedSockAddr& rhs) const +{ + return !(*this == rhs); +} + +uint32 PackedSockAddr::compute_hash() const { + return utp_hash_mem(&_in, sizeof(_in)) ^ _port; +} + +void PackedSockAddr::set(const SOCKADDR_STORAGE* sa, socklen_t len) +{ + if (sa->ss_family == AF_INET) { + assert(len >= sizeof(sockaddr_in)); + const sockaddr_in *sin = (sockaddr_in*)sa; + _sin6w[0] = 0; + _sin6w[1] = 0; + _sin6w[2] = 0; + _sin6w[3] = 0; + _sin6w[4] = 0; + _sin6w[5] = 0xffff; + _sin4 = sin->sin_addr.s_addr; + _port = ntohs(sin->sin_port); + } else { + assert(len >= sizeof(sockaddr_in6)); + const sockaddr_in6 *sin6 = (sockaddr_in6*)sa; + _in._in6addr = sin6->sin6_addr; + _port = ntohs(sin6->sin6_port); + } +} + +PackedSockAddr::PackedSockAddr(const SOCKADDR_STORAGE* sa, socklen_t len) +{ + set(sa, len); +} + +PackedSockAddr::PackedSockAddr(void) +{ + SOCKADDR_STORAGE sa; + socklen_t len = sizeof(SOCKADDR_STORAGE); + memset(&sa, 0, len); + sa.ss_family = AF_INET; + set(&sa, len); +} + +SOCKADDR_STORAGE PackedSockAddr::get_sockaddr_storage(socklen_t *len = NULL) const +{ + SOCKADDR_STORAGE sa; + const byte family = get_family(); + if (family == AF_INET) { + sockaddr_in *sin = (sockaddr_in*)&sa; + if (len) *len = sizeof(sockaddr_in); + memset(sin, 0, sizeof(sockaddr_in)); + sin->sin_family = family; + sin->sin_port = htons(_port); + sin->sin_addr.s_addr = _sin4; + } else { + sockaddr_in6 *sin6 = (sockaddr_in6*)&sa; + memset(sin6, 0, sizeof(sockaddr_in6)); + if (len) *len = sizeof(sockaddr_in6); + sin6->sin6_family = family; + sin6->sin6_addr = _in._in6addr; + sin6->sin6_port = htons(_port); + } + return sa; +} + +// #define addrfmt(x, s) x.fmt(s, sizeof(s)) +cstr PackedSockAddr::fmt(str s, size_t len) const +{ + memset(s, 0, len); + const byte family = get_family(); + str i; + if (family == AF_INET) { + inet_ntop(family, (uint32*)&_sin4, s, len); + i = s; + while (*++i) {} + } else { + i = s; + *i++ = '['; + inet_ntop(family, (in6_addr*)&_in._in6addr, i, len-1); + while (*++i) {} + *i++ = ']'; + } + snprintf(i, len - (i-s), ":%u", _port); + return s; +} diff --git a/utp_packedsockaddr.h b/utp_packedsockaddr.h new file mode 100644 index 0000000..76e8acc --- /dev/null +++ b/utp_packedsockaddr.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2010-2013 BitTorrent, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef __UTP_PACKEDSOCKADDR_H__ +#define __UTP_PACKEDSOCKADDR_H__ + +#include "utp_types.h" + +struct PACKED_ATTRIBUTE PackedSockAddr { + // The values are always stored here in network byte order + union { + byte _in6[16]; // IPv6 + uint16 _in6w[8]; // IPv6, word based (for convenience) + uint32 _in6d[4]; // Dword access + in6_addr _in6addr; // For convenience + } _in; + + // Host byte order + uint16 _port; + + #define _sin4 _in._in6d[3] // IPv4 is stored where it goes if mapped + + #define _sin6 _in._in6 + #define _sin6w _in._in6w + #define _sin6d _in._in6d + + byte get_family() const; + bool operator==(const PackedSockAddr& rhs) const; + bool operator!=(const PackedSockAddr& rhs) const; + void set(const SOCKADDR_STORAGE* sa, socklen_t len); + + PackedSockAddr(const SOCKADDR_STORAGE* sa, socklen_t len); + PackedSockAddr(void); + + SOCKADDR_STORAGE get_sockaddr_storage(socklen_t *len) const; + cstr fmt(str s, size_t len) const; + + uint32 compute_hash() const; +} ALIGNED_ATTRIBUTE(4); + +#endif //__UTP_PACKEDSOCKADDR_H__ diff --git a/utp_templates.h b/utp_templates.h index 9684b52..8f88f5c 100644 --- a/utp_templates.h +++ b/utp_templates.h @@ -1,186 +1,195 @@ -#ifndef __TEMPLATES_H__ -#define __TEMPLATES_H__ - -#include "utypes.h" -#include - -#if defined(POSIX) -/* Allow over-writing FORCEINLINE from makefile because gcc 3.4.4 for buffalo - doesn't seem to support __attribute__((always_inline)) in -O0 build - (strangely, it works in -Os build) */ -#ifndef FORCEINLINE -// The always_inline attribute asks gcc to inline the function even if no optimization is being requested. -// This macro should be used exclusive-or with the inline directive (use one or the other but not both) -// since Microsoft uses __forceinline to also mean inline, -// and this code is following a Microsoft compatibility model. -// Just setting the attribute without also specifying the inline directive apparently won't inline the function, -// as evidenced by multiply-defined symbols found at link time. -#define FORCEINLINE inline __attribute__((always_inline)) -#endif -#endif - -#ifdef __GNUC__ -// Used for gcc tool chains accepting but not supporting pragma pack -// See http://gcc.gnu.org/onlinedocs/gcc/Type-Attributes.html -#define PACKED_ATTRIBUTE __attribute__((__packed__)) -#else -#define PACKED_ATTRIBUTE -#endif - -#ifdef __GNUC__ -#define ALIGNED_ATTRIBUTE(x) __attribute__((aligned (x))) -#else -#define ALIGNED_ATTRIBUTE(x) -#endif - -// Utility templates -#undef min -#undef max - -template static inline T min(T a, T b) { if (a < b) return a; return b; } -template static inline T max(T a, T b) { if (a > b) return a; return b; } - -template static inline T min(T a, T b, T c) { return min(min(a,b),c); } -template static inline T max(T a, T b, T c) { return max(max(a,b),c); } -template static inline T clamp(T v, T mi, T ma) -{ - if (v > ma) v = ma; - if (v < mi) v = mi; - return v; -} - -#if (defined(__SVR4) && defined(__sun)) -#pragma pack(1) -#else -#pragma pack(push,1) -#endif - -namespace aux -{ - FORCEINLINE uint16 host_to_network(uint16 i) { return htons(i); } - FORCEINLINE uint32 host_to_network(uint32 i) { return htonl(i); } - FORCEINLINE int32 host_to_network(int32 i) { return htonl(i); } - FORCEINLINE uint16 network_to_host(uint16 i) { return ntohs(i); } - FORCEINLINE uint32 network_to_host(uint32 i) { return ntohl(i); } - FORCEINLINE int32 network_to_host(int32 i) { return ntohl(i); } -} - -template -struct PACKED_ATTRIBUTE big_endian -{ - T operator=(T i) { m_integer = aux::host_to_network(i); return i; } - operator T() const { return aux::network_to_host(m_integer); } -private: - T m_integer; -}; - -typedef big_endian int32_big; -typedef big_endian uint32_big; -typedef big_endian uint16_big; - -#if (defined(__SVR4) && defined(__sun)) -#pragma pack(0) -#else -#pragma pack(pop) -#endif - -template static inline void zeromem(T *a, size_t count = 1) { memset(a, 0, count * sizeof(T)); } - -typedef int SortCompareProc(const void *, const void *); - -template static FORCEINLINE void QuickSortT(T *base, size_t num, int (*comp)(const T *, const T *)) { qsort(base, num, sizeof(T), (SortCompareProc*)comp); } - - -// WARNING: The template parameter MUST be a POD type! -template class Array { -protected: - T *mem; - size_t alloc,count; - -public: - Array(size_t init) { Init(init); } - Array() { Init(); } - ~Array() { Free(); } - - void inline Init() { mem = NULL; alloc = count = 0; } - void inline Init(size_t init) { Init(); if (init) Resize(init); } - size_t inline GetCount() const { return count; } - size_t inline GetAlloc() const { return alloc; } - void inline SetCount(size_t c) { count = c; } - - inline T& operator[](size_t offset) { assert(offset ==0 || offset(minsize, alloc * 2)); } - - inline size_t Append(const T &t) { - if (count >= alloc) Grow(); - size_t r=count++; - mem[r] = t; - return r; - } - - T inline &Append() { - if (count >= alloc) Grow(); - return mem[count++]; - } - - void inline Compact() { - Resize(count); - } - - void inline Free() { - free(mem); - Init(); - } - - void inline Clear() { - count = 0; - } - - bool inline MoveUpLast(size_t index) { - assert(index < count); - size_t c = --count; - if (index != c) { - mem[index] = mem[c]; - return true; - } - return false; - } - - bool inline MoveUpLastExist(const T &v) { - return MoveUpLast(LookupElementExist(v)); - } - - size_t inline LookupElement(const T &v) const { - for(size_t i = 0; i != count; i++) - if (mem[i] == v) - return i; - return (size_t) -1; - } - - bool inline HasElement(const T &v) const { - return LookupElement(v) != -1; - } - - typedef int SortCompareProc(const T *a, const T *b); - - void Sort(SortCompareProc* proc, size_t start, size_t end) { - QuickSortT(&mem[start], end - start, proc); - } - - void Sort(SortCompareProc* proc, size_t start) { - Sort(proc, start, count); - } - - void Sort(SortCompareProc* proc) { - Sort(proc, 0, count); - } -}; - -#endif //__TEMPLATES_H__ +/* + * Copyright (c) 2010-2013 BitTorrent, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef __TEMPLATES_H__ +#define __TEMPLATES_H__ + +#include "utp_types.h" +#include + +#if defined(POSIX) +/* Allow over-writing FORCEINLINE from makefile because gcc 3.4.4 for buffalo + doesn't seem to support __attribute__((always_inline)) in -O0 build + (strangely, it works in -Os build) */ +#ifndef FORCEINLINE +// The always_inline attribute asks gcc to inline the function even if no optimization is being requested. +// This macro should be used exclusive-or with the inline directive (use one or the other but not both) +// since Microsoft uses __forceinline to also mean inline, +// and this code is following a Microsoft compatibility model. +// Just setting the attribute without also specifying the inline directive apparently won't inline the function, +// as evidenced by multiply-defined symbols found at link time. +#define FORCEINLINE inline __attribute__((always_inline)) +#endif +#endif + +// Utility templates +#undef min +#undef max + +template static inline T min(T a, T b) { if (a < b) return a; return b; } +template static inline T max(T a, T b) { if (a > b) return a; return b; } + +template static inline T min(T a, T b, T c) { return min(min(a,b),c); } +template static inline T max(T a, T b, T c) { return max(max(a,b),c); } +template static inline T clamp(T v, T mi, T ma) +{ + if (v > ma) v = ma; + if (v < mi) v = mi; + return v; +} + +#if (defined(__SVR4) && defined(__sun)) + #pragma pack(1) +#else + #pragma pack(push,1) +#endif + + +namespace aux +{ + FORCEINLINE uint16 host_to_network(uint16 i) { return htons(i); } + FORCEINLINE uint32 host_to_network(uint32 i) { return htonl(i); } + FORCEINLINE int32 host_to_network(int32 i) { return htonl(i); } + FORCEINLINE uint16 network_to_host(uint16 i) { return ntohs(i); } + FORCEINLINE uint32 network_to_host(uint32 i) { return ntohl(i); } + FORCEINLINE int32 network_to_host(int32 i) { return ntohl(i); } +} + +template +struct PACKED_ATTRIBUTE big_endian +{ + T operator=(T i) { m_integer = aux::host_to_network(i); return i; } + operator T() const { return aux::network_to_host(m_integer); } +private: + T m_integer; +}; + +typedef big_endian int32_big; +typedef big_endian uint32_big; +typedef big_endian uint16_big; + +#if (defined(__SVR4) && defined(__sun)) + #pragma pack(0) +#else + #pragma pack(pop) +#endif + +template static inline void zeromem(T *a, size_t count = 1) { memset(a, 0, count * sizeof(T)); } + +typedef int SortCompareProc(const void *, const void *); + +template static FORCEINLINE void QuickSortT(T *base, size_t num, int (*comp)(const T *, const T *)) { qsort(base, num, sizeof(T), (SortCompareProc*)comp); } + + +// WARNING: The template parameter MUST be a POD type! +template class Array { +protected: + T *mem; + size_t alloc,count; + +public: + Array(size_t init) { Init(init); } + Array() { Init(); } + ~Array() { Free(); } + + void inline Init() { mem = NULL; alloc = count = 0; } + void inline Init(size_t init) { Init(); if (init) Resize(init); } + size_t inline GetCount() const { return count; } + size_t inline GetAlloc() const { return alloc; } + void inline SetCount(size_t c) { count = c; } + + inline T& operator[](size_t offset) { assert(offset ==0 || offset(minsize, alloc * 2)); } + + inline size_t Append(const T &t) { + if (count >= alloc) Grow(); + size_t r=count++; + mem[r] = t; + return r; + } + + T inline &Append() { + if (count >= alloc) Grow(); + return mem[count++]; + } + + void inline Compact() { + Resize(count); + } + + void inline Free() { + free(mem); + Init(); + } + + void inline Clear() { + count = 0; + } + + bool inline MoveUpLast(size_t index) { + assert(index < count); + size_t c = --count; + if (index != c) { + mem[index] = mem[c]; + return true; + } + return false; + } + + bool inline MoveUpLastExist(const T &v) { + return MoveUpLast(LookupElementExist(v)); + } + + size_t inline LookupElement(const T &v) const { + for(size_t i = 0; i != count; i++) + if (mem[i] == v) + return i; + return (size_t) -1; + } + + bool inline HasElement(const T &v) const { + return LookupElement(v) != -1; + } + + typedef int SortCompareProc(const T *a, const T *b); + + void Sort(SortCompareProc* proc, size_t start, size_t end) { + QuickSortT(&mem[start], end - start, proc); + } + + void Sort(SortCompareProc* proc, size_t start) { + Sort(proc, start, count); + } + + void Sort(SortCompareProc* proc) { + Sort(proc, 0, count); + } +}; + +#endif //__TEMPLATES_H__ diff --git a/utp_types.h b/utp_types.h index 79bafcd..c47772b 100644 --- a/utp_types.h +++ b/utp_types.h @@ -1,42 +1,120 @@ -#ifndef __UTYPES_H__ -#define __UTYPES_H__ - -// standard types -typedef unsigned char byte; -typedef unsigned char uint8; -typedef signed char int8; -typedef unsigned short uint16; -typedef signed short int16; -typedef unsigned int uint; -typedef unsigned int uint32; -typedef signed int int32; - -#ifdef _MSC_VER -typedef unsigned __int64 uint64; -typedef signed __int64 int64; -#else -typedef unsigned long long uint64; -typedef long long int64; -#endif - -/* compile-time assert */ -#ifndef CASSERT -#define CASSERT( exp, name ) typedef int is_not_##name [ (exp ) ? 1 : -1 ]; -#endif - -CASSERT(8 == sizeof(uint64), sizeof_uint64_is_8) -CASSERT(8 == sizeof(int64), sizeof_int64_is_8) - -#ifndef INT64_MAX -#define INT64_MAX 0x7fffffffffffffffLL -#endif - -// always ANSI -typedef const char * cstr; -typedef char * str; - -#ifndef __cplusplus -typedef uint8 bool; -#endif - -#endif //__UTYPES_H__ +/* + * Copyright (c) 2010-2013 BitTorrent, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef __UTP_TYPES_H__ +#define __UTP_TYPES_H__ + +#ifdef __GNUC__ + // Used for gcc tool chains accepting but not supporting pragma pack + // See http://gcc.gnu.org/onlinedocs/gcc/Type-Attributes.html + #define PACKED_ATTRIBUTE __attribute__((__packed__)) +#else + #define PACKED_ATTRIBUTE +#endif + +#ifdef __GNUC__ + #define ALIGNED_ATTRIBUTE(x) __attribute__((aligned (x))) +#else + #define ALIGNED_ATTRIBUTE(x) +#endif + +// hash.cpp needs socket definitions, which is why this networking specific +// code is inclued in utypes.h +#ifdef WIN32 + #define _CRT_SECURE_NO_DEPRECATE + #define WIN32_LEAN_AND_MEAN + #include + #include + #include + #define IP_OPT_DONTFRAG IP_DONTFRAGMENT +#else + #include + #include + #include + + #ifdef IP_DONTFRAG + #define IP_OPT_DONTFRAG IP_DONTFRAG + #elif defined IP_DONTFRAGMENT + #define IP_OPT_DONTFRAG IP_DONTFRAGMENT + #else + //#warning "I don't know how to set DF bit on this system" + #endif +#endif + +#ifdef _MSC_VER + #include + typedef SSIZE_T ssize_t; +#endif + +#ifdef POSIX + typedef struct sockaddr_storage SOCKADDR_STORAGE; +#endif + +#ifdef WIN32 + #define I64u "%I64u" +#else + #define I64u "%Lu" +#endif + +#ifdef WIN32 + #define snprintf _snprintf +#endif + +// standard types +typedef unsigned char byte; +typedef unsigned char uint8; +typedef signed char int8; +typedef unsigned short uint16; +typedef signed short int16; +typedef unsigned int uint; +typedef unsigned int uint32; +typedef signed int int32; + +#ifdef _MSC_VER +typedef unsigned __int64 uint64; +typedef signed __int64 int64; +#else +typedef unsigned long long uint64; +typedef long long int64; +#endif + +/* compile-time assert */ +#ifndef CASSERT +#define CASSERT( exp, name ) typedef int is_not_##name [ (exp ) ? 1 : -1 ]; +#endif + +CASSERT(8 == sizeof(uint64), sizeof_uint64_is_8) +CASSERT(8 == sizeof(int64), sizeof_int64_is_8) + +#ifndef INT64_MAX +#define INT64_MAX 0x7fffffffffffffffLL +#endif + +// always ANSI +typedef const char * cstr; +typedef char * str; + +#ifndef __cplusplus +typedef uint8 bool; +#endif + +#endif //__UTP_TYPES_H__ diff --git a/utp_utils.cpp b/utp_utils.cpp index 5515e62..f2c57ab 100644 --- a/utp_utils.cpp +++ b/utp_utils.cpp @@ -1,210 +1,254 @@ -#include "StdAfx.h" - -#include "utypes.h" -#include -#include - -#ifdef WIN32 - -#define WIN32_LEAN_AND_MEAN -#include -#include -#include - -typedef ULONGLONG (WINAPI GetTickCount64Proc)(void); -static GetTickCount64Proc *pt2GetTickCount64; -static GetTickCount64Proc *pt2RealGetTickCount; - -static uint64 startPerformanceCounter; -static uint64 startGetTickCount; -// MSVC 6 standard doesn't like division with uint64s -static double counterPerMicrosecond; - -uint64 UTGetTickCount64() -{ - if (pt2GetTickCount64) { - return pt2GetTickCount64(); - } - if (pt2RealGetTickCount) { - uint64 v = pt2RealGetTickCount(); - // fix return value from GetTickCount - return (DWORD)v | ((v >> 0x18) & 0xFFFFFFFF00000000); - } - return (uint64)GetTickCount(); -} - -void Time_Initialize() -{ - HMODULE kernel32 = GetModuleHandleA("kernel32.dll"); - pt2GetTickCount64 = (GetTickCount64Proc*)GetProcAddress(kernel32, "GetTickCount64"); - // not a typo. GetTickCount actually returns 64 bits - pt2RealGetTickCount = (GetTickCount64Proc*)GetProcAddress(kernel32, "GetTickCount"); - - uint64 frequency; - QueryPerformanceCounter((LARGE_INTEGER*)&startPerformanceCounter); - QueryPerformanceFrequency((LARGE_INTEGER*)&frequency); - counterPerMicrosecond = (double)frequency / 1000000.0f; - startGetTickCount = UTGetTickCount64(); -} - -int64 abs64(int64 x) { return x < 0 ? -x : x; } - -static uint64 GetMicroseconds() -{ - static bool time_init = false; - if (!time_init) { - time_init = true; - Time_Initialize(); - } - - uint64 counter; - uint64 tick; - - QueryPerformanceCounter((LARGE_INTEGER*) &counter); - tick = UTGetTickCount64(); - - // unfortunately, QueryPerformanceCounter is not guaranteed - // to be monotonic. Make it so. - int64 ret = (int64)(((int64)counter - (int64)startPerformanceCounter) / counterPerMicrosecond); - // if the QPC clock leaps more than one second off GetTickCount64() - // something is seriously fishy. Adjust QPC to stay monotonic - int64 tick_diff = tick - startGetTickCount; - if (abs64(ret / 100000 - tick_diff / 100) > 10) { - startPerformanceCounter -= (uint64)((int64)(tick_diff * 1000 - ret) * counterPerMicrosecond); - ret = (int64)((counter - startPerformanceCounter) / counterPerMicrosecond); - } - return ret; -} - -#else //!WIN32 - -#include -#include // Linux needs both time.h and sys/time.h -#include - -#include -#include -#include - -#if defined(__APPLE__) -#include - -static uint64 GetMicroseconds() -{ - // http://developer.apple.com/mac/library/qa/qa2004/qa1398.html - // http://www.macresearch.org/tutorial_performance_and_time - static mach_timebase_info_data_t sTimebaseInfo; - static uint64_t start_tick = 0; - uint64_t tick; - // Returns a counter in some fraction of a nanoseconds - tick = mach_absolute_time(); - if (sTimebaseInfo.denom == 0) { - // Get the timer ratio to convert mach_absolute_time to nanoseconds - mach_timebase_info(&sTimebaseInfo); - start_tick = tick; - } - // Calculate the elapsed time, convert it to microseconds and return it. - return ((tick - start_tick) * sTimebaseInfo.numer) / (sTimebaseInfo.denom * 1000); -} - -#else //!__APPLE__ - -/* Unfortunately, #ifdef CLOCK_MONOTONIC is not enough to make sure that - POSIX clocks work -- we could be running a recent libc with an ancient - kernel (think OpenWRT). -- jch */ - -static uint64_t GetMicroseconds() -{ - static int have_posix_clocks = -1; - int rc; - -#if defined(_POSIX_TIMERS) && _POSIX_TIMERS > 0 && defined(CLOCK_MONOTONIC) - if (have_posix_clocks < 0) { - struct timespec ts; - rc = clock_gettime(CLOCK_MONOTONIC, &ts); - if (rc < 0) { - have_posix_clocks = 0; - } else { - have_posix_clocks = 1; - } - } - - if (have_posix_clocks) { - struct timespec ts; - rc = clock_gettime(CLOCK_MONOTONIC, &ts); - return uint64(ts.tv_sec) * 1000000 + ts.tv_nsec / 1000; - } -#endif - { - struct timeval tv; - rc = gettimeofday(&tv, NULL); - return uint64(tv.tv_sec) * 1000000 + tv.tv_usec; - } -} -#endif //!__APPLE__ - -#endif //!WIN32 - -uint64 UTP_GetMicroseconds() -{ - static uint64 offset = 0, previous = 0; - - uint64 now = GetMicroseconds() + offset; - if (previous > now) { - /* Eek! */ - offset += previous - now; - now = previous; - } - previous = now; - return now; -} - -uint32 UTP_GetMilliseconds() -{ - return UTP_GetMicroseconds() / 1000; -} - - -#define ETHERNET_MTU 1500 -#define IPV4_HEADER_SIZE 20 -#define IPV6_HEADER_SIZE 40 -#define UDP_HEADER_SIZE 8 -#define GRE_HEADER_SIZE 24 -#define PPPOE_HEADER_SIZE 8 -#define MPPE_HEADER_SIZE 2 -// packets have been observed in the wild that were fragmented -// with a payload of 1416 for the first fragment -// There are reports of routers that have MTU sizes as small as 1392 -#define FUDGE_HEADER_SIZE 36 -#define TEREDO_MTU 1280 - -#define UDP_IPV4_OVERHEAD (IPV4_HEADER_SIZE + UDP_HEADER_SIZE) -#define UDP_IPV6_OVERHEAD (IPV6_HEADER_SIZE + UDP_HEADER_SIZE) -#define UDP_TEREDO_OVERHEAD (UDP_IPV4_OVERHEAD + UDP_IPV6_OVERHEAD) - -#define UDP_IPV4_MTU (ETHERNET_MTU - IPV4_HEADER_SIZE - UDP_HEADER_SIZE - GRE_HEADER_SIZE - PPPOE_HEADER_SIZE - MPPE_HEADER_SIZE - FUDGE_HEADER_SIZE) -#define UDP_IPV6_MTU (ETHERNET_MTU - IPV6_HEADER_SIZE - UDP_HEADER_SIZE - GRE_HEADER_SIZE - PPPOE_HEADER_SIZE - MPPE_HEADER_SIZE - FUDGE_HEADER_SIZE) -#define UDP_TEREDO_MTU (TEREDO_MTU - IPV6_HEADER_SIZE - UDP_HEADER_SIZE) - -uint16 UTP_GetUDPMTU(const struct sockaddr *remote, socklen_t remotelen) -{ - // Since we don't know the local address of the interface, - // be conservative and assume all IPv6 connections are Teredo. - return remote->sa_family == AF_INET6 ? UDP_TEREDO_MTU : UDP_IPV4_MTU; -} - -uint16 UTP_GetUDPOverhead(const struct sockaddr *remote, socklen_t remotelen) -{ - // Since we don't know the local address of the interface, - // be conservative and assume all IPv6 connections are Teredo. - return remote->sa_family == AF_INET6 ? UDP_TEREDO_OVERHEAD : UDP_IPV4_OVERHEAD; -} - -uint32 UTP_Random() -{ - return rand(); -} - -void UTP_DelaySample(const struct sockaddr *remote, int sample_ms) {} -size_t UTP_GetPacketSize(const struct sockaddr *remote) { return 1500; } - +/* + * Copyright (c) 2010-2013 BitTorrent, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include "utp.h" +#include "utp_types.h" + +#ifdef WIN32 + #define WIN32_LEAN_AND_MEAN + #include + #include + #include +#else //!WIN32 + #include + #include // Linux needs both time.h and sys/time.h +#endif + +#if defined(__APPLE__) + #include +#endif + +#include "utp_utils.h" + +#ifdef WIN32 + +typedef ULONGLONG (WINAPI GetTickCount64Proc)(void); +static GetTickCount64Proc *pt2GetTickCount64; +static GetTickCount64Proc *pt2RealGetTickCount; + +static uint64 startPerformanceCounter; +static uint64 startGetTickCount; +// MSVC 6 standard doesn't like division with uint64s +static double counterPerMicrosecond; + +static uint64 UTGetTickCount64() +{ + if (pt2GetTickCount64) { + return pt2GetTickCount64(); + } + if (pt2RealGetTickCount) { + uint64 v = pt2RealGetTickCount(); + // fix return value from GetTickCount + return (DWORD)v | ((v >> 0x18) & 0xFFFFFFFF00000000); + } + return (uint64)GetTickCount(); +} + +static void Time_Initialize() +{ + HMODULE kernel32 = GetModuleHandleA("kernel32.dll"); + pt2GetTickCount64 = (GetTickCount64Proc*)GetProcAddress(kernel32, "GetTickCount64"); + // not a typo. GetTickCount actually returns 64 bits + pt2RealGetTickCount = (GetTickCount64Proc*)GetProcAddress(kernel32, "GetTickCount"); + + uint64 frequency; + QueryPerformanceCounter((LARGE_INTEGER*)&startPerformanceCounter); + QueryPerformanceFrequency((LARGE_INTEGER*)&frequency); + counterPerMicrosecond = (double)frequency / 1000000.0f; + startGetTickCount = UTGetTickCount64(); +} + +static int64 abs64(int64 x) { return x < 0 ? -x : x; } + +static uint64 __GetMicroseconds() +{ + static bool time_init = false; + if (!time_init) { + time_init = true; + Time_Initialize(); + } + + uint64 counter; + uint64 tick; + + QueryPerformanceCounter((LARGE_INTEGER*) &counter); + tick = UTGetTickCount64(); + + // unfortunately, QueryPerformanceCounter is not guaranteed + // to be monotonic. Make it so. + int64 ret = (int64)(((int64)counter - (int64)startPerformanceCounter) / counterPerMicrosecond); + // if the QPC clock leaps more than one second off GetTickCount64() + // something is seriously fishy. Adjust QPC to stay monotonic + int64 tick_diff = tick - startGetTickCount; + if (abs64(ret / 100000 - tick_diff / 100) > 10) { + startPerformanceCounter -= (uint64)((int64)(tick_diff * 1000 - ret) * counterPerMicrosecond); + ret = (int64)((counter - startPerformanceCounter) / counterPerMicrosecond); + } + return ret; +} + +static inline uint64 UTP_GetMilliseconds() +{ + return GetTickCount(); +} + +#else //!WIN32 + +static inline uint64 UTP_GetMicroseconds(void); +static inline uint64 UTP_GetMilliseconds() +{ + return UTP_GetMicroseconds() / 1000; +} + +#if defined(__APPLE__) + +static uint64 __GetMicroseconds() +{ + // http://developer.apple.com/mac/library/qa/qa2004/qa1398.html + // http://www.macresearch.org/tutorial_performance_and_time + static mach_timebase_info_data_t sTimebaseInfo; + static uint64_t start_tick = 0; + uint64_t tick; + // Returns a counter in some fraction of a nanoseconds + tick = mach_absolute_time(); + if (sTimebaseInfo.denom == 0) { + // Get the timer ratio to convert mach_absolute_time to nanoseconds + mach_timebase_info(&sTimebaseInfo); + start_tick = tick; + } + // Calculate the elapsed time, convert it to microseconds and return it. + return ((tick - start_tick) * sTimebaseInfo.numer) / (sTimebaseInfo.denom * 1000); +} + +#else // !__APPLE__ + +#if ! (defined(_POSIX_TIMERS) && _POSIX_TIMERS > 0 && defined(CLOCK_MONOTONIC)) + #warning "Using non-monotonic function gettimeofday() in UTP_GetMicroseconds()" +#endif + +/* Unfortunately, #ifdef CLOCK_MONOTONIC is not enough to make sure that + POSIX clocks work -- we could be running a recent libc with an ancient + kernel (think OpenWRT). -- jch */ + +static uint64_t __GetMicroseconds() +{ + struct timeval tv; + + #if defined(_POSIX_TIMERS) && _POSIX_TIMERS > 0 && defined(CLOCK_MONOTONIC) + static int have_posix_clocks = -1; + int rc; + + if (have_posix_clocks < 0) { + struct timespec ts; + rc = clock_gettime(CLOCK_MONOTONIC, &ts); + if (rc < 0) { + have_posix_clocks = 0; + } else { + have_posix_clocks = 1; + } + } + + if (have_posix_clocks) { + struct timespec ts; + rc = clock_gettime(CLOCK_MONOTONIC, &ts); + return uint64(ts.tv_sec) * 1000000 + uint64(ts.tv_nsec) / 1000; + } + #endif + + gettimeofday(&tv, NULL); + return uint64(tv.tv_sec) * 1000000 + tv.tv_usec; +} + +#endif //!__APPLE__ + +#endif //!WIN32 + +/* + * Whew. Okay. After that #ifdef maze above, we now know we have a working + * __GetMicroseconds() implementation on all platforms. + * + * Because there are a number of assertions in libutp that will cause a crash + * if monotonic time isn't monotonic, now apply some safety checks. While in + * principle we're already protecting ourselves in cases where non-monotonic + * time is likely to happen, this protects all versions. + */ + +static inline uint64 UTP_GetMicroseconds() +{ + static uint64 offset = 0, previous = 0; + + uint64 now = __GetMicroseconds() + offset; + if (previous > now) { + /* Eek! */ + offset += previous - now; + now = previous; + } + previous = now; + return now; +} + +#define ETHERNET_MTU 1500 +#define IPV4_HEADER_SIZE 20 +#define IPV6_HEADER_SIZE 40 +#define UDP_HEADER_SIZE 8 +#define GRE_HEADER_SIZE 24 +#define PPPOE_HEADER_SIZE 8 +#define MPPE_HEADER_SIZE 2 +// packets have been observed in the wild that were fragmented +// with a payload of 1416 for the first fragment +// There are reports of routers that have MTU sizes as small as 1392 +#define FUDGE_HEADER_SIZE 36 +#define TEREDO_MTU 1280 + +#define UDP_IPV4_OVERHEAD (IPV4_HEADER_SIZE + UDP_HEADER_SIZE) +#define UDP_IPV6_OVERHEAD (IPV6_HEADER_SIZE + UDP_HEADER_SIZE) +#define UDP_TEREDO_OVERHEAD (UDP_IPV4_OVERHEAD + UDP_IPV6_OVERHEAD) + +#define UDP_IPV4_MTU (ETHERNET_MTU - IPV4_HEADER_SIZE - UDP_HEADER_SIZE - GRE_HEADER_SIZE - PPPOE_HEADER_SIZE - MPPE_HEADER_SIZE - FUDGE_HEADER_SIZE) +#define UDP_IPV6_MTU (ETHERNET_MTU - IPV6_HEADER_SIZE - UDP_HEADER_SIZE - GRE_HEADER_SIZE - PPPOE_HEADER_SIZE - MPPE_HEADER_SIZE - FUDGE_HEADER_SIZE) +#define UDP_TEREDO_MTU (TEREDO_MTU - IPV6_HEADER_SIZE - UDP_HEADER_SIZE) + +uint64 utp_default_get_udp_mtu(utp_callback_arguments *args) { + // Since we don't know the local address of the interface, + // be conservative and assume all IPv6 connections are Teredo. + return (args->address->sa_family == AF_INET6) ? UDP_TEREDO_MTU : UDP_IPV4_MTU; +} + +uint64 utp_default_get_udp_overhead(utp_callback_arguments *args) { + // Since we don't know the local address of the interface, + // be conservative and assume all IPv6 connections are Teredo. + return (args->address->sa_family == AF_INET6) ? UDP_TEREDO_OVERHEAD : UDP_IPV4_OVERHEAD; +} + +uint64 utp_default_get_random(utp_callback_arguments *args) { + return rand(); +} + +uint64 utp_default_get_milliseconds(utp_callback_arguments *args) { + return UTP_GetMilliseconds(); +} + +uint64 utp_default_get_microseconds(utp_callback_arguments *args) { + return UTP_GetMicroseconds(); +} diff --git a/utp_utils.h b/utp_utils.h index 033984f..7eb0c55 100644 --- a/utp_utils.h +++ b/utp_utils.h @@ -1,16 +1,27 @@ -// This should return the MTU to the destination -uint16 UTP_GetUDPMTU(const struct sockaddr *remote, socklen_t remotelen); -// This should return the number of bytes of UDP overhead for one packet to the -// destination, for overhead calculation only -uint16 UTP_GetUDPOverhead(const struct sockaddr *remote, socklen_t remotelen); -// This should return monotonically increasing milliseconds, start point does not matter -uint32 UTP_GetMilliseconds(); -// This should return monotonically increasing microseconds, start point does not matter -uint64 UTP_GetMicroseconds(); -// This should return a random uint32 -uint32 UTP_Random(); -// This is called every time we have a delay sample is made -void UTP_DelaySample(const struct sockaddr *remote, int sample_ms); -// Should return the max packet size to use when sending to the given address -size_t UTP_GetPacketSize(const struct sockaddr *remote); - +/* + * Copyright (c) 2010-2013 BitTorrent, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +uint64 utp_default_get_udp_mtu(utp_callback_arguments *args); +uint64 utp_default_get_udp_overhead(utp_callback_arguments *args); +uint64 utp_default_get_random(utp_callback_arguments *args); +uint64 utp_default_get_milliseconds(utp_callback_arguments *args); +uint64 utp_default_get_microseconds(utp_callback_arguments *args); diff --git a/win32_inet_ntop.cpp b/win32_inet_ntop.cpp index 640ae92..02ff83c 100644 --- a/win32_inet_ntop.cpp +++ b/win32_inet_ntop.cpp @@ -1,85 +1,107 @@ -#define WIN32_LEAN_AND_MEAN -#include -#include -#include -#include "win32_inet_ntop.h" - -#if ((!defined NTDDI_VERSION) || (NTDDI_VERSION < NTDDI_LONGHORN)) -const char *inet_ntop(int af, const void *src, char *dest, size_t length) -{ - if (af != AF_INET && af != AF_INET6) - { - return NULL; - } - - SOCKADDR_STORAGE address; - DWORD address_length; - - if (af == AF_INET) - { - address_length = sizeof(sockaddr_in); - sockaddr_in* ipv4_address = (sockaddr_in*)(&address); - ipv4_address->sin_family = AF_INET; - ipv4_address->sin_port = 0; - memcpy(&ipv4_address->sin_addr, src, sizeof(in_addr)); - } - else // AF_INET6 - { - address_length = sizeof(sockaddr_in6); - sockaddr_in6* ipv6_address = (sockaddr_in6*)(&address); - ipv6_address->sin6_family = AF_INET6; - ipv6_address->sin6_port = 0; - ipv6_address->sin6_flowinfo = 0; - // hmmm - ipv6_address->sin6_scope_id = 0; - memcpy(&ipv6_address->sin6_addr, src, sizeof(in6_addr)); - } - - DWORD string_length = (DWORD)(length); - int result; - result = WSAAddressToStringA((sockaddr*)(&address), - address_length, 0, dest, - &string_length); - - // one common reason for this to fail is that ipv6 is not installed - - return result == SOCKET_ERROR ? NULL : dest; -} - -int inet_pton(int af, const char* src, void* dest) -{ - if (af != AF_INET && af != AF_INET6) - { - return -1; - } - - SOCKADDR_STORAGE address; - int address_length = sizeof(SOCKADDR_STORAGE); - int result = WSAStringToAddressA((char*)(src), af, 0, - (sockaddr*)(&address), - &address_length); - - if (af == AF_INET) - { - if (result != SOCKET_ERROR) - { - sockaddr_in* ipv4_address =(sockaddr_in*)(&address); - memcpy(dest, &ipv4_address->sin_addr, sizeof(in_addr)); - } - else if (strcmp(src, "255.255.255.255") == 0) - { - ((in_addr*)(dest))->s_addr = INADDR_NONE; - } - } - else // AF_INET6 - { - if (result != SOCKET_ERROR) - { - sockaddr_in6* ipv6_address = (sockaddr_in6*)(&address); - memcpy(dest, &ipv6_address->sin6_addr, sizeof(in6_addr)); - } - } - - return result == SOCKET_ERROR ? -1 : 1; -} -#endif +/* + * Copyright (c) 2010-2013 BitTorrent, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#define WIN32_LEAN_AND_MEAN +#include +#include +#include +#include "win32_inet_ntop.h" + +#if ((!defined NTDDI_VERSION) || (NTDDI_VERSION < NTDDI_LONGHORN)) +const char *inet_ntop(int af, const void *src, char *dest, size_t length) +{ + if (af != AF_INET && af != AF_INET6) + { + return NULL; + } + + SOCKADDR_STORAGE address; + DWORD address_length; + + if (af == AF_INET) + { + address_length = sizeof(sockaddr_in); + sockaddr_in* ipv4_address = (sockaddr_in*)(&address); + ipv4_address->sin_family = AF_INET; + ipv4_address->sin_port = 0; + memcpy(&ipv4_address->sin_addr, src, sizeof(in_addr)); + } + else // AF_INET6 + { + address_length = sizeof(sockaddr_in6); + sockaddr_in6* ipv6_address = (sockaddr_in6*)(&address); + ipv6_address->sin6_family = AF_INET6; + ipv6_address->sin6_port = 0; + ipv6_address->sin6_flowinfo = 0; + // hmmm + ipv6_address->sin6_scope_id = 0; + memcpy(&ipv6_address->sin6_addr, src, sizeof(in6_addr)); + } + + DWORD string_length = (DWORD)(length); + int result; + result = WSAAddressToStringA((sockaddr*)(&address), + address_length, 0, dest, + &string_length); + + // one common reason for this to fail is that ipv6 is not installed + + return result == SOCKET_ERROR ? NULL : dest; +} + +int inet_pton(int af, const char* src, void* dest) +{ + if (af != AF_INET && af != AF_INET6) + { + return -1; + } + + SOCKADDR_STORAGE address; + int address_length = sizeof(SOCKADDR_STORAGE); + int result = WSAStringToAddressA((char*)(src), af, 0, + (sockaddr*)(&address), + &address_length); + + if (af == AF_INET) + { + if (result != SOCKET_ERROR) + { + sockaddr_in* ipv4_address =(sockaddr_in*)(&address); + memcpy(dest, &ipv4_address->sin_addr, sizeof(in_addr)); + } + else if (strcmp(src, "255.255.255.255") == 0) + { + ((in_addr*)(dest))->s_addr = INADDR_NONE; + } + } + else // AF_INET6 + { + if (result != SOCKET_ERROR) + { + sockaddr_in6* ipv6_address = (sockaddr_in6*)(&address); + memcpy(dest, &ipv6_address->sin6_addr, sizeof(in6_addr)); + } + } + + return result == SOCKET_ERROR ? -1 : 1; +} +#endif diff --git a/win32_inet_ntop.h b/win32_inet_ntop.h index e079ae4..e386cf4 100644 --- a/win32_inet_ntop.h +++ b/win32_inet_ntop.h @@ -1,9 +1,31 @@ -#ifndef __WIN32_INET_NTOP_H__ -#define __WIN32_INET_NTOP_H__ - -#if ((!defined NTDDI_VERSION) || (NTDDI_VERSION < NTDDI_LONGHORN)) -const char *inet_ntop(int af, const void *src, char *dest, size_t length); -int inet_pton(int af, const char* src, void* dest); -#endif - -#endif //__WIN32_INET_NTOP_H__ +/* + * Copyright (c) 2010-2013 BitTorrent, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef __WIN32_INET_NTOP_H__ +#define __WIN32_INET_NTOP_H__ + +#if ((!defined NTDDI_VERSION) || (NTDDI_VERSION < NTDDI_LONGHORN)) +const char *inet_ntop(int af, const void *src, char *dest, size_t length); +int inet_pton(int af, const char* src, void* dest); +#endif + +#endif //__WIN32_INET_NTOP_H__