diff --git a/README b/README.md similarity index 66% rename from README rename to README.md index 8532f2a5..ac61e0d9 100644 --- a/README +++ b/README.md @@ -1,4 +1,4 @@ -wrk - a HTTP benchmarking tool +# wrk - a HTTP benchmarking tool wrk is a modern HTTP benchmarking tool capable of generating significant load when run on a single multi-core CPU. It combines a multithreaded @@ -6,27 +6,45 @@ wrk - a HTTP benchmarking tool An optional LuaJIT script can perform HTTP request generation, response processing, and custom reporting. Details are available in SCRIPTING and - several examples are located in scripts/ + several examples are located in [scripts/](scripts/). -Basic Usage +## Basic Usage - wrk -t12 -c400 -d30s http://127.0.0.1:8080/index.html + wrk -t12 -c400 -d30s http://127.0.0.1:8080/index.html This runs a benchmark for 30 seconds, using 12 threads, and keeping 400 HTTP connections open. Output: - Running 30s test @ http://127.0.0.1:8080/index.html - 12 threads and 400 connections - Thread Stats Avg Stdev Max +/- Stdev - Latency 635.91us 0.89ms 12.92ms 93.69% - Req/Sec 56.20k 8.07k 62.00k 86.54% - 22464657 requests in 30.00s, 17.76GB read - Requests/sec: 748868.53 - Transfer/sec: 606.33MB + Running 30s test @ http://127.0.0.1:8080/index.html + 12 threads and 400 connections + Thread Stats Avg Stdev Max +/- Stdev + Latency 635.91us 0.89ms 12.92ms 93.69% + Req/Sec 56.20k 8.07k 62.00k 86.54% + 22464657 requests in 30.00s, 17.76GB read + Requests/sec: 748868.53 + Transfer/sec: 606.33MB -Benchmarking Tips +## Command Line Options + + -c, --connections: total number of HTTP connections to keep open with + each thread handling N = connections/threads + + -d, --duration: duration of the test, e.g. 2s, 2m, 2h + + -t, --threads: total number of threads to use + + -s, --script: LuaJIT script, see SCRIPTING + + -H, --header: HTTP header to add to request, e.g. "User-Agent: wrk" + + --latency: print detailed latency statistics + + --timeout: record a timeout if a response is not received within + this amount of time. + +## Benchmarking Tips The machine running wrk must have a sufficient number of ephemeral ports available and closed sockets should be recycled quickly. To handle the @@ -38,14 +56,14 @@ Benchmarking Tips building a new HTTP request, and use of response() will necessarily reduce the amount of load that can be generated. -Acknowledgements +## Acknowledgements wrk contains code from a number of open source projects including the 'ae' event loop from redis, the nginx/joyent/node.js 'http-parser', and Mike Pall's LuaJIT. Please consult the NOTICE file for licensing details. -Cryptography Notice +## Cryptography Notice This distribution includes cryptographic software. The country in which you currently reside may have restrictions on the import, diff --git a/src/ae.c b/src/ae.c index 90be4e28..e66808a8 100644 --- a/src/ae.c +++ b/src/ae.c @@ -91,6 +91,36 @@ aeEventLoop *aeCreateEventLoop(int setsize) { return NULL; } +/* Return the current set size. */ +int aeGetSetSize(aeEventLoop *eventLoop) { + return eventLoop->setsize; +} + +/* Resize the maximum set size of the event loop. + * If the requested set size is smaller than the current set size, but + * there is already a file descriptor in use that is >= the requested + * set size minus one, AE_ERR is returned and the operation is not + * performed at all. + * + * Otherwise AE_OK is returned and the operation is successful. */ +int aeResizeSetSize(aeEventLoop *eventLoop, int setsize) { + int i; + + if (setsize == eventLoop->setsize) return AE_OK; + if (eventLoop->maxfd >= setsize) return AE_ERR; + if (aeApiResize(eventLoop,setsize) == -1) return AE_ERR; + + eventLoop->events = zrealloc(eventLoop->events,sizeof(aeFileEvent)*setsize); + eventLoop->fired = zrealloc(eventLoop->fired,sizeof(aeFiredEvent)*setsize); + eventLoop->setsize = setsize; + + /* Make sure that if we created new slots, they are initialized with + * an AE_NONE mask. */ + for (i = eventLoop->maxfd+1; i < setsize; i++) + eventLoop->events[i].mask = AE_NONE; + return AE_OK; +} + void aeDeleteEventLoop(aeEventLoop *eventLoop) { aeApiFree(eventLoop); zfree(eventLoop->events); @@ -126,8 +156,9 @@ void aeDeleteFileEvent(aeEventLoop *eventLoop, int fd, int mask) { if (fd >= eventLoop->setsize) return; aeFileEvent *fe = &eventLoop->events[fd]; - if (fe->mask == AE_NONE) return; + + aeApiDelEvent(eventLoop, fd, mask); fe->mask = fe->mask & (~mask); if (fd == eventLoop->maxfd && fe->mask == AE_NONE) { /* Update the max fd */ @@ -137,7 +168,6 @@ void aeDeleteFileEvent(aeEventLoop *eventLoop, int fd, int mask) if (eventLoop->events[j].mask != AE_NONE) break; eventLoop->maxfd = j; } - aeApiDelEvent(eventLoop, fd, mask); } int aeGetFileEvents(aeEventLoop *eventLoop, int fd) { @@ -191,21 +221,12 @@ long long aeCreateTimeEvent(aeEventLoop *eventLoop, long long milliseconds, int aeDeleteTimeEvent(aeEventLoop *eventLoop, long long id) { - aeTimeEvent *te, *prev = NULL; - - te = eventLoop->timeEventHead; + aeTimeEvent *te = eventLoop->timeEventHead; while(te) { if (te->id == id) { - if (prev == NULL) - eventLoop->timeEventHead = te->next; - else - prev->next = te->next; - if (te->finalizerProc) - te->finalizerProc(eventLoop, te->clientData); - zfree(te); + te->id = AE_DELETED_EVENT_ID; return AE_OK; } - prev = te; te = te->next; } return AE_ERR; /* NO event with the specified ID found */ @@ -240,7 +261,7 @@ static aeTimeEvent *aeSearchNearestTimer(aeEventLoop *eventLoop) /* Process time events */ static int processTimeEvents(aeEventLoop *eventLoop) { int processed = 0; - aeTimeEvent *te; + aeTimeEvent *te, *prev; long long maxId; time_t now = time(NULL); @@ -261,12 +282,32 @@ static int processTimeEvents(aeEventLoop *eventLoop) { } eventLoop->lastTime = now; + prev = NULL; te = eventLoop->timeEventHead; maxId = eventLoop->timeEventNextId-1; while(te) { long now_sec, now_ms; long long id; + /* Remove events scheduled for deletion. */ + if (te->id == AE_DELETED_EVENT_ID) { + aeTimeEvent *next = te->next; + if (prev == NULL) + eventLoop->timeEventHead = te->next; + else + prev->next = te->next; + if (te->finalizerProc) + te->finalizerProc(eventLoop, te->clientData); + zfree(te); + te = next; + continue; + } + + /* Make sure we don't process time events created by time events in + * this iteration. Note that this check is currently useless: we always + * add new timers on the head, however if we change the implementation + * detail, this check may be useful again: we keep it here for future + * defense. */ if (te->id > maxId) { te = te->next; continue; @@ -280,28 +321,14 @@ static int processTimeEvents(aeEventLoop *eventLoop) { id = te->id; retval = te->timeProc(eventLoop, id, te->clientData); processed++; - /* After an event is processed our time event list may - * no longer be the same, so we restart from head. - * Still we make sure to don't process events registered - * by event handlers itself in order to don't loop forever. - * To do so we saved the max ID we want to handle. - * - * FUTURE OPTIMIZATIONS: - * Note that this is NOT great algorithmically. Redis uses - * a single time event so it's not a problem but the right - * way to do this is to add the new elements on head, and - * to flag deleted elements in a special way for later - * deletion (putting references to the nodes to delete into - * another linked list). */ if (retval != AE_NOMORE) { aeAddMillisecondsToNow(retval,&te->when_sec,&te->when_ms); } else { - aeDeleteTimeEvent(eventLoop, id); + te->id = AE_DELETED_EVENT_ID; } - te = eventLoop->timeEventHead; - } else { - te = te->next; } + prev = te; + te = te->next; } return processed; } @@ -309,7 +336,7 @@ static int processTimeEvents(aeEventLoop *eventLoop) { /* Process every pending time event, then every pending file event * (that may be registered by time event callbacks just processed). * Without special flags the function sleeps until some file event - * fires, or when the next time event occurrs (if any). + * fires, or when the next time event occurs (if any). * * If flags is 0, the function does nothing and returns. * if flags has AE_ALL_EVENTS set, all the kind of events are processed. @@ -341,22 +368,25 @@ int aeProcessEvents(aeEventLoop *eventLoop, int flags) if (shortest) { long now_sec, now_ms; - /* Calculate the time missing for the nearest - * timer to fire. */ aeGetTime(&now_sec, &now_ms); tvp = &tv; - tvp->tv_sec = shortest->when_sec - now_sec; - if (shortest->when_ms < now_ms) { - tvp->tv_usec = ((shortest->when_ms+1000) - now_ms)*1000; - tvp->tv_sec --; + + /* How many milliseconds we need to wait for the next + * time event to fire? */ + long long ms = + (shortest->when_sec - now_sec)*1000 + + shortest->when_ms - now_ms; + + if (ms > 0) { + tvp->tv_sec = ms/1000; + tvp->tv_usec = (ms % 1000)*1000; } else { - tvp->tv_usec = (shortest->when_ms - now_ms)*1000; + tvp->tv_sec = 0; + tvp->tv_usec = 0; } - if (tvp->tv_sec < 0) tvp->tv_sec = 0; - if (tvp->tv_usec < 0) tvp->tv_usec = 0; } else { /* If we have to check for events but need to return - * ASAP because of AE_DONT_WAIT we need to se the timeout + * ASAP because of AE_DONT_WAIT we need to set the timeout * to zero */ if (flags & AE_DONT_WAIT) { tv.tv_sec = tv.tv_usec = 0; @@ -395,7 +425,7 @@ int aeProcessEvents(aeEventLoop *eventLoop, int flags) return processed; /* return the number of processed file/time events */ } -/* Wait for millseconds until the given file descriptor becomes +/* Wait for milliseconds until the given file descriptor becomes * writable/readable/exception */ int aeWait(int fd, int mask, long long milliseconds) { struct pollfd pfd; diff --git a/src/ae.h b/src/ae.h index 4d895024..827c4c9e 100644 --- a/src/ae.h +++ b/src/ae.h @@ -33,6 +33,8 @@ #ifndef __AE_H__ #define __AE_H__ +#include + #define AE_OK 0 #define AE_ERR -1 @@ -46,6 +48,7 @@ #define AE_DONT_WAIT 4 #define AE_NOMORE -1 +#define AE_DELETED_EVENT_ID -1 /* Macros */ #define AE_NOTUSED(V) ((void) V) @@ -114,5 +117,7 @@ int aeWait(int fd, int mask, long long milliseconds); void aeMain(aeEventLoop *eventLoop); char *aeGetApiName(void); void aeSetBeforeSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *beforesleep); +int aeGetSetSize(aeEventLoop *eventLoop); +int aeResizeSetSize(aeEventLoop *eventLoop, int setsize); #endif diff --git a/src/ae_epoll.c b/src/ae_epoll.c index 4823c281..410aac70 100644 --- a/src/ae_epoll.c +++ b/src/ae_epoll.c @@ -45,7 +45,7 @@ static int aeApiCreate(aeEventLoop *eventLoop) { zfree(state); return -1; } - state->epfd = epoll_create(1024); /* 1024 is just an hint for the kernel */ + state->epfd = epoll_create(1024); /* 1024 is just a hint for the kernel */ if (state->epfd == -1) { zfree(state->events); zfree(state); @@ -55,6 +55,13 @@ static int aeApiCreate(aeEventLoop *eventLoop) { return 0; } +static int aeApiResize(aeEventLoop *eventLoop, int setsize) { + aeApiState *state = eventLoop->apidata; + + state->events = zrealloc(state->events, sizeof(struct epoll_event)*setsize); + return 0; +} + static void aeApiFree(aeEventLoop *eventLoop) { aeApiState *state = eventLoop->apidata; @@ -65,7 +72,7 @@ static void aeApiFree(aeEventLoop *eventLoop) { static int aeApiAddEvent(aeEventLoop *eventLoop, int fd, int mask) { aeApiState *state = eventLoop->apidata; - struct epoll_event ee; + struct epoll_event ee = {0}; /* avoid valgrind warning */ /* If the fd was already monitored for some event, we need a MOD * operation. Otherwise we need an ADD operation. */ int op = eventLoop->events[fd].mask == AE_NONE ? @@ -75,7 +82,6 @@ static int aeApiAddEvent(aeEventLoop *eventLoop, int fd, int mask) { mask |= eventLoop->events[fd].mask; /* Merge old events */ if (mask & AE_READABLE) ee.events |= EPOLLIN; if (mask & AE_WRITABLE) ee.events |= EPOLLOUT; - ee.data.u64 = 0; /* avoid valgrind warning */ ee.data.fd = fd; if (epoll_ctl(state->epfd,op,fd,&ee) == -1) return -1; return 0; @@ -83,13 +89,12 @@ static int aeApiAddEvent(aeEventLoop *eventLoop, int fd, int mask) { static void aeApiDelEvent(aeEventLoop *eventLoop, int fd, int delmask) { aeApiState *state = eventLoop->apidata; - struct epoll_event ee; + struct epoll_event ee = {0}; /* avoid valgrind warning */ int mask = eventLoop->events[fd].mask & (~delmask); ee.events = 0; if (mask & AE_READABLE) ee.events |= EPOLLIN; if (mask & AE_WRITABLE) ee.events |= EPOLLOUT; - ee.data.u64 = 0; /* avoid valgrind warning */ ee.data.fd = fd; if (mask != AE_NONE) { epoll_ctl(state->epfd,EPOLL_CTL_MOD,fd,&ee); diff --git a/src/ae_evport.c b/src/ae_evport.c index 0196dccf..5c317bec 100644 --- a/src/ae_evport.c +++ b/src/ae_evport.c @@ -50,15 +50,15 @@ static int evport_debug = 0; * aeApiPoll, the corresponding file descriptors become dissociated from the * port. This is necessary because poll events are level-triggered, so if the * fd didn't become dissociated, it would immediately fire another event since - * the underlying state hasn't changed yet. We must reassociate the file + * the underlying state hasn't changed yet. We must re-associate the file * descriptor, but only after we know that our caller has actually read from it. * The ae API does not tell us exactly when that happens, but we do know that * it must happen by the time aeApiPoll is called again. Our solution is to - * keep track of the last fds returned by aeApiPoll and reassociate them next + * keep track of the last fds returned by aeApiPoll and re-associate them next * time aeApiPoll is invoked. * * To summarize, in this module, each fd association is EITHER (a) represented - * only via the in-kernel assocation OR (b) represented by pending_fds and + * only via the in-kernel association OR (b) represented by pending_fds and * pending_masks. (b) is only true for the last fds we returned from aeApiPoll, * and only until we enter aeApiPoll again (at which point we restore the * in-kernel association). @@ -94,6 +94,11 @@ static int aeApiCreate(aeEventLoop *eventLoop) { return 0; } +static int aeApiResize(aeEventLoop *eventLoop, int setsize) { + /* Nothing to resize here. */ + return 0; +} + static void aeApiFree(aeEventLoop *eventLoop) { aeApiState *state = eventLoop->apidata; @@ -164,7 +169,7 @@ static int aeApiAddEvent(aeEventLoop *eventLoop, int fd, int mask) { * This fd was recently returned from aeApiPoll. It should be safe to * assume that the consumer has processed that poll event, but we play * it safer by simply updating pending_mask. The fd will be - * reassociated as usual when aeApiPoll is called again. + * re-associated as usual when aeApiPoll is called again. */ if (evport_debug) fprintf(stderr, "aeApiAddEvent: adding to pending fd %d\n", fd); @@ -228,7 +233,7 @@ static void aeApiDelEvent(aeEventLoop *eventLoop, int fd, int mask) { * ENOMEM is a potentially transient condition, but the kernel won't * generally return it unless things are really bad. EAGAIN indicates * we've reached an resource limit, for which it doesn't make sense to - * retry (counterintuitively). All other errors indicate a bug. In any + * retry (counter-intuitively). All other errors indicate a bug. In any * of these cases, the best we can do is to abort. */ abort(); /* will not return */ @@ -243,7 +248,7 @@ static int aeApiPoll(aeEventLoop *eventLoop, struct timeval *tvp) { port_event_t event[MAX_EVENT_BATCHSZ]; /* - * If we've returned fd events before, we must reassociate them with the + * If we've returned fd events before, we must re-associate them with the * port now, before calling port_get(). See the block comment at the top of * this file for an explanation of why. */ diff --git a/src/ae_kqueue.c b/src/ae_kqueue.c index 458772f7..6796f4ce 100644 --- a/src/ae_kqueue.c +++ b/src/ae_kqueue.c @@ -54,8 +54,14 @@ static int aeApiCreate(aeEventLoop *eventLoop) { return -1; } eventLoop->apidata = state; - - return 0; + return 0; +} + +static int aeApiResize(aeEventLoop *eventLoop, int setsize) { + aeApiState *state = eventLoop->apidata; + + state->events = zrealloc(state->events, sizeof(struct kevent)*setsize); + return 0; } static void aeApiFree(aeEventLoop *eventLoop) { @@ -69,7 +75,7 @@ static void aeApiFree(aeEventLoop *eventLoop) { static int aeApiAddEvent(aeEventLoop *eventLoop, int fd, int mask) { aeApiState *state = eventLoop->apidata; struct kevent ke; - + if (mask & AE_READABLE) { EV_SET(&ke, fd, EVFILT_READ, EV_ADD, 0, 0, NULL); if (kevent(state->kqfd, &ke, 1, NULL, 0, NULL) == -1) return -1; @@ -112,16 +118,16 @@ static int aeApiPoll(aeEventLoop *eventLoop, struct timeval *tvp) { if (retval > 0) { int j; - + numevents = retval; for(j = 0; j < numevents; j++) { int mask = 0; struct kevent *e = state->events+j; - + if (e->filter == EVFILT_READ) mask |= AE_READABLE; if (e->filter == EVFILT_WRITE) mask |= AE_WRITABLE; - eventLoop->fired[j].fd = e->ident; - eventLoop->fired[j].mask = mask; + eventLoop->fired[j].fd = e->ident; + eventLoop->fired[j].mask = mask; } } return numevents; diff --git a/src/ae_select.c b/src/ae_select.c index f732e8e1..c039a8ea 100644 --- a/src/ae_select.c +++ b/src/ae_select.c @@ -29,6 +29,7 @@ */ +#include #include typedef struct aeApiState { @@ -48,6 +49,12 @@ static int aeApiCreate(aeEventLoop *eventLoop) { return 0; } +static int aeApiResize(aeEventLoop *eventLoop, int setsize) { + /* Just ensure we have enough room in the fd_set type. */ + if (setsize >= FD_SETSIZE) return -1; + return 0; +} + static void aeApiFree(aeEventLoop *eventLoop) { zfree(eventLoop->apidata); } diff --git a/src/atomicvar.h b/src/atomicvar.h new file mode 100644 index 00000000..4aa8fa17 --- /dev/null +++ b/src/atomicvar.h @@ -0,0 +1,94 @@ +/* This file implements atomic counters using __atomic or __sync macros if + * available, otherwise synchronizing different threads using a mutex. + * + * The exported interaface is composed of three macros: + * + * atomicIncr(var,count,mutex) -- Increment the atomic counter + * atomicDecr(var,count,mutex) -- Decrement the atomic counter + * atomicGet(var,dstvar,mutex) -- Fetch the atomic counter value + * + * If atomic primitives are availble (tested in config.h) the mutex + * is not used. + * + * Never use return value from the macros. To update and get use instead: + * + * atomicIncr(mycounter,...); + * atomicGet(mycounter,newvalue); + * doSomethingWith(newvalue); + * + * ---------------------------------------------------------------------------- + * + * Copyright (c) 2015, Salvatore Sanfilippo + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Redis nor the names of its contributors may be used + * to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#ifndef __ATOMIC_VAR_H +#define __ATOMIC_VAR_H + +#if defined(__ATOMIC_RELAXED) && (!defined(__clang__) || !defined(__APPLE__) || __apple_build_version__ > 4210057) +/* Implementation using __atomic macros. */ + +#define atomicIncr(var,count,mutex) __atomic_add_fetch(&var,(count),__ATOMIC_RELAXED) +#define atomicDecr(var,count,mutex) __atomic_sub_fetch(&var,(count),__ATOMIC_RELAXED) +#define atomicGet(var,dstvar,mutex) do { \ + dstvar = __atomic_load_n(&var,__ATOMIC_RELAXED); \ +} while(0) + +#elif defined(HAVE_ATOMIC) +/* Implementation using __sync macros. */ + +#define atomicIncr(var,count,mutex) __sync_add_and_fetch(&var,(count)) +#define atomicDecr(var,count,mutex) __sync_sub_and_fetch(&var,(count)) +#define atomicGet(var,dstvar,mutex) do { \ + dstvar = __sync_sub_and_fetch(&var,0); \ +} while(0) + +#else +/* Implementation using pthread mutex. */ + +#define atomicIncr(var,count,mutex) do { \ + pthread_mutex_lock(&mutex); \ + var += (count); \ + pthread_mutex_unlock(&mutex); \ +} while(0) + +#define atomicDecr(var,count,mutex) do { \ + pthread_mutex_lock(&mutex); \ + var -= (count); \ + pthread_mutex_unlock(&mutex); \ +} while(0) + +#define atomicGet(var,dstvar,mutex) do { \ + pthread_mutex_lock(&mutex); \ + dstvar = var; \ + pthread_mutex_unlock(&mutex); \ +} while(0) +#endif + +#endif /* __ATOMIC_VAR_H */ diff --git a/src/http_parser.c b/src/http_parser.c index c0e7ca15..895bf0c7 100644 --- a/src/http_parser.c +++ b/src/http_parser.c @@ -64,8 +64,7 @@ do { \ return (V); \ } while (0); #define REEXECUTE() \ - --p; \ - break; + goto reexecute; \ #ifdef __GNUC__ @@ -124,7 +123,7 @@ do { \ FOR##_mark = NULL; \ } \ } while (0) - + /* Run the data callback FOR and consume the current byte */ #define CALLBACK_DATA(FOR) \ CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1) @@ -401,6 +400,8 @@ enum http_host_state , s_http_host , s_http_host_v6 , s_http_host_v6_end + , s_http_host_v6_zone_start + , s_http_host_v6_zone , s_http_host_port_start , s_http_host_port }; @@ -434,6 +435,12 @@ enum http_host_state (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_') #endif +/** + * Verify that a char is a valid visible (printable) US-ASCII + * character or %x80-FF + **/ +#define IS_HEADER_CHAR(ch) \ + (ch == CR || ch == LF || ch == 9 || ((unsigned char)ch > 31 && ch != 127)) #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res) @@ -638,6 +645,7 @@ size_t http_parser_execute (http_parser *parser, const char *body_mark = 0; const char *status_mark = 0; enum state p_state = (enum state) parser->state; + const unsigned int lenient = parser->lenient_http_headers; /* We're in an error state. Don't bother doing anything. */ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { @@ -697,6 +705,7 @@ size_t http_parser_execute (http_parser *parser, if (PARSING_HEADER(CURRENT_STATE())) COUNT_HEADER_SIZE(1); +reexecute: switch (CURRENT_STATE()) { case s_dead: @@ -957,21 +966,23 @@ size_t http_parser_execute (http_parser *parser, parser->method = (enum http_method) 0; parser->index = 1; switch (ch) { + case 'A': parser->method = HTTP_ACL; break; + case 'B': parser->method = HTTP_BIND; break; case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break; case 'D': parser->method = HTTP_DELETE; break; case 'G': parser->method = HTTP_GET; break; case 'H': parser->method = HTTP_HEAD; break; - case 'L': parser->method = HTTP_LOCK; break; + case 'L': parser->method = HTTP_LOCK; /* or LINK */ break; case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH, MKCALENDAR */ break; case 'N': parser->method = HTTP_NOTIFY; break; case 'O': parser->method = HTTP_OPTIONS; break; case 'P': parser->method = HTTP_POST; /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */ break; - case 'R': parser->method = HTTP_REPORT; break; + case 'R': parser->method = HTTP_REPORT; /* or REBIND */ break; case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH */ break; case 'T': parser->method = HTTP_TRACE; break; - case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break; + case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE, UNBIND, UNLINK */ break; default: SET_ERRNO(HPE_INVALID_METHOD); goto error; @@ -996,69 +1007,40 @@ size_t http_parser_execute (http_parser *parser, UPDATE_STATE(s_req_spaces_before_url); } else if (ch == matcher[parser->index]) { ; /* nada */ - } else if (parser->method == HTTP_CONNECT) { - if (parser->index == 1 && ch == 'H') { - parser->method = HTTP_CHECKOUT; - } else if (parser->index == 2 && ch == 'P') { - parser->method = HTTP_COPY; - } else { - SET_ERRNO(HPE_INVALID_METHOD); - goto error; - } - } else if (parser->method == HTTP_MKCOL) { - if (parser->index == 1 && ch == 'O') { - parser->method = HTTP_MOVE; - } else if (parser->index == 1 && ch == 'E') { - parser->method = HTTP_MERGE; - } else if (parser->index == 1 && ch == '-') { - parser->method = HTTP_MSEARCH; - } else if (parser->index == 2 && ch == 'A') { - parser->method = HTTP_MKACTIVITY; - } else if (parser->index == 3 && ch == 'A') { - parser->method = HTTP_MKCALENDAR; - } else { - SET_ERRNO(HPE_INVALID_METHOD); - goto error; - } - } else if (parser->method == HTTP_SUBSCRIBE) { - if (parser->index == 1 && ch == 'E') { - parser->method = HTTP_SEARCH; - } else { - SET_ERRNO(HPE_INVALID_METHOD); - goto error; - } - } else if (parser->index == 1 && parser->method == HTTP_POST) { - if (ch == 'R') { - parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */ - } else if (ch == 'U') { - parser->method = HTTP_PUT; /* or HTTP_PURGE */ - } else if (ch == 'A') { - parser->method = HTTP_PATCH; - } else { - SET_ERRNO(HPE_INVALID_METHOD); - goto error; - } - } else if (parser->index == 2) { - if (parser->method == HTTP_PUT) { - if (ch == 'R') { - parser->method = HTTP_PURGE; - } else { - SET_ERRNO(HPE_INVALID_METHOD); - goto error; - } - } else if (parser->method == HTTP_UNLOCK) { - if (ch == 'S') { - parser->method = HTTP_UNSUBSCRIBE; - } else { + } else if (IS_ALPHA(ch)) { + + switch (parser->method << 16 | parser->index << 8 | ch) { +#define XX(meth, pos, ch, new_meth) \ + case (HTTP_##meth << 16 | pos << 8 | ch): \ + parser->method = HTTP_##new_meth; break; + + XX(POST, 1, 'U', PUT) + XX(POST, 1, 'A', PATCH) + XX(CONNECT, 1, 'H', CHECKOUT) + XX(CONNECT, 2, 'P', COPY) + XX(MKCOL, 1, 'O', MOVE) + XX(MKCOL, 1, 'E', MERGE) + XX(MKCOL, 2, 'A', MKACTIVITY) + XX(MKCOL, 3, 'A', MKCALENDAR) + XX(SUBSCRIBE, 1, 'E', SEARCH) + XX(REPORT, 2, 'B', REBIND) + XX(POST, 1, 'R', PROPFIND) + XX(PROPFIND, 4, 'P', PROPPATCH) + XX(PUT, 2, 'R', PURGE) + XX(LOCK, 1, 'I', LINK) + XX(UNLOCK, 2, 'S', UNSUBSCRIBE) + XX(UNLOCK, 2, 'B', UNBIND) + XX(UNLOCK, 3, 'I', UNLINK) +#undef XX + + default: SET_ERRNO(HPE_INVALID_METHOD); goto error; - } - } else { - SET_ERRNO(HPE_INVALID_METHOD); - goto error; } - } else if (parser->index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') { - parser->method = HTTP_PROPPATCH; + } else if (ch == '-' && + parser->index == 1 && + parser->method == HTTP_MKCOL) { + parser->method = HTTP_MSEARCH; } else { SET_ERRNO(HPE_INVALID_METHOD); goto error; @@ -1487,6 +1469,12 @@ size_t http_parser_execute (http_parser *parser, goto error; } + if (parser->flags & F_CONTENTLENGTH) { + SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH); + goto error; + } + + parser->flags |= F_CONTENTLENGTH; parser->content_length = ch - '0'; break; @@ -1536,6 +1524,11 @@ size_t http_parser_execute (http_parser *parser, REEXECUTE(); } + if (!lenient && !IS_HEADER_CHAR(ch)) { + SET_ERRNO(HPE_INVALID_HEADER_TOKEN); + goto error; + } + c = LOWER(ch); switch (h_state) { @@ -1703,7 +1696,10 @@ size_t http_parser_execute (http_parser *parser, case s_header_almost_done: { - STRICT_CHECK(ch != LF); + if (UNLIKELY(ch != LF)) { + SET_ERRNO(HPE_LF_EXPECTED); + goto error; + } UPDATE_STATE(s_header_value_lws); break; @@ -1782,9 +1778,17 @@ size_t http_parser_execute (http_parser *parser, if (parser->flags & F_TRAILING) { /* End of a chunked request */ - UPDATE_STATE(NEW_MESSAGE()); - CALLBACK_NOTIFY(message_complete); - break; + UPDATE_STATE(s_message_done); + CALLBACK_NOTIFY_NOADVANCE(chunk_complete); + REEXECUTE(); + } + + /* Cannot use chunked encoding and a content-length header together + per the HTTP specification. */ + if ((parser->flags & F_CHUNKED) && + (parser->flags & F_CONTENTLENGTH)) { + SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH); + goto error; } UPDATE_STATE(s_headers_done); @@ -1809,6 +1813,9 @@ size_t http_parser_execute (http_parser *parser, case 0: break; + case 2: + parser->upgrade = 1; + case 1: parser->flags |= F_SKIPBODY; break; @@ -1828,12 +1835,16 @@ size_t http_parser_execute (http_parser *parser, case s_headers_done: { + int hasBody; STRICT_CHECK(ch != LF); parser->nread = 0; - /* Exit, the rest of the connect is in a different protocol. */ - if (parser->upgrade) { + hasBody = parser->flags & F_CHUNKED || + (parser->content_length > 0 && parser->content_length != ULLONG_MAX); + if (parser->upgrade && (parser->method == HTTP_CONNECT || + (parser->flags & F_SKIPBODY) || !hasBody)) { + /* Exit, the rest of the message is in a different protocol. */ UPDATE_STATE(NEW_MESSAGE()); CALLBACK_NOTIFY(message_complete); RETURN((p - data) + 1); @@ -1854,8 +1865,7 @@ size_t http_parser_execute (http_parser *parser, /* Content-Length header given and non-zero */ UPDATE_STATE(s_body_identity); } else { - if (parser->type == HTTP_REQUEST || - !http_message_needs_eof(parser)) { + if (!http_message_needs_eof(parser)) { /* Assume content-length 0 - read the next */ UPDATE_STATE(NEW_MESSAGE()); CALLBACK_NOTIFY(message_complete); @@ -1915,6 +1925,10 @@ size_t http_parser_execute (http_parser *parser, case s_message_done: UPDATE_STATE(NEW_MESSAGE()); CALLBACK_NOTIFY(message_complete); + if (parser->upgrade) { + /* Exit, the rest of the message is in a different protocol. */ + RETURN((p - data) + 1); + } break; case s_chunk_size_start: @@ -1994,6 +2008,7 @@ size_t http_parser_execute (http_parser *parser, } else { UPDATE_STATE(s_chunk_data); } + CALLBACK_NOTIFY(chunk_header); break; } @@ -2033,6 +2048,7 @@ size_t http_parser_execute (http_parser *parser, STRICT_CHECK(ch != LF); parser->nread = 0; UPDATE_STATE(s_chunk_size_start); + CALLBACK_NOTIFY(chunk_complete); break; default: @@ -2136,15 +2152,21 @@ http_parser_init (http_parser *parser, enum http_parser_type t) parser->http_errno = HPE_OK; } +void +http_parser_settings_init(http_parser_settings *settings) +{ + memset(settings, 0, sizeof(*settings)); +} + const char * http_errno_name(enum http_errno err) { - assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0]))); + assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab)); return http_strerror_tab[err].name; } const char * http_errno_description(enum http_errno err) { - assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0]))); + assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab)); return http_strerror_tab[err].description; } @@ -2197,6 +2219,23 @@ http_parse_host_char(enum http_host_state s, const char ch) { return s_http_host_v6; } + if (s == s_http_host_v6 && ch == '%') { + return s_http_host_v6_zone_start; + } + break; + + case s_http_host_v6_zone: + if (ch == ']') { + return s_http_host_v6_end; + } + + /* FALLTHROUGH */ + case s_http_host_v6_zone_start: + /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */ + if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' || + ch == '~') { + return s_http_host_v6_zone; + } break; case s_http_host_port: @@ -2220,6 +2259,8 @@ http_parse_host(const char * buf, struct http_parser_url *u, int found_at) { const char *p; size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len; + assert(u->field_set & (1 << UF_HOST)); + u->field_data[UF_HOST].len = 0; s = found_at ? s_http_userinfo_start : s_http_host_start; @@ -2246,6 +2287,11 @@ http_parse_host(const char * buf, struct http_parser_url *u, int found_at) { u->field_data[UF_HOST].len++; break; + case s_http_host_v6_zone_start: + case s_http_host_v6_zone: + u->field_data[UF_HOST].len++; + break; + case s_http_host_port: if (s != s_http_host_port) { u->field_data[UF_PORT].off = p - buf; @@ -2275,6 +2321,8 @@ http_parse_host(const char * buf, struct http_parser_url *u, int found_at) { case s_http_host_start: case s_http_host_v6_start: case s_http_host_v6: + case s_http_host_v6_zone_start: + case s_http_host_v6_zone: case s_http_host_port_start: case s_http_userinfo: case s_http_userinfo_start: @@ -2286,6 +2334,11 @@ http_parse_host(const char * buf, struct http_parser_url *u, int found_at) { return 0; } +void +http_parser_url_init(struct http_parser_url *u) { + memset(u, 0, sizeof(*u)); +} + int http_parser_parse_url(const char *buf, size_t buflen, int is_connect, struct http_parser_url *u) @@ -2359,7 +2412,12 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect, /* host must be present if there is a schema */ /* parsing http:///toto will fail */ - if ((u->field_set & ((1 << UF_SCHEMA) | (1 << UF_HOST))) != 0) { + if ((u->field_set & (1 << UF_SCHEMA)) && + (u->field_set & (1 << UF_HOST)) == 0) { + return 1; + } + + if (u->field_set & (1 << UF_HOST)) { if (http_parse_host(buf, u, found_at) != 0) { return 1; } diff --git a/src/http_parser.h b/src/http_parser.h index cb592952..45c72a07 100644 --- a/src/http_parser.h +++ b/src/http_parser.h @@ -26,11 +26,12 @@ extern "C" { /* Also update SONAME in the Makefile whenever you change these. */ #define HTTP_PARSER_VERSION_MAJOR 2 -#define HTTP_PARSER_VERSION_MINOR 4 -#define HTTP_PARSER_VERSION_PATCH 2 +#define HTTP_PARSER_VERSION_MINOR 7 +#define HTTP_PARSER_VERSION_PATCH 1 #include -#if defined(_WIN32) && !defined(__MINGW32__) && (!defined(_MSC_VER) || _MSC_VER<1600) +#if defined(_WIN32) && !defined(__MINGW32__) && \ + (!defined(_MSC_VER) || _MSC_VER<1600) && !defined(__WINE__) #include #include typedef __int8 int8_t; @@ -76,6 +77,11 @@ typedef struct http_parser_settings http_parser_settings; * HEAD request which may contain 'Content-Length' or 'Transfer-Encoding: * chunked' headers that indicate the presence of a body. * + * Returning `2` from on_headers_complete will tell parser that it should not + * expect neither a body nor any futher responses on this connection. This is + * useful for handling responses to a CONNECT request which may not contain + * `Upgrade` or `Connection: upgrade` headers. + * * http_data_cb does not return data chunks. It will be called arbitrarily * many times for each string. E.G. you might get 10 callbacks for "on_url" * each providing just a few characters more data. @@ -84,6 +90,76 @@ typedef int (*http_data_cb) (http_parser*, const char *at, size_t length); typedef int (*http_cb) (http_parser*); +/* Status Codes */ +#define HTTP_STATUS_MAP(XX) \ + XX(100, CONTINUE, Continue) \ + XX(101, SWITCHING_PROTOCOLS, Switching Protocols) \ + XX(102, PROCESSING, Processing) \ + XX(200, OK, OK) \ + XX(201, CREATED, Created) \ + XX(202, ACCEPTED, Accepted) \ + XX(203, NON_AUTHORITATIVE_INFORMATION, Non-Authoritative Information) \ + XX(204, NO_CONTENT, No Content) \ + XX(205, RESET_CONTENT, Reset Content) \ + XX(206, PARTIAL_CONTENT, Partial Content) \ + XX(207, MULTI_STATUS, Multi-Status) \ + XX(208, ALREADY_REPORTED, Already Reported) \ + XX(226, IM_USED, IM Used) \ + XX(300, MULTIPLE_CHOICES, Multiple Choices) \ + XX(301, MOVED_PERMANENTLY, Moved Permanently) \ + XX(302, FOUND, Found) \ + XX(303, SEE_OTHER, See Other) \ + XX(304, NOT_MODIFIED, Not Modified) \ + XX(305, USE_PROXY, Use Proxy) \ + XX(307, TEMPORARY_REDIRECT, Temporary Redirect) \ + XX(308, PERMANENT_REDIRECT, Permanent Redirect) \ + XX(400, BAD_REQUEST, Bad Request) \ + XX(401, UNAUTHORIZED, Unauthorized) \ + XX(402, PAYMENT_REQUIRED, Payment Required) \ + XX(403, FORBIDDEN, Forbidden) \ + XX(404, NOT_FOUND, Not Found) \ + XX(405, METHOD_NOT_ALLOWED, Method Not Allowed) \ + XX(406, NOT_ACCEPTABLE, Not Acceptable) \ + XX(407, PROXY_AUTHENTICATION_REQUIRED, Proxy Authentication Required) \ + XX(408, REQUEST_TIMEOUT, Request Timeout) \ + XX(409, CONFLICT, Conflict) \ + XX(410, GONE, Gone) \ + XX(411, LENGTH_REQUIRED, Length Required) \ + XX(412, PRECONDITION_FAILED, Precondition Failed) \ + XX(413, PAYLOAD_TOO_LARGE, Payload Too Large) \ + XX(414, URI_TOO_LONG, URI Too Long) \ + XX(415, UNSUPPORTED_MEDIA_TYPE, Unsupported Media Type) \ + XX(416, RANGE_NOT_SATISFIABLE, Range Not Satisfiable) \ + XX(417, EXPECTATION_FAILED, Expectation Failed) \ + XX(421, MISDIRECTED_REQUEST, Misdirected Request) \ + XX(422, UNPROCESSABLE_ENTITY, Unprocessable Entity) \ + XX(423, LOCKED, Locked) \ + XX(424, FAILED_DEPENDENCY, Failed Dependency) \ + XX(426, UPGRADE_REQUIRED, Upgrade Required) \ + XX(428, PRECONDITION_REQUIRED, Precondition Required) \ + XX(429, TOO_MANY_REQUESTS, Too Many Requests) \ + XX(431, REQUEST_HEADER_FIELDS_TOO_LARGE, Request Header Fields Too Large) \ + XX(451, UNAVAILABLE_FOR_LEGAL_REASONS, Unavailable For Legal Reasons) \ + XX(500, INTERNAL_SERVER_ERROR, Internal Server Error) \ + XX(501, NOT_IMPLEMENTED, Not Implemented) \ + XX(502, BAD_GATEWAY, Bad Gateway) \ + XX(503, SERVICE_UNAVAILABLE, Service Unavailable) \ + XX(504, GATEWAY_TIMEOUT, Gateway Timeout) \ + XX(505, HTTP_VERSION_NOT_SUPPORTED, HTTP Version Not Supported) \ + XX(506, VARIANT_ALSO_NEGOTIATES, Variant Also Negotiates) \ + XX(507, INSUFFICIENT_STORAGE, Insufficient Storage) \ + XX(508, LOOP_DETECTED, Loop Detected) \ + XX(510, NOT_EXTENDED, Not Extended) \ + XX(511, NETWORK_AUTHENTICATION_REQUIRED, Network Authentication Required) \ + +enum http_status + { +#define XX(num, name, string) HTTP_STATUS_##name = num, + HTTP_STATUS_MAP(XX) +#undef XX + }; + + /* Request Methods */ #define HTTP_METHOD_MAP(XX) \ XX(0, DELETE, DELETE) \ @@ -95,7 +171,7 @@ typedef int (*http_cb) (http_parser*); XX(5, CONNECT, CONNECT) \ XX(6, OPTIONS, OPTIONS) \ XX(7, TRACE, TRACE) \ - /* webdav */ \ + /* WebDAV */ \ XX(8, COPY, COPY) \ XX(9, LOCK, LOCK) \ XX(10, MKCOL, MKCOL) \ @@ -104,21 +180,28 @@ typedef int (*http_cb) (http_parser*); XX(13, PROPPATCH, PROPPATCH) \ XX(14, SEARCH, SEARCH) \ XX(15, UNLOCK, UNLOCK) \ + XX(16, BIND, BIND) \ + XX(17, REBIND, REBIND) \ + XX(18, UNBIND, UNBIND) \ + XX(19, ACL, ACL) \ /* subversion */ \ - XX(16, REPORT, REPORT) \ - XX(17, MKACTIVITY, MKACTIVITY) \ - XX(18, CHECKOUT, CHECKOUT) \ - XX(19, MERGE, MERGE) \ + XX(20, REPORT, REPORT) \ + XX(21, MKACTIVITY, MKACTIVITY) \ + XX(22, CHECKOUT, CHECKOUT) \ + XX(23, MERGE, MERGE) \ /* upnp */ \ - XX(20, MSEARCH, M-SEARCH) \ - XX(21, NOTIFY, NOTIFY) \ - XX(22, SUBSCRIBE, SUBSCRIBE) \ - XX(23, UNSUBSCRIBE, UNSUBSCRIBE) \ + XX(24, MSEARCH, M-SEARCH) \ + XX(25, NOTIFY, NOTIFY) \ + XX(26, SUBSCRIBE, SUBSCRIBE) \ + XX(27, UNSUBSCRIBE, UNSUBSCRIBE) \ /* RFC-5789 */ \ - XX(24, PATCH, PATCH) \ - XX(25, PURGE, PURGE) \ + XX(28, PATCH, PATCH) \ + XX(29, PURGE, PURGE) \ /* CalDAV */ \ - XX(26, MKCALENDAR, MKCALENDAR) \ + XX(30, MKCALENDAR, MKCALENDAR) \ + /* RFC-2068, section 19.6.1.2 */ \ + XX(31, LINK, LINK) \ + XX(32, UNLINK, UNLINK) \ enum http_method { @@ -140,11 +223,12 @@ enum flags , F_TRAILING = 1 << 4 , F_UPGRADE = 1 << 5 , F_SKIPBODY = 1 << 6 + , F_CONTENTLENGTH = 1 << 7 }; /* Map for errno-related constants - * + * * The provided argument should be a macro that takes 2 arguments. */ #define HTTP_ERRNO_MAP(XX) \ @@ -160,6 +244,8 @@ enum flags XX(CB_body, "the on_body callback failed") \ XX(CB_message_complete, "the on_message_complete callback failed") \ XX(CB_status, "the on_status callback failed") \ + XX(CB_chunk_header, "the on_chunk_header callback failed") \ + XX(CB_chunk_complete, "the on_chunk_complete callback failed") \ \ /* Parsing-related errors */ \ XX(INVALID_EOF_STATE, "stream ended at an unexpected time") \ @@ -180,6 +266,8 @@ enum flags XX(INVALID_HEADER_TOKEN, "invalid character in header") \ XX(INVALID_CONTENT_LENGTH, \ "invalid character in content-length header") \ + XX(UNEXPECTED_CONTENT_LENGTH, \ + "unexpected content-length header") \ XX(INVALID_CHUNK_SIZE, \ "invalid character in chunk size header") \ XX(INVALID_CONSTANT, "invalid constant string") \ @@ -204,10 +292,11 @@ enum http_errno { struct http_parser { /** PRIVATE **/ unsigned int type : 2; /* enum http_parser_type */ - unsigned int flags : 6; /* F_* values from 'flags' enum; semi-public */ - unsigned int state : 8; /* enum state from http_parser.c */ - unsigned int header_state : 8; /* enum header_state from http_parser.c */ - unsigned int index : 8; /* index into current matcher */ + unsigned int flags : 8; /* F_* values from 'flags' enum; semi-public */ + unsigned int state : 7; /* enum state from http_parser.c */ + unsigned int header_state : 7; /* enum header_state from http_parser.c */ + unsigned int index : 7; /* index into current matcher */ + unsigned int lenient_http_headers : 1; uint32_t nread; /* # bytes read in various scenarios */ uint64_t content_length; /* # bytes in body (0 if no Content-Length header) */ @@ -240,6 +329,11 @@ struct http_parser_settings { http_cb on_headers_complete; http_data_cb on_body; http_cb on_message_complete; + /* When on_chunk_header is called, the current chunk length is stored + * in parser->content_length. + */ + http_cb on_chunk_header; + http_cb on_chunk_complete; }; @@ -288,6 +382,11 @@ unsigned long http_parser_version(void); void http_parser_init(http_parser *parser, enum http_parser_type type); +/* Initialize http_parser_settings members to 0 + */ +void http_parser_settings_init(http_parser_settings *settings); + + /* Executes the parser. Returns number of parsed bytes. Sets * `parser->http_errno` on error. */ size_t http_parser_execute(http_parser *parser, @@ -313,6 +412,9 @@ const char *http_errno_name(enum http_errno err); /* Return a string description of the given error */ const char *http_errno_description(enum http_errno err); +/* Initialize all http_parser_url members to 0 */ +void http_parser_url_init(struct http_parser_url *u); + /* Parse a URL; return nonzero on failure */ int http_parser_parse_url(const char *buf, size_t buflen, int is_connect, diff --git a/src/zmalloc.c b/src/zmalloc.c index 89f80d83..f71ce2c9 100644 --- a/src/zmalloc.c +++ b/src/zmalloc.c @@ -30,10 +30,20 @@ #include #include + +/* This function provide us access to the original libc free(). This is useful + * for instance to free results obtained by backtrace_symbols(). We need + * to define this function before including zmalloc.h that may shadow the + * free implementation if we use jemalloc or another non standard allocator. */ +void zlibc_free(void *ptr) { + free(ptr); +} + #include #include #include "config.h" #include "zmalloc.h" +#include "atomicvar.h" #ifdef HAVE_MALLOC_SIZE #define PREFIX_SIZE (0) @@ -56,15 +66,15 @@ #define calloc(count,size) je_calloc(count,size) #define realloc(ptr,size) je_realloc(ptr,size) #define free(ptr) je_free(ptr) +#define mallocx(size,flags) je_mallocx(size,flags) +#define dallocx(ptr,flags) je_dallocx(ptr,flags) #endif -#define update_zmalloc_stat_alloc(__n,__size) do { \ +#define update_zmalloc_stat_alloc(__n) do { \ size_t _n = (__n); \ if (_n&(sizeof(long)-1)) _n += sizeof(long)-(_n&(sizeof(long)-1)); \ if (zmalloc_thread_safe) { \ - pthread_mutex_lock(&used_memory_mutex); \ - used_memory += _n; \ - pthread_mutex_unlock(&used_memory_mutex); \ + atomicIncr(used_memory,__n,used_memory_mutex); \ } else { \ used_memory += _n; \ } \ @@ -74,9 +84,7 @@ size_t _n = (__n); \ if (_n&(sizeof(long)-1)) _n += sizeof(long)-(_n&(sizeof(long)-1)); \ if (zmalloc_thread_safe) { \ - pthread_mutex_lock(&used_memory_mutex); \ - used_memory -= _n; \ - pthread_mutex_unlock(&used_memory_mutex); \ + atomicDecr(used_memory,__n,used_memory_mutex); \ } else { \ used_memory -= _n; \ } \ @@ -86,37 +94,57 @@ static size_t used_memory = 0; static int zmalloc_thread_safe = 0; pthread_mutex_t used_memory_mutex = PTHREAD_MUTEX_INITIALIZER; -static void zmalloc_oom(size_t size) { +static void zmalloc_default_oom(size_t size) { fprintf(stderr, "zmalloc: Out of memory trying to allocate %zu bytes\n", size); fflush(stderr); abort(); } +static void (*zmalloc_oom_handler)(size_t) = zmalloc_default_oom; + void *zmalloc(size_t size) { void *ptr = malloc(size+PREFIX_SIZE); - if (!ptr) zmalloc_oom(size); + if (!ptr) zmalloc_oom_handler(size); #ifdef HAVE_MALLOC_SIZE - update_zmalloc_stat_alloc(zmalloc_size(ptr),size); + update_zmalloc_stat_alloc(zmalloc_size(ptr)); return ptr; #else *((size_t*)ptr) = size; - update_zmalloc_stat_alloc(size+PREFIX_SIZE,size); + update_zmalloc_stat_alloc(size+PREFIX_SIZE); return (char*)ptr+PREFIX_SIZE; #endif } +/* Allocation and free functions that bypass the thread cache + * and go straight to the allocator arena bins. + * Currently implemented only for jemalloc. Used for online defragmentation. */ +#ifdef HAVE_DEFRAG +void *zmalloc_no_tcache(size_t size) { + void *ptr = mallocx(size+PREFIX_SIZE, MALLOCX_TCACHE_NONE); + if (!ptr) zmalloc_oom_handler(size); + update_zmalloc_stat_alloc(zmalloc_size(ptr)); + return ptr; +} + +void zfree_no_tcache(void *ptr) { + if (ptr == NULL) return; + update_zmalloc_stat_free(zmalloc_size(ptr)); + dallocx(ptr, MALLOCX_TCACHE_NONE); +} +#endif + void *zcalloc(size_t size) { void *ptr = calloc(1, size+PREFIX_SIZE); - if (!ptr) zmalloc_oom(size); + if (!ptr) zmalloc_oom_handler(size); #ifdef HAVE_MALLOC_SIZE - update_zmalloc_stat_alloc(zmalloc_size(ptr),size); + update_zmalloc_stat_alloc(zmalloc_size(ptr)); return ptr; #else *((size_t*)ptr) = size; - update_zmalloc_stat_alloc(size+PREFIX_SIZE,size); + update_zmalloc_stat_alloc(size+PREFIX_SIZE); return (char*)ptr+PREFIX_SIZE; #endif } @@ -132,26 +160,26 @@ void *zrealloc(void *ptr, size_t size) { #ifdef HAVE_MALLOC_SIZE oldsize = zmalloc_size(ptr); newptr = realloc(ptr,size); - if (!newptr) zmalloc_oom(size); + if (!newptr) zmalloc_oom_handler(size); update_zmalloc_stat_free(oldsize); - update_zmalloc_stat_alloc(zmalloc_size(newptr),size); + update_zmalloc_stat_alloc(zmalloc_size(newptr)); return newptr; #else realptr = (char*)ptr-PREFIX_SIZE; oldsize = *((size_t*)realptr); newptr = realloc(realptr,size+PREFIX_SIZE); - if (!newptr) zmalloc_oom(size); + if (!newptr) zmalloc_oom_handler(size); *((size_t*)newptr) = size; update_zmalloc_stat_free(oldsize); - update_zmalloc_stat_alloc(size,size); + update_zmalloc_stat_alloc(size); return (char*)newptr+PREFIX_SIZE; #endif } /* Provide zmalloc_size() for systems where this function is not provided by - * malloc itself, given that in that case we store an header with this + * malloc itself, given that in that case we store a header with this * information as the first bytes of every allocation. */ #ifndef HAVE_MALLOC_SIZE size_t zmalloc_size(void *ptr) { @@ -193,9 +221,11 @@ char *zstrdup(const char *s) { size_t zmalloc_used_memory(void) { size_t um; - if (zmalloc_thread_safe) pthread_mutex_lock(&used_memory_mutex); - um = used_memory; - if (zmalloc_thread_safe) pthread_mutex_unlock(&used_memory_mutex); + if (zmalloc_thread_safe) { + atomicGet(used_memory,um,used_memory_mutex); + } else { + um = used_memory; + } return um; } @@ -203,6 +233,10 @@ void zmalloc_enable_thread_safeness(void) { zmalloc_thread_safe = 1; } +void zmalloc_set_oom_handler(void (*oom_handler)(size_t)) { + zmalloc_oom_handler = oom_handler; +} + /* Get the RSS information in an OS-specific way. * * WARNING: the function zmalloc_get_rss() is not designed to be fast @@ -211,9 +245,9 @@ void zmalloc_enable_thread_safeness(void) { * * For this kind of "fast RSS reporting" usages use instead the * function RedisEstimateRSS() that is a much faster (and less precise) - * version of the funciton. */ + * version of the function. */ -#if defined(HAVE_PROCFS) +#if defined(HAVE_PROC_STAT) #include #include #include @@ -282,6 +316,113 @@ size_t zmalloc_get_rss(void) { #endif /* Fragmentation = RSS / allocated-bytes */ -float zmalloc_get_fragmentation_ratio(void) { - return (float)zmalloc_get_rss()/zmalloc_used_memory(); +float zmalloc_get_fragmentation_ratio(size_t rss) { + return (float)rss/zmalloc_used_memory(); } + +/* Get the sum of the specified field (converted form kb to bytes) in + * /proc/self/smaps. The field must be specified with trailing ":" as it + * apperas in the smaps output. + * + * If a pid is specified, the information is extracted for such a pid, + * otherwise if pid is -1 the information is reported is about the + * current process. + * + * Example: zmalloc_get_smap_bytes_by_field("Rss:",-1); + */ +#if defined(HAVE_PROC_SMAPS) +size_t zmalloc_get_smap_bytes_by_field(char *field, long pid) { + char line[1024]; + size_t bytes = 0; + int flen = strlen(field); + FILE *fp; + + if (pid == -1) { + fp = fopen("/proc/self/smaps","r"); + } else { + char filename[128]; + snprintf(filename,sizeof(filename),"/proc/%ld/smaps",pid); + fp = fopen(filename,"r"); + } + + if (!fp) return 0; + while(fgets(line,sizeof(line),fp) != NULL) { + if (strncmp(line,field,flen) == 0) { + char *p = strchr(line,'k'); + if (p) { + *p = '\0'; + bytes += strtol(line+flen,NULL,10) * 1024; + } + } + } + fclose(fp); + return bytes; +} +#else +size_t zmalloc_get_smap_bytes_by_field(char *field, long pid) { + ((void) field); + ((void) pid); + return 0; +} +#endif + +size_t zmalloc_get_private_dirty(long pid) { + return zmalloc_get_smap_bytes_by_field("Private_Dirty:",pid); +} + +/* Returns the size of physical memory (RAM) in bytes. + * It looks ugly, but this is the cleanest way to achive cross platform results. + * Cleaned up from: + * + * http://nadeausoftware.com/articles/2012/09/c_c_tip_how_get_physical_memory_size_system + * + * Note that this function: + * 1) Was released under the following CC attribution license: + * http://creativecommons.org/licenses/by/3.0/deed.en_US. + * 2) Was originally implemented by David Robert Nadeau. + * 3) Was modified for Redis by Matt Stancliff. + * 4) This note exists in order to comply with the original license. + */ +size_t zmalloc_get_memory_size(void) { +#if defined(__unix__) || defined(__unix) || defined(unix) || \ + (defined(__APPLE__) && defined(__MACH__)) +#if defined(CTL_HW) && (defined(HW_MEMSIZE) || defined(HW_PHYSMEM64)) + int mib[2]; + mib[0] = CTL_HW; +#if defined(HW_MEMSIZE) + mib[1] = HW_MEMSIZE; /* OSX. --------------------- */ +#elif defined(HW_PHYSMEM64) + mib[1] = HW_PHYSMEM64; /* NetBSD, OpenBSD. --------- */ +#endif + int64_t size = 0; /* 64-bit */ + size_t len = sizeof(size); + if (sysctl( mib, 2, &size, &len, NULL, 0) == 0) + return (size_t)size; + return 0L; /* Failed? */ + +#elif defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE) + /* FreeBSD, Linux, OpenBSD, and Solaris. -------------------- */ + return (size_t)sysconf(_SC_PHYS_PAGES) * (size_t)sysconf(_SC_PAGESIZE); + +#elif defined(CTL_HW) && (defined(HW_PHYSMEM) || defined(HW_REALMEM)) + /* DragonFly BSD, FreeBSD, NetBSD, OpenBSD, and OSX. -------- */ + int mib[2]; + mib[0] = CTL_HW; +#if defined(HW_REALMEM) + mib[1] = HW_REALMEM; /* FreeBSD. ----------------- */ +#elif defined(HW_PYSMEM) + mib[1] = HW_PHYSMEM; /* Others. ------------------ */ +#endif + unsigned int size = 0; /* 32-bit */ + size_t len = sizeof(size); + if (sysctl(mib, 2, &size, &len, NULL, 0) == 0) + return (size_t)size; + return 0L; /* Failed? */ +#endif /* sysctl and sysconf variants */ + +#else + return 0L; /* Unknown OS. */ +#endif +} + + diff --git a/src/zmalloc.h b/src/zmalloc.h index 995814c8..b6d4e1d9 100644 --- a/src/zmalloc.h +++ b/src/zmalloc.h @@ -38,7 +38,7 @@ #if defined(USE_TCMALLOC) #define ZMALLOC_LIB ("tcmalloc-" __xstr(TC_VERSION_MAJOR) "." __xstr(TC_VERSION_MINOR)) #include -#if TC_VERSION_MAJOR >= 1 && TC_VERSION_MINOR >= 6 +#if (TC_VERSION_MAJOR == 1 && TC_VERSION_MINOR >= 6) || (TC_VERSION_MAJOR > 1) #define HAVE_MALLOC_SIZE 1 #define zmalloc_size(p) tc_malloc_size(p) #else @@ -47,11 +47,10 @@ #elif defined(USE_JEMALLOC) #define ZMALLOC_LIB ("jemalloc-" __xstr(JEMALLOC_VERSION_MAJOR) "." __xstr(JEMALLOC_VERSION_MINOR) "." __xstr(JEMALLOC_VERSION_BUGFIX)) -#define JEMALLOC_MANGLE #include -#if JEMALLOC_VERSION_MAJOR >= 2 && JEMALLOC_VERSION_MINOR >= 1 +#if (JEMALLOC_VERSION_MAJOR == 2 && JEMALLOC_VERSION_MINOR >= 1) || (JEMALLOC_VERSION_MAJOR > 2) #define HAVE_MALLOC_SIZE 1 -#define zmalloc_size(p) JEMALLOC_P(malloc_usable_size)(p) +#define zmalloc_size(p) je_malloc_usable_size(p) #else #error "Newer version of jemalloc required" #endif @@ -66,6 +65,13 @@ #define ZMALLOC_LIB "libc" #endif +/* We can enable the Redis defrag capabilities only if we are using Jemalloc + * and the version used is our special version modified for Redis having + * the ability to return per-allocation fragmentation hints. */ +#if defined(USE_JEMALLOC) && defined(JEMALLOC_FRAG_HINT) +#define HAVE_DEFRAG +#endif + void *zmalloc(size_t size); void *zcalloc(size_t size); void *zrealloc(void *ptr, size_t size); @@ -73,8 +79,18 @@ void zfree(void *ptr); char *zstrdup(const char *s); size_t zmalloc_used_memory(void); void zmalloc_enable_thread_safeness(void); -float zmalloc_get_fragmentation_ratio(void); +void zmalloc_set_oom_handler(void (*oom_handler)(size_t)); +float zmalloc_get_fragmentation_ratio(size_t rss); size_t zmalloc_get_rss(void); +size_t zmalloc_get_private_dirty(long pid); +size_t zmalloc_get_smap_bytes_by_field(char *field, long pid); +size_t zmalloc_get_memory_size(void); +void zlibc_free(void *ptr); + +#ifdef HAVE_DEFRAG +void zfree_no_tcache(void *ptr); +void *zmalloc_no_tcache(size_t size); +#endif #ifndef HAVE_MALLOC_SIZE size_t zmalloc_size(void *ptr);