Enable use of large memory pages (Trond Norbye) <[email protected]>

Initial support for solaris. git-svn-id: http://code.sixapart.com/svn/memcached/trunk/server@724 b0b603af-a30f-0410-a34e-baf09ae79d0b
nu2 · Feb 25, 2008 · a6b35b4 · a6b35b4
1 parent c12ebb2
commit a6b35b4
Show file tree

Hide file tree

Showing 5 changed files with 131 additions and 8 deletions.
diff --git a/configure.ac b/configure.ac
@@ -199,6 +199,8 @@ if test "x$enable_threads" == "xyes"; then
 fi
 
 AC_CHECK_FUNCS(mlockall)
+AC_CHECK_FUNCS(getpagesizes)
+AC_CHECK_FUNCS(memcntl)
 
 AC_CONFIG_FILES(Makefile doc/Makefile)
 AC_OUTPUT
diff --git a/doc/memory_management.txt b/doc/memory_management.txt
@@ -1,3 +1,14 @@
+Date: Tue, 20 Feb 2008
+From: Trond Norbye <[email protected]>
+
+When started with -L memcached will try to enable large memory
+pages, and preallocate all memory up front. By using large memory
+pages memcached could reduce the number of TLB misses (depending
+on the access pattern), and hence improve performance. 
+
+See http://en.wikipedia.org/wiki/Translation_lookaside_buffer for
+a description of TLB.
+
 Date: Fri, 5 Sep 2003 20:31:03 +0300
 From: Anatoly Vorobey <[email protected]>
 To: [email protected]
@@ -44,7 +55,8 @@ high percentage of memory is wasted. The most efficient way to reduce
 the waste is to use a list of size classes that closely matches (if 
 that's at all possible) common sizes of objects that the clients
 of this particular installation of memcached are likely to store.
-For example, if your installation is going to store hundreds of                                                                  thousands of objects of the size exactly 120 bytes, you'd be much better
+For example, if your installation is going to store hundreds of
+thousands of objects of the size exactly 120 bytes, you'd be much better
 off changing, in the "naive" list of sizes outlined above, the class
 of 128 bytes to something a bit higher (because the overhead of 
 storing an item, while not large, will push those 120-bytes objects over 

diff --git a/memcached.c b/memcached.c
@@ -2693,7 +2693,17 @@ static void usage(void) {
            "-b            run a managed instanced (mnemonic: buckets)\n"
            "-P <file>     save PID in <file>, only used with -d option\n"
            "-f <factor>   chunk size growth factor, default 1.25\n"
-           "-n <bytes>    minimum space allocated for key+value+flags, default 48\n");
+           "-n <bytes>    minimum space allocated for key+value+flags, default 48\n"
+
+#if defined(HAVE_GETPAGESIZES) && defined(HAVE_MEMCNTL)
+           "-L            Try to use large memory pages (if available). Increasing\n"
+           "              the memory page size could reduce the number of TLB misses\n"
+           "              and improve the performance. In order to get large pages\n"
+           "              from the OS, memcached will allocate the total item-cache\n"
+           "              in one large chunk.\n"
+#endif
+           );
+
 #ifdef USE_THREADS
     printf("-t <num>      number of threads to use, default 4\n");
 #endif
@@ -2804,11 +2814,53 @@ static void sig_handler(const int sig) {
     exit(EXIT_SUCCESS);
 }
 
+#if defined(HAVE_GETPAGESIZES) && defined(HAVE_MEMCNTL)
+/*
+ * On systems that supports multiple page sizes we may reduce the
+ * number of TLB-misses by using the biggest available page size
+ */
+int enable_large_pages(void) {
+    int ret = -1;
+    size_t sizes[32];
+    int avail = getpagesizes(sizes, 32);
+    if (avail != -1) {
+        size_t max = sizes[0];
+        struct memcntl_mha arg = {0};
+        int ii;
+
+        for (ii = 1; ii < avail; ++ii) {
+            if (max < sizes[ii]) {
+                max = sizes[ii];
+            }
+        }
+
+        arg.mha_flags   = 0;
+        arg.mha_pagesize = max;
+        arg.mha_cmd = MHA_MAPSIZE_BSSBRK;
+
+        if (memcntl(0, 0, MC_HAT_ADVISE, (caddr_t)&arg, 0, 0) == -1) {
+            fprintf(stderr, "Failed to set large pages: %s\n",
+                    strerror(errno));
+            fprintf(stderr, "Will use default page size\n");
+        } else {
+            ret = 0;
+        }
+    } else {
+        fprintf(stderr, "Failed to get supported pagesizes: %s\n",
+                strerror(errno));
+        fprintf(stderr, "Will use default page size\n");
+    }
+
+    return ret;
+}
+#endif
+
 int main (int argc, char **argv) {
     int c;
     int x;
     bool lock_memory = false;
     bool daemonize = false;
+    bool preallocate = false;
     int maxcore = 0;
     char *username = NULL;
     char *pid_file = NULL;
@@ -2833,7 +2885,7 @@ int main (int argc, char **argv) {
     setbuf(stderr, NULL);
 
     /* process arguments */
-    while ((c = getopt(argc, argv, "a:bp:s:U:m:Mc:khirvdl:u:P:f:s:n:t:D:")) != -1) {
+    while ((c = getopt(argc, argv, "a:bp:s:U:m:Mc:khirvdl:u:P:f:s:n:t:D:L")) != -1) {
         switch (c) {
         case 'a':
             /* access for unix domain socket, as octal mask (like chmod)*/
@@ -2917,6 +2969,13 @@ int main (int argc, char **argv) {
             settings.prefix_delimiter = optarg[0];
             settings.detail_enabled = 1;
             break;
+#if defined(HAVE_GETPAGESIZES) && defined(HAVE_MEMCNTL)
+        case 'L' :
+            if (enable_large_pages() == 0) {
+                preallocate = true;
+            }
+            break;
+#endif
         default:
             fprintf(stderr, "Illegal argument \"%c\"\n", c);
             return 1;
@@ -3056,7 +3115,7 @@ int main (int argc, char **argv) {
     conn_init();
     /* Hacky suffix buffers. */
     suffix_init();
-    slabs_init(settings.maxbytes, settings.factor);
+    slabs_init(settings.maxbytes, settings.factor, preallocate);
 
     /* managed instance? alloc and zero a bucket array */
     if (settings.managed) {

diff --git a/slabs.c b/slabs.c
@@ -54,10 +54,15 @@ static size_t mem_limit = 0;
 static size_t mem_malloced = 0;
 static int power_largest;
 
+static void *mem_base = NULL;
+static void *mem_current = NULL;
+static size_t mem_avail = 0;
+
 /*
  * Forward Declarations
  */
 static int do_slabs_newslab(const unsigned int id);
+static void *memory_allocate(size_t size);
 
 #ifndef DONT_PREALLOC_SLABS
 /* Preallocate as many slab pages as possible (called from slabs_init)
@@ -92,7 +97,7 @@ unsigned int slabs_clsid(const size_t size) {
  * Determines the chunk sizes and initializes the slab class descriptors
  * accordingly.
  */
-void slabs_init(const size_t limit, const double factor) {
+void slabs_init(const size_t limit, const double factor, const bool prealloc) {
     int i = POWER_SMALLEST - 1;
     unsigned int size = sizeof(item) + settings.chunk_size;
 
@@ -101,6 +106,19 @@ void slabs_init(const size_t limit, const double factor) {
         size = 128;
 
     mem_limit = limit;
+
+    if (prealloc) {
+        /* Allocate everything in a big chunk with malloc */
+        mem_base = malloc(mem_limit);
+        if (mem_base != NULL) {
+            mem_current = mem_base;
+            mem_avail = mem_limit;
+        } else {
+            fprintf(stderr, "Warning: Failed to allocate requested memory in"
+                    " one large chunk.\nWill allocate in smaller chunks\n");
+        }
+    }
+
     memset(slabclass, 0, sizeof(slabclass));
 
     while (++i < POWER_LARGEST && size <= POWER_BLOCK / 2) {
@@ -187,7 +205,7 @@ static int do_slabs_newslab(const unsigned int id) {
 
     if (grow_slab_list(id) == 0) return 0;
 
-    ptr = malloc((size_t)len);
+    ptr = memory_allocate((size_t)len);
     if (ptr == 0) return 0;
 
     memset(ptr, 0, (size_t)len);
@@ -374,3 +392,32 @@ int do_slabs_reassign(unsigned char srcid, unsigned char dstid) {
     return 1;
 }
 #endif
+
+static void *memory_allocate(size_t size) {
+    void *ret;
+
+    if (mem_base == NULL) {
+        /* We are not using a preallocated large memory chunk */
+        ret = malloc(size);
+    } else {
+        ret = mem_current;
+
+        if (size > mem_avail) {
+            return NULL;
+        }
+
+        /* mem_current pointer _must_ be aligned!!! */
+        if (size % CHUNK_ALIGN_BYTES) {
+            size += CHUNK_ALIGN_BYTES - (size % CHUNK_ALIGN_BYTES);
+        }
+
+        mem_current += size;
+        if (size < mem_avail) {
+            mem_avail -= size;
+        } else {
+            mem_avail = 0;
+        }
+    }
+
+    return ret;
+}
diff --git a/slabs.h b/slabs.h
@@ -2,8 +2,11 @@
 
 /** Init the subsystem. 1st argument is the limit on no. of bytes to allocate,
     0 if no limit. 2nd argument is the growth factor; each slab will use a chunk
-    size equal to the previous slab's chunk size times this factor. */
-void slabs_init(const size_t limit, const double factor);
+    size equal to the previous slab's chunk size times this factor.
+    3rd argument specifies if the slab allocator should allocate all memory
+    up front (if true), or allocate memory in chunks as it is needed (if false)
+*/
+void slabs_init(const size_t limit, const double factor, const bool prealloc);
 
 
 /**