Skip to content

Commit bd87b01

Browse files
author
Jason Evans
committed
Optimize Valgrind integration.
Forcefully disable tcache if running inside Valgrind, and remove Valgrind calls in tcache-specific code. Restructure Valgrind-related code to move most Valgrind calls out of the fast path functions. Take advantage of static knowledge to elide some branches in JEMALLOC_VALGRIND_REALLOC().
1 parent ecd3e59 commit bd87b01

12 files changed

+231
-136
lines changed

Makefile.in

+4
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ cfgoutputs_in := @cfgoutputs_in@
4848
cfgoutputs_out := @cfgoutputs_out@
4949
enable_autogen := @enable_autogen@
5050
enable_code_coverage := @enable_code_coverage@
51+
enable_valgrind := @enable_valgrind@
5152
enable_zone_allocator := @enable_zone_allocator@
5253
DSO_LDFLAGS = @DSO_LDFLAGS@
5354
SOREV = @SOREV@
@@ -82,6 +83,9 @@ C_SRCS := $(srcroot)src/jemalloc.c $(srcroot)src/arena.c \
8283
$(srcroot)src/mb.c $(srcroot)src/mutex.c $(srcroot)src/prof.c \
8384
$(srcroot)src/quarantine.c $(srcroot)src/rtree.c $(srcroot)src/stats.c \
8485
$(srcroot)src/tcache.c $(srcroot)src/util.c $(srcroot)src/tsd.c
86+
ifeq ($(enable_valgrind), 1)
87+
C_SRCS += $(srcroot)src/valgrind.c
88+
endif
8589
ifeq ($(enable_zone_allocator), 1)
8690
C_SRCS += $(srcroot)src/zone.c
8791
endif

doc/jemalloc.xml.in

+2-1
Original file line numberDiff line numberDiff line change
@@ -979,7 +979,8 @@ malloc_conf = "xmalloc:true";]]></programlisting>
979979
linkend="opt.lg_tcache_max"><mallctl>opt.lg_tcache_max</mallctl></link>
980980
option for related tuning information. This option is enabled by
981981
default unless running inside <ulink
982-
url="http://valgrind.org/">Valgrind</ulink>.</para></listitem>
982+
url="http://valgrind.org/">Valgrind</ulink>, in which case it is
983+
forcefully disabled.</para></listitem>
983984
</varlistentry>
984985

985986
<varlistentry id="opt.lg_tcache_max">

include/jemalloc/internal/jemalloc_internal.h.in

+4-80
Original file line numberDiff line numberDiff line change
@@ -60,11 +60,6 @@ typedef intptr_t ssize_t;
6060
#include <sys/ktrace.h>
6161
#endif
6262

63-
#ifdef JEMALLOC_VALGRIND
64-
#include <valgrind/valgrind.h>
65-
#include <valgrind/memcheck.h>
66-
#endif
67-
6863
#define JEMALLOC_NO_DEMANGLE
6964
#ifdef JEMALLOC_JET
7065
# define JEMALLOC_N(n) jet_##n
@@ -362,81 +357,7 @@ static const bool config_ivsalloc =
362357
# define VARIABLE_ARRAY(type, name, count) type name[count]
363358
#endif
364359

365-
#ifdef JEMALLOC_VALGRIND
366-
/*
367-
* The JEMALLOC_VALGRIND_*() macros must be macros rather than functions
368-
* so that when Valgrind reports errors, there are no extra stack frames
369-
* in the backtraces.
370-
*
371-
* The size that is reported to valgrind must be consistent through a chain of
372-
* malloc..realloc..realloc calls. Request size isn't recorded anywhere in
373-
* jemalloc, so it is critical that all callers of these macros provide usize
374-
* rather than request size. As a result, buffer overflow detection is
375-
* technically weakened for the standard API, though it is generally accepted
376-
* practice to consider any extra bytes reported by malloc_usable_size() as
377-
* usable space.
378-
*/
379-
#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do { \
380-
if (config_valgrind && in_valgrind && cond) \
381-
VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, p2rz(ptr), zero); \
382-
} while (0)
383-
#define JEMALLOC_VALGRIND_REALLOC(ptr, usize, old_ptr, old_usize, \
384-
old_rzsize, zero) do { \
385-
if (config_valgrind && in_valgrind) { \
386-
size_t rzsize = p2rz(ptr); \
387-
\
388-
if (ptr == old_ptr) { \
389-
VALGRIND_RESIZEINPLACE_BLOCK(ptr, old_usize, \
390-
usize, rzsize); \
391-
if (zero && old_usize < usize) { \
392-
VALGRIND_MAKE_MEM_DEFINED( \
393-
(void *)((uintptr_t)ptr + \
394-
old_usize), usize - old_usize); \
395-
} \
396-
} else { \
397-
if (old_ptr != NULL) { \
398-
VALGRIND_FREELIKE_BLOCK(old_ptr, \
399-
old_rzsize); \
400-
} \
401-
if (ptr != NULL) { \
402-
size_t copy_size = (old_usize < usize) \
403-
? old_usize : usize; \
404-
size_t tail_size = usize - copy_size; \
405-
VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, \
406-
rzsize, false); \
407-
if (copy_size > 0) { \
408-
VALGRIND_MAKE_MEM_DEFINED(ptr, \
409-
copy_size); \
410-
} \
411-
if (zero && tail_size > 0) { \
412-
VALGRIND_MAKE_MEM_DEFINED( \
413-
(void *)((uintptr_t)ptr + \
414-
copy_size), tail_size); \
415-
} \
416-
} \
417-
} \
418-
} \
419-
} while (0)
420-
#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) do { \
421-
if (config_valgrind && in_valgrind) \
422-
VALGRIND_FREELIKE_BLOCK(ptr, rzsize); \
423-
} while (0)
424-
#else
425-
#define RUNNING_ON_VALGRIND ((unsigned)0)
426-
#define VALGRIND_MALLOCLIKE_BLOCK(addr, sizeB, rzB, is_zeroed) \
427-
do {} while (0)
428-
#define VALGRIND_RESIZEINPLACE_BLOCK(addr, oldSizeB, newSizeB, rzB) \
429-
do {} while (0)
430-
#define VALGRIND_FREELIKE_BLOCK(addr, rzB) do {} while (0)
431-
#define VALGRIND_MAKE_MEM_NOACCESS(_qzz_addr, _qzz_len) do {} while (0)
432-
#define VALGRIND_MAKE_MEM_UNDEFINED(_qzz_addr, _qzz_len) do {} while (0)
433-
#define VALGRIND_MAKE_MEM_DEFINED(_qzz_addr, _qzz_len) do {} while (0)
434-
#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do {} while (0)
435-
#define JEMALLOC_VALGRIND_REALLOC(ptr, usize, old_ptr, old_usize, \
436-
old_rzsize, zero) do {} while (0)
437-
#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) do {} while (0)
438-
#endif
439-
360+
#include "jemalloc/internal/valgrind.h"
440361
#include "jemalloc/internal/util.h"
441362
#include "jemalloc/internal/atomic.h"
442363
#include "jemalloc/internal/prng.h"
@@ -463,6 +384,7 @@ static const bool config_ivsalloc =
463384
/******************************************************************************/
464385
#define JEMALLOC_H_STRUCTS
465386

387+
#include "jemalloc/internal/valgrind.h"
466388
#include "jemalloc/internal/util.h"
467389
#include "jemalloc/internal/atomic.h"
468390
#include "jemalloc/internal/prng.h"
@@ -534,6 +456,7 @@ void jemalloc_prefork(void);
534456
void jemalloc_postfork_parent(void);
535457
void jemalloc_postfork_child(void);
536458

459+
#include "jemalloc/internal/valgrind.h"
537460
#include "jemalloc/internal/util.h"
538461
#include "jemalloc/internal/atomic.h"
539462
#include "jemalloc/internal/prng.h"
@@ -560,6 +483,7 @@ void jemalloc_postfork_child(void);
560483
/******************************************************************************/
561484
#define JEMALLOC_H_INLINES
562485

486+
#include "jemalloc/internal/valgrind.h"
563487
#include "jemalloc/internal/util.h"
564488
#include "jemalloc/internal/atomic.h"
565489
#include "jemalloc/internal/prng.h"

include/jemalloc/internal/private_symbols.txt

+4
Original file line numberDiff line numberDiff line change
@@ -409,3 +409,7 @@ thread_allocated_tsd_set
409409
tsd_init_check_recursion
410410
tsd_init_finish
411411
u2rz
412+
valgrind_freelike_block
413+
valgrind_make_mem_defined
414+
valgrind_make_mem_noaccess
415+
valgrind_make_mem_undefined

include/jemalloc/internal/tcache.h

+1-6
Original file line numberDiff line numberDiff line change
@@ -314,13 +314,11 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero)
314314
} else if (opt_zero)
315315
memset(ret, 0, size);
316316
}
317-
VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
318317
} else {
319318
if (config_fill && opt_junk) {
320319
arena_alloc_junk_small(ret, &arena_bin_info[binind],
321320
true);
322321
}
323-
VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
324322
memset(ret, 0, size);
325323
}
326324

@@ -369,11 +367,8 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero)
369367
else if (opt_zero)
370368
memset(ret, 0, size);
371369
}
372-
VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
373-
} else {
374-
VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
370+
} else
375371
memset(ret, 0, size);
376-
}
377372

378373
if (config_stats)
379374
tbin->tstats.nrequests++;

include/jemalloc/internal/valgrind.h

+112
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
/******************************************************************************/
2+
#ifdef JEMALLOC_H_TYPES
3+
4+
#ifdef JEMALLOC_VALGRIND
5+
#include <valgrind/valgrind.h>
6+
7+
/*
8+
* The size that is reported to Valgrind must be consistent through a chain of
9+
* malloc..realloc..realloc calls. Request size isn't recorded anywhere in
10+
* jemalloc, so it is critical that all callers of these macros provide usize
11+
* rather than request size. As a result, buffer overflow detection is
12+
* technically weakened for the standard API, though it is generally accepted
13+
* practice to consider any extra bytes reported by malloc_usable_size() as
14+
* usable space.
15+
*/
16+
#define JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(ptr, usize) do { \
17+
if (in_valgrind) \
18+
valgrind_make_mem_noaccess(ptr, usize); \
19+
} while (0)
20+
#define JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ptr, usize) do { \
21+
if (in_valgrind) \
22+
valgrind_make_mem_undefined(ptr, usize); \
23+
} while (0)
24+
#define JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ptr, usize) do { \
25+
if (in_valgrind) \
26+
valgrind_make_mem_defined(ptr, usize); \
27+
} while (0)
28+
/*
29+
* The VALGRIND_MALLOCLIKE_BLOCK() and VALGRIND_RESIZEINPLACE_BLOCK() macro
30+
* calls must be embedded in macros rather than in functions so that when
31+
* Valgrind reports errors, there are no extra stack frames in the backtraces.
32+
*/
33+
#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do { \
34+
if (in_valgrind && cond) \
35+
VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, p2rz(ptr), zero); \
36+
} while (0)
37+
#define JEMALLOC_VALGRIND_REALLOC(maybe_moved, ptr, usize, \
38+
ptr_maybe_null, old_ptr, old_usize, old_rzsize, old_ptr_maybe_null, \
39+
zero) do { \
40+
if (in_valgrind) { \
41+
size_t rzsize = p2rz(ptr); \
42+
\
43+
if (!maybe_moved || ptr == old_ptr) { \
44+
VALGRIND_RESIZEINPLACE_BLOCK(ptr, old_usize, \
45+
usize, rzsize); \
46+
if (zero && old_usize < usize) { \
47+
valgrind_make_mem_defined( \
48+
(void *)((uintptr_t)ptr + \
49+
old_usize), usize - old_usize); \
50+
} \
51+
} else { \
52+
if (!old_ptr_maybe_null || old_ptr != NULL) { \
53+
valgrind_freelike_block(old_ptr, \
54+
old_rzsize); \
55+
} \
56+
if (!ptr_maybe_null || ptr != NULL) { \
57+
size_t copy_size = (old_usize < usize) \
58+
? old_usize : usize; \
59+
size_t tail_size = usize - copy_size; \
60+
VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, \
61+
rzsize, false); \
62+
if (copy_size > 0) { \
63+
valgrind_make_mem_defined(ptr, \
64+
copy_size); \
65+
} \
66+
if (zero && tail_size > 0) { \
67+
valgrind_make_mem_defined( \
68+
(void *)((uintptr_t)ptr + \
69+
copy_size), tail_size); \
70+
} \
71+
} \
72+
} \
73+
} \
74+
} while (0)
75+
#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) do { \
76+
if (in_valgrind) \
77+
valgrind_freelike_block(ptr, rzsize); \
78+
} while (0)
79+
#else
80+
#define RUNNING_ON_VALGRIND ((unsigned)0)
81+
#define JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(ptr, usize) do {} while (0)
82+
#define JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ptr, usize) do {} while (0)
83+
#define JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ptr, usize) do {} while (0)
84+
#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do {} while (0)
85+
#define JEMALLOC_VALGRIND_REALLOC(maybe_moved, ptr, usize, \
86+
ptr_maybe_null, old_ptr, old_usize, old_rzsize, old_ptr_maybe_null, \
87+
zero) do {} while (0)
88+
#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) do {} while (0)
89+
#endif
90+
91+
#endif /* JEMALLOC_H_TYPES */
92+
/******************************************************************************/
93+
#ifdef JEMALLOC_H_STRUCTS
94+
95+
#endif /* JEMALLOC_H_STRUCTS */
96+
/******************************************************************************/
97+
#ifdef JEMALLOC_H_EXTERNS
98+
99+
#ifdef JEMALLOC_VALGRIND
100+
void valgrind_make_mem_noaccess(void *ptr, size_t usize);
101+
void valgrind_make_mem_undefined(void *ptr, size_t usize);
102+
void valgrind_make_mem_defined(void *ptr, size_t usize);
103+
void valgrind_freelike_block(void *ptr, size_t usize);
104+
#endif
105+
106+
#endif /* JEMALLOC_H_EXTERNS */
107+
/******************************************************************************/
108+
#ifdef JEMALLOC_H_INLINES
109+
110+
#endif /* JEMALLOC_H_INLINES */
111+
/******************************************************************************/
112+

src/arena.c

+14-14
Original file line numberDiff line numberDiff line change
@@ -337,8 +337,8 @@ static inline void
337337
arena_run_zero(arena_chunk_t *chunk, size_t run_ind, size_t npages)
338338
{
339339

340-
VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk + (run_ind <<
341-
LG_PAGE)), (npages << LG_PAGE));
340+
JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk +
341+
(run_ind << LG_PAGE)), (npages << LG_PAGE));
342342
memset((void *)((uintptr_t)chunk + (run_ind << LG_PAGE)), 0,
343343
(npages << LG_PAGE));
344344
}
@@ -347,8 +347,8 @@ static inline void
347347
arena_run_page_mark_zeroed(arena_chunk_t *chunk, size_t run_ind)
348348
{
349349

350-
VALGRIND_MAKE_MEM_DEFINED((void *)((uintptr_t)chunk + (run_ind <<
351-
LG_PAGE)), PAGE);
350+
JEMALLOC_VALGRIND_MAKE_MEM_DEFINED((void *)((uintptr_t)chunk + (run_ind
351+
<< LG_PAGE)), PAGE);
352352
}
353353

354354
static inline void
@@ -457,7 +457,7 @@ arena_run_split_large_helper(arena_t *arena, arena_run_t *run, size_t size,
457457
arena_run_zero(chunk, run_ind, need_pages);
458458
}
459459
} else {
460-
VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk +
460+
JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk +
461461
(run_ind << LG_PAGE)), (need_pages << LG_PAGE));
462462
}
463463

@@ -525,7 +525,7 @@ arena_run_split_small(arena_t *arena, arena_run_t *run, size_t size,
525525
if (config_debug && flag_dirty == 0 && arena_mapbits_unzeroed_get(chunk,
526526
run_ind+need_pages-1) == 0)
527527
arena_run_page_validate_zeroed(chunk, run_ind+need_pages-1);
528-
VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk +
528+
JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk +
529529
(run_ind << LG_PAGE)), (need_pages << LG_PAGE));
530530
}
531531

@@ -592,14 +592,14 @@ arena_chunk_init_hard(arena_t *arena)
592592
* the chunk is not zeroed.
593593
*/
594594
if (zero == false) {
595-
VALGRIND_MAKE_MEM_UNDEFINED((void *)arena_mapp_get(chunk,
596-
map_bias+1), (size_t)((uintptr_t) arena_mapp_get(chunk,
597-
chunk_npages-1) - (uintptr_t)arena_mapp_get(chunk,
598-
map_bias+1)));
595+
JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(
596+
(void *)arena_mapp_get(chunk, map_bias+1),
597+
(size_t)((uintptr_t) arena_mapp_get(chunk, chunk_npages-1) -
598+
(uintptr_t)arena_mapp_get(chunk, map_bias+1)));
599599
for (i = map_bias+1; i < chunk_npages-1; i++)
600600
arena_mapbits_unzeroed_set(chunk, i, unzeroed);
601601
} else {
602-
VALGRIND_MAKE_MEM_DEFINED((void *)arena_mapp_get(chunk,
602+
JEMALLOC_VALGRIND_MAKE_MEM_DEFINED((void *)arena_mapp_get(chunk,
603603
map_bias+1), (size_t)((uintptr_t) arena_mapp_get(chunk,
604604
chunk_npages-1) - (uintptr_t)arena_mapp_get(chunk,
605605
map_bias+1)));
@@ -1645,13 +1645,13 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero)
16451645
} else if (opt_zero)
16461646
memset(ret, 0, size);
16471647
}
1648-
VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
1648+
JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
16491649
} else {
16501650
if (config_fill && opt_junk) {
16511651
arena_alloc_junk_small(ret, &arena_bin_info[binind],
16521652
true);
16531653
}
1654-
VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
1654+
JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
16551655
memset(ret, 0, size);
16561656
}
16571657

@@ -2226,7 +2226,7 @@ arena_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size,
22262226
* expectation that the extra bytes will be reliably preserved.
22272227
*/
22282228
copysize = (size < oldsize) ? size : oldsize;
2229-
VALGRIND_MAKE_MEM_UNDEFINED(ret, copysize);
2229+
JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, copysize);
22302230
memcpy(ret, ptr, copysize);
22312231
iqalloct(ptr, try_tcache_dalloc);
22322232
return (ret);

0 commit comments

Comments
 (0)