From a9951b1b6a326532163e0fe4ee1a26e972258a1e Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 6 Oct 2015 16:18:30 +0200 Subject: [PATCH] Jemalloc updated to 4.0.3. --- deps/jemalloc/.autom4te.cfg | 3 + deps/jemalloc/.gitattributes | 1 + deps/jemalloc/.gitignore | 7 +- deps/jemalloc/COPYING | 4 +- deps/jemalloc/ChangeLog | 250 +- deps/jemalloc/INSTALL | 148 +- deps/jemalloc/Makefile.in | 112 +- deps/jemalloc/VERSION | 2 +- deps/jemalloc/bin/jemalloc-config.in | 79 + deps/jemalloc/bin/{pprof => jeprof.in} | 530 +-- deps/jemalloc/config.guess | 192 +- deps/jemalloc/config.sub | 22 +- deps/jemalloc/configure | 1408 ++++++-- deps/jemalloc/configure.ac | 592 +++- deps/jemalloc/doc/jemalloc.3 | 891 +++-- deps/jemalloc/doc/jemalloc.html | 917 +++-- deps/jemalloc/doc/jemalloc.xml.in | 1237 ++++--- .../include/jemalloc/internal/arena.h | 1038 +++--- .../include/jemalloc/internal/atomic.h | 539 ++- .../jemalloc/include/jemalloc/internal/base.h | 4 +- .../include/jemalloc/internal/bitmap.h | 58 +- .../include/jemalloc/internal/chunk.h | 62 +- .../include/jemalloc/internal/chunk_dss.h | 3 +- .../include/jemalloc/internal/chunk_mmap.h | 7 +- deps/jemalloc/include/jemalloc/internal/ckh.h | 8 +- deps/jemalloc/include/jemalloc/internal/ctl.h | 14 +- .../include/jemalloc/internal/extent.h | 219 +- .../jemalloc/include/jemalloc/internal/hash.h | 13 +- .../jemalloc/include/jemalloc/internal/huge.h | 36 +- .../jemalloc/internal/jemalloc_internal.h.in | 928 +++--- .../internal/jemalloc_internal_decls.h | 64 + .../internal/jemalloc_internal_defs.h.in | 91 +- .../internal/jemalloc_internal_macros.h | 6 + .../include/jemalloc/internal/mutex.h | 14 +- .../include/jemalloc/internal/pages.h | 26 + .../jemalloc/internal/private_symbols.txt | 336 +- .../jemalloc/include/jemalloc/internal/prng.h | 14 +- .../jemalloc/include/jemalloc/internal/prof.h | 750 ++--- deps/jemalloc/include/jemalloc/internal/ql.h | 4 +- deps/jemalloc/include/jemalloc/internal/qr.h | 6 +- .../include/jemalloc/internal/quarantine.h | 21 +- deps/jemalloc/include/jemalloc/internal/rb.h | 24 +- .../include/jemalloc/internal/rtree.h | 364 +- .../include/jemalloc/internal/size_classes.sh | 290 +- .../include/jemalloc/internal/stats.h | 64 +- .../include/jemalloc/internal/tcache.h | 305 +- deps/jemalloc/include/jemalloc/internal/tsd.h | 545 ++- .../jemalloc/include/jemalloc/internal/util.h | 174 +- .../include/jemalloc/internal/valgrind.h | 112 + deps/jemalloc/include/jemalloc/jemalloc.sh | 4 +- .../include/jemalloc/jemalloc_defs.h.in | 17 +- .../include/jemalloc/jemalloc_macros.h.in | 95 +- .../include/jemalloc/jemalloc_protos.h.in | 86 +- .../include/jemalloc/jemalloc_typedefs.h.in | 57 + .../include/msvc_compat/{ => C99}/stdbool.h | 4 + .../include/msvc_compat/{ => C99}/stdint.h | 0 deps/jemalloc/include/msvc_compat/inttypes.h | 313 -- deps/jemalloc/include/msvc_compat/strings.h | 10 +- .../include/msvc_compat/windows_extra.h | 26 + deps/jemalloc/jemalloc.pc.in | 12 + deps/jemalloc/src/arena.c | 2957 +++++++++++------ deps/jemalloc/src/base.c | 184 +- deps/jemalloc/src/bitmap.c | 18 +- deps/jemalloc/src/chunk.c | 812 +++-- deps/jemalloc/src/chunk_dss.c | 54 +- deps/jemalloc/src/chunk_mmap.c | 154 +- deps/jemalloc/src/ckh.c | 61 +- deps/jemalloc/src/ctl.c | 1085 ++++-- deps/jemalloc/src/extent.c | 38 +- deps/jemalloc/src/huge.c | 598 ++-- deps/jemalloc/src/jemalloc.c | 2104 +++++++----- deps/jemalloc/src/mutex.c | 10 +- deps/jemalloc/src/pages.c | 173 + deps/jemalloc/src/prof.c | 1879 ++++++++--- deps/jemalloc/src/quarantine.c | 132 +- deps/jemalloc/src/rtree.c | 150 +- deps/jemalloc/src/stats.c | 487 +-- deps/jemalloc/src/tcache.c | 320 +- deps/jemalloc/src/tsd.c | 62 +- deps/jemalloc/src/util.c | 30 +- deps/jemalloc/src/valgrind.c | 34 + deps/jemalloc/src/zone.c | 34 +- deps/jemalloc/test/include/test/btalloc.h | 31 + .../test/include/test/jemalloc_test.h.in | 12 +- .../test/include/test/jemalloc_test_defs.h.in | 6 +- deps/jemalloc/test/include/test/math.h | 2 +- deps/jemalloc/test/include/test/mq.h | 19 +- deps/jemalloc/test/include/test/test.h | 390 +-- deps/jemalloc/test/include/test/thd.h | 2 +- deps/jemalloc/test/include/test/timer.h | 26 + .../jemalloc/test/integration/MALLOCX_ARENA.c | 21 +- deps/jemalloc/test/integration/allocm.c | 107 - deps/jemalloc/test/integration/chunk.c | 276 ++ deps/jemalloc/test/integration/mallocx.c | 133 +- deps/jemalloc/test/integration/mremap.c | 45 - deps/jemalloc/test/integration/overflow.c | 49 + deps/jemalloc/test/integration/rallocm.c | 111 - deps/jemalloc/test/integration/rallocx.c | 15 +- deps/jemalloc/test/integration/sdallocx.c | 57 + deps/jemalloc/test/integration/xallocx.c | 414 ++- deps/jemalloc/test/src/SFMT.c | 22 +- deps/jemalloc/test/src/btalloc.c | 8 + deps/jemalloc/test/src/btalloc_0.c | 3 + deps/jemalloc/test/src/btalloc_1.c | 3 + deps/jemalloc/test/src/mq.c | 29 + deps/jemalloc/test/src/mtx.c | 4 + deps/jemalloc/test/src/test.c | 23 +- deps/jemalloc/test/src/thd.c | 6 +- deps/jemalloc/test/src/timer.c | 85 + deps/jemalloc/test/stress/microbench.c | 181 + deps/jemalloc/test/unit/SFMT.c | 2008 +++++------ deps/jemalloc/test/unit/atomic.c | 122 + deps/jemalloc/test/unit/bitmap.c | 24 +- deps/jemalloc/test/unit/ckh.c | 68 +- deps/jemalloc/test/unit/hash.c | 4 +- deps/jemalloc/test/unit/junk.c | 96 +- deps/jemalloc/test/unit/junk_alloc.c | 3 + deps/jemalloc/test/unit/junk_free.c | 3 + deps/jemalloc/test/unit/lg_chunk.c | 26 + deps/jemalloc/test/unit/mallctl.c | 260 +- deps/jemalloc/test/unit/math.c | 6 + deps/jemalloc/test/unit/mq.c | 3 +- deps/jemalloc/test/unit/prof_accum.c | 9 +- deps/jemalloc/test/unit/prof_accum.h | 35 - deps/jemalloc/test/unit/prof_accum_a.c | 3 - deps/jemalloc/test/unit/prof_accum_b.c | 3 - deps/jemalloc/test/unit/prof_active.c | 136 + deps/jemalloc/test/unit/prof_gdump.c | 29 +- deps/jemalloc/test/unit/prof_reset.c | 302 ++ deps/jemalloc/test/unit/prof_thread_name.c | 129 + deps/jemalloc/test/unit/rb.c | 7 +- deps/jemalloc/test/unit/rtree.c | 93 +- deps/jemalloc/test/unit/size_classes.c | 89 + deps/jemalloc/test/unit/stats.c | 171 +- deps/jemalloc/test/unit/tsd.c | 48 +- deps/jemalloc/test/unit/util.c | 86 +- deps/jemalloc/test/unit/zero.c | 4 +- 137 files changed, 20120 insertions(+), 10261 deletions(-) create mode 100644 deps/jemalloc/.autom4te.cfg create mode 100644 deps/jemalloc/.gitattributes create mode 100644 deps/jemalloc/bin/jemalloc-config.in rename deps/jemalloc/bin/{pprof => jeprof.in} (95%) mode change 100755 => 100644 create mode 100644 deps/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h create mode 100644 deps/jemalloc/include/jemalloc/internal/pages.h create mode 100644 deps/jemalloc/include/jemalloc/internal/valgrind.h create mode 100644 deps/jemalloc/include/jemalloc/jemalloc_typedefs.h.in rename deps/jemalloc/include/msvc_compat/{ => C99}/stdbool.h (73%) rename deps/jemalloc/include/msvc_compat/{ => C99}/stdint.h (100%) delete mode 100644 deps/jemalloc/include/msvc_compat/inttypes.h create mode 100644 deps/jemalloc/include/msvc_compat/windows_extra.h create mode 100644 deps/jemalloc/jemalloc.pc.in create mode 100644 deps/jemalloc/src/pages.c create mode 100644 deps/jemalloc/src/valgrind.c create mode 100644 deps/jemalloc/test/include/test/btalloc.h create mode 100644 deps/jemalloc/test/include/test/timer.h delete mode 100644 deps/jemalloc/test/integration/allocm.c create mode 100644 deps/jemalloc/test/integration/chunk.c delete mode 100644 deps/jemalloc/test/integration/mremap.c create mode 100644 deps/jemalloc/test/integration/overflow.c delete mode 100644 deps/jemalloc/test/integration/rallocm.c create mode 100644 deps/jemalloc/test/integration/sdallocx.c create mode 100644 deps/jemalloc/test/src/btalloc.c create mode 100644 deps/jemalloc/test/src/btalloc_0.c create mode 100644 deps/jemalloc/test/src/btalloc_1.c create mode 100644 deps/jemalloc/test/src/mq.c create mode 100644 deps/jemalloc/test/src/timer.c create mode 100644 deps/jemalloc/test/stress/microbench.c create mode 100644 deps/jemalloc/test/unit/atomic.c create mode 100644 deps/jemalloc/test/unit/junk_alloc.c create mode 100644 deps/jemalloc/test/unit/junk_free.c create mode 100644 deps/jemalloc/test/unit/lg_chunk.c delete mode 100644 deps/jemalloc/test/unit/prof_accum.h delete mode 100644 deps/jemalloc/test/unit/prof_accum_a.c delete mode 100644 deps/jemalloc/test/unit/prof_accum_b.c create mode 100644 deps/jemalloc/test/unit/prof_active.c create mode 100644 deps/jemalloc/test/unit/prof_reset.c create mode 100644 deps/jemalloc/test/unit/prof_thread_name.c create mode 100644 deps/jemalloc/test/unit/size_classes.c diff --git a/deps/jemalloc/.autom4te.cfg b/deps/jemalloc/.autom4te.cfg new file mode 100644 index 000000000..fe2424db5 --- /dev/null +++ b/deps/jemalloc/.autom4te.cfg @@ -0,0 +1,3 @@ +begin-language: "Autoconf-without-aclocal-m4" +args: --no-cache +end-language: "Autoconf-without-aclocal-m4" diff --git a/deps/jemalloc/.gitattributes b/deps/jemalloc/.gitattributes new file mode 100644 index 000000000..6313b56c5 --- /dev/null +++ b/deps/jemalloc/.gitattributes @@ -0,0 +1 @@ +* text=auto eol=lf diff --git a/deps/jemalloc/.gitignore b/deps/jemalloc/.gitignore index 4c408ec2c..d0e393619 100644 --- a/deps/jemalloc/.gitignore +++ b/deps/jemalloc/.gitignore @@ -1,8 +1,8 @@ /*.gcov.* -/autom4te.cache/ - +/bin/jemalloc-config /bin/jemalloc.sh +/bin/jeprof /config.stamp /config.log @@ -15,6 +15,8 @@ /doc/jemalloc.html /doc/jemalloc.3 +/jemalloc.pc + /lib/ /Makefile @@ -35,6 +37,7 @@ /include/jemalloc/jemalloc_protos.h /include/jemalloc/jemalloc_protos_jet.h /include/jemalloc/jemalloc_rename.h +/include/jemalloc/jemalloc_typedefs.h /src/*.[od] /src/*.gcda diff --git a/deps/jemalloc/COPYING b/deps/jemalloc/COPYING index bdda0feb9..611968cda 100644 --- a/deps/jemalloc/COPYING +++ b/deps/jemalloc/COPYING @@ -1,10 +1,10 @@ Unless otherwise specified, files in the jemalloc source distribution are subject to the following license: -------------------------------------------------------------------------------- -Copyright (C) 2002-2014 Jason Evans . +Copyright (C) 2002-2015 Jason Evans . All rights reserved. Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved. -Copyright (C) 2009-2014 Facebook, Inc. All rights reserved. +Copyright (C) 2009-2015 Facebook, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: diff --git a/deps/jemalloc/ChangeLog b/deps/jemalloc/ChangeLog index d56ee999e..e3b0a5190 100644 --- a/deps/jemalloc/ChangeLog +++ b/deps/jemalloc/ChangeLog @@ -1,10 +1,250 @@ Following are change highlights associated with official releases. Important -bug fixes are all mentioned, but internal enhancements are omitted here for -brevity (even though they are more fun to write about). Much more detail can be -found in the git revision history: +bug fixes are all mentioned, but some internal enhancements are omitted here for +brevity. Much more detail can be found in the git revision history: https://github.com/jemalloc/jemalloc +* 4.0.3 (September 24, 2015) + + This bugfix release continues the trend of xallocx() and heap profiling fixes. + + Bug fixes: + - Fix xallocx(..., MALLOCX_ZERO) to zero all trailing bytes of large + allocations when --enable-cache-oblivious configure option is enabled. + - Fix xallocx(..., MALLOCX_ZERO) to zero trailing bytes of huge allocations + when resizing from/to a size class that is not a multiple of the chunk size. + - Fix prof_tctx_dump_iter() to filter out nodes that were created after heap + profile dumping started. + - Work around a potentially bad thread-specific data initialization + interaction with NPTL (glibc's pthreads implementation). + +* 4.0.2 (September 21, 2015) + + This bugfix release addresses a few bugs specific to heap profiling. + + Bug fixes: + - Fix ixallocx_prof_sample() to never modify nor create sampled small + allocations. xallocx() is in general incapable of moving small allocations, + so this fix removes buggy code without loss of generality. + - Fix irallocx_prof_sample() to always allocate large regions, even when + alignment is non-zero. + - Fix prof_alloc_rollback() to read tdata from thread-specific data rather + than dereferencing a potentially invalid tctx. + +* 4.0.1 (September 15, 2015) + + This is a bugfix release that is somewhat high risk due to the amount of + refactoring required to address deep xallocx() problems. As a side effect of + these fixes, xallocx() now tries harder to partially fulfill requests for + optional extra space. Note that a couple of minor heap profiling + optimizations are included, but these are better thought of as performance + fixes that were integral to disovering most of the other bugs. + + Optimizations: + - Avoid a chunk metadata read in arena_prof_tctx_set(), since it is in the + fast path when heap profiling is enabled. Additionally, split a special + case out into arena_prof_tctx_reset(), which also avoids chunk metadata + reads. + - Optimize irallocx_prof() to optimistically update the sampler state. The + prior implementation appears to have been a holdover from when + rallocx()/xallocx() functionality was combined as rallocm(). + + Bug fixes: + - Fix TLS configuration such that it is enabled by default for platforms on + which it works correctly. + - Fix arenas_cache_cleanup() and arena_get_hard() to handle + allocation/deallocation within the application's thread-specific data + cleanup functions even after arenas_cache is torn down. + - Fix xallocx() bugs related to size+extra exceeding HUGE_MAXCLASS. + - Fix chunk purge hook calls for in-place huge shrinking reallocation to + specify the old chunk size rather than the new chunk size. This bug caused + no correctness issues for the default chunk purge function, but was + visible to custom functions set via the "arena..chunk_hooks" mallctl. + - Fix heap profiling bugs: + + Fix heap profiling to distinguish among otherwise identical sample sites + with interposed resets (triggered via the "prof.reset" mallctl). This bug + could cause data structure corruption that would most likely result in a + segfault. + + Fix irealloc_prof() to prof_alloc_rollback() on OOM. + + Make one call to prof_active_get_unlocked() per allocation event, and use + the result throughout the relevant functions that handle an allocation + event. Also add a missing check in prof_realloc(). These fixes protect + allocation events against concurrent prof_active changes. + + Fix ixallocx_prof() to pass usize_max and zero to ixallocx_prof_sample() + in the correct order. + + Fix prof_realloc() to call prof_free_sampled_object() after calling + prof_malloc_sample_object(). Prior to this fix, if tctx and old_tctx were + the same, the tctx could have been prematurely destroyed. + - Fix portability bugs: + + Don't bitshift by negative amounts when encoding/decoding run sizes in + chunk header maps. This affected systems with page sizes greater than 8 + KiB. + + Rename index_t to szind_t to avoid an existing type on Solaris. + + Add JEMALLOC_CXX_THROW to the memalign() function prototype, in order to + match glibc and avoid compilation errors when including both + jemalloc/jemalloc.h and malloc.h in C++ code. + + Don't assume that /bin/sh is appropriate when running size_classes.sh + during configuration. + + Consider __sparcv9 a synonym for __sparc64__ when defining LG_QUANTUM. + + Link tests to librt if it contains clock_gettime(2). + +* 4.0.0 (August 17, 2015) + + This version contains many speed and space optimizations, both minor and + major. The major themes are generalization, unification, and simplification. + Although many of these optimizations cause no visible behavior change, their + cumulative effect is substantial. + + New features: + - Normalize size class spacing to be consistent across the complete size + range. By default there are four size classes per size doubling, but this + is now configurable via the --with-lg-size-class-group option. Also add the + --with-lg-page, --with-lg-page-sizes, --with-lg-quantum, and + --with-lg-tiny-min options, which can be used to tweak page and size class + settings. Impacts: + + Worst case performance for incrementally growing/shrinking reallocation + is improved because there are far fewer size classes, and therefore + copying happens less often. + + Internal fragmentation is limited to 20% for all but the smallest size + classes (those less than four times the quantum). (1B + 4 KiB) + and (1B + 4 MiB) previously suffered nearly 50% internal fragmentation. + + Chunk fragmentation tends to be lower because there are fewer distinct run + sizes to pack. + - Add support for explicit tcaches. The "tcache.create", "tcache.flush", and + "tcache.destroy" mallctls control tcache lifetime and flushing, and the + MALLOCX_TCACHE(tc) and MALLOCX_TCACHE_NONE flags to the *allocx() API + control which tcache is used for each operation. + - Implement per thread heap profiling, as well as the ability to + enable/disable heap profiling on a per thread basis. Add the "prof.reset", + "prof.lg_sample", "thread.prof.name", "thread.prof.active", + "opt.prof_thread_active_init", "prof.thread_active_init", and + "thread.prof.active" mallctls. + - Add support for per arena application-specified chunk allocators, configured + via the "arena..chunk_hooks" mallctl. + - Refactor huge allocation to be managed by arenas, so that arenas now + function as general purpose independent allocators. This is important in + the context of user-specified chunk allocators, aside from the scalability + benefits. Related new statistics: + + The "stats.arenas..huge.allocated", "stats.arenas..huge.nmalloc", + "stats.arenas..huge.ndalloc", and "stats.arenas..huge.nrequests" + mallctls provide high level per arena huge allocation statistics. + + The "arenas.nhchunks", "arenas.hchunk..size", + "stats.arenas..hchunks..nmalloc", + "stats.arenas..hchunks..ndalloc", + "stats.arenas..hchunks..nrequests", and + "stats.arenas..hchunks..curhchunks" mallctls provide per size class + statistics. + - Add the 'util' column to malloc_stats_print() output, which reports the + proportion of available regions that are currently in use for each small + size class. + - Add "alloc" and "free" modes for for junk filling (see the "opt.junk" + mallctl), so that it is possible to separately enable junk filling for + allocation versus deallocation. + - Add the jemalloc-config script, which provides information about how + jemalloc was configured, and how to integrate it into application builds. + - Add metadata statistics, which are accessible via the "stats.metadata", + "stats.arenas..metadata.mapped", and + "stats.arenas..metadata.allocated" mallctls. + - Add the "stats.resident" mallctl, which reports the upper limit of + physically resident memory mapped by the allocator. + - Add per arena control over unused dirty page purging, via the + "arenas.lg_dirty_mult", "arena..lg_dirty_mult", and + "stats.arenas..lg_dirty_mult" mallctls. + - Add the "prof.gdump" mallctl, which makes it possible to toggle the gdump + feature on/off during program execution. + - Add sdallocx(), which implements sized deallocation. The primary + optimization over dallocx() is the removal of a metadata read, which often + suffers an L1 cache miss. + - Add missing header includes in jemalloc/jemalloc.h, so that applications + only have to #include . + - Add support for additional platforms: + + Bitrig + + Cygwin + + DragonFlyBSD + + iOS + + OpenBSD + + OpenRISC/or1k + + Optimizations: + - Maintain dirty runs in per arena LRUs rather than in per arena trees of + dirty-run-containing chunks. In practice this change significantly reduces + dirty page purging volume. + - Integrate whole chunks into the unused dirty page purging machinery. This + reduces the cost of repeated huge allocation/deallocation, because it + effectively introduces a cache of chunks. + - Split the arena chunk map into two separate arrays, in order to increase + cache locality for the frequently accessed bits. + - Move small run metadata out of runs, into arena chunk headers. This reduces + run fragmentation, smaller runs reduce external fragmentation for small size + classes, and packed (less uniformly aligned) metadata layout improves CPU + cache set distribution. + - Randomly distribute large allocation base pointer alignment relative to page + boundaries in order to more uniformly utilize CPU cache sets. This can be + disabled via the --disable-cache-oblivious configure option, and queried via + the "config.cache_oblivious" mallctl. + - Micro-optimize the fast paths for the public API functions. + - Refactor thread-specific data to reside in a single structure. This assures + that only a single TLS read is necessary per call into the public API. + - Implement in-place huge allocation growing and shrinking. + - Refactor rtree (radix tree for chunk lookups) to be lock-free, and make + additional optimizations that reduce maximum lookup depth to one or two + levels. This resolves what was a concurrency bottleneck for per arena huge + allocation, because a global data structure is critical for determining + which arenas own which huge allocations. + + Incompatible changes: + - Replace --enable-cc-silence with --disable-cc-silence to suppress spurious + warnings by default. + - Assure that the constness of malloc_usable_size()'s return type matches that + of the system implementation. + - Change the heap profile dump format to support per thread heap profiling, + rename pprof to jeprof, and enhance it with the --thread= option. As a + result, the bundled jeprof must now be used rather than the upstream + (gperftools) pprof. + - Disable "opt.prof_final" by default, in order to avoid atexit(3), which can + internally deadlock on some platforms. + - Change the "arenas.nlruns" mallctl type from size_t to unsigned. + - Replace the "stats.arenas..bins..allocated" mallctl with + "stats.arenas..bins..curregs". + - Ignore MALLOC_CONF in set{uid,gid,cap} binaries. + - Ignore MALLOCX_ARENA(a) in dallocx(), in favor of using the + MALLOCX_TCACHE(tc) and MALLOCX_TCACHE_NONE flags to control tcache usage. + + Removed features: + - Remove the *allocm() API, which is superseded by the *allocx() API. + - Remove the --enable-dss options, and make dss non-optional on all platforms + which support sbrk(2). + - Remove the "arenas.purge" mallctl, which was obsoleted by the + "arena..purge" mallctl in 3.1.0. + - Remove the unnecessary "opt.valgrind" mallctl; jemalloc automatically + detects whether it is running inside Valgrind. + - Remove the "stats.huge.allocated", "stats.huge.nmalloc", and + "stats.huge.ndalloc" mallctls. + - Remove the --enable-mremap option. + - Remove the "stats.chunks.current", "stats.chunks.total", and + "stats.chunks.high" mallctls. + + Bug fixes: + - Fix the cactive statistic to decrease (rather than increase) when active + memory decreases. This regression was first released in 3.5.0. + - Fix OOM handling in memalign() and valloc(). A variant of this bug existed + in all releases since 2.0.0, which introduced these functions. + - Fix an OOM-related regression in arena_tcache_fill_small(), which could + cause cache corruption on OOM. This regression was present in all releases + from 2.2.0 through 3.6.0. + - Fix size class overflow handling for malloc(), posix_memalign(), memalign(), + calloc(), and realloc() when profiling is enabled. + - Fix the "arena..dss" mallctl to return an error if "primary" or + "secondary" precedence is specified, but sbrk(2) is not supported. + - Fix fallback lg_floor() implementations to handle extremely large inputs. + - Ensure the default purgeable zone is after the default zone on OS X. + - Fix latent bugs in atomic_*(). + - Fix the "arena..dss" mallctl to handle read-only calls. + - Fix tls_model configuration to enable the initial-exec model when possible. + - Mark malloc_conf as a weak symbol so that the application can override it. + - Correctly detect glibc's adaptive pthread mutexes. + - Fix the --without-export configure option. + * 3.6.0 (March 31, 2014) This version contains a critical bug fix for a regression present in 3.5.0 and @@ -21,7 +261,7 @@ found in the git revision history: backtracing to be reliable. - Use dss allocation precedence for huge allocations as well as small/large allocations. - - Fix test assertion failure message formatting. This bug did not manifect on + - Fix test assertion failure message formatting. This bug did not manifest on x86_64 systems because of implementation subtleties in va_list. - Fix inconsequential test failures for hash and SFMT code. @@ -516,7 +756,7 @@ found in the git revision history: - Make it possible for the application to manually flush a thread's cache, via the "tcache.flush" mallctl. - Base maximum dirty page count on proportion of active memory. - - Compute various addtional run-time statistics, including per size class + - Compute various additional run-time statistics, including per size class statistics for large objects. - Expose malloc_stats_print(), which can be called repeatedly by the application. diff --git a/deps/jemalloc/INSTALL b/deps/jemalloc/INSTALL index 841704d2a..8d3968745 100644 --- a/deps/jemalloc/INSTALL +++ b/deps/jemalloc/INSTALL @@ -1,10 +1,23 @@ -Building and installing jemalloc can be as simple as typing the following while -in the root directory of the source tree: +Building and installing a packaged release of jemalloc can be as simple as +typing the following while in the root directory of the source tree: ./configure make make install +If building from unpackaged developer sources, the simplest command sequence +that might work is: + + ./autogen.sh + make dist + make + make install + +Note that documentation is not built by the default target because doing so +would create a dependency on xsltproc in packaged releases, hence the +requirement to either run 'make dist' or avoid installing docs via the various +install_* targets documented below. + === Advanced configuration ===================================================== The 'configure' script supports numerous options that allow control of which @@ -56,7 +69,7 @@ any of the following arguments (not a definitive list) to 'configure': replace the "malloc", "calloc", etc. symbols. --without-export - Don't export public APIs. This can be useful when building jemalloc as a + Don't export public APIs. This can be useful when building jemalloc as a static library, or to avoid exporting public APIs when using the zone allocator on OSX. @@ -71,10 +84,10 @@ any of the following arguments (not a definitive list) to 'configure': versions of jemalloc can coexist in the same installation directory. For example, libjemalloc.so.0 becomes libjemalloc.so.0. ---enable-cc-silence - Enable code that silences non-useful compiler warnings. This is helpful - when trying to tell serious warnings from those due to compiler - limitations, but it potentially incurs a performance penalty. +--disable-cc-silence + Disable code that silences non-useful compiler warnings. This is mainly + useful during development when auditing the set of warnings that are being + silenced. --enable-debug Enable assertions and validation code. This incurs a substantial @@ -94,15 +107,15 @@ any of the following arguments (not a definitive list) to 'configure': there are interactions between the various coverage targets, so it is usually advisable to run 'make clean' between repeated code coverage runs. ---enable-ivsalloc - Enable validation code, which verifies that pointers reside within - jemalloc-owned chunks before dereferencing them. This incurs a substantial - performance hit. - --disable-stats Disable statistics gathering functionality. See the "opt.stats_print" option documentation for usage details. +--enable-ivsalloc + Enable validation code, which verifies that pointers reside within + jemalloc-owned chunks before dereferencing them. This incurs a minor + performance hit. + --enable-prof Enable heap profiling and leak detection functionality. See the "opt.prof" option documentation for usage details. When enabled, there are several @@ -132,12 +145,6 @@ any of the following arguments (not a definitive list) to 'configure': released in bulk, thus reducing the total number of mutex operations. See the "opt.tcache" option for usage details. ---enable-mremap - Enable huge realloc() via mremap(2). mremap() is disabled by default - because the flavor used is specific to Linux, which has a quirk in its - virtual memory allocation algorithm that causes semi-permanent VM map holes - under normal jemalloc operation. - --disable-munmap Disable virtual memory deallocation via munmap(2); instead keep track of the virtual memory for later use. munmap() is disabled by default (i.e. @@ -145,10 +152,6 @@ any of the following arguments (not a definitive list) to 'configure': memory allocation algorithm that causes semi-permanent VM map holes under normal jemalloc operation. ---enable-dss - Enable support for page allocation/deallocation via sbrk(2), in addition to - mmap(2). - --disable-fill Disable support for junk/zero filling of memory, quarantine, and redzones. See the "opt.junk", "opt.zero", "opt.quarantine", and "opt.redzone" option @@ -157,11 +160,8 @@ any of the following arguments (not a definitive list) to 'configure': --disable-valgrind Disable support for Valgrind. ---disable-experimental - Disable support for the experimental API (*allocm()). - --disable-zone-allocator - Disable zone allocator for Darwin. This means jemalloc won't be hooked as + Disable zone allocator for Darwin. This means jemalloc won't be hooked as the default allocator on OSX/iOS. --enable-utrace @@ -185,10 +185,106 @@ any of the following arguments (not a definitive list) to 'configure': thread-local variables via the __thread keyword. If TLS is available, jemalloc uses it for several purposes. +--disable-cache-oblivious + Disable cache-oblivious large allocation alignment for large allocation + requests with no alignment constraints. If this feature is disabled, all + large allocations are page-aligned as an implementation artifact, which can + severely harm CPU cache utilization. However, the cache-oblivious layout + comes at the cost of one extra page per large allocation, which in the + most extreme case increases physical memory usage for the 16 KiB size class + to 20 KiB. + --with-xslroot= Specify where to find DocBook XSL stylesheets when building the documentation. +--with-lg-page= + Specify the base 2 log of the system page size. This option is only useful + when cross compiling, since the configure script automatically determines + the host's page size by default. + +--with-lg-page-sizes= + Specify the comma-separated base 2 logs of the page sizes to support. This + option may be useful when cross-compiling in combination with + --with-lg-page, but its primary use case is for integration with FreeBSD's + libc, wherein jemalloc is embedded. + +--with-lg-size-class-group= + Specify the base 2 log of how many size classes to use for each doubling in + size. By default jemalloc uses =2, which results in + e.g. the following size classes: + + [...], 64, + 80, 96, 112, 128, + 160, [...] + + =3 results in e.g. the following size classes: + + [...], 64, + 72, 80, 88, 96, 104, 112, 120, 128, + 144, [...] + + The minimal =0 causes jemalloc to only provide size + classes that are powers of 2: + + [...], + 64, + 128, + 256, + [...] + + An implementation detail currently limits the total number of small size + classes to 255, and a compilation error will result if the + you specify cannot be supported. The limit is + roughly =4, depending on page size. + +--with-lg-quantum= + Specify the base 2 log of the minimum allocation alignment. jemalloc needs + to know the minimum alignment that meets the following C standard + requirement (quoted from the April 12, 2011 draft of the C11 standard): + + The pointer returned if the allocation succeeds is suitably aligned so + that it may be assigned to a pointer to any type of object with a + fundamental alignment requirement and then used to access such an object + or an array of such objects in the space allocated [...] + + This setting is architecture-specific, and although jemalloc includes known + safe values for the most commonly used modern architectures, there is a + wrinkle related to GNU libc (glibc) that may impact your choice of + . On most modern architectures, this mandates 16-byte alignment + (=4), but the glibc developers chose not to meet this + requirement for performance reasons. An old discussion can be found at + https://sourceware.org/bugzilla/show_bug.cgi?id=206 . Unlike glibc, + jemalloc does follow the C standard by default (caveat: jemalloc + technically cheats if --with-lg-tiny-min is smaller than + --with-lg-quantum), but the fact that Linux systems already work around + this allocator noncompliance means that it is generally safe in practice to + let jemalloc's minimum alignment follow glibc's lead. If you specify + --with-lg-quantum=3 during configuration, jemalloc will provide additional + size classes that are not 16-byte-aligned (24, 40, and 56, assuming + --with-lg-size-class-group=2). + +--with-lg-tiny-min= + Specify the base 2 log of the minimum tiny size class to support. Tiny + size classes are powers of 2 less than the quantum, and are only + incorporated if is less than (see + --with-lg-quantum). Tiny size classes technically violate the C standard + requirement for minimum alignment, and crashes could conceivably result if + the compiler were to generate instructions that made alignment assumptions, + both because illegal instruction traps could result, and because accesses + could straddle page boundaries and cause segmentation faults due to + accessing unmapped addresses. + + The default of =3 works well in practice even on architectures + that technically require 16-byte alignment, probably for the same reason + --with-lg-quantum=3 works. Smaller tiny size classes can, and will, cause + crashes (see https://bugzilla.mozilla.org/show_bug.cgi?id=691003 for an + example). + + This option is rarely useful, and is mainly provided as documentation of a + subtle implementation detail. If you do use this option, specify a + value in [3, ..., ]. + The following environment variables (not a definitive list) impact configure's behavior: diff --git a/deps/jemalloc/Makefile.in b/deps/jemalloc/Makefile.in index d6b7d6ea3..1ac6f2926 100644 --- a/deps/jemalloc/Makefile.in +++ b/deps/jemalloc/Makefile.in @@ -28,6 +28,7 @@ CFLAGS := @CFLAGS@ LDFLAGS := @LDFLAGS@ EXTRA_LDFLAGS := @EXTRA_LDFLAGS@ LIBS := @LIBS@ +TESTLIBS := @TESTLIBS@ RPATH_EXTRA := @RPATH_EXTRA@ SO := @so@ IMPORTLIB := @importlib@ @@ -42,14 +43,16 @@ XSLTPROC := @XSLTPROC@ AUTOCONF := @AUTOCONF@ _RPATH = @RPATH@ RPATH = $(if $(1),$(call _RPATH,$(1))) -cfghdrs_in := @cfghdrs_in@ +cfghdrs_in := $(addprefix $(srcroot),@cfghdrs_in@) cfghdrs_out := @cfghdrs_out@ -cfgoutputs_in := @cfgoutputs_in@ +cfgoutputs_in := $(addprefix $(srcroot),@cfgoutputs_in@) cfgoutputs_out := @cfgoutputs_out@ enable_autogen := @enable_autogen@ enable_code_coverage := @enable_code_coverage@ -enable_experimental := @enable_experimental@ +enable_prof := @enable_prof@ +enable_valgrind := @enable_valgrind@ enable_zone_allocator := @enable_zone_allocator@ +MALLOC_CONF := @JEMALLOC_CPREFIX@MALLOC_CONF DSO_LDFLAGS = @DSO_LDFLAGS@ SOREV = @SOREV@ PIC_CFLAGS = @PIC_CFLAGS@ @@ -73,16 +76,20 @@ endif LIBJEMALLOC := $(LIBPREFIX)jemalloc$(install_suffix) # Lists of files. -BINS := $(srcroot)bin/pprof $(objroot)bin/jemalloc.sh +BINS := $(objroot)bin/jemalloc-config $(objroot)bin/jemalloc.sh $(objroot)bin/jeprof C_HDRS := $(objroot)include/jemalloc/jemalloc$(install_suffix).h C_SRCS := $(srcroot)src/jemalloc.c $(srcroot)src/arena.c \ $(srcroot)src/atomic.c $(srcroot)src/base.c $(srcroot)src/bitmap.c \ $(srcroot)src/chunk.c $(srcroot)src/chunk_dss.c \ $(srcroot)src/chunk_mmap.c $(srcroot)src/ckh.c $(srcroot)src/ctl.c \ $(srcroot)src/extent.c $(srcroot)src/hash.c $(srcroot)src/huge.c \ - $(srcroot)src/mb.c $(srcroot)src/mutex.c $(srcroot)src/prof.c \ - $(srcroot)src/quarantine.c $(srcroot)src/rtree.c $(srcroot)src/stats.c \ - $(srcroot)src/tcache.c $(srcroot)src/util.c $(srcroot)src/tsd.c + $(srcroot)src/mb.c $(srcroot)src/mutex.c $(srcroot)src/pages.c \ + $(srcroot)src/prof.c $(srcroot)src/quarantine.c $(srcroot)src/rtree.c \ + $(srcroot)src/stats.c $(srcroot)src/tcache.c $(srcroot)src/util.c \ + $(srcroot)src/tsd.c +ifeq ($(enable_valgrind), 1) +C_SRCS += $(srcroot)src/valgrind.c +endif ifeq ($(enable_zone_allocator), 1) C_SRCS += $(srcroot)src/zone.c endif @@ -98,53 +105,60 @@ DSOS := $(objroot)lib/$(LIBJEMALLOC).$(SOREV) ifneq ($(SOREV),$(SO)) DSOS += $(objroot)lib/$(LIBJEMALLOC).$(SO) endif +PC := $(objroot)jemalloc.pc MAN3 := $(objroot)doc/jemalloc$(install_suffix).3 DOCS_XML := $(objroot)doc/jemalloc$(install_suffix).xml -DOCS_HTML := $(DOCS_XML:$(objroot)%.xml=$(srcroot)%.html) -DOCS_MAN3 := $(DOCS_XML:$(objroot)%.xml=$(srcroot)%.3) +DOCS_HTML := $(DOCS_XML:$(objroot)%.xml=$(objroot)%.html) +DOCS_MAN3 := $(DOCS_XML:$(objroot)%.xml=$(objroot)%.3) DOCS := $(DOCS_HTML) $(DOCS_MAN3) -C_TESTLIB_SRCS := $(srcroot)test/src/math.c $(srcroot)test/src/mtx.c \ +C_TESTLIB_SRCS := $(srcroot)test/src/btalloc.c $(srcroot)test/src/btalloc_0.c \ + $(srcroot)test/src/btalloc_1.c $(srcroot)test/src/math.c \ + $(srcroot)test/src/mtx.c $(srcroot)test/src/mq.c \ $(srcroot)test/src/SFMT.c $(srcroot)test/src/test.c \ - $(srcroot)test/src/thd.c + $(srcroot)test/src/thd.c $(srcroot)test/src/timer.c C_UTIL_INTEGRATION_SRCS := $(srcroot)src/util.c -TESTS_UNIT := $(srcroot)test/unit/bitmap.c \ +TESTS_UNIT := $(srcroot)test/unit/atomic.c \ + $(srcroot)test/unit/bitmap.c \ $(srcroot)test/unit/ckh.c \ $(srcroot)test/unit/hash.c \ $(srcroot)test/unit/junk.c \ + $(srcroot)test/unit/junk_alloc.c \ + $(srcroot)test/unit/junk_free.c \ + $(srcroot)test/unit/lg_chunk.c \ $(srcroot)test/unit/mallctl.c \ $(srcroot)test/unit/math.c \ $(srcroot)test/unit/mq.c \ $(srcroot)test/unit/mtx.c \ $(srcroot)test/unit/prof_accum.c \ + $(srcroot)test/unit/prof_active.c \ $(srcroot)test/unit/prof_gdump.c \ $(srcroot)test/unit/prof_idump.c \ + $(srcroot)test/unit/prof_reset.c \ + $(srcroot)test/unit/prof_thread_name.c \ $(srcroot)test/unit/ql.c \ $(srcroot)test/unit/qr.c \ $(srcroot)test/unit/quarantine.c \ $(srcroot)test/unit/rb.c \ $(srcroot)test/unit/rtree.c \ $(srcroot)test/unit/SFMT.c \ + $(srcroot)test/unit/size_classes.c \ $(srcroot)test/unit/stats.c \ $(srcroot)test/unit/tsd.c \ $(srcroot)test/unit/util.c \ $(srcroot)test/unit/zero.c -TESTS_UNIT_AUX := $(srcroot)test/unit/prof_accum_a.c \ - $(srcroot)test/unit/prof_accum_b.c TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \ $(srcroot)test/integration/allocated.c \ + $(srcroot)test/integration/sdallocx.c \ $(srcroot)test/integration/mallocx.c \ - $(srcroot)test/integration/mremap.c \ + $(srcroot)test/integration/MALLOCX_ARENA.c \ + $(srcroot)test/integration/overflow.c \ $(srcroot)test/integration/posix_memalign.c \ $(srcroot)test/integration/rallocx.c \ $(srcroot)test/integration/thread_arena.c \ $(srcroot)test/integration/thread_tcache_enabled.c \ - $(srcroot)test/integration/xallocx.c -ifeq ($(enable_experimental), 1) -TESTS_INTEGRATION += $(srcroot)test/integration/allocm.c \ - $(srcroot)test/integration/MALLOCX_ARENA.c \ - $(srcroot)test/integration/rallocm.c -endif -TESTS_STRESS := + $(srcroot)test/integration/xallocx.c \ + $(srcroot)test/integration/chunk.c +TESTS_STRESS := $(srcroot)test/stress/microbench.c TESTS := $(TESTS_UNIT) $(TESTS_INTEGRATION) $(TESTS_STRESS) C_OBJS := $(C_SRCS:$(srcroot)%.c=$(objroot)%.$(O)) @@ -157,10 +171,9 @@ C_TESTLIB_STRESS_OBJS := $(C_TESTLIB_SRCS:$(srcroot)%.c=$(objroot)%.stress.$(O)) C_TESTLIB_OBJS := $(C_TESTLIB_UNIT_OBJS) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(C_TESTLIB_STRESS_OBJS) TESTS_UNIT_OBJS := $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%.$(O)) -TESTS_UNIT_AUX_OBJS := $(TESTS_UNIT_AUX:$(srcroot)%.c=$(objroot)%.$(O)) TESTS_INTEGRATION_OBJS := $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%.$(O)) TESTS_STRESS_OBJS := $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%.$(O)) -TESTS_OBJS := $(TESTS_UNIT_OBJS) $(TESTS_UNIT_AUX_OBJS) $(TESTS_INTEGRATION_OBJS) $(TESTS_STRESS_OBJS) +TESTS_OBJS := $(TESTS_UNIT_OBJS) $(TESTS_INTEGRATION_OBJS) $(TESTS_STRESS_OBJS) .PHONY: all dist build_doc_html build_doc_man build_doc .PHONY: install_bin install_include install_lib @@ -174,10 +187,10 @@ all: build_lib dist: build_doc -$(srcroot)doc/%.html : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/html.xsl +$(objroot)doc/%.html : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/html.xsl $(XSLTPROC) -o $@ $(objroot)doc/html.xsl $< -$(srcroot)doc/%.3 : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/manpages.xsl +$(objroot)doc/%.3 : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/manpages.xsl $(XSLTPROC) -o $@ $(objroot)doc/manpages.xsl $< build_doc_html: $(DOCS_HTML) @@ -209,18 +222,12 @@ $(C_TESTLIB_STRESS_OBJS): $(objroot)test/src/%.stress.$(O): $(srcroot)test/src/% $(C_TESTLIB_STRESS_OBJS): CPPFLAGS += -DJEMALLOC_STRESS_TEST -DJEMALLOC_STRESS_TESTLIB $(C_TESTLIB_OBJS): CPPFLAGS += -I$(srcroot)test/include -I$(objroot)test/include $(TESTS_UNIT_OBJS): CPPFLAGS += -DJEMALLOC_UNIT_TEST -$(TESTS_UNIT_AUX_OBJS): CPPFLAGS += -DJEMALLOC_UNIT_TEST -define make-unit-link-dep -$(1): TESTS_UNIT_LINK_OBJS += $(2) -$(1): $(2) -endef -$(foreach test, $(TESTS_UNIT:$(srcroot)test/unit/%.c=$(objroot)test/unit/%$(EXE)), $(eval $(call make-unit-link-dep,$(test),$(filter $(test:%=%_a.$(O)) $(test:%=%_b.$(O)),$(TESTS_UNIT_AUX_OBJS))))) $(TESTS_INTEGRATION_OBJS): CPPFLAGS += -DJEMALLOC_INTEGRATION_TEST $(TESTS_STRESS_OBJS): CPPFLAGS += -DJEMALLOC_STRESS_TEST $(TESTS_OBJS): $(objroot)test/%.$(O): $(srcroot)test/%.c $(TESTS_OBJS): CPPFLAGS += -I$(srcroot)test/include -I$(objroot)test/include ifneq ($(IMPORTLIB),$(SO)) -$(C_OBJS): CPPFLAGS += -DDLLEXPORT +$(C_OBJS) $(C_JET_OBJS): CPPFLAGS += -DDLLEXPORT endif ifndef CC_MM @@ -229,7 +236,7 @@ HEADER_DIRS = $(srcroot)include/jemalloc/internal \ $(objroot)include/jemalloc $(objroot)include/jemalloc/internal HEADERS = $(wildcard $(foreach dir,$(HEADER_DIRS),$(dir)/*.h)) $(C_OBJS) $(C_PIC_OBJS) $(C_JET_OBJS) $(C_TESTLIB_OBJS) $(TESTS_OBJS): $(HEADERS) -$(TESTS_OBJS): $(objroot)test/unit/jemalloc_test.h +$(TESTS_OBJS): $(objroot)test/include/test/jemalloc_test.h endif $(C_OBJS) $(C_PIC_OBJS) $(C_JET_OBJS) $(C_TESTLIB_OBJS) $(TESTS_OBJS): %.$(O): @@ -259,15 +266,15 @@ $(STATIC_LIBS): $(objroot)test/unit/%$(EXE): $(objroot)test/unit/%.$(O) $(TESTS_UNIT_LINK_OBJS) $(C_JET_OBJS) $(C_TESTLIB_UNIT_OBJS) @mkdir -p $(@D) - $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(EXTRA_LDFLAGS) + $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(TESTLIBS) $(EXTRA_LDFLAGS) $(objroot)test/integration/%$(EXE): $(objroot)test/integration/%.$(O) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) @mkdir -p $(@D) - $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(filter -lpthread,$(LIBS))) -lm $(EXTRA_LDFLAGS) + $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(filter -lpthread,$(LIBS))) -lm $(TESTLIBS) $(EXTRA_LDFLAGS) $(objroot)test/stress/%$(EXE): $(objroot)test/stress/%.$(O) $(C_JET_OBJS) $(C_TESTLIB_STRESS_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) @mkdir -p $(@D) - $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(EXTRA_LDFLAGS) + $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(TESTLIBS) $(EXTRA_LDFLAGS) build_lib_shared: $(DSOS) build_lib_static: $(STATIC_LIBS) @@ -301,7 +308,14 @@ install_lib_static: $(STATIC_LIBS) install -m 755 $$l $(LIBDIR); \ done -install_lib: install_lib_shared install_lib_static +install_lib_pc: $(PC) + install -d $(LIBDIR)/pkgconfig + @for l in $(PC); do \ + echo "install -m 644 $$l $(LIBDIR)/pkgconfig"; \ + install -m 644 $$l $(LIBDIR)/pkgconfig; \ +done + +install_lib: install_lib_shared install_lib_static install_lib_pc install_doc_html: install -d $(DATADIR)/doc/jemalloc$(install_suffix) @@ -330,18 +344,23 @@ check_unit_dir: @mkdir -p $(objroot)test/unit check_integration_dir: @mkdir -p $(objroot)test/integration -check_stress_dir: +stress_dir: @mkdir -p $(objroot)test/stress -check_dir: check_unit_dir check_integration_dir check_stress_dir +check_dir: check_unit_dir check_integration_dir check_unit: tests_unit check_unit_dir $(SHELL) $(objroot)test/test.sh $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%) +check_integration_prof: tests_integration check_integration_dir +ifeq ($(enable_prof), 1) + $(MALLOC_CONF)="prof:true" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) + $(MALLOC_CONF)="prof:true,prof_active:false" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) +endif check_integration: tests_integration check_integration_dir $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) -check_stress: tests_stress check_stress_dir +stress: tests_stress stress_dir $(SHELL) $(objroot)test/test.sh $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%) -check: tests check_dir - $(SHELL) $(objroot)test/test.sh $(TESTS:$(srcroot)%.c=$(objroot)%) +check: tests check_dir check_integration_prof + $(SHELL) $(objroot)test/test.sh $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%) $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) ifeq ($(enable_code_coverage), 1) coverage_unit: check_unit @@ -355,7 +374,7 @@ coverage_integration: check_integration $(SHELL) $(srcroot)coverage.sh $(srcroot)test/src integration $(C_TESTLIB_INTEGRATION_OBJS) $(SHELL) $(srcroot)coverage.sh $(srcroot)test/integration integration $(TESTS_INTEGRATION_OBJS) -coverage_stress: check_stress +coverage_stress: stress $(SHELL) $(srcroot)coverage.sh $(srcroot)src pic $(C_PIC_OBJS) $(SHELL) $(srcroot)coverage.sh $(srcroot)src jet $(C_JET_OBJS) $(SHELL) $(srcroot)coverage.sh $(srcroot)test/src stress $(C_TESTLIB_STRESS_OBJS) @@ -400,8 +419,9 @@ clean: rm -f $(objroot)*.gcov.* distclean: clean - rm -rf $(objroot)autom4te.cache + rm -f $(objroot)bin/jemalloc-config rm -f $(objroot)bin/jemalloc.sh + rm -f $(objroot)bin/jeprof rm -f $(objroot)config.log rm -f $(objroot)config.status rm -f $(objroot)config.stamp @@ -410,7 +430,7 @@ distclean: clean relclean: distclean rm -f $(objroot)configure - rm -f $(srcroot)VERSION + rm -f $(objroot)VERSION rm -f $(DOCS_HTML) rm -f $(DOCS_MAN3) diff --git a/deps/jemalloc/VERSION b/deps/jemalloc/VERSION index dace31ba7..f1f9f1c61 100644 --- a/deps/jemalloc/VERSION +++ b/deps/jemalloc/VERSION @@ -1 +1 @@ -3.6.0-0-g46c0af68bd248b04df75e4f92d5fb804c3d75340 +4.0.3-0-ge9192eacf8935e29fc62fddc2701f7942b1cc02c diff --git a/deps/jemalloc/bin/jemalloc-config.in b/deps/jemalloc/bin/jemalloc-config.in new file mode 100644 index 000000000..b016c8d33 --- /dev/null +++ b/deps/jemalloc/bin/jemalloc-config.in @@ -0,0 +1,79 @@ +#!/bin/sh + +usage() { + cat < +Options: + --help | -h : Print usage. + --version : Print jemalloc version. + --revision : Print shared library revision number. + --config : Print configure options used to build jemalloc. + --prefix : Print installation directory prefix. + --bindir : Print binary installation directory. + --datadir : Print data installation directory. + --includedir : Print include installation directory. + --libdir : Print library installation directory. + --mandir : Print manual page installation directory. + --cc : Print compiler used to build jemalloc. + --cflags : Print compiler flags used to build jemalloc. + --cppflags : Print preprocessor flags used to build jemalloc. + --ldflags : Print library flags used to build jemalloc. + --libs : Print libraries jemalloc was linked against. +EOF +} + +prefix="@prefix@" +exec_prefix="@exec_prefix@" + +case "$1" in +--help | -h) + usage + exit 0 + ;; +--version) + echo "@jemalloc_version@" + ;; +--revision) + echo "@rev@" + ;; +--config) + echo "@CONFIG@" + ;; +--prefix) + echo "@PREFIX@" + ;; +--bindir) + echo "@BINDIR@" + ;; +--datadir) + echo "@DATADIR@" + ;; +--includedir) + echo "@INCLUDEDIR@" + ;; +--libdir) + echo "@LIBDIR@" + ;; +--mandir) + echo "@MANDIR@" + ;; +--cc) + echo "@CC@" + ;; +--cflags) + echo "@CFLAGS@" + ;; +--cppflags) + echo "@CPPFLAGS@" + ;; +--ldflags) + echo "@LDFLAGS@ @EXTRA_LDFLAGS@" + ;; +--libs) + echo "@LIBS@" + ;; +*) + usage + exit 1 +esac diff --git a/deps/jemalloc/bin/pprof b/deps/jemalloc/bin/jeprof.in old mode 100755 new mode 100644 similarity index 95% rename from deps/jemalloc/bin/pprof rename to deps/jemalloc/bin/jeprof.in index a309943c1..e7178078a --- a/deps/jemalloc/bin/pprof +++ b/deps/jemalloc/bin/jeprof.in @@ -2,11 +2,11 @@ # Copyright (c) 1998-2007, Google Inc. # All rights reserved. -# +# # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: -# +# # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above @@ -16,7 +16,7 @@ # * Neither the name of Google Inc. nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. -# +# # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR @@ -40,28 +40,28 @@ # # Examples: # -# % tools/pprof "program" "profile" +# % tools/jeprof "program" "profile" # Enters "interactive" mode # -# % tools/pprof --text "program" "profile" +# % tools/jeprof --text "program" "profile" # Generates one line per procedure # -# % tools/pprof --gv "program" "profile" +# % tools/jeprof --gv "program" "profile" # Generates annotated call-graph and displays via "gv" # -# % tools/pprof --gv --focus=Mutex "program" "profile" +# % tools/jeprof --gv --focus=Mutex "program" "profile" # Restrict to code paths that involve an entry that matches "Mutex" # -# % tools/pprof --gv --focus=Mutex --ignore=string "program" "profile" +# % tools/jeprof --gv --focus=Mutex --ignore=string "program" "profile" # Restrict to code paths that involve an entry that matches "Mutex" # and does not match "string" # -# % tools/pprof --list=IBF_CheckDocid "program" "profile" +# % tools/jeprof --list=IBF_CheckDocid "program" "profile" # Generates disassembly listing of all routines with at least one # sample that match the --list= pattern. The listing is # annotated with the flat and cumulative sample counts at each line. # -# % tools/pprof --disasm=IBF_CheckDocid "program" "profile" +# % tools/jeprof --disasm=IBF_CheckDocid "program" "profile" # Generates disassembly listing of all routines with at least one # sample that match the --disasm= pattern. The listing is # annotated with the flat and cumulative sample counts at each PC value. @@ -72,10 +72,11 @@ use strict; use warnings; use Getopt::Long; +my $JEPROF_VERSION = "@jemalloc_version@"; my $PPROF_VERSION = "2.0"; # These are the object tools we use which can come from a -# user-specified location using --tools, from the PPROF_TOOLS +# user-specified location using --tools, from the JEPROF_TOOLS # environment variable, or from the environment. my %obj_tool_map = ( "objdump" => "objdump", @@ -144,13 +145,13 @@ my $sep_address = undef; sub usage_string { return < +jeprof [options] is a space separated list of profile names. -pprof [options] +jeprof [options] is a list of profile files where each file contains the necessary symbol mappings as well as profile data (likely generated with --raw). -pprof [options] +jeprof [options] is a remote form. Symbols are obtained from host:port$SYMBOL_PAGE Each name can be: @@ -161,9 +162,9 @@ pprof [options] $GROWTH_PAGE, $CONTENTION_PAGE, /pprof/wall, $CENSUSPROFILE_PAGE, or /pprof/filteredprofile. For instance: - pprof http://myserver.com:80$HEAP_PAGE + jeprof http://myserver.com:80$HEAP_PAGE If / is omitted, the service defaults to $PROFILE_PAGE (cpu profiling). -pprof --symbols +jeprof --symbols Maps addresses to symbol names. In this mode, stdin should be a list of library mappings, in the same format as is found in the heap- and cpu-profile files (this loosely matches that of /proc/self/maps @@ -202,7 +203,7 @@ Output type: --pdf Generate PDF to stdout --svg Generate SVG to stdout --gif Generate GIF to stdout - --raw Generate symbolized pprof data (useful with remote fetch) + --raw Generate symbolized jeprof data (useful with remote fetch) Heap-Profile Options: --inuse_space Display in-use (mega)bytes [default] @@ -223,6 +224,7 @@ Call-graph Options: --edgefraction= Hide edges below *total [default=.001] --maxdegree= Max incoming/outgoing edges per node [default=8] --focus= Focus on nodes matching + --thread= Show profile for thread --ignore= Ignore nodes matching --scale= Set GV scaling [default=0] --heapcheck Make nodes with non-0 object counts @@ -235,34 +237,34 @@ Miscellaneous: --version Version information Environment Variables: - PPROF_TMPDIR Profiles directory. Defaults to \$HOME/pprof - PPROF_TOOLS Prefix for object tools pathnames + JEPROF_TMPDIR Profiles directory. Defaults to \$HOME/jeprof + JEPROF_TOOLS Prefix for object tools pathnames Examples: -pprof /bin/ls ls.prof +jeprof /bin/ls ls.prof Enters "interactive" mode -pprof --text /bin/ls ls.prof +jeprof --text /bin/ls ls.prof Outputs one line per procedure -pprof --web /bin/ls ls.prof +jeprof --web /bin/ls ls.prof Displays annotated call-graph in web browser -pprof --gv /bin/ls ls.prof +jeprof --gv /bin/ls ls.prof Displays annotated call-graph via 'gv' -pprof --gv --focus=Mutex /bin/ls ls.prof +jeprof --gv --focus=Mutex /bin/ls ls.prof Restricts to code paths including a .*Mutex.* entry -pprof --gv --focus=Mutex --ignore=string /bin/ls ls.prof +jeprof --gv --focus=Mutex --ignore=string /bin/ls ls.prof Code paths including Mutex but not string -pprof --list=getdir /bin/ls ls.prof +jeprof --list=getdir /bin/ls ls.prof (Per-line) annotated source listing for getdir() -pprof --disasm=getdir /bin/ls ls.prof +jeprof --disasm=getdir /bin/ls ls.prof (Per-PC) annotated disassembly for getdir() -pprof http://localhost:1234/ +jeprof http://localhost:1234/ Enters "interactive" mode -pprof --text localhost:1234 +jeprof --text localhost:1234 Outputs one line per procedure for localhost:1234 -pprof --raw localhost:1234 > ./local.raw -pprof --text ./local.raw +jeprof --raw localhost:1234 > ./local.raw +jeprof --text ./local.raw Fetches a remote profile for later analysis and then analyzes it in text mode. EOF @@ -270,7 +272,8 @@ EOF sub version_string { return < \$main::opt_edgefraction, "maxdegree=i" => \$main::opt_maxdegree, "focus=s" => \$main::opt_focus, + "thread=s" => \$main::opt_thread, "ignore=s" => \$main::opt_ignore, "scale=i" => \$main::opt_scale, "heapcheck" => \$main::opt_heapcheck, @@ -562,6 +567,86 @@ sub Init() { } } +sub FilterAndPrint { + my ($profile, $symbols, $libs, $thread) = @_; + + # Get total data in profile + my $total = TotalProfile($profile); + + # Remove uniniteresting stack items + $profile = RemoveUninterestingFrames($symbols, $profile); + + # Focus? + if ($main::opt_focus ne '') { + $profile = FocusProfile($symbols, $profile, $main::opt_focus); + } + + # Ignore? + if ($main::opt_ignore ne '') { + $profile = IgnoreProfile($symbols, $profile, $main::opt_ignore); + } + + my $calls = ExtractCalls($symbols, $profile); + + # Reduce profiles to required output granularity, and also clean + # each stack trace so a given entry exists at most once. + my $reduced = ReduceProfile($symbols, $profile); + + # Get derived profiles + my $flat = FlatProfile($reduced); + my $cumulative = CumulativeProfile($reduced); + + # Print + if (!$main::opt_interactive) { + if ($main::opt_disasm) { + PrintDisassembly($libs, $flat, $cumulative, $main::opt_disasm); + } elsif ($main::opt_list) { + PrintListing($total, $libs, $flat, $cumulative, $main::opt_list, 0); + } elsif ($main::opt_text) { + # Make sure the output is empty when have nothing to report + # (only matters when --heapcheck is given but we must be + # compatible with old branches that did not pass --heapcheck always): + if ($total != 0) { + printf("Total%s: %s %s\n", + (defined($thread) ? " (t$thread)" : ""), + Unparse($total), Units()); + } + PrintText($symbols, $flat, $cumulative, -1); + } elsif ($main::opt_raw) { + PrintSymbolizedProfile($symbols, $profile, $main::prog); + } elsif ($main::opt_callgrind) { + PrintCallgrind($calls); + } else { + if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) { + if ($main::opt_gv) { + RunGV(TempName($main::next_tmpfile, "ps"), ""); + } elsif ($main::opt_evince) { + RunEvince(TempName($main::next_tmpfile, "pdf"), ""); + } elsif ($main::opt_web) { + my $tmp = TempName($main::next_tmpfile, "svg"); + RunWeb($tmp); + # The command we run might hand the file name off + # to an already running browser instance and then exit. + # Normally, we'd remove $tmp on exit (right now), + # but fork a child to remove $tmp a little later, so that the + # browser has time to load it first. + delete $main::tempnames{$tmp}; + if (fork() == 0) { + sleep 5; + unlink($tmp); + exit(0); + } + } + } else { + cleanup(); + exit(1); + } + } + } else { + InteractiveMode($profile, $symbols, $libs, $total); + } +} + sub Main() { Init(); $main::collected_profile = undef; @@ -605,9 +690,6 @@ sub Main() { $symbol_map = MergeSymbols($symbol_map, $base->{symbols}); } - # Get total data in profile - my $total = TotalProfile($profile); - # Collect symbols my $symbols; if ($main::use_symbolized_profile) { @@ -622,75 +704,17 @@ sub Main() { $symbols = ExtractSymbols($libs, $pcs); } - # Remove uniniteresting stack items - $profile = RemoveUninterestingFrames($symbols, $profile); - - # Focus? - if ($main::opt_focus ne '') { - $profile = FocusProfile($symbols, $profile, $main::opt_focus); + if (!defined($main::opt_thread)) { + FilterAndPrint($profile, $symbols, $libs); } - - # Ignore? - if ($main::opt_ignore ne '') { - $profile = IgnoreProfile($symbols, $profile, $main::opt_ignore); - } - - my $calls = ExtractCalls($symbols, $profile); - - # Reduce profiles to required output granularity, and also clean - # each stack trace so a given entry exists at most once. - my $reduced = ReduceProfile($symbols, $profile); - - # Get derived profiles - my $flat = FlatProfile($reduced); - my $cumulative = CumulativeProfile($reduced); - - # Print - if (!$main::opt_interactive) { - if ($main::opt_disasm) { - PrintDisassembly($libs, $flat, $cumulative, $main::opt_disasm); - } elsif ($main::opt_list) { - PrintListing($total, $libs, $flat, $cumulative, $main::opt_list, 0); - } elsif ($main::opt_text) { - # Make sure the output is empty when have nothing to report - # (only matters when --heapcheck is given but we must be - # compatible with old branches that did not pass --heapcheck always): - if ($total != 0) { - printf("Total: %s %s\n", Unparse($total), Units()); - } - PrintText($symbols, $flat, $cumulative, -1); - } elsif ($main::opt_raw) { - PrintSymbolizedProfile($symbols, $profile, $main::prog); - } elsif ($main::opt_callgrind) { - PrintCallgrind($calls); - } else { - if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) { - if ($main::opt_gv) { - RunGV(TempName($main::next_tmpfile, "ps"), ""); - } elsif ($main::opt_evince) { - RunEvince(TempName($main::next_tmpfile, "pdf"), ""); - } elsif ($main::opt_web) { - my $tmp = TempName($main::next_tmpfile, "svg"); - RunWeb($tmp); - # The command we run might hand the file name off - # to an already running browser instance and then exit. - # Normally, we'd remove $tmp on exit (right now), - # but fork a child to remove $tmp a little later, so that the - # browser has time to load it first. - delete $main::tempnames{$tmp}; - if (fork() == 0) { - sleep 5; - unlink($tmp); - exit(0); - } - } - } else { - cleanup(); - exit(1); + if (defined($data->{threads})) { + foreach my $thread (sort { $a <=> $b } keys(%{$data->{threads}})) { + if (defined($main::opt_thread) && + ($main::opt_thread eq '*' || $main::opt_thread == $thread)) { + my $thread_profile = $data->{threads}{$thread}; + FilterAndPrint($thread_profile, $symbols, $libs, $thread); } } - } else { - InteractiveMode($profile, $symbols, $libs, $total); } cleanup(); @@ -780,14 +804,14 @@ sub InteractiveMode { $| = 1; # Make output unbuffered for interactive mode my ($orig_profile, $symbols, $libs, $total) = @_; - print STDERR "Welcome to pprof! For help, type 'help'.\n"; + print STDERR "Welcome to jeprof! For help, type 'help'.\n"; # Use ReadLine if it's installed and input comes from a console. if ( -t STDIN && !ReadlineMightFail() && defined(eval {require Term::ReadLine}) ) { - my $term = new Term::ReadLine 'pprof'; - while ( defined ($_ = $term->readline('(pprof) '))) { + my $term = new Term::ReadLine 'jeprof'; + while ( defined ($_ = $term->readline('(jeprof) '))) { $term->addhistory($_) if /\S/; if (!InteractiveCommand($orig_profile, $symbols, $libs, $total, $_)) { last; # exit when we get an interactive command to quit @@ -795,7 +819,7 @@ sub InteractiveMode { } } else { # don't have readline while (1) { - print STDERR "(pprof) "; + print STDERR "(jeprof) "; $_ = ; last if ! defined $_ ; s/\r//g; # turn windows-looking lines into unix-looking lines @@ -988,7 +1012,7 @@ sub ProcessProfile { sub InteractiveHelpMessage { print STDERR <