Jemalloc updated to 3.0.0.

Full changelog here: http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git;a=blob_plain;f=ChangeLog;hb=master Notable improvements from the point of view of Redis: 1) Bugfixing. 2) Support for Valgrind. 3) Support for OSX Lion, FreeBSD.
2012-05-15 15:27:12 +02:00 · 2012-05-15 15:27:12 +02:00 · 39f8289c41
commit 39f8289c41
parent 99733fec8b
157 changed files with 42924 additions and 9103 deletions
--- a/deps/jemalloc.orig/.gitignore
+++ b/deps/jemalloc.orig/.gitignore
@ -0,0 +1,23 @@
+/autom4te.cache/
+/config.stamp
+/config.log
+/config.status
+/configure
+/doc/html.xsl
+/doc/manpages.xsl
+/doc/jemalloc.xml
+/doc/jemalloc.html
+/doc/jemalloc.3
+/lib/
+/Makefile
+/include/jemalloc/internal/jemalloc_internal\.h
+/include/jemalloc/jemalloc\.h
+/include/jemalloc/jemalloc_defs\.h
+/test/jemalloc_test\.h
+/src/*.[od]
+/test/*.[od]
+/test/*.out
+/test/[a-z]*
+!test/*.c
+!test/*.exp
+/VERSION
--- a/deps/jemalloc.orig/COPYING
+++ b/deps/jemalloc.orig/COPYING
@ -0,0 +1,51 @@
+Unless otherwise specified, files in the jemalloc source distribution are
+subject to the following licenses:
+--------------------------------------------------------------------------------
+Copyright (C) 2002-2010 Jason Evans <jasone@canonware.com>.
+All rights reserved.
+Copyright (C) 2007-2010 Mozilla Foundation.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice(s),
+   this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice(s),
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS
+OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
+EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+--------------------------------------------------------------------------------
+Copyright (C) 2009-2010 Facebook, Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above copyright notice, this
+  list of conditions and the following disclaimer in the documentation and/or
+  other materials provided with the distribution.
+* Neither the name of Facebook, Inc. nor the names of its contributors may be
+  used to endorse or promote products derived from this software without
+  specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+--------------------------------------------------------------------------------
--- a/deps/jemalloc.orig/ChangeLog
+++ b/deps/jemalloc.orig/ChangeLog
@ -0,0 +1,250 @@
+Following are change highlights associated with official releases.  Important
+bug fixes are all mentioned, but internal enhancements are omitted here for
+brevity (even though they are more fun to write about).  Much more detail can be
+found in the git revision history:
+
+    http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git
+    git://canonware.com/jemalloc.git
+
+* 2.2.5 (November 14, 2011)
+
+  Bug fixes:
+  - Fix huge_ralloc() race when using mremap(2).  This is a serious bug that
+    could cause memory corruption and/or crashes.
+  - Fix huge_ralloc() to maintain chunk statistics.
+  - Fix malloc_stats_print(..., "a") output.
+
+* 2.2.4 (November 5, 2011)
+
+  Bug fixes:
+  - Initialize arenas_tsd before using it.  This bug existed for 2.2.[0-3], as
+    well as for --disable-tls builds in earlier releases.
+  - Do not assume a 4 KiB page size in test/rallocm.c.
+
+* 2.2.3 (August 31, 2011)
+
+  This version fixes numerous bugs related to heap profiling.
+
+  Bug fixes:
+  - Fix a prof-related race condition.  This bug could cause memory corruption,
+    but only occurred in non-default configurations (prof_accum:false).
+  - Fix off-by-one backtracing issues (make sure that prof_alloc_prep() is
+    excluded from backtraces).
+  - Fix a prof-related bug in realloc() (only triggered by OOM errors).
+  - Fix prof-related bugs in allocm() and rallocm().
+  - Fix prof_tdata_cleanup() for --disable-tls builds.
+  - Fix a relative include path, to fix objdir builds.
+
+* 2.2.2 (July 30, 2011)
+
+  Bug fixes:
+  - Fix a build error for --disable-tcache.
+  - Fix assertions in arena_purge() (for real this time).
+  - Add the --with-private-namespace option.  This is a workaround for symbol
+    conflicts that can inadvertently arise when using static libraries.
+
+* 2.2.1 (March 30, 2011)
+
+  Bug fixes:
+  - Implement atomic operations for x86/x64.  This fixes compilation failures
+    for versions of gcc that are still in wide use.
+  - Fix an assertion in arena_purge().
+
+* 2.2.0 (March 22, 2011)
+
+  This version incorporates several improvements to algorithms and data
+  structures that tend to reduce fragmentation and increase speed.
+
+  New features:
+  - Add the "stats.cactive" mallctl.
+  - Update pprof (from google-perftools 1.7).
+  - Improve backtracing-related configuration logic, and add the
+    --disable-prof-libgcc option.
+
+  Bug fixes:
+  - Change default symbol visibility from "internal", to "hidden", which
+    decreases the overhead of library-internal function calls.
+  - Fix symbol visibility so that it is also set on OS X.
+  - Fix a build dependency regression caused by the introduction of the .pic.o
+    suffix for PIC object files.
+  - Add missing checks for mutex initialization failures.
+  - Don't use libgcc-based backtracing except on x64, where it is known to work.
+  - Fix deadlocks on OS X that were due to memory allocation in
+    pthread_mutex_lock().
+  - Heap profiling-specific fixes:
+    + Fix memory corruption due to integer overflow in small region index
+      computation, when using a small enough sample interval that profiling
+      context pointers are stored in small run headers.
+    + Fix a bootstrap ordering bug that only occurred with TLS disabled.
+    + Fix a rallocm() rsize bug.
+    + Fix error detection bugs for aligned memory allocation.
+
+* 2.1.3 (March 14, 2011)
+
+  Bug fixes:
+  - Fix a cpp logic regression (due to the "thread.{de,}allocatedp" mallctl fix
+    for OS X in 2.1.2).
+  - Fix a "thread.arena" mallctl bug.
+  - Fix a thread cache stats merging bug.
+
+* 2.1.2 (March 2, 2011)
+
+  Bug fixes:
+  - Fix "thread.{de,}allocatedp" mallctl for OS X.
+  - Add missing jemalloc.a to build system.
+
+* 2.1.1 (January 31, 2011)
+
+  Bug fixes:
+  - Fix aligned huge reallocation (affected allocm()).
+  - Fix the ALLOCM_LG_ALIGN macro definition.
+  - Fix a heap dumping deadlock.
+  - Fix a "thread.arena" mallctl bug.
+
+* 2.1.0 (December 3, 2010)
+
+  This version incorporates some optimizations that can't quite be considered
+  bug fixes.
+
+  New features:
+  - Use Linux's mremap(2) for huge object reallocation when possible.
+  - Avoid locking in mallctl*() when possible.
+  - Add the "thread.[de]allocatedp" mallctl's.
+  - Convert the manual page source from roff to DocBook, and generate both roff
+    and HTML manuals.
+
+  Bug fixes:
+  - Fix a crash due to incorrect bootstrap ordering.  This only impacted
+    --enable-debug --enable-dss configurations.
+  - Fix a minor statistics bug for mallctl("swap.avail", ...).
+
+* 2.0.1 (October 29, 2010)
+
+  Bug fixes:
+  - Fix a race condition in heap profiling that could cause undefined behavior
+    if "opt.prof_accum" were disabled.
+  - Add missing mutex unlocks for some OOM error paths in the heap profiling
+    code.
+  - Fix a compilation error for non-C99 builds.
+
+* 2.0.0 (October 24, 2010)
+
+  This version focuses on the experimental *allocm() API, and on improved
+  run-time configuration/introspection.  Nonetheless, numerous performance
+  improvements are also included.
+
+  New features:
+  - Implement the experimental {,r,s,d}allocm() API, which provides a superset
+    of the functionality available via malloc(), calloc(), posix_memalign(),
+    realloc(), malloc_usable_size(), and free().  These functions can be used to
+    allocate/reallocate aligned zeroed memory, ask for optional extra memory
+    during reallocation, prevent object movement during reallocation, etc.
+  - Replace JEMALLOC_OPTIONS/JEMALLOC_PROF_PREFIX with MALLOC_CONF, which is
+    more human-readable, and more flexible.  For example:
+      JEMALLOC_OPTIONS=AJP
+    is now:
+      MALLOC_CONF=abort:true,fill:true,stats_print:true
+  - Port to Apple OS X.  Sponsored by Mozilla.
+  - Make it possible for the application to control thread-->arena mappings via
+    the "thread.arena" mallctl.
+  - Add compile-time support for all TLS-related functionality via pthreads TSD.
+    This is mainly of interest for OS X, which does not support TLS, but has a
+    TSD implementation with similar performance.
+  - Override memalign() and valloc() if they are provided by the system.
+  - Add the "arenas.purge" mallctl, which can be used to synchronously purge all
+    dirty unused pages.
+  - Make cumulative heap profiling data optional, so that it is possible to
+    limit the amount of memory consumed by heap profiling data structures.
+  - Add per thread allocation counters that can be accessed via the
+    "thread.allocated" and "thread.deallocated" mallctls.
+
+  Incompatible changes:
+  - Remove JEMALLOC_OPTIONS and malloc_options (see MALLOC_CONF above).
+  - Increase default backtrace depth from 4 to 128 for heap profiling.
+  - Disable interval-based profile dumps by default.
+
+  Bug fixes:
+  - Remove bad assertions in fork handler functions.  These assertions could
+    cause aborts for some combinations of configure settings.
+  - Fix strerror_r() usage to deal with non-standard semantics in GNU libc.
+  - Fix leak context reporting.  This bug tended to cause the number of contexts
+    to be underreported (though the reported number of objects and bytes were
+    correct).
+  - Fix a realloc() bug for large in-place growing reallocation.  This bug could
+    cause memory corruption, but it was hard to trigger.
+  - Fix an allocation bug for small allocations that could be triggered if
+    multiple threads raced to create a new run of backing pages.
+  - Enhance the heap profiler to trigger samples based on usable size, rather
+    than request size.
+  - Fix a heap profiling bug due to sometimes losing track of requested object
+    size for sampled objects.
+
+* 1.0.3 (August 12, 2010)
+
+  Bug fixes:
+  - Fix the libunwind-based implementation of stack backtracing (used for heap
+    profiling).  This bug could cause zero-length backtraces to be reported.
+  - Add a missing mutex unlock in library initialization code.  If multiple
+    threads raced to initialize malloc, some of them could end up permanently
+    blocked.
+
+* 1.0.2 (May 11, 2010)
+
+  Bug fixes:
+  - Fix junk filling of large objects, which could cause memory corruption.
+  - Add MAP_NORESERVE support for chunk mapping, because otherwise virtual
+    memory limits could cause swap file configuration to fail.  Contributed by
+    Jordan DeLong.
+
+* 1.0.1 (April 14, 2010)
+
+  Bug fixes:
+  - Fix compilation when --enable-fill is specified.
+  - Fix threads-related profiling bugs that affected accuracy and caused memory
+    to be leaked during thread exit.
+  - Fix dirty page purging race conditions that could cause crashes.
+  - Fix crash in tcache flushing code during thread destruction.
+
+* 1.0.0 (April 11, 2010)
+
+  This release focuses on speed and run-time introspection.  Numerous
+  algorithmic improvements make this release substantially faster than its
+  predecessors.
+
+  New features:
+  - Implement autoconf-based configuration system.
+  - Add mallctl*(), for the purposes of introspection and run-time
+    configuration.
+  - Make it possible for the application to manually flush a thread's cache, via
+    the "tcache.flush" mallctl.
+  - Base maximum dirty page count on proportion of active memory.
+  - Compute various addtional run-time statistics, including per size class
+    statistics for large objects.
+  - Expose malloc_stats_print(), which can be called repeatedly by the
+    application.
+  - Simplify the malloc_message() signature to only take one string argument,
+    and incorporate an opaque data pointer argument for use by the application
+    in combination with malloc_stats_print().
+  - Add support for allocation backed by one or more swap files, and allow the
+    application to disable over-commit if swap files are in use.
+  - Implement allocation profiling and leak checking.
+
+  Removed features:
+  - Remove the dynamic arena rebalancing code, since thread-specific caching
+    reduces its utility.
+
+  Bug fixes:
+  - Modify chunk allocation to work when address space layout randomization
+    (ASLR) is in use.
+  - Fix thread cleanup bugs related to TLS destruction.
+  - Handle 0-size allocation requests in posix_memalign().
+  - Fix a chunk leak.  The leaked chunks were never touched, so this impacted
+    virtual memory usage, but not physical memory usage.
+
+* linux_2008082[78]a (August 27/28, 2008)
+
+  These snapshot releases are the simple result of incorporating Linux-specific
+  support into the FreeBSD malloc sources.
+
+--------------------------------------------------------------------------------
+vim:filetype=text:textwidth=80
--- a/deps/jemalloc.orig/INSTALL
+++ b/deps/jemalloc.orig/INSTALL
@ -0,0 +1,257 @@
+Building and installing jemalloc can be as simple as typing the following while
+in the root directory of the source tree:
+
+    ./configure
+    make
+    make install
+
+=== Advanced configuration =====================================================
+
+The 'configure' script supports numerous options that allow control of which
+functionality is enabled, where jemalloc is installed, etc.  Optionally, pass
+any of the following arguments (not a definitive list) to 'configure':
+
+--help
+    Print a definitive list of options.
+
+--prefix=<install-root-dir>
+    Set the base directory in which to install.  For example:
+
+        ./configure --prefix=/usr/local
+
+    will cause files to be installed into /usr/local/include, /usr/local/lib,
+    and /usr/local/man.
+
+--with-rpath=<colon-separated-rpath>
+    Embed one or more library paths, so that libjemalloc can find the libraries
+    it is linked to.  This works only on ELF-based systems.
+
+--with-jemalloc-prefix=<prefix>
+    Prefix all public APIs with <prefix>.  For example, if <prefix> is
+    "prefix_", API changes like the following occur:
+
+      malloc()         --> prefix_malloc()
+      malloc_conf      --> prefix_malloc_conf
+      /etc/malloc.conf --> /etc/prefix_malloc.conf
+      MALLOC_CONF      --> PREFIX_MALLOC_CONF
+
+    This makes it possible to use jemalloc at the same time as the system
+    allocator, or even to use multiple copies of jemalloc simultaneously.
+
+    By default, the prefix is "", except on OS X, where it is "je_".  On OS X,
+    jemalloc overlays the default malloc zone, but makes no attempt to actually
+    replace the "malloc", "calloc", etc. symbols.
+
+--with-private-namespace=<prefix>
+    Prefix all library-private APIs with <prefix>.  For shared libraries,
+    symbol visibility mechanisms prevent these symbols from being exported, but
+    for static libraries, naming collisions are a real possibility.  By
+    default, the prefix is "" (empty string).
+
+--with-install-suffix=<suffix>
+    Append <suffix> to the base name of all installed files, such that multiple
+    versions of jemalloc can coexist in the same installation directory.  For
+    example, libjemalloc.so.0 becomes libjemalloc<suffix>.so.0.
+
+--enable-cc-silence
+    Enable code that silences non-useful compiler warnings.  This is helpful
+    when trying to tell serious warnings from those due to compiler
+    limitations, but it potentially incurs a performance penalty.
+
+--enable-debug
+    Enable assertions and validation code.  This incurs a substantial
+    performance hit, but is very useful during application development.
+
+--enable-stats
+    Enable statistics gathering functionality.  See the "opt.stats_print"
+    option documentation for usage details.
+
+--enable-prof
+    Enable heap profiling and leak detection functionality.  See the "opt.prof"
+    option documentation for usage details.  When enabled, there are several
+    approaches to backtracing, and the configure script chooses the first one
+    in the following list that appears to function correctly:
+
+    + libunwind      (requires --enable-prof-libunwind)
+    + libgcc         (unless --disable-prof-libgcc)
+    + gcc intrinsics (unless --disable-prof-gcc)
+
+--enable-prof-libunwind
+    Use the libunwind library (http://www.nongnu.org/libunwind/) for stack
+    backtracing.
+
+--disable-prof-libgcc
+    Disable the use of libgcc's backtracing functionality.
+
+--disable-prof-gcc
+    Disable the use of gcc intrinsics for backtracing.
+
+--with-static-libunwind=<libunwind.a>
+    Statically link against the specified libunwind.a rather than dynamically
+    linking with -lunwind.
+
+--disable-tiny
+    Disable tiny (sub-quantum-sized) object support.  Technically it is not
+    legal for a malloc implementation to allocate objects with less than
+    quantum alignment (8 or 16 bytes, depending on architecture), but in
+    practice it never causes any problems if, for example, 4-byte allocations
+    are 4-byte-aligned.
+
+--disable-tcache
+    Disable thread-specific caches for small objects.  Objects are cached and
+    released in bulk, thus reducing the total number of mutex operations.  See
+    the "opt.tcache" option for usage details.
+
+--enable-swap
+    Enable mmap()ed swap file support.  When this feature is built in, it is
+    possible to specify one or more files that act as backing store.  This
+    effectively allows for per application swap files.
+
+--enable-dss
+    Enable support for page allocation/deallocation via sbrk(2), in addition to
+    mmap(2).
+
+--enable-fill
+    Enable support for junk/zero filling of memory.  See the "opt.junk"/
+    "opt.zero" option documentation for usage details.
+
+--enable-xmalloc
+    Enable support for optional immediate termination due to out-of-memory
+    errors, as is commonly implemented by "xmalloc" wrapper function for malloc.
+    See the "opt.xmalloc" option documentation for usage details.
+
+--enable-sysv
+    Enable support for System V semantics, wherein malloc(0) returns NULL
+    rather than a minimal allocation.  See the "opt.sysv" option documentation
+    for usage details.
+
+--enable-dynamic-page-shift
+    Under most conditions, the system page size never changes (usually 4KiB or
+    8KiB, depending on architecture and configuration), and unless this option
+    is enabled, jemalloc assumes that page size can safely be determined during
+    configuration and hard-coded.  Enabling dynamic page size determination has
+    a measurable impact on performance, since the compiler is forced to load
+    the page size from memory rather than embedding immediate values.
+
+--disable-lazy-lock
+    Disable code that wraps pthread_create() to detect when an application
+    switches from single-threaded to multi-threaded mode, so that it can avoid
+    mutex locking/unlocking operations while in single-threaded mode.  In
+    practice, this feature usually has little impact on performance unless
+    thread-specific caching is disabled.
+
+--disable-tls
+    Disable thread-local storage (TLS), which allows for fast access to
+    thread-local variables via the __thread keyword.  If TLS is available,
+    jemalloc uses it for several purposes.
+
+--with-xslroot=<path>
+    Specify where to find DocBook XSL stylesheets when building the
+    documentation.
+
+The following environment variables (not a definitive list) impact configure's
+behavior:
+
+CFLAGS="?"
+    Pass these flags to the compiler.  You probably shouldn't define this unless
+    you know what you are doing.  (Use EXTRA_CFLAGS instead.)
+
+EXTRA_CFLAGS="?"
+    Append these flags to CFLAGS.  This makes it possible to add flags such as
+    -Werror, while allowing the configure script to determine what other flags
+    are appropriate for the specified configuration.
+
+    The configure script specifically checks whether an optimization flag (-O*)
+    is specified in EXTRA_CFLAGS, and refrains from specifying an optimization
+    level if it finds that one has already been specified.
+
+CPPFLAGS="?"
+    Pass these flags to the C preprocessor.  Note that CFLAGS is not passed to
+    'cpp' when 'configure' is looking for include files, so you must use
+    CPPFLAGS instead if you need to help 'configure' find header files.
+
+LD_LIBRARY_PATH="?"
+    'ld' uses this colon-separated list to find libraries.
+
+LDFLAGS="?"
+    Pass these flags when linking.
+
+PATH="?"
+    'configure' uses this to find programs.
+
+=== Advanced compilation =======================================================
+
+To install only parts of jemalloc, use the following targets:
+
+    install_bin
+    install_include
+    install_lib
+    install_doc
+
+To clean up build results to varying degrees, use the following make targets:
+
+    clean
+    distclean
+    relclean
+
+=== Advanced installation ======================================================
+
+Optionally, define make variables when invoking make, including (not
+exclusively):
+
+INCLUDEDIR="?"
+    Use this as the installation prefix for header files.
+
+LIBDIR="?"
+    Use this as the installation prefix for libraries.
+
+MANDIR="?"
+    Use this as the installation prefix for man pages.
+
+DESTDIR="?"
+    Prepend DESTDIR to INCLUDEDIR, LIBDIR, DATADIR, and MANDIR.  This is useful
+    when installing to a different path than was specified via --prefix.
+
+CC="?"
+    Use this to invoke the C compiler.
+
+CFLAGS="?"
+    Pass these flags to the compiler.
+
+CPPFLAGS="?"
+    Pass these flags to the C preprocessor.
+
+LDFLAGS="?"
+    Pass these flags when linking.
+
+PATH="?"
+    Use this to search for programs used during configuration and building.
+
+=== Development ================================================================
+
+If you intend to make non-trivial changes to jemalloc, use the 'autogen.sh'
+script rather than 'configure'.  This re-generates 'configure', enables
+configuration dependency rules, and enables re-generation of automatically
+generated source files.
+
+The build system supports using an object directory separate from the source
+tree.  For example, you can create an 'obj' directory, and from within that
+directory, issue configuration and build commands:
+
+    autoconf
+    mkdir obj
+    cd obj
+    ../configure --enable-autogen
+    make
+
+=== Documentation ==============================================================
+
+The manual page is generated in both html and roff formats.  Any web browser
+can be used to view the html manual.  The roff manual page can be formatted
+prior to installation via any of the following commands:
+
+    nroff -man -t doc/jemalloc.3
+
+    groff -man -t -Tps doc/jemalloc.3 | ps2pdf - doc/jemalloc.3.pdf
+
+    (cd doc; groff -man -man-ext -t -Thtml jemalloc.3 > jemalloc.3.html)
--- a/deps/jemalloc.orig/Makefile.in
+++ b/deps/jemalloc.orig/Makefile.in
@ -0,0 +1,259 @@
+# Clear out all vpaths, then set just one (default vpath) for the main build
+# directory.
+vpath
+vpath % .
+
+# Clear the default suffixes, so that built-in rules are not used.
+.SUFFIXES :
+
+SHELL := /bin/sh
+
+CC := @CC@
+
+# Configuration parameters.
+DESTDIR =
+BINDIR := $(DESTDIR)@BINDIR@
+INCLUDEDIR := $(DESTDIR)@INCLUDEDIR@
+LIBDIR := $(DESTDIR)@LIBDIR@
+DATADIR := $(DESTDIR)@DATADIR@
+MANDIR := $(DESTDIR)@MANDIR@
+
+# Build parameters.
+CPPFLAGS := @CPPFLAGS@ -I@srcroot@include -I@objroot@include
+CFLAGS := @CFLAGS@
+ifeq (macho, @abi@)
+CFLAGS += -dynamic
+endif
+LDFLAGS := @LDFLAGS@
+LIBS := @LIBS@
+RPATH_EXTRA := @RPATH_EXTRA@
+ifeq (macho, @abi@)
+SO := dylib
+WL_SONAME := dylib_install_name
+else
+SO := so
+WL_SONAME := soname
+endif
+REV := 1
+ifeq (macho, @abi@)
+TEST_LIBRARY_PATH := DYLD_FALLBACK_LIBRARY_PATH=@objroot@lib
+else
+TEST_LIBRARY_PATH :=
+endif
+
+# Lists of files.
+BINS := @srcroot@bin/pprof
+CHDRS := @objroot@include/jemalloc/jemalloc@install_suffix@.h \
+	@objroot@include/jemalloc/jemalloc_defs@install_suffix@.h
+CSRCS := @srcroot@src/jemalloc.c @srcroot@src/arena.c @srcroot@src/atomic.c \
+	@srcroot@src/base.c @srcroot@src/bitmap.c @srcroot@src/chunk.c \
+	@srcroot@src/chunk_dss.c @srcroot@src/chunk_mmap.c \
+	@srcroot@src/chunk_swap.c @srcroot@src/ckh.c @srcroot@src/ctl.c \
+	@srcroot@src/extent.c @srcroot@src/hash.c @srcroot@src/huge.c \
+	@srcroot@src/mb.c @srcroot@src/mutex.c @srcroot@src/prof.c \
+	@srcroot@src/rtree.c @srcroot@src/stats.c @srcroot@src/tcache.c
+ifeq (macho, @abi@)
+CSRCS += @srcroot@src/zone.c
+endif
+STATIC_LIBS :=	@objroot@lib/libjemalloc@install_suffix@.a
+DSOS := @objroot@lib/libjemalloc@install_suffix@.$(SO).$(REV) \
+	@objroot@lib/libjemalloc@install_suffix@.$(SO) \
+	@objroot@lib/libjemalloc@install_suffix@_pic.a
+MAN3 := @objroot@doc/jemalloc@install_suffix@.3
+DOCS_XML := @objroot@doc/jemalloc@install_suffix@.xml
+DOCS_HTML := $(DOCS_XML:@objroot@%.xml=@srcroot@%.html)
+DOCS_MAN3 := $(DOCS_XML:@objroot@%.xml=@srcroot@%.3)
+DOCS := $(DOCS_HTML) $(DOCS_MAN3)
+CTESTS := @srcroot@test/allocated.c @srcroot@test/allocm.c \
+	@srcroot@test/bitmap.c @srcroot@test/mremap.c \
+	@srcroot@test/posix_memalign.c @srcroot@test/rallocm.c \
+	@srcroot@test/thread_arena.c
+
+.PHONY: all dist doc_html doc_man doc
+.PHONY: install_bin install_include install_lib
+.PHONY: install_html install_man install_doc install
+.PHONY: tests check clean distclean relclean
+
+.SECONDARY : $(CTESTS:@srcroot@%.c=@objroot@%.o)
+
+# Default target.
+all: $(DSOS) $(STATIC_LIBS)
+
+dist: doc
+
+@srcroot@doc/%.html : @objroot@doc/%.xml @srcroot@doc/stylesheet.xsl @objroot@doc/html.xsl
+	@XSLTPROC@ -o $@ @objroot@doc/html.xsl $<
+
+@srcroot@doc/%.3 : @objroot@doc/%.xml @srcroot@doc/stylesheet.xsl @objroot@doc/manpages.xsl
+	@XSLTPROC@ -o $@ @objroot@doc/manpages.xsl $<
+
+doc_html: $(DOCS_HTML)
+doc_man: $(DOCS_MAN3)
+doc: $(DOCS)
+
+#
+# Include generated dependency files.
+#
+-include $(CSRCS:@srcroot@%.c=@objroot@%.d)
+-include $(CSRCS:@srcroot@%.c=@objroot@%.pic.d)
+-include $(CTESTS:@srcroot@%.c=@objroot@%.d)
+
+@objroot@src/%.o: @srcroot@src/%.c
+	@mkdir -p $(@D)
+	$(CC) $(CFLAGS) -c $(CPPFLAGS) -o $@ $<
+	@$(SHELL) -ec "$(CC) -MM $(CPPFLAGS) $< | sed \"s/\($(subst /,\/,$(notdir $(basename $@)))\)\.o\([ :]*\)/$(subst /,\/,$(strip $(dir $@)))\1.o \2/g\" > $(@:%.o=%.d)"
+
+@objroot@src/%.pic.o: @srcroot@src/%.c
+	@mkdir -p $(@D)
+	$(CC) $(CFLAGS) -fPIC -DPIC -c $(CPPFLAGS) -o $@ $<
+	@$(SHELL) -ec "$(CC) -MM $(CPPFLAGS) $< | sed \"s/\($(subst /,\/,$(notdir $(basename $(basename $@))))\)\.o\([ :]*\)/$(subst /,\/,$(strip $(dir $@)))\1.pic.o \2/g\" > $(@:%.o=%.d)"
+
+%.$(SO) : %.$(SO).$(REV)
+	@mkdir -p $(@D)
+	ln -sf $(<F) $@
+
+@objroot@lib/libjemalloc@install_suffix@.$(SO).$(REV) : $(CSRCS:@srcroot@%.c=@objroot@%.pic.o)
+	@mkdir -p $(@D)
+	$(CC) -shared -Wl,-$(WL_SONAME),$(@F) $(RPATH_EXTRA:%=@RPATH@%) -o $@ $+ $(LDFLAGS) $(LIBS)
+
+@objroot@lib/libjemalloc@install_suffix@_pic.a : $(CSRCS:@srcroot@%.c=@objroot@%.pic.o)
+	@mkdir -p $(@D)
+	ar crus $@ $+
+
+@objroot@lib/libjemalloc@install_suffix@.a : $(CSRCS:@srcroot@%.c=@objroot@%.o)
+	@mkdir -p $(@D)
+	ar crus $@ $+
+
+@objroot@test/%.o: @srcroot@test/%.c
+	@mkdir -p $(@D)
+	$(CC) $(CFLAGS) -c $(CPPFLAGS) -I@objroot@test -o $@ $<
+	@$(SHELL) -ec "$(CC) -MM $(CPPFLAGS) -I@objroot@test $< | sed \"s/\($(subst /,\/,$(notdir $(basename $@)))\)\.o\([ :]*\)/$(subst /,\/,$(strip $(dir $@)))\1.o \2/g\" > $(@:%.o=%.d)"
+
+# Automatic dependency generation misses #include "*.c".
+@objroot@test/bitmap.o : @objroot@src/bitmap.o
+
+@objroot@test/%: @objroot@test/%.o \
+		 @objroot@lib/libjemalloc@install_suffix@.$(SO)
+	@mkdir -p $(@D)
+ifneq (@RPATH@, )
+	$(CC) -o $@ $< @RPATH@@objroot@lib -L@objroot@lib -ljemalloc@install_suffix@ -lpthread
+else
+	$(CC) -o $@ $< -L@objroot@lib -ljemalloc@install_suffix@ -lpthread
+endif
+
+install_bin:
+	install -d $(BINDIR)
+	@for b in $(BINS); do \
+	echo "install -m 755 $$b $(BINDIR)"; \
+	install -m 755 $$b $(BINDIR); \
+done
+
+install_include:
+	install -d $(INCLUDEDIR)/jemalloc
+	@for h in $(CHDRS); do \
+	echo "install -m 644 $$h $(INCLUDEDIR)/jemalloc"; \
+	install -m 644 $$h $(INCLUDEDIR)/jemalloc; \
+done
+
+install_lib: $(DSOS) $(STATIC_LIBS)
+	install -d $(LIBDIR)
+	install -m 755 @objroot@lib/libjemalloc@install_suffix@.$(SO).$(REV) $(LIBDIR)
+	ln -sf libjemalloc@install_suffix@.$(SO).$(REV) $(LIBDIR)/libjemalloc@install_suffix@.$(SO)
+	install -m 755 @objroot@lib/libjemalloc@install_suffix@_pic.a $(LIBDIR)
+	install -m 755 @objroot@lib/libjemalloc@install_suffix@.a $(LIBDIR)
+
+install_html:
+	install -d $(DATADIR)/doc/jemalloc@install_suffix@
+	@for d in $(DOCS_HTML); do \
+	echo "install -m 644 $$d $(DATADIR)/doc/jemalloc@install_suffix@"; \
+	install -m 644 $$d $(DATADIR)/doc/jemalloc@install_suffix@; \
+done
+
+install_man:
+	install -d $(MANDIR)/man3
+	@for d in $(DOCS_MAN3); do \
+	echo "install -m 644 $$d $(MANDIR)/man3"; \
+	install -m 644 $$d $(MANDIR)/man3; \
+done
+
+install_doc: install_html install_man
+
+install: install_bin install_include install_lib install_doc
+
+tests: $(CTESTS:@srcroot@%.c=@objroot@%)
+
+check: tests
+	@mkdir -p @objroot@test
+	@$(SHELL) -c 'total=0; \
+		failures=0; \
+		echo "========================================="; \
+		for t in $(CTESTS:@srcroot@%.c=@objroot@%); do \
+			total=`expr $$total + 1`; \
+			/bin/echo -n "$${t} ... "; \
+			$(TEST_LIBRARY_PATH) $${t} @abs_srcroot@ @abs_objroot@ \
+			  > @objroot@$${t}.out 2>&1; \
+			if test -e "@srcroot@$${t}.exp"; then \
+				diff -u @srcroot@$${t}.exp \
+				  @objroot@$${t}.out >/dev/null 2>&1; \
+				fail=$$?; \
+				if test "$${fail}" -eq "1" ; then \
+					failures=`expr $${failures} + 1`; \
+					echo "*** FAIL ***"; \
+				else \
+					echo "pass"; \
+				fi; \
+			else \
+				echo "*** FAIL *** (.exp file is missing)"; \
+				failures=`expr $${failures} + 1`; \
+			fi; \
+		done; \
+		echo "========================================="; \
+		echo "Failures: $${failures}/$${total}"'
+
+clean:
+	rm -f $(CSRCS:@srcroot@%.c=@objroot@%.o)
+	rm -f $(CSRCS:@srcroot@%.c=@objroot@%.pic.o)
+	rm -f $(CSRCS:@srcroot@%.c=@objroot@%.d)
+	rm -f $(CSRCS:@srcroot@%.c=@objroot@%.pic.d)
+	rm -f $(CTESTS:@srcroot@%.c=@objroot@%)
+	rm -f $(CTESTS:@srcroot@%.c=@objroot@%.o)
+	rm -f $(CTESTS:@srcroot@%.c=@objroot@%.d)
+	rm -f $(CTESTS:@srcroot@%.c=@objroot@%.out)
+	rm -f $(DSOS) $(STATIC_LIBS)
+
+distclean: clean
+	rm -rf @objroot@autom4te.cache
+	rm -f @objroot@config.log
+	rm -f @objroot@config.status
+	rm -f @objroot@config.stamp
+	rm -f @cfghdrs_out@
+	rm -f @cfgoutputs_out@
+
+relclean: distclean
+	rm -f @objroot@configure
+	rm -f @srcroot@VERSION
+	rm -f $(DOCS_HTML)
+	rm -f $(DOCS_MAN3)
+
+#===============================================================================
+# Re-configuration rules.
+
+ifeq (@enable_autogen@, 1)
+@srcroot@configure : @srcroot@configure.ac
+	cd ./@srcroot@ && @AUTOCONF@
+
+@objroot@config.status : @srcroot@configure
+	./@objroot@config.status --recheck
+
+@srcroot@config.stamp.in : @srcroot@configure.ac
+	echo stamp > @srcroot@config.stamp.in
+
+@objroot@config.stamp : @cfgoutputs_in@ @cfghdrs_in@ @srcroot@configure
+	./@objroot@config.status
+	@touch $@
+
+# There must be some action in order for make to re-read Makefile when it is
+# out of date.
+@cfgoutputs_out@ @cfghdrs_out@ : @objroot@config.stamp
+	@true
+endif
--- a/deps/jemalloc.orig/README
+++ b/deps/jemalloc.orig/README
@ -0,0 +1,16 @@
+jemalloc is a general-purpose scalable concurrent malloc(3) implementation.
+This distribution is a stand-alone "portable" implementation that currently
+targets Linux and Apple OS X.  jemalloc is included as the default allocator in
+the FreeBSD and NetBSD operating systems, and it is used by the Mozilla Firefox
+web browser on Microsoft Windows-related platforms.  Depending on your needs,
+one of the other divergent versions may suit your needs better than this
+distribution.
+
+The COPYING file contains copyright and licensing information.
+
+The INSTALL file contains information on how to configure, build, and install
+jemalloc.
+
+The ChangeLog file contains a brief summary of changes for each release.
+
+URL: http://www.canonware.com/jemalloc/
--- a/deps/jemalloc.orig/autogen.sh
+++ b/deps/jemalloc.orig/autogen.sh
@ -0,0 +1,17 @@
+#!/bin/sh
+
+for i in autoconf; do
+    echo "$i"
+    $i
+    if [ $? -ne 0 ]; then
+	echo "Error $? in $i"
+	exit 1
+    fi
+done
+
+echo "./configure --enable-autogen $@"
+./configure --enable-autogen $@
+if [ $? -ne 0 ]; then
+    echo "Error $? in ./configure"
+    exit 1
+fi
--- a/deps/jemalloc.orig/bin/pprof
+++ b/deps/jemalloc.orig/bin/pprof
--- a/deps/jemalloc.orig/config.guess
+++ b/deps/jemalloc.orig/config.guess
--- a/deps/jemalloc.orig/config.stamp.in
+++ b/deps/jemalloc.orig/config.stamp.in
--- a/deps/jemalloc.orig/config.sub
+++ b/deps/jemalloc.orig/config.sub
--- a/deps/jemalloc.orig/configure.ac
+++ b/deps/jemalloc.orig/configure.ac
@ -0,0 +1,938 @@
+dnl Process this file with autoconf to produce a configure script.
+AC_INIT([Makefile.in])
+
+dnl ============================================================================
+dnl Custom macro definitions.
+
+dnl JE_CFLAGS_APPEND(cflag)
+AC_DEFUN([JE_CFLAGS_APPEND],
+[
+AC_MSG_CHECKING([whether compiler supports $1])
+TCFLAGS="${CFLAGS}"
+if test "x${CFLAGS}" = "x" ; then
+  CFLAGS="$1"
+else
+  CFLAGS="${CFLAGS} $1"
+fi
+AC_RUN_IFELSE([AC_LANG_PROGRAM(
+[[
+]], [[
+    return 0;
+]])],
+              AC_MSG_RESULT([yes]),
+              AC_MSG_RESULT([no])
+              [CFLAGS="${TCFLAGS}"]
+)
+])
+
+dnl JE_COMPILABLE(label, hcode, mcode, rvar)
+AC_DEFUN([JE_COMPILABLE],
+[
+AC_MSG_CHECKING([whether $1 is compilable])
+AC_RUN_IFELSE([AC_LANG_PROGRAM(
+[$2], [$3])],
+              AC_MSG_RESULT([yes])
+              [$4="yes"],
+              AC_MSG_RESULT([no])
+              [$4="no"]
+)
+])
+
+dnl ============================================================================
+
+srcroot=$srcdir
+if test "x${srcroot}" = "x." ; then
+  srcroot=""
+else
+  srcroot="${srcroot}/"
+fi
+AC_SUBST([srcroot])
+abs_srcroot="`cd \"${srcdir}\"; pwd`/"
+AC_SUBST([abs_srcroot])
+
+objroot=""
+AC_SUBST([objroot])
+abs_objroot="`pwd`/"
+AC_SUBST([abs_objroot])
+
+dnl Munge install path variables.
+if test "x$prefix" = "xNONE" ; then
+  prefix="/usr/local"
+fi
+if test "x$exec_prefix" = "xNONE" ; then
+  exec_prefix=$prefix
+fi
+PREFIX=$prefix
+AC_SUBST([PREFIX])
+BINDIR=`eval echo $bindir`
+BINDIR=`eval echo $BINDIR`
+AC_SUBST([BINDIR])
+INCLUDEDIR=`eval echo $includedir`
+INCLUDEDIR=`eval echo $INCLUDEDIR`
+AC_SUBST([INCLUDEDIR])
+LIBDIR=`eval echo $libdir`
+LIBDIR=`eval echo $LIBDIR`
+AC_SUBST([LIBDIR])
+DATADIR=`eval echo $datadir`
+DATADIR=`eval echo $DATADIR`
+AC_SUBST([DATADIR])
+MANDIR=`eval echo $mandir`
+MANDIR=`eval echo $MANDIR`
+AC_SUBST([MANDIR])
+
+dnl Support for building documentation.
+AC_PATH_PROG([XSLTPROC], [xsltproc], , [$PATH])
+AC_ARG_WITH([xslroot],
+  [AS_HELP_STRING([--with-xslroot=<path>], [XSL stylesheet root path])],
+if test "x$with_xslroot" = "xno" ; then
+  XSLROOT="/usr/share/xml/docbook/stylesheet/docbook-xsl"
+else
+  XSLROOT="${with_xslroot}"
+fi,
+  XSLROOT="/usr/share/xml/docbook/stylesheet/docbook-xsl"
+)
+AC_SUBST([XSLROOT])
+
+dnl If CFLAGS isn't defined, set CFLAGS to something reasonable.  Otherwise,
+dnl just prevent autoconf from molesting CFLAGS.
+CFLAGS=$CFLAGS
+AC_PROG_CC
+if test "x$CFLAGS" = "x" ; then
+  no_CFLAGS="yes"
+  if test "x$GCC" = "xyes" ; then
+    JE_CFLAGS_APPEND([-std=gnu99])
+    JE_CFLAGS_APPEND([-Wall])
+    JE_CFLAGS_APPEND([-pipe])
+    JE_CFLAGS_APPEND([-g3])
+  fi
+fi
+dnl Append EXTRA_CFLAGS to CFLAGS, if defined.
+if test "x$EXTRA_CFLAGS" != "x" ; then
+  JE_CFLAGS_APPEND([$EXTRA_CFLAGS])
+fi
+AC_PROG_CPP
+
+AC_CHECK_SIZEOF([void *])
+if test "x${ac_cv_sizeof_void_p}" = "x8" ; then
+  LG_SIZEOF_PTR=3
+elif test "x${ac_cv_sizeof_void_p}" = "x4" ; then
+  LG_SIZEOF_PTR=2
+else
+  AC_MSG_ERROR([Unsupported pointer size: ${ac_cv_sizeof_void_p}])
+fi
+AC_DEFINE_UNQUOTED([LG_SIZEOF_PTR], [$LG_SIZEOF_PTR])
+
+AC_CHECK_SIZEOF([int])
+if test "x${ac_cv_sizeof_int}" = "x8" ; then
+  LG_SIZEOF_INT=3
+elif test "x${ac_cv_sizeof_int}" = "x4" ; then
+  LG_SIZEOF_INT=2
+else
+  AC_MSG_ERROR([Unsupported int size: ${ac_cv_sizeof_int}])
+fi
+AC_DEFINE_UNQUOTED([LG_SIZEOF_INT], [$LG_SIZEOF_INT])
+
+AC_CHECK_SIZEOF([long])
+if test "x${ac_cv_sizeof_long}" = "x8" ; then
+  LG_SIZEOF_LONG=3
+elif test "x${ac_cv_sizeof_long}" = "x4" ; then
+  LG_SIZEOF_LONG=2
+else
+  AC_MSG_ERROR([Unsupported long size: ${ac_cv_sizeof_long}])
+fi
+AC_DEFINE_UNQUOTED([LG_SIZEOF_LONG], [$LG_SIZEOF_LONG])
+
+AC_CANONICAL_HOST
+dnl CPU-specific settings.
+CPU_SPINWAIT=""
+case "${host_cpu}" in
+  i[[345]]86)
+	;;
+  i686)
+	JE_COMPILABLE([__asm__], [], [[__asm__ volatile("pause"); return 0;]],
+	              [asm])
+	if test "x${asm}" = "xyes" ; then
+	    CPU_SPINWAIT='__asm__ volatile("pause")'
+	fi
+	;;
+  x86_64)
+	JE_COMPILABLE([__asm__ syntax], [],
+	              [[__asm__ volatile("pause"); return 0;]], [asm])
+	if test "x${asm}" = "xyes" ; then
+	    CPU_SPINWAIT='__asm__ volatile("pause")'
+	fi
+	;;
+  *)
+	;;
+esac
+AC_DEFINE_UNQUOTED([CPU_SPINWAIT], [$CPU_SPINWAIT])
+
+dnl Platform-specific settings.  abi and RPATH can probably be determined
+dnl programmatically, but doing so is error-prone, which makes it generally
+dnl not worth the trouble.
+dnl 
+dnl Define cpp macros in CPPFLAGS, rather than doing AC_DEFINE(macro), since the
+dnl definitions need to be seen before any headers are included, which is a pain
+dnl to make happen otherwise.
+case "${host}" in
+  *-*-darwin*)
+	CFLAGS="$CFLAGS -fno-common -no-cpp-precomp"
+	abi="macho"
+	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE])
+	RPATH=""
+	;;
+  *-*-freebsd*)
+	CFLAGS="$CFLAGS"
+	abi="elf"
+	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE])
+	RPATH="-Wl,-rpath,"
+	;;
+  *-*-linux*)
+	CFLAGS="$CFLAGS"
+	CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE"
+	abi="elf"
+	AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED])
+	RPATH="-Wl,-rpath,"
+	;;
+  *-*-netbsd*)
+	AC_MSG_CHECKING([ABI])
+        AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
+[[#ifdef __ELF__
+/* ELF */
+#else
+#error aout
+#endif
+]])],
+                          [CFLAGS="$CFLAGS"; abi="elf"],
+                          [abi="aout"])
+	AC_MSG_RESULT([$abi])
+	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE])
+	RPATH="-Wl,-rpath,"
+	;;
+  *-*-solaris2*)
+	CFLAGS="$CFLAGS"
+	abi="elf"
+	RPATH="-Wl,-R,"
+	dnl Solaris needs this for sigwait().
+	CPPFLAGS="$CPPFLAGS -D_POSIX_PTHREAD_SEMANTICS"
+	LIBS="$LIBS -lposix4 -lsocket -lnsl"
+	;;
+  *)
+	AC_MSG_RESULT([Unsupported operating system: ${host}])
+	abi="elf"
+	RPATH="-Wl,-rpath,"
+	;;
+esac
+AC_SUBST([abi])
+AC_SUBST([RPATH])
+
+JE_COMPILABLE([__attribute__ syntax],
+              [static __attribute__((unused)) void foo(void){}],
+              [],
+              [attribute])
+if test "x${attribute}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_ATTR], [ ])
+  if test "x${GCC}" = "xyes" -a "x${abi}" = "xelf"; then
+    JE_CFLAGS_APPEND([-fvisibility=hidden])
+  fi
+fi
+
+JE_COMPILABLE([mremap(...MREMAP_FIXED...)], [
+#define _GNU_SOURCE
+#include <sys/mman.h>
+], [
+void *p = mremap((void *)0, 0, 0, MREMAP_MAYMOVE|MREMAP_FIXED, (void *)0);
+], [mremap_fixed])
+if test "x${mremap_fixed}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_MREMAP_FIXED])
+fi
+
+dnl Support optional additions to rpath.
+AC_ARG_WITH([rpath],
+  [AS_HELP_STRING([--with-rpath=<rpath>], [Colon-separated rpath (ELF systems only)])],
+if test "x$with_rpath" = "xno" ; then
+  RPATH_EXTRA=
+else
+  RPATH_EXTRA="`echo $with_rpath | tr \":\" \" \"`"
+fi,
+  RPATH_EXTRA=
+)
+AC_SUBST([RPATH_EXTRA])
+
+dnl Disable rules that do automatic regeneration of configure output by default.
+AC_ARG_ENABLE([autogen],
+  [AS_HELP_STRING([--enable-autogen], [Automatically regenerate configure output])],
+if test "x$enable_autogen" = "xno" ; then
+  enable_autogen="0"
+else
+  enable_autogen="1"
+fi
+,
+enable_autogen="0"
+)
+AC_SUBST([enable_autogen])
+
+AC_PROG_INSTALL
+AC_PROG_RANLIB
+AC_PATH_PROG([AR], [ar], , [$PATH])
+AC_PATH_PROG([LD], [ld], , [$PATH])
+AC_PATH_PROG([AUTOCONF], [autoconf], , [$PATH])
+
+dnl Do not prefix public APIs by default.
+AC_ARG_WITH([jemalloc_prefix],
+  [AS_HELP_STRING([--with-jemalloc-prefix=<prefix>], [Prefix to prepend to all public APIs])],
+  [JEMALLOC_PREFIX="$with_jemalloc_prefix"],
+  [if test "x$abi" != "xmacho" ; then
+  JEMALLOC_PREFIX=""
+else
+  JEMALLOC_PREFIX="je_"
+fi]
+)
+if test "x$JEMALLOC_PREFIX" != "x" ; then
+  JEMALLOC_CPREFIX=`echo ${JEMALLOC_PREFIX} | tr "a-z" "A-Z"`
+  AC_DEFINE_UNQUOTED([JEMALLOC_PREFIX], ["$JEMALLOC_PREFIX"])
+  AC_DEFINE_UNQUOTED([JEMALLOC_CPREFIX], ["$JEMALLOC_CPREFIX"])
+  AC_DEFINE_UNQUOTED([JEMALLOC_P(string_that_no_one_should_want_to_use_as_a_jemalloc_API_prefix)], [${JEMALLOC_PREFIX}##string_that_no_one_should_want_to_use_as_a_jemalloc_API_prefix])
+fi
+
+dnl Do not mangle library-private APIs by default.
+AC_ARG_WITH([private_namespace],
+  [AS_HELP_STRING([--with-private-namespace=<prefix>], [Prefix to prepend to all library-private APIs])],
+  [JEMALLOC_PRIVATE_NAMESPACE="$with_private_namespace"],
+  [JEMALLOC_PRIVATE_NAMESPACE=""]
+)
+AC_DEFINE_UNQUOTED([JEMALLOC_PRIVATE_NAMESPACE], ["$JEMALLOC_PRIVATE_NAMESPACE"])
+if test "x$JEMALLOC_PRIVATE_NAMESPACE" != "x" ; then
+  AC_DEFINE_UNQUOTED([JEMALLOC_N(string_that_no_one_should_want_to_use_as_a_jemalloc_private_namespace_prefix)], [${JEMALLOC_PRIVATE_NAMESPACE}##string_that_no_one_should_want_to_use_as_a_jemalloc_private_namespace_prefix])
+else
+  AC_DEFINE_UNQUOTED([JEMALLOC_N(string_that_no_one_should_want_to_use_as_a_jemalloc_private_namespace_prefix)], [string_that_no_one_should_want_to_use_as_a_jemalloc_private_namespace_prefix])
+fi
+
+dnl Do not add suffix to installed files by default.
+AC_ARG_WITH([install_suffix],
+  [AS_HELP_STRING([--with-install-suffix=<suffix>], [Suffix to append to all installed files])],
+  [INSTALL_SUFFIX="$with_install_suffix"],
+  [INSTALL_SUFFIX=]
+)
+install_suffix="$INSTALL_SUFFIX"
+AC_SUBST([install_suffix])
+
+cfgoutputs_in="${srcroot}Makefile.in"
+cfgoutputs_in="${cfgoutputs_in} ${srcroot}doc/html.xsl.in"
+cfgoutputs_in="${cfgoutputs_in} ${srcroot}doc/manpages.xsl.in"
+cfgoutputs_in="${cfgoutputs_in} ${srcroot}doc/jemalloc.xml.in"
+cfgoutputs_in="${cfgoutputs_in} ${srcroot}include/jemalloc/jemalloc.h.in"
+cfgoutputs_in="${cfgoutputs_in} ${srcroot}include/jemalloc/internal/jemalloc_internal.h.in"
+cfgoutputs_in="${cfgoutputs_in} ${srcroot}test/jemalloc_test.h.in"
+
+cfgoutputs_out="Makefile"
+cfgoutputs_out="${cfgoutputs_out} doc/html.xsl"
+cfgoutputs_out="${cfgoutputs_out} doc/manpages.xsl"
+cfgoutputs_out="${cfgoutputs_out} doc/jemalloc${install_suffix}.xml"
+cfgoutputs_out="${cfgoutputs_out} include/jemalloc/jemalloc${install_suffix}.h"
+cfgoutputs_out="${cfgoutputs_out} include/jemalloc/internal/jemalloc_internal.h"
+cfgoutputs_out="${cfgoutputs_out} test/jemalloc_test.h"
+
+cfgoutputs_tup="Makefile"
+cfgoutputs_tup="${cfgoutputs_tup} doc/html.xsl:doc/html.xsl.in"
+cfgoutputs_tup="${cfgoutputs_tup} doc/manpages.xsl:doc/manpages.xsl.in"
+cfgoutputs_tup="${cfgoutputs_tup} doc/jemalloc${install_suffix}.xml:doc/jemalloc.xml.in"
+cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/jemalloc${install_suffix}.h:include/jemalloc/jemalloc.h.in"
+cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/internal/jemalloc_internal.h"
+cfgoutputs_tup="${cfgoutputs_tup} test/jemalloc_test.h:test/jemalloc_test.h.in"
+
+cfghdrs_in="${srcroot}include/jemalloc/jemalloc_defs.h.in"
+
+cfghdrs_out="include/jemalloc/jemalloc_defs${install_suffix}.h"
+
+cfghdrs_tup="include/jemalloc/jemalloc_defs${install_suffix}.h:include/jemalloc/jemalloc_defs.h.in"
+
+dnl Do not silence irrelevant compiler warnings by default, since enabling this
+dnl option incurs a performance penalty.
+AC_ARG_ENABLE([cc-silence],
+  [AS_HELP_STRING([--enable-cc-silence],
+                  [Silence irrelevant compiler warnings])],
+[if test "x$enable_cc_silence" = "xno" ; then
+  enable_cc_silence="0"
+else
+  enable_cc_silence="1"
+fi
+],
+[enable_cc_silence="0"]
+)
+if test "x$enable_cc_silence" = "x1" ; then
+  AC_DEFINE([JEMALLOC_CC_SILENCE])
+fi
+
+dnl Do not compile with debugging by default.
+AC_ARG_ENABLE([debug],
+  [AS_HELP_STRING([--enable-debug], [Build debugging code])],
+[if test "x$enable_debug" = "xno" ; then
+  enable_debug="0"
+else
+  enable_debug="1"
+fi
+],
+[enable_debug="0"]
+)
+if test "x$enable_debug" = "x1" ; then
+  AC_DEFINE([JEMALLOC_DEBUG], [ ])
+  AC_DEFINE([JEMALLOC_IVSALLOC], [ ])
+fi
+AC_SUBST([enable_debug])
+
+dnl Only optimize if not debugging.
+if test "x$enable_debug" = "x0" -a "x$no_CFLAGS" = "xyes" ; then
+  dnl Make sure that an optimization flag was not specified in EXTRA_CFLAGS.
+  optimize="no"
+  echo "$EXTRA_CFLAGS" | grep "\-O" >/dev/null || optimize="yes"
+  if test "x${optimize}" = "xyes" ; then
+    if test "x$GCC" = "xyes" ; then
+      JE_CFLAGS_APPEND([-O3])
+      JE_CFLAGS_APPEND([-funroll-loops])
+    else
+      JE_CFLAGS_APPEND([-O])
+    fi
+  fi
+fi
+
+dnl Do not enable statistics calculation by default.
+AC_ARG_ENABLE([stats],
+  [AS_HELP_STRING([--enable-stats], [Enable statistics calculation/reporting])],
+[if test "x$enable_stats" = "xno" ; then
+  enable_stats="0"
+else
+  enable_stats="1"
+fi
+],
+[enable_stats="0"]
+)
+if test "x$enable_stats" = "x1" ; then
+  AC_DEFINE([JEMALLOC_STATS], [ ])
+fi
+AC_SUBST([enable_stats])
+
+dnl Do not enable profiling by default.
+AC_ARG_ENABLE([prof],
+  [AS_HELP_STRING([--enable-prof], [Enable allocation profiling])],
+[if test "x$enable_prof" = "xno" ; then
+  enable_prof="0"
+else
+  enable_prof="1"
+fi
+],
+[enable_prof="0"]
+)
+if test "x$enable_prof" = "x1" ; then
+  backtrace_method=""
+else
+  backtrace_method="N/A"
+fi
+
+AC_ARG_ENABLE([prof-libunwind],
+  [AS_HELP_STRING([--enable-prof-libunwind], [Use libunwind for backtracing])],
+[if test "x$enable_prof_libunwind" = "xno" ; then
+  enable_prof_libunwind="0"
+else
+  enable_prof_libunwind="1"
+fi
+],
+[enable_prof_libunwind="0"]
+)
+AC_ARG_WITH([static_libunwind],
+  [AS_HELP_STRING([--with-static-libunwind=<libunwind.a>],
+  [Path to static libunwind library; use rather than dynamically linking])],
+if test "x$with_static_libunwind" = "xno" ; then
+  LUNWIND="-lunwind"
+else
+  if test ! -f "$with_static_libunwind" ; then
+    AC_MSG_ERROR([Static libunwind not found: $with_static_libunwind])
+  fi
+  LUNWIND="$with_static_libunwind"
+fi,
+  LUNWIND="-lunwind"
+)
+if test "x$backtrace_method" = "x" -a "x$enable_prof_libunwind" = "x1" ; then
+  AC_CHECK_HEADERS([libunwind.h], , [enable_prof_libunwind="0"])
+  if test "x$LUNWIND" = "x-lunwind" ; then
+    AC_CHECK_LIB([unwind], [backtrace], [LIBS="$LIBS $LUNWIND"],
+                 [enable_prof_libunwind="0"])
+  else
+    LIBS="$LIBS $LUNWIND"
+  fi
+  if test "x${enable_prof_libunwind}" = "x1" ; then
+    backtrace_method="libunwind"
+    AC_DEFINE([JEMALLOC_PROF_LIBUNWIND], [ ])
+  fi
+fi
+
+AC_ARG_ENABLE([prof-libgcc],
+  [AS_HELP_STRING([--disable-prof-libgcc],
+  [Do not use libgcc for backtracing])],
+[if test "x$enable_prof_libgcc" = "xno" ; then
+  enable_prof_libgcc="0"
+else
+  enable_prof_libgcc="1"
+fi
+],
+[enable_prof_libgcc="1"]
+)
+if test "x$backtrace_method" = "x" -a "x$enable_prof_libgcc" = "x1" \
+     -a "x$GCC" = "xyes" ; then
+  AC_CHECK_HEADERS([unwind.h], , [enable_prof_libgcc="0"])
+  AC_CHECK_LIB([gcc], [_Unwind_Backtrace], [LIBS="$LIBS -lgcc"], [enable_prof_libgcc="0"])
+  dnl The following is conservative, in that it only has entries for CPUs on
+  dnl which jemalloc has been tested.
+  AC_MSG_CHECKING([libgcc-based backtracing reliability on ${host_cpu}])
+  case "${host_cpu}" in
+    i[[3456]]86)
+      AC_MSG_RESULT([unreliable])
+      enable_prof_libgcc="0";
+      ;;
+    x86_64)
+      AC_MSG_RESULT([reliable])
+      ;;
+    *)
+      AC_MSG_RESULT([unreliable])
+      enable_prof_libgcc="0";
+      ;;
+  esac
+  if test "x${enable_prof_libgcc}" = "x1" ; then
+    backtrace_method="libgcc"
+    AC_DEFINE([JEMALLOC_PROF_LIBGCC], [ ])
+  fi
+else
+  enable_prof_libgcc="0"
+fi
+
+AC_ARG_ENABLE([prof-gcc],
+  [AS_HELP_STRING([--disable-prof-gcc],
+  [Do not use gcc intrinsics for backtracing])],
+[if test "x$enable_prof_gcc" = "xno" ; then
+  enable_prof_gcc="0"
+else
+  enable_prof_gcc="1"
+fi
+],
+[enable_prof_gcc="1"]
+)
+if test "x$backtrace_method" = "x" -a "x$enable_prof_gcc" = "x1" \
+     -a "x$GCC" = "xyes" ; then
+  backtrace_method="gcc intrinsics"
+  AC_DEFINE([JEMALLOC_PROF_GCC], [ ])
+else
+  enable_prof_gcc="0"
+fi
+
+if test "x$backtrace_method" = "x" ; then
+  backtrace_method="none (disabling profiling)"
+  enable_prof="0"
+fi
+AC_MSG_CHECKING([configured backtracing method])
+AC_MSG_RESULT([$backtrace_method])
+if test "x$enable_prof" = "x1" ; then
+  LIBS="$LIBS -lm"
+  AC_DEFINE([JEMALLOC_PROF], [ ])
+fi
+AC_SUBST([enable_prof])
+
+dnl Enable tiny allocations by default.
+AC_ARG_ENABLE([tiny],
+  [AS_HELP_STRING([--disable-tiny], [Disable tiny (sub-quantum) allocations])],
+[if test "x$enable_tiny" = "xno" ; then
+  enable_tiny="0"
+else
+  enable_tiny="1"
+fi
+],
+[enable_tiny="1"]
+)
+if test "x$enable_tiny" = "x1" ; then
+  AC_DEFINE([JEMALLOC_TINY], [ ])
+fi
+AC_SUBST([enable_tiny])
+
+dnl Enable thread-specific caching by default.
+AC_ARG_ENABLE([tcache],
+  [AS_HELP_STRING([--disable-tcache], [Disable per thread caches])],
+[if test "x$enable_tcache" = "xno" ; then
+  enable_tcache="0"
+else
+  enable_tcache="1"
+fi
+],
+[enable_tcache="1"]
+)
+if test "x$enable_tcache" = "x1" ; then
+  AC_DEFINE([JEMALLOC_TCACHE], [ ])
+fi
+AC_SUBST([enable_tcache])
+
+dnl Do not enable mmap()ped swap files by default.
+AC_ARG_ENABLE([swap],
+  [AS_HELP_STRING([--enable-swap], [Enable mmap()ped swap files])],
+[if test "x$enable_swap" = "xno" ; then
+  enable_swap="0"
+else
+  enable_swap="1"
+fi
+],
+[enable_swap="0"]
+)
+if test "x$enable_swap" = "x1" ; then
+  AC_DEFINE([JEMALLOC_SWAP], [ ])
+fi
+AC_SUBST([enable_swap])
+
+dnl Do not enable allocation from DSS by default.
+AC_ARG_ENABLE([dss],
+  [AS_HELP_STRING([--enable-dss], [Enable allocation from DSS])],
+[if test "x$enable_dss" = "xno" ; then
+  enable_dss="0"
+else
+  enable_dss="1"
+fi
+],
+[enable_dss="0"]
+)
+if test "x$enable_dss" = "x1" ; then
+  AC_DEFINE([JEMALLOC_DSS], [ ])
+fi
+AC_SUBST([enable_dss])
+
+dnl Do not support the junk/zero filling option by default.
+AC_ARG_ENABLE([fill],
+  [AS_HELP_STRING([--enable-fill], [Support junk/zero filling option])],
+[if test "x$enable_fill" = "xno" ; then
+  enable_fill="0"
+else
+  enable_fill="1"
+fi
+],
+[enable_fill="0"]
+)
+if test "x$enable_fill" = "x1" ; then
+  AC_DEFINE([JEMALLOC_FILL], [ ])
+fi
+AC_SUBST([enable_fill])
+
+dnl Do not support the xmalloc option by default.
+AC_ARG_ENABLE([xmalloc],
+  [AS_HELP_STRING([--enable-xmalloc], [Support xmalloc option])],
+[if test "x$enable_xmalloc" = "xno" ; then
+  enable_xmalloc="0"
+else
+  enable_xmalloc="1"
+fi
+],
+[enable_xmalloc="0"]
+)
+if test "x$enable_xmalloc" = "x1" ; then
+  AC_DEFINE([JEMALLOC_XMALLOC], [ ])
+fi
+AC_SUBST([enable_xmalloc])
+
+dnl Do not support the SYSV option by default.
+AC_ARG_ENABLE([sysv],
+  [AS_HELP_STRING([--enable-sysv], [Support SYSV semantics option])],
+[if test "x$enable_sysv" = "xno" ; then
+  enable_sysv="0"
+else
+  enable_sysv="1"
+fi
+],
+[enable_sysv="0"]
+)
+if test "x$enable_sysv" = "x1" ; then
+  AC_DEFINE([JEMALLOC_SYSV], [ ])
+fi
+AC_SUBST([enable_sysv])
+
+dnl Do not determine page shift at run time by default.
+AC_ARG_ENABLE([dynamic_page_shift],
+  [AS_HELP_STRING([--enable-dynamic-page-shift],
+  [Determine page size at run time (don't trust configure result)])],
+[if test "x$enable_dynamic_page_shift" = "xno" ; then
+  enable_dynamic_page_shift="0"
+else
+  enable_dynamic_page_shift="1"
+fi
+],
+[enable_dynamic_page_shift="0"]
+)
+if test "x$enable_dynamic_page_shift" = "x1" ; then
+  AC_DEFINE([DYNAMIC_PAGE_SHIFT], [ ])
+fi
+AC_SUBST([enable_dynamic_page_shift])
+
+AC_MSG_CHECKING([STATIC_PAGE_SHIFT])
+AC_RUN_IFELSE([AC_LANG_PROGRAM(
+[[#include <stdio.h>
+#include <unistd.h>
+#include <strings.h>
+]], [[
+    long result;
+    FILE *f;
+
+    result = sysconf(_SC_PAGESIZE);
+    if (result == -1) {
+	return 1;
+    }
+    f = fopen("conftest.out", "w");
+    if (f == NULL) {
+	return 1;
+    }
+    fprintf(f, "%u\n", ffs((int)result) - 1);
+    close(f);
+
+    return 0;
+]])],
+              [STATIC_PAGE_SHIFT=`cat conftest.out`]
+              AC_MSG_RESULT([$STATIC_PAGE_SHIFT])
+              AC_DEFINE_UNQUOTED([STATIC_PAGE_SHIFT], [$STATIC_PAGE_SHIFT]),
+              AC_MSG_RESULT([error]))
+
+dnl ============================================================================
+dnl jemalloc configuration.
+dnl 
+
+dnl Set VERSION if source directory has an embedded git repository.
+if test -d "${srcroot}.git" ; then
+  git describe --long --abbrev=40 > ${srcroot}VERSION
+fi
+jemalloc_version=`cat ${srcroot}VERSION`
+jemalloc_version_major=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]1}'`
+jemalloc_version_minor=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]2}'`
+jemalloc_version_bugfix=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]3}'`
+jemalloc_version_nrev=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]4}'`
+jemalloc_version_gid=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]5}'`
+AC_SUBST([jemalloc_version])
+AC_SUBST([jemalloc_version_major])
+AC_SUBST([jemalloc_version_minor])
+AC_SUBST([jemalloc_version_bugfix])
+AC_SUBST([jemalloc_version_nrev])
+AC_SUBST([jemalloc_version_gid])
+
+dnl ============================================================================
+dnl Configure pthreads.
+
+AC_CHECK_HEADERS([pthread.h], , [AC_MSG_ERROR([pthread.h is missing])])
+AC_CHECK_LIB([pthread], [pthread_create], [LIBS="$LIBS -lpthread"],
+             [AC_MSG_ERROR([libpthread is missing])])
+
+CPPFLAGS="$CPPFLAGS -D_REENTRANT"
+
+dnl Enable lazy locking by default.
+AC_ARG_ENABLE([lazy_lock],
+  [AS_HELP_STRING([--disable-lazy-lock],
+  [Disable lazy locking (always lock, even when single-threaded)])],
+[if test "x$enable_lazy_lock" = "xno" ; then
+  enable_lazy_lock="0"
+else
+  enable_lazy_lock="1"
+fi
+],
+[enable_lazy_lock="1"]
+)
+if test "x$enable_lazy_lock" = "x1" ; then
+  AC_CHECK_HEADERS([dlfcn.h], , [AC_MSG_ERROR([dlfcn.h is missing])])
+  AC_CHECK_LIB([dl], [dlopen], [LIBS="$LIBS -ldl"],
+               [AC_MSG_ERROR([libdl is missing])])
+  AC_DEFINE([JEMALLOC_LAZY_LOCK], [ ])
+fi
+AC_SUBST([enable_lazy_lock])
+
+AC_ARG_ENABLE([tls],
+  [AS_HELP_STRING([--disable-tls], [Disable thread-local storage (__thread keyword)])],
+if test "x$enable_tls" = "xno" ; then
+  enable_tls="0"
+else
+  enable_tls="1"
+fi
+,
+enable_tls="1"
+)
+if test "x${enable_tls}" = "x1" ; then
+AC_MSG_CHECKING([for TLS])
+AC_RUN_IFELSE([AC_LANG_PROGRAM(
+[[
+    __thread int x;
+]], [[
+    x = 42;
+
+    return 0;
+]])],
+              AC_MSG_RESULT([yes]),
+              AC_MSG_RESULT([no])
+              enable_tls="0")
+fi
+AC_SUBST([enable_tls])
+if test "x${enable_tls}" = "x0" ; then
+  AC_DEFINE_UNQUOTED([NO_TLS], [ ])
+fi
+
+dnl ============================================================================
+dnl Check for ffsl(3), and fail if not found.  This function exists on all
+dnl platforms that jemalloc currently has a chance of functioning on without
+dnl modification.
+
+AC_CHECK_FUNC([ffsl], [],
+	      [AC_MSG_ERROR([Cannot build without ffsl(3)])])
+
+dnl ============================================================================
+dnl Check for atomic(3) operations as provided on Darwin.
+
+JE_COMPILABLE([Darwin OSAtomic*()], [
+#include <libkern/OSAtomic.h>
+#include <inttypes.h>
+], [
+	{
+		int32_t x32 = 0;
+		volatile int32_t *x32p = &x32;
+		OSAtomicAdd32(1, x32p);
+	}
+	{
+		int64_t x64 = 0;
+		volatile int64_t *x64p = &x64;
+		OSAtomicAdd64(1, x64p);
+	}
+], [osatomic])
+if test "x${osatomic}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_OSATOMIC])
+fi
+
+dnl ============================================================================
+dnl Check for spinlock(3) operations as provided on Darwin.
+
+JE_COMPILABLE([Darwin OSSpin*()], [
+#include <libkern/OSAtomic.h>
+#include <inttypes.h>
+], [
+	OSSpinLock lock = 0;
+	OSSpinLockLock(&lock);
+	OSSpinLockUnlock(&lock);
+], [osspin])
+if test "x${osspin}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_OSSPIN])
+fi
+
+dnl ============================================================================
+dnl Check for allocator-related functions that should be wrapped.
+
+AC_CHECK_FUNC([memalign],
+	      [AC_DEFINE([JEMALLOC_OVERRIDE_MEMALIGN])])
+AC_CHECK_FUNC([valloc],
+	      [AC_DEFINE([JEMALLOC_OVERRIDE_VALLOC])])
+
+dnl ============================================================================
+dnl Darwin-related configuration.
+
+if test "x${abi}" = "xmacho" ; then
+  AC_DEFINE([JEMALLOC_IVSALLOC])
+  AC_DEFINE([JEMALLOC_ZONE])
+
+  dnl The szone version jumped from 3 to 6 between the OS X 10.5.x and 10.6
+  dnl releases.  malloc_zone_t and malloc_introspection_t have new fields in
+  dnl 10.6, which is the only source-level indication of the change.
+  AC_MSG_CHECKING([malloc zone version])
+  AC_TRY_COMPILE([#include <stdlib.h>
+#include <malloc/malloc.h>], [
+	static malloc_zone_t zone;
+	static struct malloc_introspection_t zone_introspect;
+
+	zone.size = NULL;
+	zone.malloc = NULL;
+	zone.calloc = NULL;
+	zone.valloc = NULL;
+	zone.free = NULL;
+	zone.realloc = NULL;
+	zone.destroy = NULL;
+	zone.zone_name = "jemalloc_zone";
+	zone.batch_malloc = NULL;
+	zone.batch_free = NULL;
+	zone.introspect = &zone_introspect;
+	zone.version = 6;
+	zone.memalign = NULL;
+	zone.free_definite_size = NULL;
+
+	zone_introspect.enumerator = NULL;
+	zone_introspect.good_size = NULL;
+	zone_introspect.check = NULL;
+	zone_introspect.print = NULL;
+	zone_introspect.log = NULL;
+	zone_introspect.force_lock = NULL;
+	zone_introspect.force_unlock = NULL;
+	zone_introspect.statistics = NULL;
+	zone_introspect.zone_locked = NULL;
+], [AC_DEFINE_UNQUOTED([JEMALLOC_ZONE_VERSION], [6])
+    AC_MSG_RESULT([6])],
+   [AC_DEFINE_UNQUOTED([JEMALLOC_ZONE_VERSION], [3])
+   AC_MSG_RESULT([3])])
+fi
+
+dnl ============================================================================
+dnl Check for typedefs, structures, and compiler characteristics.
+AC_HEADER_STDBOOL
+
+dnl Process .in files.
+AC_SUBST([cfghdrs_in])
+AC_SUBST([cfghdrs_out])
+AC_CONFIG_HEADERS([$cfghdrs_tup])
+
+dnl ============================================================================
+dnl Generate outputs.
+AC_CONFIG_FILES([$cfgoutputs_tup config.stamp])
+AC_SUBST([cfgoutputs_in])
+AC_SUBST([cfgoutputs_out])
+AC_OUTPUT
+
+dnl ============================================================================
+dnl Print out the results of configuration.
+AC_MSG_RESULT([===============================================================================])
+AC_MSG_RESULT([jemalloc version   : $jemalloc_version])
+AC_MSG_RESULT([])
+AC_MSG_RESULT([CC                 : ${CC}])
+AC_MSG_RESULT([CPPFLAGS           : ${CPPFLAGS}])
+AC_MSG_RESULT([CFLAGS             : ${CFLAGS}])
+AC_MSG_RESULT([LDFLAGS            : ${LDFLAGS}])
+AC_MSG_RESULT([LIBS               : ${LIBS}])
+AC_MSG_RESULT([RPATH_EXTRA        : ${RPATH_EXTRA}])
+AC_MSG_RESULT([])
+AC_MSG_RESULT([XSLTPROC           : ${XSLTPROC}])
+AC_MSG_RESULT([XSLROOT            : ${XSLROOT}])
+AC_MSG_RESULT([])
+AC_MSG_RESULT([PREFIX             : ${PREFIX}])
+AC_MSG_RESULT([BINDIR             : ${BINDIR}])
+AC_MSG_RESULT([INCLUDEDIR         : ${INCLUDEDIR}])
+AC_MSG_RESULT([LIBDIR             : ${LIBDIR}])
+AC_MSG_RESULT([DATADIR            : ${DATADIR}])
+AC_MSG_RESULT([MANDIR             : ${MANDIR}])
+AC_MSG_RESULT([])
+AC_MSG_RESULT([srcroot            : ${srcroot}])
+AC_MSG_RESULT([abs_srcroot        : ${abs_srcroot}])
+AC_MSG_RESULT([objroot            : ${objroot}])
+AC_MSG_RESULT([abs_objroot        : ${abs_objroot}])
+AC_MSG_RESULT([])
+AC_MSG_RESULT([JEMALLOC_PREFIX    : ${JEMALLOC_PREFIX}])
+AC_MSG_RESULT([JEMALLOC_PRIVATE_NAMESPACE])
+AC_MSG_RESULT([                   : ${JEMALLOC_PRIVATE_NAMESPACE}])
+AC_MSG_RESULT([install_suffix     : ${install_suffix}])
+AC_MSG_RESULT([autogen            : ${enable_autogen}])
+AC_MSG_RESULT([cc-silence         : ${enable_cc_silence}])
+AC_MSG_RESULT([debug              : ${enable_debug}])
+AC_MSG_RESULT([stats              : ${enable_stats}])
+AC_MSG_RESULT([prof               : ${enable_prof}])
+AC_MSG_RESULT([prof-libunwind     : ${enable_prof_libunwind}])
+AC_MSG_RESULT([prof-libgcc        : ${enable_prof_libgcc}])
+AC_MSG_RESULT([prof-gcc           : ${enable_prof_gcc}])
+AC_MSG_RESULT([tiny               : ${enable_tiny}])
+AC_MSG_RESULT([tcache             : ${enable_tcache}])
+AC_MSG_RESULT([fill               : ${enable_fill}])
+AC_MSG_RESULT([xmalloc            : ${enable_xmalloc}])
+AC_MSG_RESULT([sysv               : ${enable_sysv}])
+AC_MSG_RESULT([swap               : ${enable_swap}])
+AC_MSG_RESULT([dss                : ${enable_dss}])
+AC_MSG_RESULT([dynamic_page_shift : ${enable_dynamic_page_shift}])
+AC_MSG_RESULT([lazy_lock          : ${enable_lazy_lock}])
+AC_MSG_RESULT([tls                : ${enable_tls}])
+AC_MSG_RESULT([===============================================================================])
--- a/deps/jemalloc.orig/doc/html.xsl.in
+++ b/deps/jemalloc.orig/doc/html.xsl.in
@ -0,0 +1,4 @@
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
+  <xsl:import href="@XSLROOT@/html/docbook.xsl"/>
+  <xsl:import href="@abs_srcroot@doc/stylesheet.xsl"/>
+</xsl:stylesheet>
--- a/deps/jemalloc.orig/doc/jemalloc.xml.in
+++ b/deps/jemalloc.orig/doc/jemalloc.xml.in
--- a/deps/jemalloc.orig/doc/manpages.xsl.in
+++ b/deps/jemalloc.orig/doc/manpages.xsl.in
@ -0,0 +1,4 @@
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
+  <xsl:import href="@XSLROOT@/manpages/docbook.xsl"/>
+  <xsl:import href="@abs_srcroot@doc/stylesheet.xsl"/>
+</xsl:stylesheet>
--- a/deps/jemalloc.orig/doc/stylesheet.xsl
+++ b/deps/jemalloc.orig/doc/stylesheet.xsl
@ -0,0 +1,7 @@
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
+  <xsl:param name="funcsynopsis.style">ansi</xsl:param>
+  <xsl:param name="function.parens" select="1"/>
+  <xsl:template match="mallctl">
+    "<xsl:call-template name="inline.monoseq"/>"
+  </xsl:template>
+</xsl:stylesheet>
--- a/deps/jemalloc.orig/include/jemalloc/internal/arena.h
+++ b/deps/jemalloc.orig/include/jemalloc/internal/arena.h
@ -0,0 +1,743 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+/*
+ * Subpages are an artificially designated partitioning of pages.  Their only
+ * purpose is to support subpage-spaced size classes.
+ *
+ * There must be at least 4 subpages per page, due to the way size classes are
+ * handled.
+ */
+#define	LG_SUBPAGE		8
+#define	SUBPAGE			((size_t)(1U << LG_SUBPAGE))
+#define	SUBPAGE_MASK		(SUBPAGE - 1)
+
+/* Return the smallest subpage multiple that is >= s. */
+#define	SUBPAGE_CEILING(s)						\
+	(((s) + SUBPAGE_MASK) & ~SUBPAGE_MASK)
+
+#ifdef JEMALLOC_TINY
+   /* Smallest size class to support. */
+#  define LG_TINY_MIN		LG_SIZEOF_PTR
+#  define TINY_MIN		(1U << LG_TINY_MIN)
+#endif
+
+/*
+ * Maximum size class that is a multiple of the quantum, but not (necessarily)
+ * a power of 2.  Above this size, allocations are rounded up to the nearest
+ * power of 2.
+ */
+#define	LG_QSPACE_MAX_DEFAULT	7
+
+/*
+ * Maximum size class that is a multiple of the cacheline, but not (necessarily)
+ * a power of 2.  Above this size, allocations are rounded up to the nearest
+ * power of 2.
+ */
+#define	LG_CSPACE_MAX_DEFAULT	9
+
+/*
+ * RUN_MAX_OVRHD indicates maximum desired run header overhead.  Runs are sized
+ * as small as possible such that this setting is still honored, without
+ * violating other constraints.  The goal is to make runs as small as possible
+ * without exceeding a per run external fragmentation threshold.
+ *
+ * We use binary fixed point math for overhead computations, where the binary
+ * point is implicitly RUN_BFP bits to the left.
+ *
+ * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be
+ * honored for some/all object sizes, since when heap profiling is enabled
+ * there is one pointer of header overhead per object (plus a constant).  This
+ * constraint is relaxed (ignored) for runs that are so small that the
+ * per-region overhead is greater than:
+ *
+ *   (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP))
+ */
+#define	RUN_BFP			12
+/*                                    \/   Implicit binary fixed point. */
+#define	RUN_MAX_OVRHD		0x0000003dU
+#define	RUN_MAX_OVRHD_RELAX	0x00001800U
+
+/* Maximum number of regions in one run. */
+#define	LG_RUN_MAXREGS		11
+#define	RUN_MAXREGS		(1U << LG_RUN_MAXREGS)
+
+/*
+ * The minimum ratio of active:dirty pages per arena is computed as:
+ *
+ *   (nactive >> opt_lg_dirty_mult) >= ndirty
+ *
+ * So, supposing that opt_lg_dirty_mult is 5, there can be no less than 32
+ * times as many active pages as dirty pages.
+ */
+#define	LG_DIRTY_MULT_DEFAULT	5
+
+typedef struct arena_chunk_map_s arena_chunk_map_t;
+typedef struct arena_chunk_s arena_chunk_t;
+typedef struct arena_run_s arena_run_t;
+typedef struct arena_bin_info_s arena_bin_info_t;
+typedef struct arena_bin_s arena_bin_t;
+typedef struct arena_s arena_t;
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+/* Each element of the chunk map corresponds to one page within the chunk. */
+struct arena_chunk_map_s {
+	union {
+		/*
+		 * Linkage for run trees.  There are two disjoint uses:
+		 *
+		 * 1) arena_t's runs_avail_{clean,dirty} trees.
+		 * 2) arena_run_t conceptually uses this linkage for in-use
+		 *    non-full runs, rather than directly embedding linkage.
+		 */
+		rb_node(arena_chunk_map_t)	rb_link;
+		/*
+		 * List of runs currently in purgatory.  arena_chunk_purge()
+		 * temporarily allocates runs that contain dirty pages while
+		 * purging, so that other threads cannot use the runs while the
+		 * purging thread is operating without the arena lock held.
+		 */
+		ql_elm(arena_chunk_map_t)	ql_link;
+	}				u;
+
+#ifdef JEMALLOC_PROF
+	/* Profile counters, used for large object runs. */
+	prof_ctx_t			*prof_ctx;
+#endif
+
+	/*
+	 * Run address (or size) and various flags are stored together.  The bit
+	 * layout looks like (assuming 32-bit system):
+	 *
+	 *   ???????? ???????? ????---- ----dula
+	 *
+	 * ? : Unallocated: Run address for first/last pages, unset for internal
+	 *                  pages.
+	 *     Small: Run page offset.
+	 *     Large: Run size for first page, unset for trailing pages.
+	 * - : Unused.
+	 * d : dirty?
+	 * u : unzeroed?
+	 * l : large?
+	 * a : allocated?
+	 *
+	 * Following are example bit patterns for the three types of runs.
+	 *
+	 * p : run page offset
+	 * s : run size
+	 * c : (binind+1) for size class (used only if prof_promote is true)
+	 * x : don't care
+	 * - : 0
+	 * + : 1
+	 * [DULA] : bit set
+	 * [dula] : bit unset
+	 *
+	 *   Unallocated (clean):
+	 *     ssssssss ssssssss ssss---- ----du-a
+	 *     xxxxxxxx xxxxxxxx xxxx---- -----Uxx
+	 *     ssssssss ssssssss ssss---- ----dU-a
+	 *
+	 *   Unallocated (dirty):
+	 *     ssssssss ssssssss ssss---- ----D--a
+	 *     xxxxxxxx xxxxxxxx xxxx---- ----xxxx
+	 *     ssssssss ssssssss ssss---- ----D--a
+	 *
+	 *   Small:
+	 *     pppppppp pppppppp pppp---- ----d--A
+	 *     pppppppp pppppppp pppp---- -------A
+	 *     pppppppp pppppppp pppp---- ----d--A
+	 *
+	 *   Large:
+	 *     ssssssss ssssssss ssss---- ----D-LA
+	 *     xxxxxxxx xxxxxxxx xxxx---- ----xxxx
+	 *     -------- -------- -------- ----D-LA
+	 *
+	 *   Large (sampled, size <= PAGE_SIZE):
+	 *     ssssssss ssssssss sssscccc ccccD-LA
+	 *
+	 *   Large (not sampled, size == PAGE_SIZE):
+	 *     ssssssss ssssssss ssss---- ----D-LA
+	 */
+	size_t				bits;
+#ifdef JEMALLOC_PROF
+#define	CHUNK_MAP_CLASS_SHIFT	4
+#define	CHUNK_MAP_CLASS_MASK	((size_t)0xff0U)
+#endif
+#define	CHUNK_MAP_FLAGS_MASK	((size_t)0xfU)
+#define	CHUNK_MAP_DIRTY		((size_t)0x8U)
+#define	CHUNK_MAP_UNZEROED	((size_t)0x4U)
+#define	CHUNK_MAP_LARGE		((size_t)0x2U)
+#define	CHUNK_MAP_ALLOCATED	((size_t)0x1U)
+#define	CHUNK_MAP_KEY		CHUNK_MAP_ALLOCATED
+};
+typedef rb_tree(arena_chunk_map_t) arena_avail_tree_t;
+typedef rb_tree(arena_chunk_map_t) arena_run_tree_t;
+
+/* Arena chunk header. */
+struct arena_chunk_s {
+	/* Arena that owns the chunk. */
+	arena_t		*arena;
+
+	/* Linkage for the arena's chunks_dirty list. */
+	ql_elm(arena_chunk_t) link_dirty;
+
+	/*
+	 * True if the chunk is currently in the chunks_dirty list, due to
+	 * having at some point contained one or more dirty pages.  Removal
+	 * from chunks_dirty is lazy, so (dirtied && ndirty == 0) is possible.
+	 */
+	bool		dirtied;
+
+	/* Number of dirty pages. */
+	size_t		ndirty;
+
+	/*
+	 * Map of pages within chunk that keeps track of free/large/small.  The
+	 * first map_bias entries are omitted, since the chunk header does not
+	 * need to be tracked in the map.  This omission saves a header page
+	 * for common chunk sizes (e.g. 4 MiB).
+	 */
+	arena_chunk_map_t map[1]; /* Dynamically sized. */
+};
+typedef rb_tree(arena_chunk_t) arena_chunk_tree_t;
+
+struct arena_run_s {
+#ifdef JEMALLOC_DEBUG
+	uint32_t	magic;
+#  define ARENA_RUN_MAGIC 0x384adf93
+#endif
+
+	/* Bin this run is associated with. */
+	arena_bin_t	*bin;
+
+	/* Index of next region that has never been allocated, or nregs. */
+	uint32_t	nextind;
+
+	/* Number of free regions in run. */
+	unsigned	nfree;
+};
+
+/*
+ * Read-only information associated with each element of arena_t's bins array
+ * is stored separately, partly to reduce memory usage (only one copy, rather
+ * than one per arena), but mainly to avoid false cacheline sharing.
+ */
+struct arena_bin_info_s {
+	/* Size of regions in a run for this bin's size class. */
+	size_t		reg_size;
+
+	/* Total size of a run for this bin's size class. */
+	size_t		run_size;
+
+	/* Total number of regions in a run for this bin's size class. */
+	uint32_t	nregs;
+
+	/*
+	 * Offset of first bitmap_t element in a run header for this bin's size
+	 * class.
+	 */
+	uint32_t	bitmap_offset;
+
+	/*
+	 * Metadata used to manipulate bitmaps for runs associated with this
+	 * bin.
+	 */
+	bitmap_info_t	bitmap_info;
+
+#ifdef JEMALLOC_PROF
+	/*
+	 * Offset of first (prof_ctx_t *) in a run header for this bin's size
+	 * class, or 0 if (opt_prof == false).
+	 */
+	uint32_t	ctx0_offset;
+#endif
+
+	/* Offset of first region in a run for this bin's size class. */
+	uint32_t	reg0_offset;
+};
+
+struct arena_bin_s {
+	/*
+	 * All operations on runcur, runs, and stats require that lock be
+	 * locked.  Run allocation/deallocation are protected by the arena lock,
+	 * which may be acquired while holding one or more bin locks, but not
+	 * vise versa.
+	 */
+	malloc_mutex_t	lock;
+
+	/*
+	 * Current run being used to service allocations of this bin's size
+	 * class.
+	 */
+	arena_run_t	*runcur;
+
+	/*
+	 * Tree of non-full runs.  This tree is used when looking for an
+	 * existing run when runcur is no longer usable.  We choose the
+	 * non-full run that is lowest in memory; this policy tends to keep
+	 * objects packed well, and it can also help reduce the number of
+	 * almost-empty chunks.
+	 */
+	arena_run_tree_t runs;
+
+#ifdef JEMALLOC_STATS
+	/* Bin statistics. */
+	malloc_bin_stats_t stats;
+#endif
+};
+
+struct arena_s {
+#ifdef JEMALLOC_DEBUG
+	uint32_t		magic;
+#  define ARENA_MAGIC 0x947d3d24
+#endif
+
+	/* This arena's index within the arenas array. */
+	unsigned		ind;
+
+	/*
+	 * Number of threads currently assigned to this arena.  This field is
+	 * protected by arenas_lock.
+	 */
+	unsigned		nthreads;
+
+	/*
+	 * There are three classes of arena operations from a locking
+	 * perspective:
+	 * 1) Thread asssignment (modifies nthreads) is protected by
+	 *    arenas_lock.
+	 * 2) Bin-related operations are protected by bin locks.
+	 * 3) Chunk- and run-related operations are protected by this mutex.
+	 */
+	malloc_mutex_t		lock;
+
+#ifdef JEMALLOC_STATS
+	arena_stats_t		stats;
+#  ifdef JEMALLOC_TCACHE
+	/*
+	 * List of tcaches for extant threads associated with this arena.
+	 * Stats from these are merged incrementally, and at exit.
+	 */
+	ql_head(tcache_t)	tcache_ql;
+#  endif
+#endif
+
+#ifdef JEMALLOC_PROF
+	uint64_t		prof_accumbytes;
+#endif
+
+	/* List of dirty-page-containing chunks this arena manages. */
+	ql_head(arena_chunk_t)	chunks_dirty;
+
+	/*
+	 * In order to avoid rapid chunk allocation/deallocation when an arena
+	 * oscillates right on the cusp of needing a new chunk, cache the most
+	 * recently freed chunk.  The spare is left in the arena's chunk trees
+	 * until it is deleted.
+	 *
+	 * There is one spare chunk per arena, rather than one spare total, in
+	 * order to avoid interactions between multiple threads that could make
+	 * a single spare inadequate.
+	 */
+	arena_chunk_t		*spare;
+
+	/* Number of pages in active runs. */
+	size_t			nactive;
+
+	/*
+	 * Current count of pages within unused runs that are potentially
+	 * dirty, and for which madvise(... MADV_DONTNEED) has not been called.
+	 * By tracking this, we can institute a limit on how much dirty unused
+	 * memory is mapped for each arena.
+	 */
+	size_t			ndirty;
+
+	/*
+	 * Approximate number of pages being purged.  It is possible for
+	 * multiple threads to purge dirty pages concurrently, and they use
+	 * npurgatory to indicate the total number of pages all threads are
+	 * attempting to purge.
+	 */
+	size_t			npurgatory;
+
+	/*
+	 * Size/address-ordered trees of this arena's available runs.  The trees
+	 * are used for first-best-fit run allocation.  The dirty tree contains
+	 * runs with dirty pages (i.e. very likely to have been touched and
+	 * therefore have associated physical pages), whereas the clean tree
+	 * contains runs with pages that either have no associated physical
+	 * pages, or have pages that the kernel may recycle at any time due to
+	 * previous madvise(2) calls.  The dirty tree is used in preference to
+	 * the clean tree for allocations, because using dirty pages reduces
+	 * the amount of dirty purging necessary to keep the active:dirty page
+	 * ratio below the purge threshold.
+	 */
+	arena_avail_tree_t	runs_avail_clean;
+	arena_avail_tree_t	runs_avail_dirty;
+
+	/*
+	 * bins is used to store trees of free regions of the following sizes,
+	 * assuming a 64-bit system with 16-byte quantum, 4 KiB page size, and
+	 * default MALLOC_CONF.
+	 *
+	 *   bins[i] |   size |
+	 *   --------+--------+
+	 *        0  |      8 |
+	 *   --------+--------+
+	 *        1  |     16 |
+	 *        2  |     32 |
+	 *        3  |     48 |
+	 *           :        :
+	 *        6  |     96 |
+	 *        7  |    112 |
+	 *        8  |    128 |
+	 *   --------+--------+
+	 *        9  |    192 |
+	 *       10  |    256 |
+	 *       11  |    320 |
+	 *       12  |    384 |
+	 *       13  |    448 |
+	 *       14  |    512 |
+	 *   --------+--------+
+	 *       15  |    768 |
+	 *       16  |   1024 |
+	 *       17  |   1280 |
+	 *           :        :
+	 *       25  |   3328 |
+	 *       26  |   3584 |
+	 *       27  |   3840 |
+	 *   --------+--------+
+	 */
+	arena_bin_t		bins[1]; /* Dynamically sized. */
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+extern size_t	opt_lg_qspace_max;
+extern size_t	opt_lg_cspace_max;
+extern ssize_t	opt_lg_dirty_mult;
+/*
+ * small_size2bin is a compact lookup table that rounds request sizes up to
+ * size classes.  In order to reduce cache footprint, the table is compressed,
+ * and all accesses are via the SMALL_SIZE2BIN macro.
+ */
+extern uint8_t const	*small_size2bin;
+#define	SMALL_SIZE2BIN(s)	(small_size2bin[(s-1) >> LG_TINY_MIN])
+
+extern arena_bin_info_t	*arena_bin_info;
+
+/* Various bin-related settings. */
+#ifdef JEMALLOC_TINY		/* Number of (2^n)-spaced tiny bins. */
+#  define		ntbins	((unsigned)(LG_QUANTUM - LG_TINY_MIN))
+#else
+#  define		ntbins	0
+#endif
+extern unsigned		nqbins; /* Number of quantum-spaced bins. */
+extern unsigned		ncbins; /* Number of cacheline-spaced bins. */
+extern unsigned		nsbins; /* Number of subpage-spaced bins. */
+extern unsigned		nbins;
+#ifdef JEMALLOC_TINY
+#  define		tspace_max	((size_t)(QUANTUM >> 1))
+#endif
+#define			qspace_min	QUANTUM
+extern size_t		qspace_max;
+extern size_t		cspace_min;
+extern size_t		cspace_max;
+extern size_t		sspace_min;
+extern size_t		sspace_max;
+#define			small_maxclass	sspace_max
+
+#define			nlclasses (chunk_npages - map_bias)
+
+void	arena_purge_all(arena_t *arena);
+#ifdef JEMALLOC_PROF
+void	arena_prof_accum(arena_t *arena, uint64_t accumbytes);
+#endif
+#ifdef JEMALLOC_TCACHE
+void	arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin,
+    size_t binind
+#  ifdef JEMALLOC_PROF
+    , uint64_t prof_accumbytes
+#  endif
+    );
+#endif
+void	*arena_malloc_small(arena_t *arena, size_t size, bool zero);
+void	*arena_malloc_large(arena_t *arena, size_t size, bool zero);
+void	*arena_malloc(size_t size, bool zero);
+void	*arena_palloc(arena_t *arena, size_t size, size_t alloc_size,
+    size_t alignment, bool zero);
+size_t	arena_salloc(const void *ptr);
+#ifdef JEMALLOC_PROF
+void	arena_prof_promoted(const void *ptr, size_t size);
+size_t	arena_salloc_demote(const void *ptr);
+#endif
+void	arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
+    arena_chunk_map_t *mapelm);
+void	arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr);
+#ifdef JEMALLOC_STATS
+void	arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty,
+    arena_stats_t *astats, malloc_bin_stats_t *bstats,
+    malloc_large_stats_t *lstats);
+#endif
+void	*arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size,
+    size_t extra, bool zero);
+void	*arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
+    size_t alignment, bool zero);
+bool	arena_new(arena_t *arena, unsigned ind);
+bool	arena_boot(void);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+size_t	arena_bin_index(arena_t *arena, arena_bin_t *bin);
+unsigned	arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info,
+    const void *ptr);
+#  ifdef JEMALLOC_PROF
+prof_ctx_t	*arena_prof_ctx_get(const void *ptr);
+void	arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
+#  endif
+void	arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
+JEMALLOC_INLINE size_t
+arena_bin_index(arena_t *arena, arena_bin_t *bin)
+{
+	size_t binind = bin - arena->bins;
+	assert(binind < nbins);
+	return (binind);
+}
+
+JEMALLOC_INLINE unsigned
+arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr)
+{
+	unsigned shift, diff, regind;
+	size_t size;
+
+	dassert(run->magic == ARENA_RUN_MAGIC);
+	/*
+	 * Freeing a pointer lower than region zero can cause assertion
+	 * failure.
+	 */
+	assert((uintptr_t)ptr >= (uintptr_t)run +
+	    (uintptr_t)bin_info->reg0_offset);
+
+	/*
+	 * Avoid doing division with a variable divisor if possible.  Using
+	 * actual division here can reduce allocator throughput by over 20%!
+	 */
+	diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run -
+	    bin_info->reg0_offset);
+
+	/* Rescale (factor powers of 2 out of the numerator and denominator). */
+	size = bin_info->reg_size;
+	shift = ffs(size) - 1;
+	diff >>= shift;
+	size >>= shift;
+
+	if (size == 1) {
+		/* The divisor was a power of 2. */
+		regind = diff;
+	} else {
+		/*
+		 * To divide by a number D that is not a power of two we
+		 * multiply by (2^21 / D) and then right shift by 21 positions.
+		 *
+		 *   X / D
+		 *
+		 * becomes
+		 *
+		 *   (X * size_invs[D - 3]) >> SIZE_INV_SHIFT
+		 *
+		 * We can omit the first three elements, because we never
+		 * divide by 0, and 1 and 2 are both powers of two, which are
+		 * handled above.
+		 */
+#define	SIZE_INV_SHIFT	((sizeof(unsigned) << 3) - LG_RUN_MAXREGS)
+#define	SIZE_INV(s)	(((1U << SIZE_INV_SHIFT) / (s)) + 1)
+		static const unsigned size_invs[] = {
+		    SIZE_INV(3),
+		    SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
+		    SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
+		    SIZE_INV(12), SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
+		    SIZE_INV(16), SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
+		    SIZE_INV(20), SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
+		    SIZE_INV(24), SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
+		    SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
+		};
+
+		if (size <= ((sizeof(size_invs) / sizeof(unsigned)) + 2))
+			regind = (diff * size_invs[size - 3]) >> SIZE_INV_SHIFT;
+		else
+			regind = diff / size;
+#undef SIZE_INV
+#undef SIZE_INV_SHIFT
+	}
+	assert(diff == regind * size);
+	assert(regind < bin_info->nregs);
+
+	return (regind);
+}
+
+#ifdef JEMALLOC_PROF
+JEMALLOC_INLINE prof_ctx_t *
+arena_prof_ctx_get(const void *ptr)
+{
+	prof_ctx_t *ret;
+	arena_chunk_t *chunk;
+	size_t pageind, mapbits;
+
+	assert(ptr != NULL);
+	assert(CHUNK_ADDR2BASE(ptr) != ptr);
+
+	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
+	mapbits = chunk->map[pageind-map_bias].bits;
+	assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
+	if ((mapbits & CHUNK_MAP_LARGE) == 0) {
+		if (prof_promote)
+			ret = (prof_ctx_t *)(uintptr_t)1U;
+		else {
+			arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
+			    (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
+			    PAGE_SHIFT));
+			size_t binind = arena_bin_index(chunk->arena, run->bin);
+			arena_bin_info_t *bin_info = &arena_bin_info[binind];
+			unsigned regind;
+
+			dassert(run->magic == ARENA_RUN_MAGIC);
+			regind = arena_run_regind(run, bin_info, ptr);
+			ret = *(prof_ctx_t **)((uintptr_t)run +
+			    bin_info->ctx0_offset + (regind *
+			    sizeof(prof_ctx_t *)));
+		}
+	} else
+		ret = chunk->map[pageind-map_bias].prof_ctx;
+
+	return (ret);
+}
+
+JEMALLOC_INLINE void
+arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
+{
+	arena_chunk_t *chunk;
+	size_t pageind, mapbits;
+
+	assert(ptr != NULL);
+	assert(CHUNK_ADDR2BASE(ptr) != ptr);
+
+	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
+	mapbits = chunk->map[pageind-map_bias].bits;
+	assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
+	if ((mapbits & CHUNK_MAP_LARGE) == 0) {
+		if (prof_promote == false) {
+			arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
+			    (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
+			    PAGE_SHIFT));
+			arena_bin_t *bin = run->bin;
+			size_t binind;
+			arena_bin_info_t *bin_info;
+			unsigned regind;
+
+			dassert(run->magic == ARENA_RUN_MAGIC);
+			binind = arena_bin_index(chunk->arena, bin);
+			bin_info = &arena_bin_info[binind];
+			regind = arena_run_regind(run, bin_info, ptr);
+
+			*((prof_ctx_t **)((uintptr_t)run + bin_info->ctx0_offset
+			    + (regind * sizeof(prof_ctx_t *)))) = ctx;
+		} else
+			assert((uintptr_t)ctx == (uintptr_t)1U);
+	} else
+		chunk->map[pageind-map_bias].prof_ctx = ctx;
+}
+#endif
+
+JEMALLOC_INLINE void
+arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
+{
+	size_t pageind;
+	arena_chunk_map_t *mapelm;
+
+	assert(arena != NULL);
+	dassert(arena->magic == ARENA_MAGIC);
+	assert(chunk->arena == arena);
+	assert(ptr != NULL);
+	assert(CHUNK_ADDR2BASE(ptr) != ptr);
+
+	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
+	mapelm = &chunk->map[pageind-map_bias];
+	assert((mapelm->bits & CHUNK_MAP_ALLOCATED) != 0);
+	if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) {
+		/* Small allocation. */
+#ifdef JEMALLOC_TCACHE
+		tcache_t *tcache;
+
+		if ((tcache = tcache_get()) != NULL)
+			tcache_dalloc_small(tcache, ptr);
+		else {
+#endif
+			arena_run_t *run;
+			arena_bin_t *bin;
+
+			run = (arena_run_t *)((uintptr_t)chunk +
+			    (uintptr_t)((pageind - (mapelm->bits >>
+			    PAGE_SHIFT)) << PAGE_SHIFT));
+			dassert(run->magic == ARENA_RUN_MAGIC);
+			bin = run->bin;
+#ifdef JEMALLOC_DEBUG
+			{
+				size_t binind = arena_bin_index(arena, bin);
+				arena_bin_info_t *bin_info =
+				    &arena_bin_info[binind];
+				assert(((uintptr_t)ptr - ((uintptr_t)run +
+				    (uintptr_t)bin_info->reg0_offset)) %
+				    bin_info->reg_size == 0);
+			}
+#endif
+			malloc_mutex_lock(&bin->lock);
+			arena_dalloc_bin(arena, chunk, ptr, mapelm);
+			malloc_mutex_unlock(&bin->lock);
+#ifdef JEMALLOC_TCACHE
+		}
+#endif
+	} else {
+#ifdef JEMALLOC_TCACHE
+		size_t size = mapelm->bits & ~PAGE_MASK;
+
+		assert(((uintptr_t)ptr & PAGE_MASK) == 0);
+		if (size <= tcache_maxclass) {
+			tcache_t *tcache;
+
+			if ((tcache = tcache_get()) != NULL)
+				tcache_dalloc_large(tcache, ptr, size);
+			else {
+				malloc_mutex_lock(&arena->lock);
+				arena_dalloc_large(arena, chunk, ptr);
+				malloc_mutex_unlock(&arena->lock);
+			}
+		} else {
+			malloc_mutex_lock(&arena->lock);
+			arena_dalloc_large(arena, chunk, ptr);
+			malloc_mutex_unlock(&arena->lock);
+		}
+#else
+		assert(((uintptr_t)ptr & PAGE_MASK) == 0);
+		malloc_mutex_lock(&arena->lock);
+		arena_dalloc_large(arena, chunk, ptr);
+		malloc_mutex_unlock(&arena->lock);
+#endif
+	}
+}
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
--- a/deps/jemalloc.orig/include/jemalloc/internal/atomic.h
+++ b/deps/jemalloc.orig/include/jemalloc/internal/atomic.h
@ -0,0 +1,169 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+#define	atomic_read_uint64(p)	atomic_add_uint64(p, 0)
+#define	atomic_read_uint32(p)	atomic_add_uint32(p, 0)
+
+#if (LG_SIZEOF_PTR == 3)
+#  define atomic_read_z(p)						\
+    (size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)0)
+#  define atomic_add_z(p, x)						\
+    (size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)x)
+#  define atomic_sub_z(p, x)						\
+    (size_t)atomic_sub_uint64((uint64_t *)p, (uint64_t)x)
+#elif (LG_SIZEOF_PTR == 2)
+#  define atomic_read_z(p)						\
+    (size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)0)
+#  define atomic_add_z(p, x)						\
+    (size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)x)
+#  define atomic_sub_z(p, x)						\
+    (size_t)atomic_sub_uint32((uint32_t *)p, (uint32_t)x)
+#endif
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+uint64_t	atomic_add_uint64(uint64_t *p, uint64_t x);
+uint64_t	atomic_sub_uint64(uint64_t *p, uint64_t x);
+uint32_t	atomic_add_uint32(uint32_t *p, uint32_t x);
+uint32_t	atomic_sub_uint32(uint32_t *p, uint32_t x);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_))
+/******************************************************************************/
+/* 64-bit operations. */
+#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8
+JEMALLOC_INLINE uint64_t
+atomic_add_uint64(uint64_t *p, uint64_t x)
+{
+
+	return (__sync_add_and_fetch(p, x));
+}
+
+JEMALLOC_INLINE uint64_t
+atomic_sub_uint64(uint64_t *p, uint64_t x)
+{
+
+	return (__sync_sub_and_fetch(p, x));
+}
+#elif (defined(JEMALLOC_OSATOMIC))
+JEMALLOC_INLINE uint64_t
+atomic_add_uint64(uint64_t *p, uint64_t x)
+{
+
+	return (OSAtomicAdd64((int64_t)x, (int64_t *)p));
+}
+
+JEMALLOC_INLINE uint64_t
+atomic_sub_uint64(uint64_t *p, uint64_t x)
+{
+
+	return (OSAtomicAdd64(-((int64_t)x), (int64_t *)p));
+}
+#elif (defined(__amd64_) || defined(__x86_64__))
+JEMALLOC_INLINE uint64_t
+atomic_add_uint64(uint64_t *p, uint64_t x)
+{
+
+	asm volatile (
+	    "lock; xaddq %0, %1;"
+	    : "+r" (x), "=m" (*p) /* Outputs. */
+	    : "m" (*p) /* Inputs. */
+	    );
+
+	return (x);
+}
+
+JEMALLOC_INLINE uint64_t
+atomic_sub_uint64(uint64_t *p, uint64_t x)
+{
+
+	x = (uint64_t)(-(int64_t)x);
+	asm volatile (
+	    "lock; xaddq %0, %1;"
+	    : "+r" (x), "=m" (*p) /* Outputs. */
+	    : "m" (*p) /* Inputs. */
+	    );
+
+	return (x);
+}
+#else
+#  if (LG_SIZEOF_PTR == 3)
+#    error "Missing implementation for 64-bit atomic operations"
+#  endif
+#endif
+
+/******************************************************************************/
+/* 32-bit operations. */
+#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4
+JEMALLOC_INLINE uint32_t
+atomic_add_uint32(uint32_t *p, uint32_t x)
+{
+
+	return (__sync_add_and_fetch(p, x));
+}
+
+JEMALLOC_INLINE uint32_t
+atomic_sub_uint32(uint32_t *p, uint32_t x)
+{
+
+	return (__sync_sub_and_fetch(p, x));
+}
+#elif (defined(JEMALLOC_OSATOMIC))
+JEMALLOC_INLINE uint32_t
+atomic_add_uint32(uint32_t *p, uint32_t x)
+{
+
+	return (OSAtomicAdd32((int32_t)x, (int32_t *)p));
+}
+
+JEMALLOC_INLINE uint32_t
+atomic_sub_uint32(uint32_t *p, uint32_t x)
+{
+
+	return (OSAtomicAdd32(-((int32_t)x), (int32_t *)p));
+}
+#elif (defined(__i386__) || defined(__amd64_) || defined(__x86_64__))
+JEMALLOC_INLINE uint32_t
+atomic_add_uint32(uint32_t *p, uint32_t x)
+{
+
+	asm volatile (
+	    "lock; xaddl %0, %1;"
+	    : "+r" (x), "=m" (*p) /* Outputs. */
+	    : "m" (*p) /* Inputs. */
+	    );
+
+	return (x);
+}
+
+JEMALLOC_INLINE uint32_t
+atomic_sub_uint32(uint32_t *p, uint32_t x)
+{
+
+	x = (uint32_t)(-(int32_t)x);
+	asm volatile (
+	    "lock; xaddl %0, %1;"
+	    : "+r" (x), "=m" (*p) /* Outputs. */
+	    : "m" (*p) /* Inputs. */
+	    );
+
+	return (x);
+}
+#else
+#  error "Missing implementation for 32-bit atomic operations"
+#endif
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
--- a/deps/jemalloc.orig/include/jemalloc/internal/base.h
+++ b/deps/jemalloc.orig/include/jemalloc/internal/base.h
@ -0,0 +1,24 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+extern malloc_mutex_t	base_mtx;
+
+void	*base_alloc(size_t size);
+extent_node_t *base_node_alloc(void);
+void	base_node_dealloc(extent_node_t *node);
+bool	base_boot(void);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
--- a/deps/jemalloc.orig/include/jemalloc/internal/bitmap.h
+++ b/deps/jemalloc.orig/include/jemalloc/internal/bitmap.h
@ -0,0 +1,184 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+/* Maximum bitmap bit count is 2^LG_BITMAP_MAXBITS. */
+#define	LG_BITMAP_MAXBITS	LG_RUN_MAXREGS
+
+typedef struct bitmap_level_s bitmap_level_t;
+typedef struct bitmap_info_s bitmap_info_t;
+typedef unsigned long bitmap_t;
+#define	LG_SIZEOF_BITMAP	LG_SIZEOF_LONG
+
+/* Number of bits per group. */
+#define	LG_BITMAP_GROUP_NBITS		(LG_SIZEOF_BITMAP + 3)
+#define	BITMAP_GROUP_NBITS		(ZU(1) << LG_BITMAP_GROUP_NBITS)
+#define	BITMAP_GROUP_NBITS_MASK		(BITMAP_GROUP_NBITS-1)
+
+/* Maximum number of levels possible. */
+#define	BITMAP_MAX_LEVELS						\
+    (LG_BITMAP_MAXBITS / LG_SIZEOF_BITMAP)				\
+    + !!(LG_BITMAP_MAXBITS % LG_SIZEOF_BITMAP)
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+struct bitmap_level_s {
+	/* Offset of this level's groups within the array of groups. */
+	size_t group_offset;
+};
+
+struct bitmap_info_s {
+	/* Logical number of bits in bitmap (stored at bottom level). */
+	size_t nbits;
+
+	/* Number of levels necessary for nbits. */
+	unsigned nlevels;
+
+	/*
+	 * Only the first (nlevels+1) elements are used, and levels are ordered
+	 * bottom to top (e.g. the bottom level is stored in levels[0]).
+	 */
+	bitmap_level_t levels[BITMAP_MAX_LEVELS+1];
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+void	bitmap_info_init(bitmap_info_t *binfo, size_t nbits);
+size_t	bitmap_info_ngroups(const bitmap_info_t *binfo);
+size_t	bitmap_size(size_t nbits);
+void	bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+bool	bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo);
+bool	bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
+void	bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
+size_t	bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo);
+void	bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_BITMAP_C_))
+JEMALLOC_INLINE bool
+bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo)
+{
+	unsigned rgoff = binfo->levels[binfo->nlevels].group_offset - 1;
+	bitmap_t rg = bitmap[rgoff];
+	/* The bitmap is full iff the root group is 0. */
+	return (rg == 0);
+}
+
+JEMALLOC_INLINE bool
+bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
+{
+	size_t goff;
+	bitmap_t g;
+
+	assert(bit < binfo->nbits);
+	goff = bit >> LG_BITMAP_GROUP_NBITS;
+	g = bitmap[goff];
+	return (!(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))));
+}
+
+JEMALLOC_INLINE void
+bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
+{
+	size_t goff;
+	bitmap_t *gp;
+	bitmap_t g;
+
+	assert(bit < binfo->nbits);
+	assert(bitmap_get(bitmap, binfo, bit) == false);
+	goff = bit >> LG_BITMAP_GROUP_NBITS;
+	gp = &bitmap[goff];
+	g = *gp;
+	assert(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK)));
+	g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK);
+	*gp = g;
+	assert(bitmap_get(bitmap, binfo, bit));
+	/* Propagate group state transitions up the tree. */
+	if (g == 0) {
+		unsigned i;
+		for (i = 1; i < binfo->nlevels; i++) {
+			bit = goff;
+			goff = bit >> LG_BITMAP_GROUP_NBITS;
+			gp = &bitmap[binfo->levels[i].group_offset + goff];
+			g = *gp;
+			assert(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK)));
+			g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK);
+			*gp = g;
+			if (g != 0)
+				break;
+		}
+	}
+}
+
+/* sfu: set first unset. */
+JEMALLOC_INLINE size_t
+bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo)
+{
+	size_t bit;
+	bitmap_t g;
+	unsigned i;
+
+	assert(bitmap_full(bitmap, binfo) == false);
+
+	i = binfo->nlevels - 1;
+	g = bitmap[binfo->levels[i].group_offset];
+	bit = ffsl(g) - 1;
+	while (i > 0) {
+		i--;
+		g = bitmap[binfo->levels[i].group_offset + bit];
+		bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffsl(g) - 1);
+	}
+
+	bitmap_set(bitmap, binfo, bit);
+	return (bit);
+}
+
+JEMALLOC_INLINE void
+bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
+{
+	size_t goff;
+	bitmap_t *gp;
+	bitmap_t g;
+	bool propagate;
+
+	assert(bit < binfo->nbits);
+	assert(bitmap_get(bitmap, binfo, bit));
+	goff = bit >> LG_BITMAP_GROUP_NBITS;
+	gp = &bitmap[goff];
+	g = *gp;
+	propagate = (g == 0);
+	assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))) == 0);
+	g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK);
+	*gp = g;
+	assert(bitmap_get(bitmap, binfo, bit) == false);
+	/* Propagate group state transitions up the tree. */
+	if (propagate) {
+		unsigned i;
+		for (i = 1; i < binfo->nlevels; i++) {
+			bit = goff;
+			goff = bit >> LG_BITMAP_GROUP_NBITS;
+			gp = &bitmap[binfo->levels[i].group_offset + goff];
+			g = *gp;
+			propagate = (g == 0);
+			assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK)))
+			    == 0);
+			g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK);
+			*gp = g;
+			if (propagate == false)
+				break;
+		}
+	}
+}
+
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
--- a/deps/jemalloc.orig/include/jemalloc/internal/chunk.h
+++ b/deps/jemalloc.orig/include/jemalloc/internal/chunk.h
@ -0,0 +1,65 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+/*
+ * Size and alignment of memory chunks that are allocated by the OS's virtual
+ * memory system.
+ */
+#define	LG_CHUNK_DEFAULT	22
+
+/* Return the chunk address for allocation address a. */
+#define	CHUNK_ADDR2BASE(a)						\
+	((void *)((uintptr_t)(a) & ~chunksize_mask))
+
+/* Return the chunk offset of address a. */
+#define	CHUNK_ADDR2OFFSET(a)						\
+	((size_t)((uintptr_t)(a) & chunksize_mask))
+
+/* Return the smallest chunk multiple that is >= s. */
+#define	CHUNK_CEILING(s)						\
+	(((s) + chunksize_mask) & ~chunksize_mask)
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+extern size_t		opt_lg_chunk;
+#ifdef JEMALLOC_SWAP
+extern bool		opt_overcommit;
+#endif
+
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+/* Protects stats_chunks; currently not used for any other purpose. */
+extern malloc_mutex_t	chunks_mtx;
+/* Chunk statistics. */
+extern chunk_stats_t	stats_chunks;
+#endif
+
+#ifdef JEMALLOC_IVSALLOC
+extern rtree_t		*chunks_rtree;
+#endif
+
+extern size_t		chunksize;
+extern size_t		chunksize_mask; /* (chunksize - 1). */
+extern size_t		chunk_npages;
+extern size_t		map_bias; /* Number of arena chunk header pages. */
+extern size_t		arena_maxclass; /* Max size class for arenas. */
+
+void	*chunk_alloc(size_t size, bool base, bool *zero);
+void	chunk_dealloc(void *chunk, size_t size, bool unmap);
+bool	chunk_boot(void);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
+
+#include "jemalloc/internal/chunk_swap.h"
+#include "jemalloc/internal/chunk_dss.h"
+#include "jemalloc/internal/chunk_mmap.h"
--- a/deps/jemalloc.orig/include/jemalloc/internal/chunk_dss.h
+++ b/deps/jemalloc.orig/include/jemalloc/internal/chunk_dss.h
@ -0,0 +1,30 @@
+#ifdef JEMALLOC_DSS
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+/*
+ * Protects sbrk() calls.  This avoids malloc races among threads, though it
+ * does not protect against races with threads that call sbrk() directly.
+ */
+extern malloc_mutex_t	dss_mtx;
+
+void	*chunk_alloc_dss(size_t size, bool *zero);
+bool	chunk_in_dss(void *chunk);
+bool	chunk_dealloc_dss(void *chunk, size_t size);
+bool	chunk_dss_boot(void);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
+#endif /* JEMALLOC_DSS */
--- a/deps/jemalloc.orig/include/jemalloc/internal/chunk_mmap.h
+++ b/deps/jemalloc.orig/include/jemalloc/internal/chunk_mmap.h
@ -0,0 +1,23 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+void	*chunk_alloc_mmap(size_t size);
+void	*chunk_alloc_mmap_noreserve(size_t size);
+void	chunk_dealloc_mmap(void *chunk, size_t size);
+
+bool	chunk_mmap_boot(void);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
--- a/deps/jemalloc.orig/include/jemalloc/internal/chunk_swap.h
+++ b/deps/jemalloc.orig/include/jemalloc/internal/chunk_swap.h
--- a/deps/jemalloc.orig/include/jemalloc/internal/ckh.h
+++ b/deps/jemalloc.orig/include/jemalloc/internal/ckh.h
@ -0,0 +1,95 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+typedef struct ckh_s ckh_t;
+typedef struct ckhc_s ckhc_t;
+
+/* Typedefs to allow easy function pointer passing. */
+typedef void ckh_hash_t (const void *, unsigned, size_t *, size_t *);
+typedef bool ckh_keycomp_t (const void *, const void *);
+
+/* Maintain counters used to get an idea of performance. */
+/* #define	CKH_COUNT */
+/* Print counter values in ckh_delete() (requires CKH_COUNT). */
+/* #define	CKH_VERBOSE */
+
+/*
+ * There are 2^LG_CKH_BUCKET_CELLS cells in each hash table bucket.  Try to fit
+ * one bucket per L1 cache line.
+ */
+#define LG_CKH_BUCKET_CELLS (LG_CACHELINE - LG_SIZEOF_PTR - 1)
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+/* Hash table cell. */
+struct ckhc_s {
+	const void	*key;
+	const void	*data;
+};
+
+struct ckh_s {
+#ifdef JEMALLOC_DEBUG
+#define	CKH_MAGIC	0x3af2489d
+	uint32_t	magic;
+#endif
+
+#ifdef CKH_COUNT
+	/* Counters used to get an idea of performance. */
+	uint64_t	ngrows;
+	uint64_t	nshrinks;
+	uint64_t	nshrinkfails;
+	uint64_t	ninserts;
+	uint64_t	nrelocs;
+#endif
+
+	/* Used for pseudo-random number generation. */
+#define	CKH_A		1103515241
+#define	CKH_C		12347
+	uint32_t	prn_state;
+
+	/* Total number of items. */
+	size_t		count;
+
+	/*
+	 * Minimum and current number of hash table buckets.  There are
+	 * 2^LG_CKH_BUCKET_CELLS cells per bucket.
+	 */
+	unsigned	lg_minbuckets;
+	unsigned	lg_curbuckets;
+
+	/* Hash and comparison functions. */
+	ckh_hash_t	*hash;
+	ckh_keycomp_t	*keycomp;
+
+	/* Hash table with 2^lg_curbuckets buckets. */
+	ckhc_t		*tab;
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+bool	ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
+    ckh_keycomp_t *keycomp);
+void	ckh_delete(ckh_t *ckh);
+size_t	ckh_count(ckh_t *ckh);
+bool	ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data);
+bool	ckh_insert(ckh_t *ckh, const void *key, const void *data);
+bool	ckh_remove(ckh_t *ckh, const void *searchkey, void **key,
+    void **data);
+bool	ckh_search(ckh_t *ckh, const void *seachkey, void **key, void **data);
+void	ckh_string_hash(const void *key, unsigned minbits, size_t *hash1,
+    size_t *hash2);
+bool	ckh_string_keycomp(const void *k1, const void *k2);
+void	ckh_pointer_hash(const void *key, unsigned minbits, size_t *hash1,
+    size_t *hash2);
+bool	ckh_pointer_keycomp(const void *k1, const void *k2);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
--- a/deps/jemalloc.orig/include/jemalloc/internal/ctl.h
+++ b/deps/jemalloc.orig/include/jemalloc/internal/ctl.h
@ -0,0 +1,118 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+typedef struct ctl_node_s ctl_node_t;
+typedef struct ctl_arena_stats_s ctl_arena_stats_t;
+typedef struct ctl_stats_s ctl_stats_t;
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+struct ctl_node_s {
+	bool			named;
+	union {
+		struct {
+			const char	*name;
+			/* If (nchildren == 0), this is a terminal node. */
+			unsigned	nchildren;
+			const	ctl_node_t *children;
+		} named;
+		struct {
+			const ctl_node_t *(*index)(const size_t *, size_t,
+			    size_t);
+		} indexed;
+	} u;
+	int	(*ctl)(const size_t *, size_t, void *, size_t *, void *,
+	    size_t);
+};
+
+struct ctl_arena_stats_s {
+	bool			initialized;
+	unsigned		nthreads;
+	size_t			pactive;
+	size_t			pdirty;
+#ifdef JEMALLOC_STATS
+	arena_stats_t		astats;
+
+	/* Aggregate stats for small size classes, based on bin stats. */
+	size_t			allocated_small;
+	uint64_t		nmalloc_small;
+	uint64_t		ndalloc_small;
+	uint64_t		nrequests_small;
+
+	malloc_bin_stats_t	*bstats;	/* nbins elements. */
+	malloc_large_stats_t	*lstats;	/* nlclasses elements. */
+#endif
+};
+
+struct ctl_stats_s {
+#ifdef JEMALLOC_STATS
+	size_t			allocated;
+	size_t			active;
+	size_t			mapped;
+	struct {
+		size_t		current;	/* stats_chunks.curchunks */
+		uint64_t	total;		/* stats_chunks.nchunks */
+		size_t		high;		/* stats_chunks.highchunks */
+	} chunks;
+	struct {
+		size_t		allocated;	/* huge_allocated */
+		uint64_t	nmalloc;	/* huge_nmalloc */
+		uint64_t	ndalloc;	/* huge_ndalloc */
+	} huge;
+#endif
+	ctl_arena_stats_t	*arenas;	/* (narenas + 1) elements. */
+#ifdef JEMALLOC_SWAP
+	size_t			swap_avail;
+#endif
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+int	ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp,
+    size_t newlen);
+int	ctl_nametomib(const char *name, size_t *mibp, size_t *miblenp);
+
+int	ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
+    void *newp, size_t newlen);
+bool	ctl_boot(void);
+
+#define	xmallctl(name, oldp, oldlenp, newp, newlen) do {		\
+	if (JEMALLOC_P(mallctl)(name, oldp, oldlenp, newp, newlen)	\
+	    != 0) {							\
+		malloc_write("<jemalloc>: Failure in xmallctl(\"");	\
+		malloc_write(name);					\
+		malloc_write("\", ...)\n");				\
+		abort();						\
+	}								\
+} while (0)
+
+#define	xmallctlnametomib(name, mibp, miblenp) do {			\
+	if (JEMALLOC_P(mallctlnametomib)(name, mibp, miblenp) != 0) {	\
+		malloc_write(						\
+		    "<jemalloc>: Failure in xmallctlnametomib(\"");	\
+		malloc_write(name);					\
+		malloc_write("\", ...)\n");				\
+		abort();						\
+	}								\
+} while (0)
+
+#define	xmallctlbymib(mib, miblen, oldp, oldlenp, newp, newlen) do {	\
+	if (JEMALLOC_P(mallctlbymib)(mib, miblen, oldp, oldlenp, newp,	\
+	    newlen) != 0) {						\
+		malloc_write(						\
+		    "<jemalloc>: Failure in xmallctlbymib()\n");	\
+		abort();						\
+	}								\
+} while (0)
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
+
--- a/deps/jemalloc.orig/include/jemalloc/internal/extent.h
+++ b/deps/jemalloc.orig/include/jemalloc/internal/extent.h
@ -0,0 +1,49 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+typedef struct extent_node_s extent_node_t;
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+/* Tree of extents. */
+struct extent_node_s {
+#if (defined(JEMALLOC_SWAP) || defined(JEMALLOC_DSS))
+	/* Linkage for the size/address-ordered tree. */
+	rb_node(extent_node_t)	link_szad;
+#endif
+
+	/* Linkage for the address-ordered tree. */
+	rb_node(extent_node_t)	link_ad;
+
+#ifdef JEMALLOC_PROF
+	/* Profile counters, used for huge objects. */
+	prof_ctx_t		*prof_ctx;
+#endif
+
+	/* Pointer to the extent that this tree node is responsible for. */
+	void			*addr;
+
+	/* Total region size. */
+	size_t			size;
+};
+typedef rb_tree(extent_node_t) extent_tree_t;
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+#if (defined(JEMALLOC_SWAP) || defined(JEMALLOC_DSS))
+rb_proto(, extent_tree_szad_, extent_tree_t, extent_node_t)
+#endif
+
+rb_proto(, extent_tree_ad_, extent_tree_t, extent_node_t)
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
+
--- a/deps/jemalloc.orig/include/jemalloc/internal/hash.h
+++ b/deps/jemalloc.orig/include/jemalloc/internal/hash.h
@ -0,0 +1,70 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+uint64_t	hash(const void *key, size_t len, uint64_t seed);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_HASH_C_))
+/*
+ * The following hash function is based on MurmurHash64A(), placed into the
+ * public domain by Austin Appleby.  See http://murmurhash.googlepages.com/ for
+ * details.
+ */
+JEMALLOC_INLINE uint64_t
+hash(const void *key, size_t len, uint64_t seed)
+{
+	const uint64_t m = 0xc6a4a7935bd1e995LLU;
+	const int r = 47;
+	uint64_t h = seed ^ (len * m);
+	const uint64_t *data = (const uint64_t *)key;
+	const uint64_t *end = data + (len/8);
+	const unsigned char *data2;
+
+	assert(((uintptr_t)key & 0x7) == 0);
+
+	while(data != end) {
+		uint64_t k = *data++;
+
+		k *= m;
+		k ^= k >> r;
+		k *= m;
+
+		h ^= k;
+		h *= m;
+	}
+
+	data2 = (const unsigned char *)data;
+	switch(len & 7) {
+		case 7: h ^= ((uint64_t)(data2[6])) << 48;
+		case 6: h ^= ((uint64_t)(data2[5])) << 40;
+		case 5: h ^= ((uint64_t)(data2[4])) << 32;
+		case 4: h ^= ((uint64_t)(data2[3])) << 24;
+		case 3: h ^= ((uint64_t)(data2[2])) << 16;
+		case 2: h ^= ((uint64_t)(data2[1])) << 8;
+		case 1: h ^= ((uint64_t)(data2[0]));
+			h *= m;
+	}
+
+	h ^= h >> r;
+	h *= m;
+	h ^= h >> r;
+
+	return (h);
+}
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
--- a/deps/jemalloc.orig/include/jemalloc/internal/huge.h
+++ b/deps/jemalloc.orig/include/jemalloc/internal/huge.h
@ -0,0 +1,41 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+#ifdef JEMALLOC_STATS
+/* Huge allocation statistics. */
+extern uint64_t		huge_nmalloc;
+extern uint64_t		huge_ndalloc;
+extern size_t		huge_allocated;
+#endif
+
+/* Protects chunk-related data structures. */
+extern malloc_mutex_t	huge_mtx;
+
+void	*huge_malloc(size_t size, bool zero);
+void	*huge_palloc(size_t size, size_t alignment, bool zero);
+void	*huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size,
+    size_t extra);
+void	*huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
+    size_t alignment, bool zero);
+void	huge_dalloc(void *ptr, bool unmap);
+size_t	huge_salloc(const void *ptr);
+#ifdef JEMALLOC_PROF
+prof_ctx_t	*huge_prof_ctx_get(const void *ptr);
+void	huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
+#endif
+bool	huge_boot(void);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
--- a/deps/jemalloc.orig/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/deps/jemalloc.orig/include/jemalloc/internal/jemalloc_internal.h.in
@ -0,0 +1,788 @@
+#include <sys/mman.h>
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#include <sys/uio.h>
+
+#include <errno.h>
+#include <limits.h>
+#ifndef SIZE_T_MAX
+#  define SIZE_T_MAX	SIZE_MAX
+#endif
+#include <pthread.h>
+#include <sched.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stddef.h>
+#ifndef offsetof
+#  define offsetof(type, member)	((size_t)&(((type *)NULL)->member))
+#endif
+#include <inttypes.h>
+#include <string.h>
+#include <strings.h>
+#include <ctype.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <math.h>
+
+#define	JEMALLOC_MANGLE
+#include "../jemalloc@install_suffix@.h"
+
+#include "jemalloc/internal/private_namespace.h"
+
+#if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN))
+#include <libkern/OSAtomic.h>
+#endif
+
+#ifdef JEMALLOC_ZONE
+#include <mach/mach_error.h>
+#include <mach/mach_init.h>
+#include <mach/vm_map.h>
+#include <malloc/malloc.h>
+#endif
+
+#ifdef JEMALLOC_LAZY_LOCK
+#include <dlfcn.h>
+#endif
+
+#define	RB_COMPACT
+#include "jemalloc/internal/rb.h"
+#include "jemalloc/internal/qr.h"
+#include "jemalloc/internal/ql.h"
+
+extern void	(*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s);
+
+/*
+ * Define a custom assert() in order to reduce the chances of deadlock during
+ * assertion failure.
+ */
+#ifndef assert
+#  ifdef JEMALLOC_DEBUG
+#    define assert(e) do {						\
+	if (!(e)) {							\
+		char line_buf[UMAX2S_BUFSIZE];				\
+		malloc_write("<jemalloc>: ");				\
+		malloc_write(__FILE__);					\
+		malloc_write(":");					\
+		malloc_write(u2s(__LINE__, 10, line_buf));		\
+		malloc_write(": Failed assertion: ");			\
+		malloc_write("\"");					\
+		malloc_write(#e);					\
+		malloc_write("\"\n");					\
+		abort();						\
+	}								\
+} while (0)
+#  else
+#    define assert(e)
+#  endif
+#endif
+
+#ifdef JEMALLOC_DEBUG
+#  define dassert(e) assert(e)
+#else
+#  define dassert(e)
+#endif
+
+/*
+ * jemalloc can conceptually be broken into components (arena, tcache, etc.),
+ * but there are circular dependencies that cannot be broken without
+ * substantial performance degradation.  In order to reduce the effect on
+ * visual code flow, read the header files in multiple passes, with one of the
+ * following cpp variables defined during each pass:
+ *
+ *   JEMALLOC_H_TYPES   : Preprocessor-defined constants and psuedo-opaque data
+ *                        types.
+ *   JEMALLOC_H_STRUCTS : Data structures.
+ *   JEMALLOC_H_EXTERNS : Extern data declarations and function prototypes.
+ *   JEMALLOC_H_INLINES : Inline functions.
+ */
+/******************************************************************************/
+#define JEMALLOC_H_TYPES
+
+#define	ALLOCM_LG_ALIGN_MASK	((int)0x3f)
+
+#define	ZU(z)	((size_t)z)
+
+#ifndef __DECONST
+#  define	__DECONST(type, var)	((type)(uintptr_t)(const void *)(var))
+#endif
+
+#ifdef JEMALLOC_DEBUG
+   /* Disable inlining to make debugging easier. */
+#  define JEMALLOC_INLINE
+#  define inline
+#else
+#  define JEMALLOC_ENABLE_INLINE
+#  define JEMALLOC_INLINE static inline
+#endif
+
+/* Size of stack-allocated buffer passed to buferror(). */
+#define	BUFERROR_BUF		64
+
+/* Minimum alignment of allocations is 2^LG_QUANTUM bytes. */
+#ifdef __i386__
+#  define LG_QUANTUM		4
+#endif
+#ifdef __ia64__
+#  define LG_QUANTUM		4
+#endif
+#ifdef __alpha__
+#  define LG_QUANTUM		4
+#endif
+#ifdef __sparc64__
+#  define LG_QUANTUM		4
+#endif
+#if (defined(__amd64__) || defined(__x86_64__))
+#  define LG_QUANTUM		4
+#endif
+#ifdef __arm__
+#  define LG_QUANTUM		3
+#endif
+#ifdef __mips__
+#  define LG_QUANTUM		3
+#endif
+#ifdef __powerpc__
+#  define LG_QUANTUM		4
+#endif
+#ifdef __s390x__
+#  define LG_QUANTUM		4
+#endif
+
+#define	QUANTUM			((size_t)(1U << LG_QUANTUM))
+#define	QUANTUM_MASK		(QUANTUM - 1)
+
+/* Return the smallest quantum multiple that is >= a. */
+#define	QUANTUM_CEILING(a)						\
+	(((a) + QUANTUM_MASK) & ~QUANTUM_MASK)
+
+#define	LONG			((size_t)(1U << LG_SIZEOF_LONG))
+#define	LONG_MASK		(LONG - 1)
+
+/* Return the smallest long multiple that is >= a. */
+#define	LONG_CEILING(a)						\
+	(((a) + LONG_MASK) & ~LONG_MASK)
+
+#define	SIZEOF_PTR		(1U << LG_SIZEOF_PTR)
+#define	PTR_MASK		(SIZEOF_PTR - 1)
+
+/* Return the smallest (void *) multiple that is >= a. */
+#define	PTR_CEILING(a)						\
+	(((a) + PTR_MASK) & ~PTR_MASK)
+
+/*
+ * Maximum size of L1 cache line.  This is used to avoid cache line aliasing.
+ * In addition, this controls the spacing of cacheline-spaced size classes.
+ */
+#define	LG_CACHELINE		6
+#define	CACHELINE		((size_t)(1U << LG_CACHELINE))
+#define	CACHELINE_MASK		(CACHELINE - 1)
+
+/* Return the smallest cacheline multiple that is >= s. */
+#define	CACHELINE_CEILING(s)						\
+	(((s) + CACHELINE_MASK) & ~CACHELINE_MASK)
+
+/*
+ * Page size.  STATIC_PAGE_SHIFT is determined by the configure script.  If
+ * DYNAMIC_PAGE_SHIFT is enabled, only use the STATIC_PAGE_* macros where
+ * compile-time values are required for the purposes of defining data
+ * structures.
+ */
+#define	STATIC_PAGE_SIZE ((size_t)(1U << STATIC_PAGE_SHIFT))
+#define	STATIC_PAGE_MASK ((size_t)(STATIC_PAGE_SIZE - 1))
+
+#ifdef PAGE_SHIFT
+#  undef PAGE_SHIFT
+#endif
+#ifdef PAGE_SIZE
+#  undef PAGE_SIZE
+#endif
+#ifdef PAGE_MASK
+#  undef PAGE_MASK
+#endif
+
+#ifdef DYNAMIC_PAGE_SHIFT
+#  define PAGE_SHIFT	lg_pagesize
+#  define PAGE_SIZE	pagesize
+#  define PAGE_MASK	pagesize_mask
+#else
+#  define PAGE_SHIFT	STATIC_PAGE_SHIFT
+#  define PAGE_SIZE	STATIC_PAGE_SIZE
+#  define PAGE_MASK	STATIC_PAGE_MASK
+#endif
+
+/* Return the smallest pagesize multiple that is >= s. */
+#define	PAGE_CEILING(s)							\
+	(((s) + PAGE_MASK) & ~PAGE_MASK)
+
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/prn.h"
+#include "jemalloc/internal/ckh.h"
+#include "jemalloc/internal/stats.h"
+#include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/mb.h"
+#include "jemalloc/internal/extent.h"
+#include "jemalloc/internal/arena.h"
+#include "jemalloc/internal/bitmap.h"
+#include "jemalloc/internal/base.h"
+#include "jemalloc/internal/chunk.h"
+#include "jemalloc/internal/huge.h"
+#include "jemalloc/internal/rtree.h"
+#include "jemalloc/internal/tcache.h"
+#include "jemalloc/internal/hash.h"
+#ifdef JEMALLOC_ZONE
+#include "jemalloc/internal/zone.h"
+#endif
+#include "jemalloc/internal/prof.h"
+
+#undef JEMALLOC_H_TYPES
+/******************************************************************************/
+#define JEMALLOC_H_STRUCTS
+
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/prn.h"
+#include "jemalloc/internal/ckh.h"
+#include "jemalloc/internal/stats.h"
+#include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/mb.h"
+#include "jemalloc/internal/bitmap.h"
+#include "jemalloc/internal/extent.h"
+#include "jemalloc/internal/arena.h"
+#include "jemalloc/internal/base.h"
+#include "jemalloc/internal/chunk.h"
+#include "jemalloc/internal/huge.h"
+#include "jemalloc/internal/rtree.h"
+#include "jemalloc/internal/tcache.h"
+#include "jemalloc/internal/hash.h"
+#ifdef JEMALLOC_ZONE
+#include "jemalloc/internal/zone.h"
+#endif
+#include "jemalloc/internal/prof.h"
+
+#ifdef JEMALLOC_STATS
+typedef struct {
+	uint64_t	allocated;
+	uint64_t	deallocated;
+} thread_allocated_t;
+#endif
+
+#undef JEMALLOC_H_STRUCTS
+/******************************************************************************/
+#define JEMALLOC_H_EXTERNS
+
+extern bool	opt_abort;
+#ifdef JEMALLOC_FILL
+extern bool	opt_junk;
+#endif
+#ifdef JEMALLOC_SYSV
+extern bool	opt_sysv;
+#endif
+#ifdef JEMALLOC_XMALLOC
+extern bool	opt_xmalloc;
+#endif
+#ifdef JEMALLOC_FILL
+extern bool	opt_zero;
+#endif
+extern size_t	opt_narenas;
+
+#ifdef DYNAMIC_PAGE_SHIFT
+extern size_t		pagesize;
+extern size_t		pagesize_mask;
+extern size_t		lg_pagesize;
+#endif
+
+/* Number of CPUs. */
+extern unsigned		ncpus;
+
+extern malloc_mutex_t	arenas_lock; /* Protects arenas initialization. */
+extern pthread_key_t	arenas_tsd;
+#ifndef NO_TLS
+/*
+ * Map of pthread_self() --> arenas[???], used for selecting an arena to use
+ * for allocations.
+ */
+extern __thread arena_t	*arenas_tls JEMALLOC_ATTR(tls_model("initial-exec"));
+#  define ARENA_GET()	arenas_tls
+#  define ARENA_SET(v)	do {						\
+	arenas_tls = (v);						\
+	pthread_setspecific(arenas_tsd, (void *)(v));			\
+} while (0)
+#else
+#  define ARENA_GET()	((arena_t *)pthread_getspecific(arenas_tsd))
+#  define ARENA_SET(v)	do {						\
+	pthread_setspecific(arenas_tsd, (void *)(v));			\
+} while (0)
+#endif
+
+/*
+ * Arenas that are used to service external requests.  Not all elements of the
+ * arenas array are necessarily used; arenas are created lazily as needed.
+ */
+extern arena_t		**arenas;
+extern unsigned		narenas;
+
+#ifdef JEMALLOC_STATS
+#  ifndef NO_TLS
+extern __thread thread_allocated_t	thread_allocated_tls;
+#    define ALLOCATED_GET() (thread_allocated_tls.allocated)
+#    define ALLOCATEDP_GET() (&thread_allocated_tls.allocated)
+#    define DEALLOCATED_GET() (thread_allocated_tls.deallocated)
+#    define DEALLOCATEDP_GET() (&thread_allocated_tls.deallocated)
+#    define ALLOCATED_ADD(a, d) do {					\
+	thread_allocated_tls.allocated += a;				\
+	thread_allocated_tls.deallocated += d;				\
+} while (0)
+#  else
+extern pthread_key_t	thread_allocated_tsd;
+thread_allocated_t	*thread_allocated_get_hard(void);
+
+#    define ALLOCATED_GET() (thread_allocated_get()->allocated)
+#    define ALLOCATEDP_GET() (&thread_allocated_get()->allocated)
+#    define DEALLOCATED_GET() (thread_allocated_get()->deallocated)
+#    define DEALLOCATEDP_GET() (&thread_allocated_get()->deallocated)
+#    define ALLOCATED_ADD(a, d) do {					\
+	thread_allocated_t *thread_allocated = thread_allocated_get();	\
+	thread_allocated->allocated += (a);				\
+	thread_allocated->deallocated += (d);				\
+} while (0)
+#  endif
+#endif
+
+arena_t	*arenas_extend(unsigned ind);
+arena_t	*choose_arena_hard(void);
+int	buferror(int errnum, char *buf, size_t buflen);
+void	jemalloc_prefork(void);
+void	jemalloc_postfork(void);
+
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/prn.h"
+#include "jemalloc/internal/ckh.h"
+#include "jemalloc/internal/stats.h"
+#include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/mb.h"
+#include "jemalloc/internal/bitmap.h"
+#include "jemalloc/internal/extent.h"
+#include "jemalloc/internal/arena.h"
+#include "jemalloc/internal/base.h"
+#include "jemalloc/internal/chunk.h"
+#include "jemalloc/internal/huge.h"
+#include "jemalloc/internal/rtree.h"
+#include "jemalloc/internal/tcache.h"
+#include "jemalloc/internal/hash.h"
+#ifdef JEMALLOC_ZONE
+#include "jemalloc/internal/zone.h"
+#endif
+#include "jemalloc/internal/prof.h"
+
+#undef JEMALLOC_H_EXTERNS
+/******************************************************************************/
+#define JEMALLOC_H_INLINES
+
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/prn.h"
+#include "jemalloc/internal/ckh.h"
+#include "jemalloc/internal/stats.h"
+#include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/mb.h"
+#include "jemalloc/internal/extent.h"
+#include "jemalloc/internal/base.h"
+#include "jemalloc/internal/chunk.h"
+#include "jemalloc/internal/huge.h"
+
+#ifndef JEMALLOC_ENABLE_INLINE
+size_t	pow2_ceil(size_t x);
+size_t	s2u(size_t size);
+size_t	sa2u(size_t size, size_t alignment, size_t *run_size_p);
+void	malloc_write(const char *s);
+arena_t	*choose_arena(void);
+#  if (defined(JEMALLOC_STATS) && defined(NO_TLS))
+thread_allocated_t	*thread_allocated_get(void);
+#  endif
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
+/* Compute the smallest power of 2 that is >= x. */
+JEMALLOC_INLINE size_t
+pow2_ceil(size_t x)
+{
+
+	x--;
+	x |= x >> 1;
+	x |= x >> 2;
+	x |= x >> 4;
+	x |= x >> 8;
+	x |= x >> 16;
+#if (LG_SIZEOF_PTR == 3)
+	x |= x >> 32;
+#endif
+	x++;
+	return (x);
+}
+
+/*
+ * Compute usable size that would result from allocating an object with the
+ * specified size.
+ */
+JEMALLOC_INLINE size_t
+s2u(size_t size)
+{
+
+	if (size <= small_maxclass)
+		return (arena_bin_info[SMALL_SIZE2BIN(size)].reg_size);
+	if (size <= arena_maxclass)
+		return (PAGE_CEILING(size));
+	return (CHUNK_CEILING(size));
+}
+
+/*
+ * Compute usable size that would result from allocating an object with the
+ * specified size and alignment.
+ */
+JEMALLOC_INLINE size_t
+sa2u(size_t size, size_t alignment, size_t *run_size_p)
+{
+	size_t usize;
+
+	/*
+	 * Round size up to the nearest multiple of alignment.
+	 *
+	 * This done, we can take advantage of the fact that for each small
+	 * size class, every object is aligned at the smallest power of two
+	 * that is non-zero in the base two representation of the size.  For
+	 * example:
+	 *
+	 *   Size |   Base 2 | Minimum alignment
+	 *   -----+----------+------------------
+	 *     96 |  1100000 |  32
+	 *    144 | 10100000 |  32
+	 *    192 | 11000000 |  64
+	 *
+	 * Depending on runtime settings, it is possible that arena_malloc()
+	 * will further round up to a power of two, but that never causes
+	 * correctness issues.
+	 */
+	usize = (size + (alignment - 1)) & (-alignment);
+	/*
+	 * (usize < size) protects against the combination of maximal
+	 * alignment and size greater than maximal alignment.
+	 */
+	if (usize < size) {
+		/* size_t overflow. */
+		return (0);
+	}
+
+	if (usize <= arena_maxclass && alignment <= PAGE_SIZE) {
+		if (usize <= small_maxclass)
+			return (arena_bin_info[SMALL_SIZE2BIN(usize)].reg_size);
+		return (PAGE_CEILING(usize));
+	} else {
+		size_t run_size;
+
+		/*
+		 * We can't achieve subpage alignment, so round up alignment
+		 * permanently; it makes later calculations simpler.
+		 */
+		alignment = PAGE_CEILING(alignment);
+		usize = PAGE_CEILING(size);
+		/*
+		 * (usize < size) protects against very large sizes within
+		 * PAGE_SIZE of SIZE_T_MAX.
+		 *
+		 * (usize + alignment < usize) protects against the
+		 * combination of maximal alignment and usize large enough
+		 * to cause overflow.  This is similar to the first overflow
+		 * check above, but it needs to be repeated due to the new
+		 * usize value, which may now be *equal* to maximal
+		 * alignment, whereas before we only detected overflow if the
+		 * original size was *greater* than maximal alignment.
+		 */
+		if (usize < size || usize + alignment < usize) {
+			/* size_t overflow. */
+			return (0);
+		}
+
+		/*
+		 * Calculate the size of the over-size run that arena_palloc()
+		 * would need to allocate in order to guarantee the alignment.
+		 */
+		if (usize >= alignment)
+			run_size = usize + alignment - PAGE_SIZE;
+		else {
+			/*
+			 * It is possible that (alignment << 1) will cause
+			 * overflow, but it doesn't matter because we also
+			 * subtract PAGE_SIZE, which in the case of overflow
+			 * leaves us with a very large run_size.  That causes
+			 * the first conditional below to fail, which means
+			 * that the bogus run_size value never gets used for
+			 * anything important.
+			 */
+			run_size = (alignment << 1) - PAGE_SIZE;
+		}
+		if (run_size_p != NULL)
+			*run_size_p = run_size;
+
+		if (run_size <= arena_maxclass)
+			return (PAGE_CEILING(usize));
+		return (CHUNK_CEILING(usize));
+	}
+}
+
+/*
+ * Wrapper around malloc_message() that avoids the need for
+ * JEMALLOC_P(malloc_message)(...) throughout the code.
+ */
+JEMALLOC_INLINE void
+malloc_write(const char *s)
+{
+
+	JEMALLOC_P(malloc_message)(NULL, s);
+}
+
+/*
+ * Choose an arena based on a per-thread value (fast-path code, calls slow-path
+ * code if necessary).
+ */
+JEMALLOC_INLINE arena_t *
+choose_arena(void)
+{
+	arena_t *ret;
+
+	ret = ARENA_GET();
+	if (ret == NULL) {
+		ret = choose_arena_hard();
+		assert(ret != NULL);
+	}
+
+	return (ret);
+}
+
+#if (defined(JEMALLOC_STATS) && defined(NO_TLS))
+JEMALLOC_INLINE thread_allocated_t *
+thread_allocated_get(void)
+{
+	thread_allocated_t *thread_allocated = (thread_allocated_t *)
+	    pthread_getspecific(thread_allocated_tsd);
+
+	if (thread_allocated == NULL)
+		return (thread_allocated_get_hard());
+	return (thread_allocated);
+}
+#endif
+#endif
+
+#include "jemalloc/internal/bitmap.h"
+#include "jemalloc/internal/rtree.h"
+#include "jemalloc/internal/tcache.h"
+#include "jemalloc/internal/arena.h"
+#include "jemalloc/internal/hash.h"
+#ifdef JEMALLOC_ZONE
+#include "jemalloc/internal/zone.h"
+#endif
+
+#ifndef JEMALLOC_ENABLE_INLINE
+void	*imalloc(size_t size);
+void	*icalloc(size_t size);
+void	*ipalloc(size_t usize, size_t alignment, bool zero);
+size_t	isalloc(const void *ptr);
+#  ifdef JEMALLOC_IVSALLOC
+size_t	ivsalloc(const void *ptr);
+#  endif
+void	idalloc(void *ptr);
+void	*iralloc(void *ptr, size_t size, size_t extra, size_t alignment,
+    bool zero, bool no_move);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
+JEMALLOC_INLINE void *
+imalloc(size_t size)
+{
+
+	assert(size != 0);
+
+	if (size <= arena_maxclass)
+		return (arena_malloc(size, false));
+	else
+		return (huge_malloc(size, false));
+}
+
+JEMALLOC_INLINE void *
+icalloc(size_t size)
+{
+
+	if (size <= arena_maxclass)
+		return (arena_malloc(size, true));
+	else
+		return (huge_malloc(size, true));
+}
+
+JEMALLOC_INLINE void *
+ipalloc(size_t usize, size_t alignment, bool zero)
+{
+	void *ret;
+
+	assert(usize != 0);
+	assert(usize == sa2u(usize, alignment, NULL));
+
+	if (usize <= arena_maxclass && alignment <= PAGE_SIZE)
+		ret = arena_malloc(usize, zero);
+	else {
+		size_t run_size
+#ifdef JEMALLOC_CC_SILENCE
+		    = 0
+#endif
+		    ;
+
+		/*
+		 * Ideally we would only ever call sa2u() once per aligned
+		 * allocation request, and the caller of this function has
+		 * already done so once.  However, it's rather burdensome to
+		 * require every caller to pass in run_size, especially given
+		 * that it's only relevant to large allocations.  Therefore,
+		 * just call it again here in order to get run_size.
+		 */
+		sa2u(usize, alignment, &run_size);
+		if (run_size <= arena_maxclass) {
+			ret = arena_palloc(choose_arena(), usize, run_size,
+			    alignment, zero);
+		} else if (alignment <= chunksize)
+			ret = huge_malloc(usize, zero);
+		else
+			ret = huge_palloc(usize, alignment, zero);
+	}
+
+	assert(((uintptr_t)ret & (alignment - 1)) == 0);
+	return (ret);
+}
+
+JEMALLOC_INLINE size_t
+isalloc(const void *ptr)
+{
+	size_t ret;
+	arena_chunk_t *chunk;
+
+	assert(ptr != NULL);
+
+	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+	if (chunk != ptr) {
+		/* Region. */
+		dassert(chunk->arena->magic == ARENA_MAGIC);
+
+#ifdef JEMALLOC_PROF
+		ret = arena_salloc_demote(ptr);
+#else
+		ret = arena_salloc(ptr);
+#endif
+	} else
+		ret = huge_salloc(ptr);
+
+	return (ret);
+}
+
+#ifdef JEMALLOC_IVSALLOC
+JEMALLOC_INLINE size_t
+ivsalloc(const void *ptr)
+{
+
+	/* Return 0 if ptr is not within a chunk managed by jemalloc. */
+	if (rtree_get(chunks_rtree, (uintptr_t)CHUNK_ADDR2BASE(ptr)) == NULL)
+		return (0);
+
+	return (isalloc(ptr));
+}
+#endif
+
+JEMALLOC_INLINE void
+idalloc(void *ptr)
+{
+	arena_chunk_t *chunk;
+
+	assert(ptr != NULL);
+
+	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+	if (chunk != ptr)
+		arena_dalloc(chunk->arena, chunk, ptr);
+	else
+		huge_dalloc(ptr, true);
+}
+
+JEMALLOC_INLINE void *
+iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero,
+    bool no_move)
+{
+	void *ret;
+	size_t oldsize;
+
+	assert(ptr != NULL);
+	assert(size != 0);
+
+	oldsize = isalloc(ptr);
+
+	if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1))
+	    != 0) {
+		size_t usize, copysize;
+
+		/*
+		 * Existing object alignment is inadquate; allocate new space
+		 * and copy.
+		 */
+		if (no_move)
+			return (NULL);
+		usize = sa2u(size + extra, alignment, NULL);
+		if (usize == 0)
+			return (NULL);
+		ret = ipalloc(usize, alignment, zero);
+		if (ret == NULL) {
+			if (extra == 0)
+				return (NULL);
+			/* Try again, without extra this time. */
+			usize = sa2u(size, alignment, NULL);
+			if (usize == 0)
+				return (NULL);
+			ret = ipalloc(usize, alignment, zero);
+			if (ret == NULL)
+				return (NULL);
+		}
+		/*
+		 * Copy at most size bytes (not size+extra), since the caller
+		 * has no expectation that the extra bytes will be reliably
+		 * preserved.
+		 */
+		copysize = (size < oldsize) ? size : oldsize;
+		memcpy(ret, ptr, copysize);
+		idalloc(ptr);
+		return (ret);
+	}
+
+	if (no_move) {
+		if (size <= arena_maxclass) {
+			return (arena_ralloc_no_move(ptr, oldsize, size,
+			    extra, zero));
+		} else {
+			return (huge_ralloc_no_move(ptr, oldsize, size,
+			    extra));
+		}
+	} else {
+		if (size + extra <= arena_maxclass) {
+			return (arena_ralloc(ptr, oldsize, size, extra,
+			    alignment, zero));
+		} else {
+			return (huge_ralloc(ptr, oldsize, size, extra,
+			    alignment, zero));
+		}
+	}
+}
+#endif
+
+#include "jemalloc/internal/prof.h"
+
+#undef JEMALLOC_H_INLINES
+/******************************************************************************/
--- a/deps/jemalloc.orig/include/jemalloc/internal/mb.h
+++ b/deps/jemalloc.orig/include/jemalloc/internal/mb.h
@ -0,0 +1,108 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+void	mb_write(void);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MB_C_))
+#ifdef __i386__
+/*
+ * According to the Intel Architecture Software Developer's Manual, current
+ * processors execute instructions in order from the perspective of other
+ * processors in a multiprocessor system, but 1) Intel reserves the right to
+ * change that, and 2) the compiler's optimizer could re-order instructions if
+ * there weren't some form of barrier.  Therefore, even if running on an
+ * architecture that does not need memory barriers (everything through at least
+ * i686), an "optimizer barrier" is necessary.
+ */
+JEMALLOC_INLINE void
+mb_write(void)
+{
+
+#  if 0
+	/* This is a true memory barrier. */
+	asm volatile ("pusha;"
+	    "xor  %%eax,%%eax;"
+	    "cpuid;"
+	    "popa;"
+	    : /* Outputs. */
+	    : /* Inputs. */
+	    : "memory" /* Clobbers. */
+	    );
+#else
+	/*
+	 * This is hopefully enough to keep the compiler from reordering
+	 * instructions around this one.
+	 */
+	asm volatile ("nop;"
+	    : /* Outputs. */
+	    : /* Inputs. */
+	    : "memory" /* Clobbers. */
+	    );
+#endif
+}
+#elif (defined(__amd64_) || defined(__x86_64__))
+JEMALLOC_INLINE void
+mb_write(void)
+{
+
+	asm volatile ("sfence"
+	    : /* Outputs. */
+	    : /* Inputs. */
+	    : "memory" /* Clobbers. */
+	    );
+}
+#elif defined(__powerpc__)
+JEMALLOC_INLINE void
+mb_write(void)
+{
+
+	asm volatile ("eieio"
+	    : /* Outputs. */
+	    : /* Inputs. */
+	    : "memory" /* Clobbers. */
+	    );
+}
+#elif defined(__sparc64__)
+JEMALLOC_INLINE void
+mb_write(void)
+{
+
+	asm volatile ("membar #StoreStore"
+	    : /* Outputs. */
+	    : /* Inputs. */
+	    : "memory" /* Clobbers. */
+	    );
+}
+#else
+/*
+ * This is much slower than a simple memory barrier, but the semantics of mutex
+ * unlock make this work.
+ */
+JEMALLOC_INLINE void
+mb_write(void)
+{
+	malloc_mutex_t mtx;
+
+	malloc_mutex_init(&mtx);
+	malloc_mutex_lock(&mtx);
+	malloc_mutex_unlock(&mtx);
+}
+#endif
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
--- a/deps/jemalloc.orig/include/jemalloc/internal/mutex.h
+++ b/deps/jemalloc.orig/include/jemalloc/internal/mutex.h
@ -0,0 +1,86 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+#ifdef JEMALLOC_OSSPIN
+typedef OSSpinLock malloc_mutex_t;
+#else
+typedef pthread_mutex_t malloc_mutex_t;
+#endif
+
+#ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
+#  define MALLOC_MUTEX_INITIALIZER PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
+#else
+#  define MALLOC_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER
+#endif
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+#ifdef JEMALLOC_LAZY_LOCK
+extern bool isthreaded;
+#else
+#  define isthreaded true
+#endif
+
+bool	malloc_mutex_init(malloc_mutex_t *mutex);
+void	malloc_mutex_destroy(malloc_mutex_t *mutex);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+void	malloc_mutex_lock(malloc_mutex_t *mutex);
+bool	malloc_mutex_trylock(malloc_mutex_t *mutex);
+void	malloc_mutex_unlock(malloc_mutex_t *mutex);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_))
+JEMALLOC_INLINE void
+malloc_mutex_lock(malloc_mutex_t *mutex)
+{
+
+	if (isthreaded) {
+#ifdef JEMALLOC_OSSPIN
+		OSSpinLockLock(mutex);
+#else
+		pthread_mutex_lock(mutex);
+#endif
+	}
+}
+
+JEMALLOC_INLINE bool
+malloc_mutex_trylock(malloc_mutex_t *mutex)
+{
+
+	if (isthreaded) {
+#ifdef JEMALLOC_OSSPIN
+		return (OSSpinLockTry(mutex) == false);
+#else
+		return (pthread_mutex_trylock(mutex) != 0);
+#endif
+	} else
+		return (false);
+}
+
+JEMALLOC_INLINE void
+malloc_mutex_unlock(malloc_mutex_t *mutex)
+{
+
+	if (isthreaded) {
+#ifdef JEMALLOC_OSSPIN
+		OSSpinLockUnlock(mutex);
+#else
+		pthread_mutex_unlock(mutex);
+#endif
+	}
+}
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
--- a/deps/jemalloc.orig/include/jemalloc/internal/private_namespace.h
+++ b/deps/jemalloc.orig/include/jemalloc/internal/private_namespace.h
@ -0,0 +1,195 @@
+#define	arena_bin_index JEMALLOC_N(arena_bin_index)
+#define	arena_boot JEMALLOC_N(arena_boot)
+#define	arena_dalloc JEMALLOC_N(arena_dalloc)
+#define	arena_dalloc_bin JEMALLOC_N(arena_dalloc_bin)
+#define	arena_dalloc_large JEMALLOC_N(arena_dalloc_large)
+#define	arena_malloc JEMALLOC_N(arena_malloc)
+#define	arena_malloc_large JEMALLOC_N(arena_malloc_large)
+#define	arena_malloc_small JEMALLOC_N(arena_malloc_small)
+#define	arena_new JEMALLOC_N(arena_new)
+#define	arena_palloc JEMALLOC_N(arena_palloc)
+#define	arena_prof_accum JEMALLOC_N(arena_prof_accum)
+#define	arena_prof_ctx_get JEMALLOC_N(arena_prof_ctx_get)
+#define	arena_prof_ctx_set JEMALLOC_N(arena_prof_ctx_set)
+#define	arena_prof_promoted JEMALLOC_N(arena_prof_promoted)
+#define	arena_purge_all JEMALLOC_N(arena_purge_all)
+#define	arena_ralloc JEMALLOC_N(arena_ralloc)
+#define	arena_ralloc_no_move JEMALLOC_N(arena_ralloc_no_move)
+#define	arena_run_regind JEMALLOC_N(arena_run_regind)
+#define	arena_salloc JEMALLOC_N(arena_salloc)
+#define	arena_salloc_demote JEMALLOC_N(arena_salloc_demote)
+#define	arena_stats_merge JEMALLOC_N(arena_stats_merge)
+#define	arena_tcache_fill_small JEMALLOC_N(arena_tcache_fill_small)
+#define	arenas_bin_i_index JEMALLOC_N(arenas_bin_i_index)
+#define	arenas_extend JEMALLOC_N(arenas_extend)
+#define	arenas_lrun_i_index JEMALLOC_N(arenas_lrun_i_index)
+#define	atomic_add_uint32 JEMALLOC_N(atomic_add_uint32)
+#define	atomic_add_uint64 JEMALLOC_N(atomic_add_uint64)
+#define	atomic_sub_uint32 JEMALLOC_N(atomic_sub_uint32)
+#define	atomic_sub_uint64 JEMALLOC_N(atomic_sub_uint64)
+#define	base_alloc JEMALLOC_N(base_alloc)
+#define	base_boot JEMALLOC_N(base_boot)
+#define	base_node_alloc JEMALLOC_N(base_node_alloc)
+#define	base_node_dealloc JEMALLOC_N(base_node_dealloc)
+#define	bitmap_full JEMALLOC_N(bitmap_full)
+#define	bitmap_get JEMALLOC_N(bitmap_get)
+#define	bitmap_info_init JEMALLOC_N(bitmap_info_init)
+#define	bitmap_info_ngroups JEMALLOC_N(bitmap_info_ngroups)
+#define	bitmap_init JEMALLOC_N(bitmap_init)
+#define	bitmap_set JEMALLOC_N(bitmap_set)
+#define	bitmap_sfu JEMALLOC_N(bitmap_sfu)
+#define	bitmap_size JEMALLOC_N(bitmap_size)
+#define	bitmap_unset JEMALLOC_N(bitmap_unset)
+#define	bt_init JEMALLOC_N(bt_init)
+#define	buferror JEMALLOC_N(buferror)
+#define	choose_arena JEMALLOC_N(choose_arena)
+#define	choose_arena_hard JEMALLOC_N(choose_arena_hard)
+#define	chunk_alloc JEMALLOC_N(chunk_alloc)
+#define	chunk_alloc_dss JEMALLOC_N(chunk_alloc_dss)
+#define	chunk_alloc_mmap JEMALLOC_N(chunk_alloc_mmap)
+#define	chunk_alloc_mmap_noreserve JEMALLOC_N(chunk_alloc_mmap_noreserve)
+#define	chunk_alloc_swap JEMALLOC_N(chunk_alloc_swap)
+#define	chunk_boot JEMALLOC_N(chunk_boot)
+#define	chunk_dealloc JEMALLOC_N(chunk_dealloc)
+#define	chunk_dealloc_dss JEMALLOC_N(chunk_dealloc_dss)
+#define	chunk_dealloc_mmap JEMALLOC_N(chunk_dealloc_mmap)
+#define	chunk_dealloc_swap JEMALLOC_N(chunk_dealloc_swap)
+#define	chunk_dss_boot JEMALLOC_N(chunk_dss_boot)
+#define	chunk_in_dss JEMALLOC_N(chunk_in_dss)
+#define	chunk_in_swap JEMALLOC_N(chunk_in_swap)
+#define	chunk_mmap_boot JEMALLOC_N(chunk_mmap_boot)
+#define	chunk_swap_boot JEMALLOC_N(chunk_swap_boot)
+#define	chunk_swap_enable JEMALLOC_N(chunk_swap_enable)
+#define	ckh_bucket_search JEMALLOC_N(ckh_bucket_search)
+#define	ckh_count JEMALLOC_N(ckh_count)
+#define	ckh_delete JEMALLOC_N(ckh_delete)
+#define	ckh_evict_reloc_insert JEMALLOC_N(ckh_evict_reloc_insert)
+#define	ckh_insert JEMALLOC_N(ckh_insert)
+#define	ckh_isearch JEMALLOC_N(ckh_isearch)
+#define	ckh_iter JEMALLOC_N(ckh_iter)
+#define	ckh_new JEMALLOC_N(ckh_new)
+#define	ckh_pointer_hash JEMALLOC_N(ckh_pointer_hash)
+#define	ckh_pointer_keycomp JEMALLOC_N(ckh_pointer_keycomp)
+#define	ckh_rebuild JEMALLOC_N(ckh_rebuild)
+#define	ckh_remove JEMALLOC_N(ckh_remove)
+#define	ckh_search JEMALLOC_N(ckh_search)
+#define	ckh_string_hash JEMALLOC_N(ckh_string_hash)
+#define	ckh_string_keycomp JEMALLOC_N(ckh_string_keycomp)
+#define	ckh_try_bucket_insert JEMALLOC_N(ckh_try_bucket_insert)
+#define	ckh_try_insert JEMALLOC_N(ckh_try_insert)
+#define	create_zone JEMALLOC_N(create_zone)
+#define	ctl_boot JEMALLOC_N(ctl_boot)
+#define	ctl_bymib JEMALLOC_N(ctl_bymib)
+#define	ctl_byname JEMALLOC_N(ctl_byname)
+#define	ctl_nametomib JEMALLOC_N(ctl_nametomib)
+#define	extent_tree_ad_first JEMALLOC_N(extent_tree_ad_first)
+#define	extent_tree_ad_insert JEMALLOC_N(extent_tree_ad_insert)
+#define	extent_tree_ad_iter JEMALLOC_N(extent_tree_ad_iter)
+#define	extent_tree_ad_iter_recurse JEMALLOC_N(extent_tree_ad_iter_recurse)
+#define	extent_tree_ad_iter_start JEMALLOC_N(extent_tree_ad_iter_start)
+#define	extent_tree_ad_last JEMALLOC_N(extent_tree_ad_last)
+#define	extent_tree_ad_new JEMALLOC_N(extent_tree_ad_new)
+#define	extent_tree_ad_next JEMALLOC_N(extent_tree_ad_next)
+#define	extent_tree_ad_nsearch JEMALLOC_N(extent_tree_ad_nsearch)
+#define	extent_tree_ad_prev JEMALLOC_N(extent_tree_ad_prev)
+#define	extent_tree_ad_psearch JEMALLOC_N(extent_tree_ad_psearch)
+#define	extent_tree_ad_remove JEMALLOC_N(extent_tree_ad_remove)
+#define	extent_tree_ad_reverse_iter JEMALLOC_N(extent_tree_ad_reverse_iter)
+#define	extent_tree_ad_reverse_iter_recurse JEMALLOC_N(extent_tree_ad_reverse_iter_recurse)
+#define	extent_tree_ad_reverse_iter_start JEMALLOC_N(extent_tree_ad_reverse_iter_start)
+#define	extent_tree_ad_search JEMALLOC_N(extent_tree_ad_search)
+#define	extent_tree_szad_first JEMALLOC_N(extent_tree_szad_first)
+#define	extent_tree_szad_insert JEMALLOC_N(extent_tree_szad_insert)
+#define	extent_tree_szad_iter JEMALLOC_N(extent_tree_szad_iter)
+#define	extent_tree_szad_iter_recurse JEMALLOC_N(extent_tree_szad_iter_recurse)
+#define	extent_tree_szad_iter_start JEMALLOC_N(extent_tree_szad_iter_start)
+#define	extent_tree_szad_last JEMALLOC_N(extent_tree_szad_last)
+#define	extent_tree_szad_new JEMALLOC_N(extent_tree_szad_new)
+#define	extent_tree_szad_next JEMALLOC_N(extent_tree_szad_next)
+#define	extent_tree_szad_nsearch JEMALLOC_N(extent_tree_szad_nsearch)
+#define	extent_tree_szad_prev JEMALLOC_N(extent_tree_szad_prev)
+#define	extent_tree_szad_psearch JEMALLOC_N(extent_tree_szad_psearch)
+#define	extent_tree_szad_remove JEMALLOC_N(extent_tree_szad_remove)
+#define	extent_tree_szad_reverse_iter JEMALLOC_N(extent_tree_szad_reverse_iter)
+#define	extent_tree_szad_reverse_iter_recurse JEMALLOC_N(extent_tree_szad_reverse_iter_recurse)
+#define	extent_tree_szad_reverse_iter_start JEMALLOC_N(extent_tree_szad_reverse_iter_start)
+#define	extent_tree_szad_search JEMALLOC_N(extent_tree_szad_search)
+#define	hash JEMALLOC_N(hash)
+#define	huge_boot JEMALLOC_N(huge_boot)
+#define	huge_dalloc JEMALLOC_N(huge_dalloc)
+#define	huge_malloc JEMALLOC_N(huge_malloc)
+#define	huge_palloc JEMALLOC_N(huge_palloc)
+#define	huge_prof_ctx_get JEMALLOC_N(huge_prof_ctx_get)
+#define	huge_prof_ctx_set JEMALLOC_N(huge_prof_ctx_set)
+#define	huge_ralloc JEMALLOC_N(huge_ralloc)
+#define	huge_ralloc_no_move JEMALLOC_N(huge_ralloc_no_move)
+#define	huge_salloc JEMALLOC_N(huge_salloc)
+#define	iallocm JEMALLOC_N(iallocm)
+#define	icalloc JEMALLOC_N(icalloc)
+#define	idalloc JEMALLOC_N(idalloc)
+#define	imalloc JEMALLOC_N(imalloc)
+#define	ipalloc JEMALLOC_N(ipalloc)
+#define	iralloc JEMALLOC_N(iralloc)
+#define	isalloc JEMALLOC_N(isalloc)
+#define	ivsalloc JEMALLOC_N(ivsalloc)
+#define	jemalloc_darwin_init JEMALLOC_N(jemalloc_darwin_init)
+#define	jemalloc_postfork JEMALLOC_N(jemalloc_postfork)
+#define	jemalloc_prefork JEMALLOC_N(jemalloc_prefork)
+#define	malloc_cprintf JEMALLOC_N(malloc_cprintf)
+#define	malloc_mutex_destroy JEMALLOC_N(malloc_mutex_destroy)
+#define	malloc_mutex_init JEMALLOC_N(malloc_mutex_init)
+#define	malloc_mutex_lock JEMALLOC_N(malloc_mutex_lock)
+#define	malloc_mutex_trylock JEMALLOC_N(malloc_mutex_trylock)
+#define	malloc_mutex_unlock JEMALLOC_N(malloc_mutex_unlock)
+#define	malloc_printf JEMALLOC_N(malloc_printf)
+#define	malloc_write JEMALLOC_N(malloc_write)
+#define	mb_write JEMALLOC_N(mb_write)
+#define	pow2_ceil JEMALLOC_N(pow2_ceil)
+#define	prof_backtrace JEMALLOC_N(prof_backtrace)
+#define	prof_boot0 JEMALLOC_N(prof_boot0)
+#define	prof_boot1 JEMALLOC_N(prof_boot1)
+#define	prof_boot2 JEMALLOC_N(prof_boot2)
+#define	prof_ctx_get JEMALLOC_N(prof_ctx_get)
+#define	prof_ctx_set JEMALLOC_N(prof_ctx_set)
+#define	prof_free JEMALLOC_N(prof_free)
+#define	prof_gdump JEMALLOC_N(prof_gdump)
+#define	prof_idump JEMALLOC_N(prof_idump)
+#define	prof_lookup JEMALLOC_N(prof_lookup)
+#define	prof_malloc JEMALLOC_N(prof_malloc)
+#define	prof_mdump JEMALLOC_N(prof_mdump)
+#define	prof_realloc JEMALLOC_N(prof_realloc)
+#define	prof_sample_accum_update JEMALLOC_N(prof_sample_accum_update)
+#define	prof_sample_threshold_update JEMALLOC_N(prof_sample_threshold_update)
+#define	prof_tdata_init JEMALLOC_N(prof_tdata_init)
+#define	pthread_create JEMALLOC_N(pthread_create)
+#define	rtree_get JEMALLOC_N(rtree_get)
+#define	rtree_get_locked JEMALLOC_N(rtree_get_locked)
+#define	rtree_new JEMALLOC_N(rtree_new)
+#define	rtree_set JEMALLOC_N(rtree_set)
+#define	s2u JEMALLOC_N(s2u)
+#define	sa2u JEMALLOC_N(sa2u)
+#define	stats_arenas_i_bins_j_index JEMALLOC_N(stats_arenas_i_bins_j_index)
+#define	stats_arenas_i_index JEMALLOC_N(stats_arenas_i_index)
+#define	stats_arenas_i_lruns_j_index JEMALLOC_N(stats_arenas_i_lruns_j_index)
+#define	stats_cactive_add JEMALLOC_N(stats_cactive_add)
+#define	stats_cactive_get JEMALLOC_N(stats_cactive_get)
+#define	stats_cactive_sub JEMALLOC_N(stats_cactive_sub)
+#define	stats_print JEMALLOC_N(stats_print)
+#define	szone2ozone JEMALLOC_N(szone2ozone)
+#define	tcache_alloc_easy JEMALLOC_N(tcache_alloc_easy)
+#define	tcache_alloc_large JEMALLOC_N(tcache_alloc_large)
+#define	tcache_alloc_small JEMALLOC_N(tcache_alloc_small)
+#define	tcache_alloc_small_hard JEMALLOC_N(tcache_alloc_small_hard)
+#define	tcache_bin_flush_large JEMALLOC_N(tcache_bin_flush_large)
+#define	tcache_bin_flush_small JEMALLOC_N(tcache_bin_flush_small)
+#define	tcache_boot JEMALLOC_N(tcache_boot)
+#define	tcache_create JEMALLOC_N(tcache_create)
+#define	tcache_dalloc_large JEMALLOC_N(tcache_dalloc_large)
+#define	tcache_dalloc_small JEMALLOC_N(tcache_dalloc_small)
+#define	tcache_destroy JEMALLOC_N(tcache_destroy)
+#define	tcache_event JEMALLOC_N(tcache_event)
+#define	tcache_get JEMALLOC_N(tcache_get)
+#define	tcache_stats_merge JEMALLOC_N(tcache_stats_merge)
+#define	thread_allocated_get JEMALLOC_N(thread_allocated_get)
+#define	thread_allocated_get_hard JEMALLOC_N(thread_allocated_get_hard)
+#define	u2s JEMALLOC_N(u2s)
--- a/deps/jemalloc.orig/include/jemalloc/internal/prn.h
+++ b/deps/jemalloc.orig/include/jemalloc/internal/prn.h
--- a/deps/jemalloc.orig/include/jemalloc/internal/prof.h
+++ b/deps/jemalloc.orig/include/jemalloc/internal/prof.h
@ -0,0 +1,547 @@
+#ifdef JEMALLOC_PROF
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+typedef struct prof_bt_s prof_bt_t;
+typedef struct prof_cnt_s prof_cnt_t;
+typedef struct prof_thr_cnt_s prof_thr_cnt_t;
+typedef struct prof_ctx_s prof_ctx_t;
+typedef struct prof_tdata_s prof_tdata_t;
+
+/* Option defaults. */
+#define	PROF_PREFIX_DEFAULT		"jeprof"
+#define	LG_PROF_BT_MAX_DEFAULT		7
+#define	LG_PROF_SAMPLE_DEFAULT		0
+#define	LG_PROF_INTERVAL_DEFAULT	-1
+#define	LG_PROF_TCMAX_DEFAULT		-1
+
+/*
+ * Hard limit on stack backtrace depth.  Note that the version of
+ * prof_backtrace() that is based on __builtin_return_address() necessarily has
+ * a hard-coded number of backtrace frame handlers.
+ */
+#if (defined(JEMALLOC_PROF_LIBGCC) || defined(JEMALLOC_PROF_LIBUNWIND))
+#  define LG_PROF_BT_MAX	((ZU(1) << (LG_SIZEOF_PTR+3)) - 1)
+#else
+#  define LG_PROF_BT_MAX	7 /* >= LG_PROF_BT_MAX_DEFAULT */
+#endif
+#define	PROF_BT_MAX		(1U << LG_PROF_BT_MAX)
+
+/* Initial hash table size. */
+#define	PROF_CKH_MINITEMS	64
+
+/* Size of memory buffer to use when writing dump files. */
+#define	PROF_DUMP_BUF_SIZE	65536
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+struct prof_bt_s {
+	/* Backtrace, stored as len program counters. */
+	void		**vec;
+	unsigned	len;
+};
+
+#ifdef JEMALLOC_PROF_LIBGCC
+/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */
+typedef struct {
+	prof_bt_t	*bt;
+	unsigned	nignore;
+	unsigned	max;
+} prof_unwind_data_t;
+#endif
+
+struct prof_cnt_s {
+	/*
+	 * Profiling counters.  An allocation/deallocation pair can operate on
+	 * different prof_thr_cnt_t objects that are linked into the same
+	 * prof_ctx_t cnts_ql, so it is possible for the cur* counters to go
+	 * negative.  In principle it is possible for the *bytes counters to
+	 * overflow/underflow, but a general solution would require something
+	 * like 128-bit counters; this implementation doesn't bother to solve
+	 * that problem.
+	 */
+	int64_t		curobjs;
+	int64_t		curbytes;
+	uint64_t	accumobjs;
+	uint64_t	accumbytes;
+};
+
+struct prof_thr_cnt_s {
+	/* Linkage into prof_ctx_t's cnts_ql. */
+	ql_elm(prof_thr_cnt_t)	cnts_link;
+
+	/* Linkage into thread's LRU. */
+	ql_elm(prof_thr_cnt_t)	lru_link;
+
+	/*
+	 * Associated context.  If a thread frees an object that it did not
+	 * allocate, it is possible that the context is not cached in the
+	 * thread's hash table, in which case it must be able to look up the
+	 * context, insert a new prof_thr_cnt_t into the thread's hash table,
+	 * and link it into the prof_ctx_t's cnts_ql.
+	 */
+	prof_ctx_t		*ctx;
+
+	/*
+	 * Threads use memory barriers to update the counters.  Since there is
+	 * only ever one writer, the only challenge is for the reader to get a
+	 * consistent read of the counters.
+	 *
+	 * The writer uses this series of operations:
+	 *
+	 * 1) Increment epoch to an odd number.
+	 * 2) Update counters.
+	 * 3) Increment epoch to an even number.
+	 *
+	 * The reader must assure 1) that the epoch is even while it reads the
+	 * counters, and 2) that the epoch doesn't change between the time it
+	 * starts and finishes reading the counters.
+	 */
+	unsigned		epoch;
+
+	/* Profiling counters. */
+	prof_cnt_t		cnts;
+};
+
+struct prof_ctx_s {
+	/* Associated backtrace. */
+	prof_bt_t		*bt;
+
+	/* Protects cnt_merged and cnts_ql. */
+	malloc_mutex_t		lock;
+
+	/* Temporary storage for summation during dump. */
+	prof_cnt_t		cnt_summed;
+
+	/* When threads exit, they merge their stats into cnt_merged. */
+	prof_cnt_t		cnt_merged;
+
+	/*
+	 * List of profile counters, one for each thread that has allocated in
+	 * this context.
+	 */
+	ql_head(prof_thr_cnt_t)	cnts_ql;
+};
+
+struct prof_tdata_s {
+	/*
+	 * Hash of (prof_bt_t *)-->(prof_thr_cnt_t *).  Each thread keeps a
+	 * cache of backtraces, with associated thread-specific prof_thr_cnt_t
+	 * objects.  Other threads may read the prof_thr_cnt_t contents, but no
+	 * others will ever write them.
+	 *
+	 * Upon thread exit, the thread must merge all the prof_thr_cnt_t
+	 * counter data into the associated prof_ctx_t objects, and unlink/free
+	 * the prof_thr_cnt_t objects.
+	 */
+	ckh_t			bt2cnt;
+
+	/* LRU for contents of bt2cnt. */
+	ql_head(prof_thr_cnt_t)	lru_ql;
+
+	/* Backtrace vector, used for calls to prof_backtrace(). */
+	void			**vec;
+
+	/* Sampling state. */
+	uint64_t		prn_state;
+	uint64_t		threshold;
+	uint64_t		accum;
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+extern bool	opt_prof;
+/*
+ * Even if opt_prof is true, sampling can be temporarily disabled by setting
+ * opt_prof_active to false.  No locking is used when updating opt_prof_active,
+ * so there are no guarantees regarding how long it will take for all threads
+ * to notice state changes.
+ */
+extern bool	opt_prof_active;
+extern size_t	opt_lg_prof_bt_max;   /* Maximum backtrace depth. */
+extern size_t	opt_lg_prof_sample;   /* Mean bytes between samples. */
+extern ssize_t	opt_lg_prof_interval; /* lg(prof_interval). */
+extern bool	opt_prof_gdump;       /* High-water memory dumping. */
+extern bool	opt_prof_leak;        /* Dump leak summary at exit. */
+extern bool	opt_prof_accum;       /* Report cumulative bytes. */
+extern ssize_t	opt_lg_prof_tcmax;    /* lg(max per thread bactrace cache) */
+extern char	opt_prof_prefix[PATH_MAX + 1];
+
+/*
+ * Profile dump interval, measured in bytes allocated.  Each arena triggers a
+ * profile dump when it reaches this threshold.  The effect is that the
+ * interval between profile dumps averages prof_interval, though the actual
+ * interval between dumps will tend to be sporadic, and the interval will be a
+ * maximum of approximately (prof_interval * narenas).
+ */
+extern uint64_t	prof_interval;
+
+/*
+ * If true, promote small sampled objects to large objects, since small run
+ * headers do not have embedded profile context pointers.
+ */
+extern bool	prof_promote;
+
+/* (1U << opt_lg_prof_bt_max). */
+extern unsigned	prof_bt_max;
+
+/* Thread-specific backtrace cache, used to reduce bt2ctx contention. */
+#ifndef NO_TLS
+extern __thread prof_tdata_t	*prof_tdata_tls
+    JEMALLOC_ATTR(tls_model("initial-exec"));
+#  define PROF_TCACHE_GET()	prof_tdata_tls
+#  define PROF_TCACHE_SET(v)	do {					\
+	prof_tdata_tls = (v);						\
+	pthread_setspecific(prof_tdata_tsd, (void *)(v));		\
+} while (0)
+#else
+#  define PROF_TCACHE_GET()						\
+	((prof_tdata_t *)pthread_getspecific(prof_tdata_tsd))
+#  define PROF_TCACHE_SET(v)	do {					\
+	pthread_setspecific(prof_tdata_tsd, (void *)(v));		\
+} while (0)
+#endif
+/*
+ * Same contents as b2cnt_tls, but initialized such that the TSD destructor is
+ * called when a thread exits, so that prof_tdata_tls contents can be merged,
+ * unlinked, and deallocated.
+ */
+extern pthread_key_t	prof_tdata_tsd;
+
+void	bt_init(prof_bt_t *bt, void **vec);
+void	prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max);
+prof_thr_cnt_t	*prof_lookup(prof_bt_t *bt);
+void	prof_idump(void);
+bool	prof_mdump(const char *filename);
+void	prof_gdump(void);
+prof_tdata_t	*prof_tdata_init(void);
+void	prof_boot0(void);
+void	prof_boot1(void);
+bool	prof_boot2(void);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#define	PROF_ALLOC_PREP(nignore, size, ret) do {			\
+	prof_tdata_t *prof_tdata;					\
+	prof_bt_t bt;							\
+									\
+	assert(size == s2u(size));					\
+									\
+	prof_tdata = PROF_TCACHE_GET();					\
+	if (prof_tdata == NULL) {					\
+		prof_tdata = prof_tdata_init();				\
+		if (prof_tdata == NULL) {				\
+			ret = NULL;					\
+			break;						\
+		}							\
+	}								\
+									\
+	if (opt_prof_active == false) {					\
+		/* Sampling is currently inactive, so avoid sampling. */\
+		ret = (prof_thr_cnt_t *)(uintptr_t)1U;			\
+	} else if (opt_lg_prof_sample == 0) {				\
+		/* Don't bother with sampling logic, since sampling   */\
+		/* interval is 1.                                     */\
+		bt_init(&bt, prof_tdata->vec);				\
+		prof_backtrace(&bt, nignore, prof_bt_max);		\
+		ret = prof_lookup(&bt);					\
+	} else {							\
+		if (prof_tdata->threshold == 0) {			\
+			/* Initialize.  Seed the prng differently for */\
+			/* each thread.                               */\
+			prof_tdata->prn_state =				\
+			    (uint64_t)(uintptr_t)&size;			\
+			prof_sample_threshold_update(prof_tdata);	\
+		}							\
+									\
+		/* Determine whether to capture a backtrace based on  */\
+		/* whether size is enough for prof_accum to reach     */\
+		/* prof_tdata->threshold.  However, delay updating    */\
+		/* these variables until prof_{m,re}alloc(), because  */\
+		/* we don't know for sure that the allocation will    */\
+		/* succeed.                                           */\
+		/*                                                    */\
+		/* Use subtraction rather than addition to avoid      */\
+		/* potential integer overflow.                        */\
+		if (size >= prof_tdata->threshold -			\
+		    prof_tdata->accum) {				\
+			bt_init(&bt, prof_tdata->vec);			\
+			prof_backtrace(&bt, nignore, prof_bt_max);	\
+			ret = prof_lookup(&bt);				\
+		} else							\
+			ret = (prof_thr_cnt_t *)(uintptr_t)1U;		\
+	}								\
+} while (0)
+
+#ifndef JEMALLOC_ENABLE_INLINE
+void	prof_sample_threshold_update(prof_tdata_t *prof_tdata);
+prof_ctx_t	*prof_ctx_get(const void *ptr);
+void	prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
+bool	prof_sample_accum_update(size_t size);
+void	prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt);
+void	prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
+    size_t old_size, prof_ctx_t *old_ctx);
+void	prof_free(const void *ptr, size_t size);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_))
+JEMALLOC_INLINE void
+prof_sample_threshold_update(prof_tdata_t *prof_tdata)
+{
+	uint64_t r;
+	double u;
+
+	/*
+	 * Compute sample threshold as a geometrically distributed random
+	 * variable with mean (2^opt_lg_prof_sample).
+	 *
+	 *                         __        __
+	 *                         |  log(u)  |                     1
+	 * prof_tdata->threshold = | -------- |, where p = -------------------
+	 *                         | log(1-p) |             opt_lg_prof_sample
+	 *                                                 2
+	 *
+	 * For more information on the math, see:
+	 *
+	 *   Non-Uniform Random Variate Generation
+	 *   Luc Devroye
+	 *   Springer-Verlag, New York, 1986
+	 *   pp 500
+	 *   (http://cg.scs.carleton.ca/~luc/rnbookindex.html)
+	 */
+	prn64(r, 53, prof_tdata->prn_state,
+	    (uint64_t)6364136223846793005LLU, (uint64_t)1442695040888963407LLU);
+	u = (double)r * (1.0/9007199254740992.0L);
+	prof_tdata->threshold = (uint64_t)(log(u) /
+	    log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample))))
+	    + (uint64_t)1U;
+}
+
+JEMALLOC_INLINE prof_ctx_t *
+prof_ctx_get(const void *ptr)
+{
+	prof_ctx_t *ret;
+	arena_chunk_t *chunk;
+
+	assert(ptr != NULL);
+
+	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+	if (chunk != ptr) {
+		/* Region. */
+		dassert(chunk->arena->magic == ARENA_MAGIC);
+
+		ret = arena_prof_ctx_get(ptr);
+	} else
+		ret = huge_prof_ctx_get(ptr);
+
+	return (ret);
+}
+
+JEMALLOC_INLINE void
+prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
+{
+	arena_chunk_t *chunk;
+
+	assert(ptr != NULL);
+
+	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+	if (chunk != ptr) {
+		/* Region. */
+		dassert(chunk->arena->magic == ARENA_MAGIC);
+
+		arena_prof_ctx_set(ptr, ctx);
+	} else
+		huge_prof_ctx_set(ptr, ctx);
+}
+
+JEMALLOC_INLINE bool
+prof_sample_accum_update(size_t size)
+{
+	prof_tdata_t *prof_tdata;
+
+	/* Sampling logic is unnecessary if the interval is 1. */
+	assert(opt_lg_prof_sample != 0);
+
+	prof_tdata = PROF_TCACHE_GET();
+	assert(prof_tdata != NULL);
+
+	/* Take care to avoid integer overflow. */
+	if (size >= prof_tdata->threshold - prof_tdata->accum) {
+		prof_tdata->accum -= (prof_tdata->threshold - size);
+		/* Compute new sample threshold. */
+		prof_sample_threshold_update(prof_tdata);
+		while (prof_tdata->accum >= prof_tdata->threshold) {
+			prof_tdata->accum -= prof_tdata->threshold;
+			prof_sample_threshold_update(prof_tdata);
+		}
+		return (false);
+	} else {
+		prof_tdata->accum += size;
+		return (true);
+	}
+}
+
+JEMALLOC_INLINE void
+prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt)
+{
+
+	assert(ptr != NULL);
+	assert(size == isalloc(ptr));
+
+	if (opt_lg_prof_sample != 0) {
+		if (prof_sample_accum_update(size)) {
+			/*
+			 * Don't sample.  For malloc()-like allocation, it is
+			 * always possible to tell in advance how large an
+			 * object's usable size will be, so there should never
+			 * be a difference between the size passed to
+			 * PROF_ALLOC_PREP() and prof_malloc().
+			 */
+			assert((uintptr_t)cnt == (uintptr_t)1U);
+		}
+	}
+
+	if ((uintptr_t)cnt > (uintptr_t)1U) {
+		prof_ctx_set(ptr, cnt->ctx);
+
+		cnt->epoch++;
+		/*********/
+		mb_write();
+		/*********/
+		cnt->cnts.curobjs++;
+		cnt->cnts.curbytes += size;
+		if (opt_prof_accum) {
+			cnt->cnts.accumobjs++;
+			cnt->cnts.accumbytes += size;
+		}
+		/*********/
+		mb_write();
+		/*********/
+		cnt->epoch++;
+		/*********/
+		mb_write();
+		/*********/
+	} else
+		prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U);
+}
+
+JEMALLOC_INLINE void
+prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
+    size_t old_size, prof_ctx_t *old_ctx)
+{
+	prof_thr_cnt_t *told_cnt;
+
+	assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U);
+
+	if (ptr != NULL) {
+		assert(size == isalloc(ptr));
+		if (opt_lg_prof_sample != 0) {
+			if (prof_sample_accum_update(size)) {
+				/*
+				 * Don't sample.  The size passed to
+				 * PROF_ALLOC_PREP() was larger than what
+				 * actually got allocated, so a backtrace was
+				 * captured for this allocation, even though
+				 * its actual size was insufficient to cross
+				 * the sample threshold.
+				 */
+				cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
+			}
+		}
+	}
+
+	if ((uintptr_t)old_ctx > (uintptr_t)1U) {
+		told_cnt = prof_lookup(old_ctx->bt);
+		if (told_cnt == NULL) {
+			/*
+			 * It's too late to propagate OOM for this realloc(),
+			 * so operate directly on old_cnt->ctx->cnt_merged.
+			 */
+			malloc_mutex_lock(&old_ctx->lock);
+			old_ctx->cnt_merged.curobjs--;
+			old_ctx->cnt_merged.curbytes -= old_size;
+			malloc_mutex_unlock(&old_ctx->lock);
+			told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
+		}
+	} else
+		told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
+
+	if ((uintptr_t)told_cnt > (uintptr_t)1U)
+		told_cnt->epoch++;
+	if ((uintptr_t)cnt > (uintptr_t)1U) {
+		prof_ctx_set(ptr, cnt->ctx);
+		cnt->epoch++;
+	} else
+		prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U);
+	/*********/
+	mb_write();
+	/*********/
+	if ((uintptr_t)told_cnt > (uintptr_t)1U) {
+		told_cnt->cnts.curobjs--;
+		told_cnt->cnts.curbytes -= old_size;
+	}
+	if ((uintptr_t)cnt > (uintptr_t)1U) {
+		cnt->cnts.curobjs++;
+		cnt->cnts.curbytes += size;
+		if (opt_prof_accum) {
+			cnt->cnts.accumobjs++;
+			cnt->cnts.accumbytes += size;
+		}
+	}
+	/*********/
+	mb_write();
+	/*********/
+	if ((uintptr_t)told_cnt > (uintptr_t)1U)
+		told_cnt->epoch++;
+	if ((uintptr_t)cnt > (uintptr_t)1U)
+		cnt->epoch++;
+	/*********/
+	mb_write(); /* Not strictly necessary. */
+}
+
+JEMALLOC_INLINE void
+prof_free(const void *ptr, size_t size)
+{
+	prof_ctx_t *ctx = prof_ctx_get(ptr);
+
+	if ((uintptr_t)ctx > (uintptr_t)1) {
+		assert(size == isalloc(ptr));
+		prof_thr_cnt_t *tcnt = prof_lookup(ctx->bt);
+
+		if (tcnt != NULL) {
+			tcnt->epoch++;
+			/*********/
+			mb_write();
+			/*********/
+			tcnt->cnts.curobjs--;
+			tcnt->cnts.curbytes -= size;
+			/*********/
+			mb_write();
+			/*********/
+			tcnt->epoch++;
+			/*********/
+			mb_write();
+			/*********/
+		} else {
+			/*
+			 * OOM during free() cannot be propagated, so operate
+			 * directly on cnt->ctx->cnt_merged.
+			 */
+			malloc_mutex_lock(&ctx->lock);
+			ctx->cnt_merged.curobjs--;
+			ctx->cnt_merged.curbytes -= size;
+			malloc_mutex_unlock(&ctx->lock);
+		}
+	}
+}
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
+#endif /* JEMALLOC_PROF */
--- a/deps/jemalloc.orig/include/jemalloc/internal/ql.h
+++ b/deps/jemalloc.orig/include/jemalloc/internal/ql.h
@ -0,0 +1,83 @@
+/*
+ * List definitions.
+ */
+#define ql_head(a_type)							\
+struct {								\
+	a_type *qlh_first;						\
+}
+
+#define ql_head_initializer(a_head) {NULL}
+
+#define ql_elm(a_type)	qr(a_type)
+
+/* List functions. */
+#define ql_new(a_head) do {						\
+	(a_head)->qlh_first = NULL;					\
+} while (0)
+
+#define ql_elm_new(a_elm, a_field) qr_new((a_elm), a_field)
+
+#define ql_first(a_head) ((a_head)->qlh_first)
+
+#define ql_last(a_head, a_field)					\
+	((ql_first(a_head) != NULL)					\
+	    ? qr_prev(ql_first(a_head), a_field) : NULL)
+
+#define ql_next(a_head, a_elm, a_field)					\
+	((ql_last(a_head, a_field) != (a_elm))				\
+	    ? qr_next((a_elm), a_field)	: NULL)
+
+#define ql_prev(a_head, a_elm, a_field)					\
+	((ql_first(a_head) != (a_elm)) ? qr_prev((a_elm), a_field)	\
+				       : NULL)
+
+#define ql_before_insert(a_head, a_qlelm, a_elm, a_field) do {		\
+	qr_before_insert((a_qlelm), (a_elm), a_field);			\
+	if (ql_first(a_head) == (a_qlelm)) {				\
+		ql_first(a_head) = (a_elm);				\
+	}								\
+} while (0)
+
+#define ql_after_insert(a_qlelm, a_elm, a_field)			\
+	qr_after_insert((a_qlelm), (a_elm), a_field)
+
+#define ql_head_insert(a_head, a_elm, a_field) do {			\
+	if (ql_first(a_head) != NULL) {					\
+		qr_before_insert(ql_first(a_head), (a_elm), a_field);	\
+	}								\
+	ql_first(a_head) = (a_elm);					\
+} while (0)
+
+#define ql_tail_insert(a_head, a_elm, a_field) do {			\
+	if (ql_first(a_head) != NULL) {					\
+		qr_before_insert(ql_first(a_head), (a_elm), a_field);	\
+	}								\
+	ql_first(a_head) = qr_next((a_elm), a_field);			\
+} while (0)
+
+#define ql_remove(a_head, a_elm, a_field) do {				\
+	if (ql_first(a_head) == (a_elm)) {				\
+		ql_first(a_head) = qr_next(ql_first(a_head), a_field);	\
+	}								\
+	if (ql_first(a_head) != (a_elm)) {				\
+		qr_remove((a_elm), a_field);				\
+	} else {							\
+		ql_first(a_head) = NULL;				\
+	}								\
+} while (0)
+
+#define ql_head_remove(a_head, a_type, a_field) do {			\
+	a_type *t = ql_first(a_head);					\
+	ql_remove((a_head), t, a_field);				\
+} while (0)
+
+#define ql_tail_remove(a_head, a_type, a_field) do {			\
+	a_type *t = ql_last(a_head, a_field);				\
+	ql_remove((a_head), t, a_field);				\
+} while (0)
+
+#define ql_foreach(a_var, a_head, a_field)				\
+	qr_foreach((a_var), ql_first(a_head), a_field)
+
+#define ql_reverse_foreach(a_var, a_head, a_field)			\
+	qr_reverse_foreach((a_var), ql_first(a_head), a_field)
--- a/deps/jemalloc.orig/include/jemalloc/internal/qr.h
+++ b/deps/jemalloc.orig/include/jemalloc/internal/qr.h
@ -0,0 +1,67 @@
+/* Ring definitions. */
+#define qr(a_type)							\
+struct {								\
+	a_type	*qre_next;						\
+	a_type	*qre_prev;						\
+}
+
+/* Ring functions. */
+#define qr_new(a_qr, a_field) do {					\
+	(a_qr)->a_field.qre_next = (a_qr);				\
+	(a_qr)->a_field.qre_prev = (a_qr);				\
+} while (0)
+
+#define qr_next(a_qr, a_field) ((a_qr)->a_field.qre_next)
+
+#define qr_prev(a_qr, a_field) ((a_qr)->a_field.qre_prev)
+
+#define qr_before_insert(a_qrelm, a_qr, a_field) do {			\
+	(a_qr)->a_field.qre_prev = (a_qrelm)->a_field.qre_prev;		\
+	(a_qr)->a_field.qre_next = (a_qrelm);				\
+	(a_qr)->a_field.qre_prev->a_field.qre_next = (a_qr);		\
+	(a_qrelm)->a_field.qre_prev = (a_qr);				\
+} while (0)
+
+#define qr_after_insert(a_qrelm, a_qr, a_field)				\
+    do									\
+    {									\
+	(a_qr)->a_field.qre_next = (a_qrelm)->a_field.qre_next;		\
+	(a_qr)->a_field.qre_prev = (a_qrelm);				\
+	(a_qr)->a_field.qre_next->a_field.qre_prev = (a_qr);		\
+	(a_qrelm)->a_field.qre_next = (a_qr);				\
+    } while (0)
+
+#define qr_meld(a_qr_a, a_qr_b, a_field) do {				\
+	void *t;							\
+	(a_qr_a)->a_field.qre_prev->a_field.qre_next = (a_qr_b);	\
+	(a_qr_b)->a_field.qre_prev->a_field.qre_next = (a_qr_a);	\
+	t = (a_qr_a)->a_field.qre_prev;					\
+	(a_qr_a)->a_field.qre_prev = (a_qr_b)->a_field.qre_prev;	\
+	(a_qr_b)->a_field.qre_prev = t;					\
+} while (0)
+
+/* qr_meld() and qr_split() are functionally equivalent, so there's no need to
+ * have two copies of the code. */
+#define qr_split(a_qr_a, a_qr_b, a_field)				\
+	qr_meld((a_qr_a), (a_qr_b), a_field)
+
+#define qr_remove(a_qr, a_field) do {					\
+	(a_qr)->a_field.qre_prev->a_field.qre_next			\
+	    = (a_qr)->a_field.qre_next;					\
+	(a_qr)->a_field.qre_next->a_field.qre_prev			\
+	    = (a_qr)->a_field.qre_prev;					\
+	(a_qr)->a_field.qre_next = (a_qr);				\
+	(a_qr)->a_field.qre_prev = (a_qr);				\
+} while (0)
+
+#define qr_foreach(var, a_qr, a_field)					\
+	for ((var) = (a_qr);						\
+	    (var) != NULL;						\
+	    (var) = (((var)->a_field.qre_next != (a_qr))		\
+	    ? (var)->a_field.qre_next : NULL))
+
+#define qr_reverse_foreach(var, a_qr, a_field)				\
+	for ((var) = ((a_qr) != NULL) ? qr_prev(a_qr, a_field) : NULL;	\
+	    (var) != NULL;						\
+	    (var) = (((var) != (a_qr))					\
+	    ? (var)->a_field.qre_prev : NULL))
--- a/deps/jemalloc.orig/include/jemalloc/internal/rb.h
+++ b/deps/jemalloc.orig/include/jemalloc/internal/rb.h
@ -0,0 +1,973 @@
+/*-
+ *******************************************************************************
+ *
+ * cpp macro implementation of left-leaning 2-3 red-black trees.  Parent
+ * pointers are not used, and color bits are stored in the least significant
+ * bit of right-child pointers (if RB_COMPACT is defined), thus making node
+ * linkage as compact as is possible for red-black trees.
+ *
+ * Usage:
+ *
+ *   #include <stdint.h>
+ *   #include <stdbool.h>
+ *   #define NDEBUG // (Optional, see assert(3).)
+ *   #include <assert.h>
+ *   #define RB_COMPACT // (Optional, embed color bits in right-child pointers.)
+ *   #include <rb.h>
+ *   ...
+ *
+ *******************************************************************************
+ */
+
+#ifndef RB_H_
+#define	RB_H_
+
+#if 0
+__FBSDID("$FreeBSD: head/lib/libc/stdlib/rb.h 204493 2010-02-28 22:57:13Z jasone $");
+#endif
+
+#ifdef RB_COMPACT
+/* Node structure. */
+#define	rb_node(a_type)							\
+struct {								\
+    a_type *rbn_left;							\
+    a_type *rbn_right_red;						\
+}
+#else
+#define	rb_node(a_type)							\
+struct {								\
+    a_type *rbn_left;							\
+    a_type *rbn_right;							\
+    bool rbn_red;							\
+}
+#endif
+
+/* Root structure. */
+#define	rb_tree(a_type)							\
+struct {								\
+    a_type *rbt_root;							\
+    a_type rbt_nil;							\
+}
+
+/* Left accessors. */
+#define	rbtn_left_get(a_type, a_field, a_node)				\
+    ((a_node)->a_field.rbn_left)
+#define	rbtn_left_set(a_type, a_field, a_node, a_left) do {		\
+    (a_node)->a_field.rbn_left = a_left;				\
+} while (0)
+
+#ifdef RB_COMPACT
+/* Right accessors. */
+#define	rbtn_right_get(a_type, a_field, a_node)				\
+    ((a_type *) (((intptr_t) (a_node)->a_field.rbn_right_red)		\
+      & ((ssize_t)-2)))
+#define	rbtn_right_set(a_type, a_field, a_node, a_right) do {		\
+    (a_node)->a_field.rbn_right_red = (a_type *) (((uintptr_t) a_right)	\
+      | (((uintptr_t) (a_node)->a_field.rbn_right_red) & ((size_t)1)));	\
+} while (0)
+
+/* Color accessors. */
+#define	rbtn_red_get(a_type, a_field, a_node)				\
+    ((bool) (((uintptr_t) (a_node)->a_field.rbn_right_red)		\
+      & ((size_t)1)))
+#define	rbtn_color_set(a_type, a_field, a_node, a_red) do {		\
+    (a_node)->a_field.rbn_right_red = (a_type *) ((((intptr_t)		\
+      (a_node)->a_field.rbn_right_red) & ((ssize_t)-2))			\
+      | ((ssize_t)a_red));						\
+} while (0)
+#define	rbtn_red_set(a_type, a_field, a_node) do {			\
+    (a_node)->a_field.rbn_right_red = (a_type *) (((uintptr_t)		\
+      (a_node)->a_field.rbn_right_red) | ((size_t)1));			\
+} while (0)
+#define	rbtn_black_set(a_type, a_field, a_node) do {			\
+    (a_node)->a_field.rbn_right_red = (a_type *) (((intptr_t)		\
+      (a_node)->a_field.rbn_right_red) & ((ssize_t)-2));		\
+} while (0)
+#else
+/* Right accessors. */
+#define	rbtn_right_get(a_type, a_field, a_node)				\
+    ((a_node)->a_field.rbn_right)
+#define	rbtn_right_set(a_type, a_field, a_node, a_right) do {		\
+    (a_node)->a_field.rbn_right = a_right;				\
+} while (0)
+
+/* Color accessors. */
+#define	rbtn_red_get(a_type, a_field, a_node)				\
+    ((a_node)->a_field.rbn_red)
+#define	rbtn_color_set(a_type, a_field, a_node, a_red) do {		\
+    (a_node)->a_field.rbn_red = (a_red);				\
+} while (0)
+#define	rbtn_red_set(a_type, a_field, a_node) do {			\
+    (a_node)->a_field.rbn_red = true;					\
+} while (0)
+#define	rbtn_black_set(a_type, a_field, a_node) do {			\
+    (a_node)->a_field.rbn_red = false;					\
+} while (0)
+#endif
+
+/* Node initializer. */
+#define	rbt_node_new(a_type, a_field, a_rbt, a_node) do {		\
+    rbtn_left_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil);	\
+    rbtn_right_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil);	\
+    rbtn_red_set(a_type, a_field, (a_node));				\
+} while (0)
+
+/* Tree initializer. */
+#define	rb_new(a_type, a_field, a_rbt) do {				\
+    (a_rbt)->rbt_root = &(a_rbt)->rbt_nil;				\
+    rbt_node_new(a_type, a_field, a_rbt, &(a_rbt)->rbt_nil);		\
+    rbtn_black_set(a_type, a_field, &(a_rbt)->rbt_nil);			\
+} while (0)
+
+/* Internal utility macros. */
+#define	rbtn_first(a_type, a_field, a_rbt, a_root, r_node) do {		\
+    (r_node) = (a_root);						\
+    if ((r_node) != &(a_rbt)->rbt_nil) {				\
+	for (;								\
+	  rbtn_left_get(a_type, a_field, (r_node)) != &(a_rbt)->rbt_nil;\
+	  (r_node) = rbtn_left_get(a_type, a_field, (r_node))) {	\
+	}								\
+    }									\
+} while (0)
+
+#define	rbtn_last(a_type, a_field, a_rbt, a_root, r_node) do {		\
+    (r_node) = (a_root);						\
+    if ((r_node) != &(a_rbt)->rbt_nil) {				\
+	for (; rbtn_right_get(a_type, a_field, (r_node)) !=		\
+	  &(a_rbt)->rbt_nil; (r_node) = rbtn_right_get(a_type, a_field,	\
+	  (r_node))) {							\
+	}								\
+    }									\
+} while (0)
+
+#define	rbtn_rotate_left(a_type, a_field, a_node, r_node) do {		\
+    (r_node) = rbtn_right_get(a_type, a_field, (a_node));		\
+    rbtn_right_set(a_type, a_field, (a_node),				\
+      rbtn_left_get(a_type, a_field, (r_node)));			\
+    rbtn_left_set(a_type, a_field, (r_node), (a_node));			\
+} while (0)
+
+#define	rbtn_rotate_right(a_type, a_field, a_node, r_node) do {		\
+    (r_node) = rbtn_left_get(a_type, a_field, (a_node));		\
+    rbtn_left_set(a_type, a_field, (a_node),				\
+      rbtn_right_get(a_type, a_field, (r_node)));			\
+    rbtn_right_set(a_type, a_field, (r_node), (a_node));		\
+} while (0)
+
+/*
+ * The rb_proto() macro generates function prototypes that correspond to the
+ * functions generated by an equivalently parameterized call to rb_gen().
+ */
+
+#define	rb_proto(a_attr, a_prefix, a_rbt_type, a_type)			\
+a_attr void								\
+a_prefix##new(a_rbt_type *rbtree);					\
+a_attr a_type *								\
+a_prefix##first(a_rbt_type *rbtree);					\
+a_attr a_type *								\
+a_prefix##last(a_rbt_type *rbtree);					\
+a_attr a_type *								\
+a_prefix##next(a_rbt_type *rbtree, a_type *node);			\
+a_attr a_type *								\
+a_prefix##prev(a_rbt_type *rbtree, a_type *node);			\
+a_attr a_type *								\
+a_prefix##search(a_rbt_type *rbtree, a_type *key);			\
+a_attr a_type *								\
+a_prefix##nsearch(a_rbt_type *rbtree, a_type *key);			\
+a_attr a_type *								\
+a_prefix##psearch(a_rbt_type *rbtree, a_type *key);			\
+a_attr void								\
+a_prefix##insert(a_rbt_type *rbtree, a_type *node);			\
+a_attr void								\
+a_prefix##remove(a_rbt_type *rbtree, a_type *node);			\
+a_attr a_type *								\
+a_prefix##iter(a_rbt_type *rbtree, a_type *start, a_type *(*cb)(	\
+  a_rbt_type *, a_type *, void *), void *arg);				\
+a_attr a_type *								\
+a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start,		\
+  a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg);
+
+/*
+ * The rb_gen() macro generates a type-specific red-black tree implementation,
+ * based on the above cpp macros.
+ *
+ * Arguments:
+ *
+ *   a_attr    : Function attribute for generated functions (ex: static).
+ *   a_prefix  : Prefix for generated functions (ex: ex_).
+ *   a_rb_type : Type for red-black tree data structure (ex: ex_t).
+ *   a_type    : Type for red-black tree node data structure (ex: ex_node_t).
+ *   a_field   : Name of red-black tree node linkage (ex: ex_link).
+ *   a_cmp     : Node comparison function name, with the following prototype:
+ *                 int (a_cmp *)(a_type *a_node, a_type *a_other);
+ *                                       ^^^^^^
+ *                                    or a_key
+ *               Interpretation of comparision function return values:
+ *                 -1 : a_node <  a_other
+ *                  0 : a_node == a_other
+ *                  1 : a_node >  a_other
+ *               In all cases, the a_node or a_key macro argument is the first
+ *               argument to the comparison function, which makes it possible
+ *               to write comparison functions that treat the first argument
+ *               specially.
+ *
+ * Assuming the following setup:
+ *
+ *   typedef struct ex_node_s ex_node_t;
+ *   struct ex_node_s {
+ *       rb_node(ex_node_t) ex_link;
+ *   };
+ *   typedef rb_tree(ex_node_t) ex_t;
+ *   rb_gen(static, ex_, ex_t, ex_node_t, ex_link, ex_cmp)
+ *
+ * The following API is generated:
+ *
+ *   static void
+ *   ex_new(ex_t *extree);
+ *       Description: Initialize a red-black tree structure.
+ *       Args:
+ *         extree: Pointer to an uninitialized red-black tree object.
+ *
+ *   static ex_node_t *
+ *   ex_first(ex_t *extree);
+ *   static ex_node_t *
+ *   ex_last(ex_t *extree);
+ *       Description: Get the first/last node in extree.
+ *       Args:
+ *         extree: Pointer to an initialized red-black tree object.
+ *       Ret: First/last node in extree, or NULL if extree is empty.
+ *
+ *   static ex_node_t *
+ *   ex_next(ex_t *extree, ex_node_t *node);
+ *   static ex_node_t *
+ *   ex_prev(ex_t *extree, ex_node_t *node);
+ *       Description: Get node's successor/predecessor.
+ *       Args:
+ *         extree: Pointer to an initialized red-black tree object.
+ *         node : A node in extree.
+ *       Ret: node's successor/predecessor in extree, or NULL if node is
+ *            last/first.
+ *
+ *   static ex_node_t *
+ *   ex_search(ex_t *extree, ex_node_t *key);
+ *       Description: Search for node that matches key.
+ *       Args:
+ *         extree: Pointer to an initialized red-black tree object.
+ *         key  : Search key.
+ *       Ret: Node in extree that matches key, or NULL if no match.
+ *
+ *   static ex_node_t *
+ *   ex_nsearch(ex_t *extree, ex_node_t *key);
+ *   static ex_node_t *
+ *   ex_psearch(ex_t *extree, ex_node_t *key);
+ *       Description: Search for node that matches key.  If no match is found,
+ *                    return what would be key's successor/predecessor, were
+ *                    key in extree.
+ *       Args:
+ *         extree: Pointer to an initialized red-black tree object.
+ *         key   : Search key.
+ *       Ret: Node in extree that matches key, or if no match, hypothetical
+ *            node's successor/predecessor (NULL if no successor/predecessor).
+ *
+ *   static void
+ *   ex_insert(ex_t *extree, ex_node_t *node);
+ *       Description: Insert node into extree.
+ *       Args:
+ *         extree: Pointer to an initialized red-black tree object.
+ *         node  : Node to be inserted into extree.
+ *
+ *   static void
+ *   ex_remove(ex_t *extree, ex_node_t *node);
+ *       Description: Remove node from extree.
+ *       Args:
+ *         extree: Pointer to an initialized red-black tree object.
+ *         node  : Node in extree to be removed.
+ *
+ *   static ex_node_t *
+ *   ex_iter(ex_t *extree, ex_node_t *start, ex_node_t *(*cb)(ex_t *,
+ *     ex_node_t *, void *), void *arg);
+ *   static ex_node_t *
+ *   ex_reverse_iter(ex_t *extree, ex_node_t *start, ex_node *(*cb)(ex_t *,
+ *     ex_node_t *, void *), void *arg);
+ *       Description: Iterate forward/backward over extree, starting at node.
+ *                    If extree is modified, iteration must be immediately
+ *                    terminated by the callback function that causes the
+ *                    modification.
+ *       Args:
+ *         extree: Pointer to an initialized red-black tree object.
+ *         start : Node at which to start iteration, or NULL to start at
+ *                 first/last node.
+ *         cb    : Callback function, which is called for each node during
+ *                 iteration.  Under normal circumstances the callback function
+ *                 should return NULL, which causes iteration to continue.  If a
+ *                 callback function returns non-NULL, iteration is immediately
+ *                 terminated and the non-NULL return value is returned by the
+ *                 iterator.  This is useful for re-starting iteration after
+ *                 modifying extree.
+ *         arg   : Opaque pointer passed to cb().
+ *       Ret: NULL if iteration completed, or the non-NULL callback return value
+ *            that caused termination of the iteration.
+ */
+#define	rb_gen(a_attr, a_prefix, a_rbt_type, a_type, a_field, a_cmp)	\
+a_attr void								\
+a_prefix##new(a_rbt_type *rbtree) {					\
+    rb_new(a_type, a_field, rbtree);					\
+}									\
+a_attr a_type *								\
+a_prefix##first(a_rbt_type *rbtree) {					\
+    a_type *ret;							\
+    rbtn_first(a_type, a_field, rbtree, rbtree->rbt_root, ret);		\
+    if (ret == &rbtree->rbt_nil) {					\
+	ret = NULL;							\
+    }									\
+    return (ret);							\
+}									\
+a_attr a_type *								\
+a_prefix##last(a_rbt_type *rbtree) {					\
+    a_type *ret;							\
+    rbtn_last(a_type, a_field, rbtree, rbtree->rbt_root, ret);		\
+    if (ret == &rbtree->rbt_nil) {					\
+	ret = NULL;							\
+    }									\
+    return (ret);							\
+}									\
+a_attr a_type *								\
+a_prefix##next(a_rbt_type *rbtree, a_type *node) {			\
+    a_type *ret;							\
+    if (rbtn_right_get(a_type, a_field, node) != &rbtree->rbt_nil) {	\
+	rbtn_first(a_type, a_field, rbtree, rbtn_right_get(a_type,	\
+	  a_field, node), ret);						\
+    } else {								\
+	a_type *tnode = rbtree->rbt_root;				\
+	assert(tnode != &rbtree->rbt_nil);				\
+	ret = &rbtree->rbt_nil;						\
+	while (true) {							\
+	    int cmp = (a_cmp)(node, tnode);				\
+	    if (cmp < 0) {						\
+		ret = tnode;						\
+		tnode = rbtn_left_get(a_type, a_field, tnode);		\
+	    } else if (cmp > 0) {					\
+		tnode = rbtn_right_get(a_type, a_field, tnode);		\
+	    } else {							\
+		break;							\
+	    }								\
+	    assert(tnode != &rbtree->rbt_nil);				\
+	}								\
+    }									\
+    if (ret == &rbtree->rbt_nil) {					\
+	ret = (NULL);							\
+    }									\
+    return (ret);							\
+}									\
+a_attr a_type *								\
+a_prefix##prev(a_rbt_type *rbtree, a_type *node) {			\
+    a_type *ret;							\
+    if (rbtn_left_get(a_type, a_field, node) != &rbtree->rbt_nil) {	\
+	rbtn_last(a_type, a_field, rbtree, rbtn_left_get(a_type,	\
+	  a_field, node), ret);						\
+    } else {								\
+	a_type *tnode = rbtree->rbt_root;				\
+	assert(tnode != &rbtree->rbt_nil);				\
+	ret = &rbtree->rbt_nil;						\
+	while (true) {							\
+	    int cmp = (a_cmp)(node, tnode);				\
+	    if (cmp < 0) {						\
+		tnode = rbtn_left_get(a_type, a_field, tnode);		\
+	    } else if (cmp > 0) {					\
+		ret = tnode;						\
+		tnode = rbtn_right_get(a_type, a_field, tnode);		\
+	    } else {							\
+		break;							\
+	    }								\
+	    assert(tnode != &rbtree->rbt_nil);				\
+	}								\
+    }									\
+    if (ret == &rbtree->rbt_nil) {					\
+	ret = (NULL);							\
+    }									\
+    return (ret);							\
+}									\
+a_attr a_type *								\
+a_prefix##search(a_rbt_type *rbtree, a_type *key) {			\
+    a_type *ret;							\
+    int cmp;								\
+    ret = rbtree->rbt_root;						\
+    while (ret != &rbtree->rbt_nil					\
+      && (cmp = (a_cmp)(key, ret)) != 0) {				\
+	if (cmp < 0) {							\
+	    ret = rbtn_left_get(a_type, a_field, ret);			\
+	} else {							\
+	    ret = rbtn_right_get(a_type, a_field, ret);			\
+	}								\
+    }									\
+    if (ret == &rbtree->rbt_nil) {					\
+	ret = (NULL);							\
+    }									\
+    return (ret);							\
+}									\
+a_attr a_type *								\
+a_prefix##nsearch(a_rbt_type *rbtree, a_type *key) {			\
+    a_type *ret;							\
+    a_type *tnode = rbtree->rbt_root;					\
+    ret = &rbtree->rbt_nil;						\
+    while (tnode != &rbtree->rbt_nil) {					\
+	int cmp = (a_cmp)(key, tnode);					\
+	if (cmp < 0) {							\
+	    ret = tnode;						\
+	    tnode = rbtn_left_get(a_type, a_field, tnode);		\
+	} else if (cmp > 0) {						\
+	    tnode = rbtn_right_get(a_type, a_field, tnode);		\
+	} else {							\
+	    ret = tnode;						\
+	    break;							\
+	}								\
+    }									\
+    if (ret == &rbtree->rbt_nil) {					\
+	ret = (NULL);							\
+    }									\
+    return (ret);							\
+}									\
+a_attr a_type *								\
+a_prefix##psearch(a_rbt_type *rbtree, a_type *key) {			\
+    a_type *ret;							\
+    a_type *tnode = rbtree->rbt_root;					\
+    ret = &rbtree->rbt_nil;						\
+    while (tnode != &rbtree->rbt_nil) {					\
+	int cmp = (a_cmp)(key, tnode);					\
+	if (cmp < 0) {							\
+	    tnode = rbtn_left_get(a_type, a_field, tnode);		\
+	} else if (cmp > 0) {						\
+	    ret = tnode;						\
+	    tnode = rbtn_right_get(a_type, a_field, tnode);		\
+	} else {							\
+	    ret = tnode;						\
+	    break;							\
+	}								\
+    }									\
+    if (ret == &rbtree->rbt_nil) {					\
+	ret = (NULL);							\
+    }									\
+    return (ret);							\
+}									\
+a_attr void								\
+a_prefix##insert(a_rbt_type *rbtree, a_type *node) {			\
+    struct {								\
+	a_type *node;							\
+	int cmp;							\
+    } path[sizeof(void *) << 4], *pathp;				\
+    rbt_node_new(a_type, a_field, rbtree, node);			\
+    /* Wind. */								\
+    path->node = rbtree->rbt_root;					\
+    for (pathp = path; pathp->node != &rbtree->rbt_nil; pathp++) {	\
+	int cmp = pathp->cmp = a_cmp(node, pathp->node);		\
+	assert(cmp != 0);						\
+	if (cmp < 0) {							\
+	    pathp[1].node = rbtn_left_get(a_type, a_field,		\
+	      pathp->node);						\
+	} else {							\
+	    pathp[1].node = rbtn_right_get(a_type, a_field,		\
+	      pathp->node);						\
+	}								\
+    }									\
+    pathp->node = node;							\
+    /* Unwind. */							\
+    for (pathp--; (uintptr_t)pathp >= (uintptr_t)path; pathp--) {	\
+	a_type *cnode = pathp->node;					\
+	if (pathp->cmp < 0) {						\
+	    a_type *left = pathp[1].node;				\
+	    rbtn_left_set(a_type, a_field, cnode, left);		\
+	    if (rbtn_red_get(a_type, a_field, left)) {			\
+		a_type *leftleft = rbtn_left_get(a_type, a_field, left);\
+		if (rbtn_red_get(a_type, a_field, leftleft)) {		\
+		    /* Fix up 4-node. */				\
+		    a_type *tnode;					\
+		    rbtn_black_set(a_type, a_field, leftleft);		\
+		    rbtn_rotate_right(a_type, a_field, cnode, tnode);	\
+		    cnode = tnode;					\
+		}							\
+	    } else {							\
+		return;							\
+	    }								\
+	} else {							\
+	    a_type *right = pathp[1].node;				\
+	    rbtn_right_set(a_type, a_field, cnode, right);		\
+	    if (rbtn_red_get(a_type, a_field, right)) {			\
+		a_type *left = rbtn_left_get(a_type, a_field, cnode);	\
+		if (rbtn_red_get(a_type, a_field, left)) {		\
+		    /* Split 4-node. */					\
+		    rbtn_black_set(a_type, a_field, left);		\
+		    rbtn_black_set(a_type, a_field, right);		\
+		    rbtn_red_set(a_type, a_field, cnode);		\
+		} else {						\
+		    /* Lean left. */					\
+		    a_type *tnode;					\
+		    bool tred = rbtn_red_get(a_type, a_field, cnode);	\
+		    rbtn_rotate_left(a_type, a_field, cnode, tnode);	\
+		    rbtn_color_set(a_type, a_field, tnode, tred);	\
+		    rbtn_red_set(a_type, a_field, cnode);		\
+		    cnode = tnode;					\
+		}							\
+	    } else {							\
+		return;							\
+	    }								\
+	}								\
+	pathp->node = cnode;						\
+    }									\
+    /* Set root, and make it black. */					\
+    rbtree->rbt_root = path->node;					\
+    rbtn_black_set(a_type, a_field, rbtree->rbt_root);			\
+}									\
+a_attr void								\
+a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
+    struct {								\
+	a_type *node;							\
+	int cmp;							\
+    } *pathp, *nodep, path[sizeof(void *) << 4];			\
+    /* Wind. */								\
+    nodep = NULL; /* Silence compiler warning. */			\
+    path->node = rbtree->rbt_root;					\
+    for (pathp = path; pathp->node != &rbtree->rbt_nil; pathp++) {	\
+	int cmp = pathp->cmp = a_cmp(node, pathp->node);		\
+	if (cmp < 0) {							\
+	    pathp[1].node = rbtn_left_get(a_type, a_field,		\
+	      pathp->node);						\
+	} else {							\
+	    pathp[1].node = rbtn_right_get(a_type, a_field,		\
+	      pathp->node);						\
+	    if (cmp == 0) {						\
+	        /* Find node's successor, in preparation for swap. */	\
+		pathp->cmp = 1;						\
+		nodep = pathp;						\
+		for (pathp++; pathp->node != &rbtree->rbt_nil;		\
+		  pathp++) {						\
+		    pathp->cmp = -1;					\
+		    pathp[1].node = rbtn_left_get(a_type, a_field,	\
+		      pathp->node);					\
+		}							\
+		break;							\
+	    }								\
+	}								\
+    }									\
+    assert(nodep->node == node);					\
+    pathp--;								\
+    if (pathp->node != node) {						\
+	/* Swap node with its successor. */				\
+	bool tred = rbtn_red_get(a_type, a_field, pathp->node);		\
+	rbtn_color_set(a_type, a_field, pathp->node,			\
+	  rbtn_red_get(a_type, a_field, node));				\
+	rbtn_left_set(a_type, a_field, pathp->node,			\
+	  rbtn_left_get(a_type, a_field, node));			\
+	/* If node's successor is its right child, the following code */\
+	/* will do the wrong thing for the right child pointer.       */\
+	/* However, it doesn't matter, because the pointer will be    */\
+	/* properly set when the successor is pruned.                 */\
+	rbtn_right_set(a_type, a_field, pathp->node,			\
+	  rbtn_right_get(a_type, a_field, node));			\
+	rbtn_color_set(a_type, a_field, node, tred);			\
+	/* The pruned leaf node's child pointers are never accessed   */\
+	/* again, so don't bother setting them to nil.                */\
+	nodep->node = pathp->node;					\
+	pathp->node = node;						\
+	if (nodep == path) {						\
+	    rbtree->rbt_root = nodep->node;				\
+	} else {							\
+	    if (nodep[-1].cmp < 0) {					\
+		rbtn_left_set(a_type, a_field, nodep[-1].node,		\
+		  nodep->node);						\
+	    } else {							\
+		rbtn_right_set(a_type, a_field, nodep[-1].node,		\
+		  nodep->node);						\
+	    }								\
+	}								\
+    } else {								\
+	a_type *left = rbtn_left_get(a_type, a_field, node);		\
+	if (left != &rbtree->rbt_nil) {					\
+	    /* node has no successor, but it has a left child.        */\
+	    /* Splice node out, without losing the left child.        */\
+	    assert(rbtn_red_get(a_type, a_field, node) == false);	\
+	    assert(rbtn_red_get(a_type, a_field, left));		\
+	    rbtn_black_set(a_type, a_field, left);			\
+	    if (pathp == path) {					\
+		rbtree->rbt_root = left;				\
+	    } else {							\
+		if (pathp[-1].cmp < 0) {				\
+		    rbtn_left_set(a_type, a_field, pathp[-1].node,	\
+		      left);						\
+		} else {						\
+		    rbtn_right_set(a_type, a_field, pathp[-1].node,	\
+		      left);						\
+		}							\
+	    }								\
+	    return;							\
+	} else if (pathp == path) {					\
+	    /* The tree only contained one node. */			\
+	    rbtree->rbt_root = &rbtree->rbt_nil;			\
+	    return;							\
+	}								\
+    }									\
+    if (rbtn_red_get(a_type, a_field, pathp->node)) {			\
+	/* Prune red node, which requires no fixup. */			\
+	assert(pathp[-1].cmp < 0);					\
+	rbtn_left_set(a_type, a_field, pathp[-1].node,			\
+	  &rbtree->rbt_nil);						\
+	return;								\
+    }									\
+    /* The node to be pruned is black, so unwind until balance is     */\
+    /* restored.                                                      */\
+    pathp->node = &rbtree->rbt_nil;					\
+    for (pathp--; (uintptr_t)pathp >= (uintptr_t)path; pathp--) {	\
+	assert(pathp->cmp != 0);					\
+	if (pathp->cmp < 0) {						\
+	    rbtn_left_set(a_type, a_field, pathp->node,			\
+	      pathp[1].node);						\
+	    assert(rbtn_red_get(a_type, a_field, pathp[1].node)		\
+	      == false);						\
+	    if (rbtn_red_get(a_type, a_field, pathp->node)) {		\
+		a_type *right = rbtn_right_get(a_type, a_field,		\
+		  pathp->node);						\
+		a_type *rightleft = rbtn_left_get(a_type, a_field,	\
+		  right);						\
+		a_type *tnode;						\
+		if (rbtn_red_get(a_type, a_field, rightleft)) {		\
+		    /* In the following diagrams, ||, //, and \\      */\
+		    /* indicate the path to the removed node.         */\
+		    /*                                                */\
+		    /*      ||                                        */\
+		    /*    pathp(r)                                    */\
+		    /*  //        \                                   */\
+		    /* (b)        (b)                                 */\
+		    /*           /                                    */\
+		    /*          (r)                                   */\
+		    /*                                                */\
+		    rbtn_black_set(a_type, a_field, pathp->node);	\
+		    rbtn_rotate_right(a_type, a_field, right, tnode);	\
+		    rbtn_right_set(a_type, a_field, pathp->node, tnode);\
+		    rbtn_rotate_left(a_type, a_field, pathp->node,	\
+		      tnode);						\
+		} else {						\
+		    /*      ||                                        */\
+		    /*    pathp(r)                                    */\
+		    /*  //        \                                   */\
+		    /* (b)        (b)                                 */\
+		    /*           /                                    */\
+		    /*          (b)                                   */\
+		    /*                                                */\
+		    rbtn_rotate_left(a_type, a_field, pathp->node,	\
+		      tnode);						\
+		}							\
+		/* Balance restored, but rotation modified subtree    */\
+		/* root.                                              */\
+		assert((uintptr_t)pathp > (uintptr_t)path);		\
+		if (pathp[-1].cmp < 0) {				\
+		    rbtn_left_set(a_type, a_field, pathp[-1].node,	\
+		      tnode);						\
+		} else {						\
+		    rbtn_right_set(a_type, a_field, pathp[-1].node,	\
+		      tnode);						\
+		}							\
+		return;							\
+	    } else {							\
+		a_type *right = rbtn_right_get(a_type, a_field,		\
+		  pathp->node);						\
+		a_type *rightleft = rbtn_left_get(a_type, a_field,	\
+		  right);						\
+		if (rbtn_red_get(a_type, a_field, rightleft)) {		\
+		    /*      ||                                        */\
+		    /*    pathp(b)                                    */\
+		    /*  //        \                                   */\
+		    /* (b)        (b)                                 */\
+		    /*           /                                    */\
+		    /*          (r)                                   */\
+		    a_type *tnode;					\
+		    rbtn_black_set(a_type, a_field, rightleft);		\
+		    rbtn_rotate_right(a_type, a_field, right, tnode);	\
+		    rbtn_right_set(a_type, a_field, pathp->node, tnode);\
+		    rbtn_rotate_left(a_type, a_field, pathp->node,	\
+		      tnode);						\
+		    /* Balance restored, but rotation modified        */\
+		    /* subree root, which may actually be the tree    */\
+		    /* root.                                          */\
+		    if (pathp == path) {				\
+			/* Set root. */					\
+			rbtree->rbt_root = tnode;			\
+		    } else {						\
+			if (pathp[-1].cmp < 0) {			\
+			    rbtn_left_set(a_type, a_field,		\
+			      pathp[-1].node, tnode);			\
+			} else {					\
+			    rbtn_right_set(a_type, a_field,		\
+			      pathp[-1].node, tnode);			\
+			}						\
+		    }							\
+		    return;						\
+		} else {						\
+		    /*      ||                                        */\
+		    /*    pathp(b)                                    */\
+		    /*  //        \                                   */\
+		    /* (b)        (b)                                 */\
+		    /*           /                                    */\
+		    /*          (b)                                   */\
+		    a_type *tnode;					\
+		    rbtn_red_set(a_type, a_field, pathp->node);		\
+		    rbtn_rotate_left(a_type, a_field, pathp->node,	\
+		      tnode);						\
+		    pathp->node = tnode;				\
+		}							\
+	    }								\
+	} else {							\
+	    a_type *left;						\
+	    rbtn_right_set(a_type, a_field, pathp->node,		\
+	      pathp[1].node);						\
+	    left = rbtn_left_get(a_type, a_field, pathp->node);		\
+	    if (rbtn_red_get(a_type, a_field, left)) {			\
+		a_type *tnode;						\
+		a_type *leftright = rbtn_right_get(a_type, a_field,	\
+		  left);						\
+		a_type *leftrightleft = rbtn_left_get(a_type, a_field,	\
+		  leftright);						\
+		if (rbtn_red_get(a_type, a_field, leftrightleft)) {	\
+		    /*      ||                                        */\
+		    /*    pathp(b)                                    */\
+		    /*   /        \\                                  */\
+		    /* (r)        (b)                                 */\
+		    /*   \                                            */\
+		    /*   (b)                                          */\
+		    /*   /                                            */\
+		    /* (r)                                            */\
+		    a_type *unode;					\
+		    rbtn_black_set(a_type, a_field, leftrightleft);	\
+		    rbtn_rotate_right(a_type, a_field, pathp->node,	\
+		      unode);						\
+		    rbtn_rotate_right(a_type, a_field, pathp->node,	\
+		      tnode);						\
+		    rbtn_right_set(a_type, a_field, unode, tnode);	\
+		    rbtn_rotate_left(a_type, a_field, unode, tnode);	\
+		} else {						\
+		    /*      ||                                        */\
+		    /*    pathp(b)                                    */\
+		    /*   /        \\                                  */\
+		    /* (r)        (b)                                 */\
+		    /*   \                                            */\
+		    /*   (b)                                          */\
+		    /*   /                                            */\
+		    /* (b)                                            */\
+		    assert(leftright != &rbtree->rbt_nil);		\
+		    rbtn_red_set(a_type, a_field, leftright);		\
+		    rbtn_rotate_right(a_type, a_field, pathp->node,	\
+		      tnode);						\
+		    rbtn_black_set(a_type, a_field, tnode);		\
+		}							\
+		/* Balance restored, but rotation modified subtree    */\
+		/* root, which may actually be the tree root.         */\
+		if (pathp == path) {					\
+		    /* Set root. */					\
+		    rbtree->rbt_root = tnode;				\
+		} else {						\
+		    if (pathp[-1].cmp < 0) {				\
+			rbtn_left_set(a_type, a_field, pathp[-1].node,	\
+			  tnode);					\
+		    } else {						\
+			rbtn_right_set(a_type, a_field, pathp[-1].node,	\
+			  tnode);					\
+		    }							\
+		}							\
+		return;							\
+	    } else if (rbtn_red_get(a_type, a_field, pathp->node)) {	\
+		a_type *leftleft = rbtn_left_get(a_type, a_field, left);\
+		if (rbtn_red_get(a_type, a_field, leftleft)) {		\
+		    /*        ||                                      */\
+		    /*      pathp(r)                                  */\
+		    /*     /        \\                                */\
+		    /*   (b)        (b)                               */\
+		    /*   /                                            */\
+		    /* (r)                                            */\
+		    a_type *tnode;					\
+		    rbtn_black_set(a_type, a_field, pathp->node);	\
+		    rbtn_red_set(a_type, a_field, left);		\
+		    rbtn_black_set(a_type, a_field, leftleft);		\
+		    rbtn_rotate_right(a_type, a_field, pathp->node,	\
+		      tnode);						\
+		    /* Balance restored, but rotation modified        */\
+		    /* subtree root.                                  */\
+		    assert((uintptr_t)pathp > (uintptr_t)path);		\
+		    if (pathp[-1].cmp < 0) {				\
+			rbtn_left_set(a_type, a_field, pathp[-1].node,	\
+			  tnode);					\
+		    } else {						\
+			rbtn_right_set(a_type, a_field, pathp[-1].node,	\
+			  tnode);					\
+		    }							\
+		    return;						\
+		} else {						\
+		    /*        ||                                      */\
+		    /*      pathp(r)                                  */\
+		    /*     /        \\                                */\
+		    /*   (b)        (b)                               */\
+		    /*   /                                            */\
+		    /* (b)                                            */\
+		    rbtn_red_set(a_type, a_field, left);		\
+		    rbtn_black_set(a_type, a_field, pathp->node);	\
+		    /* Balance restored. */				\
+		    return;						\
+		}							\
+	    } else {							\
+		a_type *leftleft = rbtn_left_get(a_type, a_field, left);\
+		if (rbtn_red_get(a_type, a_field, leftleft)) {		\
+		    /*               ||                               */\
+		    /*             pathp(b)                           */\
+		    /*            /        \\                         */\
+		    /*          (b)        (b)                        */\
+		    /*          /                                     */\
+		    /*        (r)                                     */\
+		    a_type *tnode;					\
+		    rbtn_black_set(a_type, a_field, leftleft);		\
+		    rbtn_rotate_right(a_type, a_field, pathp->node,	\
+		      tnode);						\
+		    /* Balance restored, but rotation modified        */\
+		    /* subtree root, which may actually be the tree   */\
+		    /* root.                                          */\
+		    if (pathp == path) {				\
+			/* Set root. */					\
+			rbtree->rbt_root = tnode;			\
+		    } else {						\
+			if (pathp[-1].cmp < 0) {			\
+			    rbtn_left_set(a_type, a_field,		\
+			      pathp[-1].node, tnode);			\
+			} else {					\
+			    rbtn_right_set(a_type, a_field,		\
+			      pathp[-1].node, tnode);			\
+			}						\
+		    }							\
+		    return;						\
+		} else {						\
+		    /*               ||                               */\
+		    /*             pathp(b)                           */\
+		    /*            /        \\                         */\
+		    /*          (b)        (b)                        */\
+		    /*          /                                     */\
+		    /*        (b)                                     */\
+		    rbtn_red_set(a_type, a_field, left);		\
+		}							\
+	    }								\
+	}								\
+    }									\
+    /* Set root. */							\
+    rbtree->rbt_root = path->node;					\
+    assert(rbtn_red_get(a_type, a_field, rbtree->rbt_root) == false);	\
+}									\
+a_attr a_type *								\
+a_prefix##iter_recurse(a_rbt_type *rbtree, a_type *node,		\
+  a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) {		\
+    if (node == &rbtree->rbt_nil) {					\
+	return (&rbtree->rbt_nil);					\
+    } else {								\
+	a_type *ret;							\
+	if ((ret = a_prefix##iter_recurse(rbtree, rbtn_left_get(a_type,	\
+	  a_field, node), cb, arg)) != &rbtree->rbt_nil			\
+	  || (ret = cb(rbtree, node, arg)) != NULL) {			\
+	    return (ret);						\
+	}								\
+	return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type,	\
+	  a_field, node), cb, arg));					\
+    }									\
+}									\
+a_attr a_type *								\
+a_prefix##iter_start(a_rbt_type *rbtree, a_type *start, a_type *node,	\
+  a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) {		\
+    int cmp = a_cmp(start, node);					\
+    if (cmp < 0) {							\
+	a_type *ret;							\
+	if ((ret = a_prefix##iter_start(rbtree, start,			\
+	  rbtn_left_get(a_type, a_field, node), cb, arg)) !=		\
+	  &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) {	\
+	    return (ret);						\
+	}								\
+	return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type,	\
+	  a_field, node), cb, arg));					\
+    } else if (cmp > 0) {						\
+	return (a_prefix##iter_start(rbtree, start,			\
+	  rbtn_right_get(a_type, a_field, node), cb, arg));		\
+    } else {								\
+	a_type *ret;							\
+	if ((ret = cb(rbtree, node, arg)) != NULL) {			\
+	    return (ret);						\
+	}								\
+	return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type,	\
+	  a_field, node), cb, arg));					\
+    }									\
+}									\
+a_attr a_type *								\
+a_prefix##iter(a_rbt_type *rbtree, a_type *start, a_type *(*cb)(	\
+  a_rbt_type *, a_type *, void *), void *arg) {				\
+    a_type *ret;							\
+    if (start != NULL) {						\
+	ret = a_prefix##iter_start(rbtree, start, rbtree->rbt_root,	\
+	  cb, arg);							\
+    } else {								\
+	ret = a_prefix##iter_recurse(rbtree, rbtree->rbt_root, cb, arg);\
+    }									\
+    if (ret == &rbtree->rbt_nil) {					\
+	ret = NULL;							\
+    }									\
+    return (ret);							\
+}									\
+a_attr a_type *								\
+a_prefix##reverse_iter_recurse(a_rbt_type *rbtree, a_type *node,	\
+  a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) {		\
+    if (node == &rbtree->rbt_nil) {					\
+	return (&rbtree->rbt_nil);					\
+    } else {								\
+	a_type *ret;							\
+	if ((ret = a_prefix##reverse_iter_recurse(rbtree,		\
+	  rbtn_right_get(a_type, a_field, node), cb, arg)) !=		\
+	  &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) {	\
+	    return (ret);						\
+	}								\
+	return (a_prefix##reverse_iter_recurse(rbtree,			\
+	  rbtn_left_get(a_type, a_field, node), cb, arg));		\
+    }									\
+}									\
+a_attr a_type *								\
+a_prefix##reverse_iter_start(a_rbt_type *rbtree, a_type *start,		\
+  a_type *node, a_type *(*cb)(a_rbt_type *, a_type *, void *),		\
+  void *arg) {								\
+    int cmp = a_cmp(start, node);					\
+    if (cmp > 0) {							\
+	a_type *ret;							\
+	if ((ret = a_prefix##reverse_iter_start(rbtree, start,		\
+	  rbtn_right_get(a_type, a_field, node), cb, arg)) !=		\
+	  &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) {	\
+	    return (ret);						\
+	}								\
+	return (a_prefix##reverse_iter_recurse(rbtree,			\
+	  rbtn_left_get(a_type, a_field, node), cb, arg));		\
+    } else if (cmp < 0) {						\
+	return (a_prefix##reverse_iter_start(rbtree, start,		\
+	  rbtn_left_get(a_type, a_field, node), cb, arg));		\
+    } else {								\
+	a_type *ret;							\
+	if ((ret = cb(rbtree, node, arg)) != NULL) {			\
+	    return (ret);						\
+	}								\
+	return (a_prefix##reverse_iter_recurse(rbtree,			\
+	  rbtn_left_get(a_type, a_field, node), cb, arg));		\
+    }									\
+}									\
+a_attr a_type *								\
+a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start,		\
+  a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) {		\
+    a_type *ret;							\
+    if (start != NULL) {						\
+	ret = a_prefix##reverse_iter_start(rbtree, start,		\
+	  rbtree->rbt_root, cb, arg);					\
+    } else {								\
+	ret = a_prefix##reverse_iter_recurse(rbtree, rbtree->rbt_root,	\
+	  cb, arg);							\
+    }									\
+    if (ret == &rbtree->rbt_nil) {					\
+	ret = NULL;							\
+    }									\
+    return (ret);							\
+}
+
+#endif /* RB_H_ */
--- a/deps/jemalloc.orig/include/jemalloc/internal/rtree.h
+++ b/deps/jemalloc.orig/include/jemalloc/internal/rtree.h
@ -0,0 +1,161 @@
+/*
+ * This radix tree implementation is tailored to the singular purpose of
+ * tracking which chunks are currently owned by jemalloc.  This functionality
+ * is mandatory for OS X, where jemalloc must be able to respond to object
+ * ownership queries.
+ *
+ *******************************************************************************
+ */
+#ifdef JEMALLOC_H_TYPES
+
+typedef struct rtree_s rtree_t;
+
+/*
+ * Size of each radix tree node (must be a power of 2).  This impacts tree
+ * depth.
+ */
+#if (LG_SIZEOF_PTR == 2)
+#  define RTREE_NODESIZE (1U << 14)
+#else
+#  define RTREE_NODESIZE CACHELINE
+#endif
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+struct rtree_s {
+	malloc_mutex_t	mutex;
+	void		**root;
+	unsigned	height;
+	unsigned	level2bits[1]; /* Dynamically sized. */
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+rtree_t	*rtree_new(unsigned bits);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+#ifndef JEMALLOC_DEBUG
+void	*rtree_get_locked(rtree_t *rtree, uintptr_t key);
+#endif
+void	*rtree_get(rtree_t *rtree, uintptr_t key);
+bool	rtree_set(rtree_t *rtree, uintptr_t key, void *val);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_))
+#define	RTREE_GET_GENERATE(f)						\
+/* The least significant bits of the key are ignored. */		\
+JEMALLOC_INLINE void *							\
+f(rtree_t *rtree, uintptr_t key)					\
+{									\
+	void *ret;							\
+	uintptr_t subkey;						\
+	unsigned i, lshift, height, bits;				\
+	void **node, **child;						\
+									\
+	RTREE_LOCK(&rtree->mutex);					\
+	for (i = lshift = 0, height = rtree->height, node = rtree->root;\
+	    i < height - 1;						\
+	    i++, lshift += bits, node = child) {			\
+		bits = rtree->level2bits[i];				\
+		subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR + \
+		    3)) - bits);					\
+		child = (void**)node[subkey];				\
+		if (child == NULL) {					\
+			RTREE_UNLOCK(&rtree->mutex);			\
+			return (NULL);					\
+		}							\
+	}								\
+									\
+	/*								\
+	 * node is a leaf, so it contains values rather than node	\
+	 * pointers.							\
+	 */								\
+	bits = rtree->level2bits[i];					\
+	subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) -	\
+	    bits);							\
+	ret = node[subkey];						\
+	RTREE_UNLOCK(&rtree->mutex);					\
+									\
+	RTREE_GET_VALIDATE						\
+	return (ret);							\
+}
+
+#ifdef JEMALLOC_DEBUG
+#  define RTREE_LOCK(l)		malloc_mutex_lock(l)
+#  define RTREE_UNLOCK(l)	malloc_mutex_unlock(l)
+#  define RTREE_GET_VALIDATE
+RTREE_GET_GENERATE(rtree_get_locked)
+#  undef RTREE_LOCK
+#  undef RTREE_UNLOCK
+#  undef RTREE_GET_VALIDATE
+#endif
+
+#define	RTREE_LOCK(l)
+#define	RTREE_UNLOCK(l)
+#ifdef JEMALLOC_DEBUG
+   /*
+    * Suppose that it were possible for a jemalloc-allocated chunk to be
+    * munmap()ped, followed by a different allocator in another thread re-using
+    * overlapping virtual memory, all without invalidating the cached rtree
+    * value.  The result would be a false positive (the rtree would claim that
+    * jemalloc owns memory that it had actually discarded).  This scenario
+    * seems impossible, but the following assertion is a prudent sanity check.
+    */
+#  define RTREE_GET_VALIDATE						\
+	assert(rtree_get_locked(rtree, key) == ret);
+#else
+#  define RTREE_GET_VALIDATE
+#endif
+RTREE_GET_GENERATE(rtree_get)
+#undef RTREE_LOCK
+#undef RTREE_UNLOCK
+#undef RTREE_GET_VALIDATE
+
+JEMALLOC_INLINE bool
+rtree_set(rtree_t *rtree, uintptr_t key, void *val)
+{
+	uintptr_t subkey;
+	unsigned i, lshift, height, bits;
+	void **node, **child;
+
+	malloc_mutex_lock(&rtree->mutex);
+	for (i = lshift = 0, height = rtree->height, node = rtree->root;
+	    i < height - 1;
+	    i++, lshift += bits, node = child) {
+		bits = rtree->level2bits[i];
+		subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) -
+		    bits);
+		child = (void**)node[subkey];
+		if (child == NULL) {
+			child = (void**)base_alloc(sizeof(void *) <<
+			    rtree->level2bits[i+1]);
+			if (child == NULL) {
+				malloc_mutex_unlock(&rtree->mutex);
+				return (true);
+			}
+			memset(child, 0, sizeof(void *) <<
+			    rtree->level2bits[i+1]);
+			node[subkey] = child;
+		}
+	}
+
+	/* node is a leaf, so it contains values rather than node pointers. */
+	bits = rtree->level2bits[i];
+	subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - bits);
+	node[subkey] = val;
+	malloc_mutex_unlock(&rtree->mutex);
+
+	return (false);
+}
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
--- a/deps/jemalloc.orig/include/jemalloc/internal/stats.h
+++ b/deps/jemalloc.orig/include/jemalloc/internal/stats.h
@ -0,0 +1,207 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+#define	UMAX2S_BUFSIZE	65
+
+#ifdef JEMALLOC_STATS
+typedef struct tcache_bin_stats_s tcache_bin_stats_t;
+typedef struct malloc_bin_stats_s malloc_bin_stats_t;
+typedef struct malloc_large_stats_s malloc_large_stats_t;
+typedef struct arena_stats_s arena_stats_t;
+#endif
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+typedef struct chunk_stats_s chunk_stats_t;
+#endif
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#ifdef JEMALLOC_STATS
+
+#ifdef JEMALLOC_TCACHE
+struct tcache_bin_stats_s {
+	/*
+	 * Number of allocation requests that corresponded to the size of this
+	 * bin.
+	 */
+	uint64_t	nrequests;
+};
+#endif
+
+struct malloc_bin_stats_s {
+	/*
+	 * Current number of bytes allocated, including objects currently
+	 * cached by tcache.
+	 */
+	size_t		allocated;
+
+	/*
+	 * Total number of allocation/deallocation requests served directly by
+	 * the bin.  Note that tcache may allocate an object, then recycle it
+	 * many times, resulting many increments to nrequests, but only one
+	 * each to nmalloc and ndalloc.
+	 */
+	uint64_t	nmalloc;
+	uint64_t	ndalloc;
+
+	/*
+	 * Number of allocation requests that correspond to the size of this
+	 * bin.  This includes requests served by tcache, though tcache only
+	 * periodically merges into this counter.
+	 */
+	uint64_t	nrequests;
+
+#ifdef JEMALLOC_TCACHE
+	/* Number of tcache fills from this bin. */
+	uint64_t	nfills;
+
+	/* Number of tcache flushes to this bin. */
+	uint64_t	nflushes;
+#endif
+
+	/* Total number of runs created for this bin's size class. */
+	uint64_t	nruns;
+
+	/*
+	 * Total number of runs reused by extracting them from the runs tree for
+	 * this bin's size class.
+	 */
+	uint64_t	reruns;
+
+	/* High-water mark for this bin. */
+	size_t		highruns;
+
+	/* Current number of runs in this bin. */
+	size_t		curruns;
+};
+
+struct malloc_large_stats_s {
+	/*
+	 * Total number of allocation/deallocation requests served directly by
+	 * the arena.  Note that tcache may allocate an object, then recycle it
+	 * many times, resulting many increments to nrequests, but only one
+	 * each to nmalloc and ndalloc.
+	 */
+	uint64_t	nmalloc;
+	uint64_t	ndalloc;
+
+	/*
+	 * Number of allocation requests that correspond to this size class.
+	 * This includes requests served by tcache, though tcache only
+	 * periodically merges into this counter.
+	 */
+	uint64_t	nrequests;
+
+	/* High-water mark for this size class. */
+	size_t		highruns;
+
+	/* Current number of runs of this size class. */
+	size_t		curruns;
+};
+
+struct arena_stats_s {
+	/* Number of bytes currently mapped. */
+	size_t		mapped;
+
+	/*
+	 * Total number of purge sweeps, total number of madvise calls made,
+	 * and total pages purged in order to keep dirty unused memory under
+	 * control.
+	 */
+	uint64_t	npurge;
+	uint64_t	nmadvise;
+	uint64_t	purged;
+
+	/* Per-size-category statistics. */
+	size_t		allocated_large;
+	uint64_t	nmalloc_large;
+	uint64_t	ndalloc_large;
+	uint64_t	nrequests_large;
+
+	/*
+	 * One element for each possible size class, including sizes that
+	 * overlap with bin size classes.  This is necessary because ipalloc()
+	 * sometimes has to use such large objects in order to assure proper
+	 * alignment.
+	 */
+	malloc_large_stats_t	*lstats;
+};
+#endif /* JEMALLOC_STATS */
+
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+struct chunk_stats_s {
+#  ifdef JEMALLOC_STATS
+	/* Number of chunks that were allocated. */
+	uint64_t	nchunks;
+#  endif
+
+	/* High-water mark for number of chunks allocated. */
+	size_t		highchunks;
+
+	/*
+	 * Current number of chunks allocated.  This value isn't maintained for
+	 * any other purpose, so keep track of it in order to be able to set
+	 * highchunks.
+	 */
+	size_t		curchunks;
+};
+#endif /* JEMALLOC_STATS */
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+extern bool	opt_stats_print;
+
+#ifdef JEMALLOC_STATS
+extern size_t	stats_cactive;
+#endif
+
+char	*u2s(uint64_t x, unsigned base, char *s);
+#ifdef JEMALLOC_STATS
+void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque,
+    const char *format, ...) JEMALLOC_ATTR(format(printf, 3, 4));
+void	malloc_printf(const char *format, ...)
+    JEMALLOC_ATTR(format(printf, 1, 2));
+#endif
+void	stats_print(void (*write)(void *, const char *), void *cbopaque,
+    const char *opts);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+#ifdef JEMALLOC_STATS
+
+#ifndef JEMALLOC_ENABLE_INLINE
+size_t	stats_cactive_get(void);
+void	stats_cactive_add(size_t size);
+void	stats_cactive_sub(size_t size);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_STATS_C_))
+JEMALLOC_INLINE size_t
+stats_cactive_get(void)
+{
+
+	return (atomic_read_z(&stats_cactive));
+}
+
+JEMALLOC_INLINE void
+stats_cactive_add(size_t size)
+{
+
+	atomic_add_z(&stats_cactive, size);
+}
+
+JEMALLOC_INLINE void
+stats_cactive_sub(size_t size)
+{
+
+	atomic_sub_z(&stats_cactive, size);
+}
+#endif
+
+#endif /* JEMALLOC_STATS */
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
--- a/deps/jemalloc.orig/include/jemalloc/internal/tcache.h
+++ b/deps/jemalloc.orig/include/jemalloc/internal/tcache.h
@ -0,0 +1,431 @@
+#ifdef JEMALLOC_TCACHE
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+typedef struct tcache_bin_info_s tcache_bin_info_t;
+typedef struct tcache_bin_s tcache_bin_t;
+typedef struct tcache_s tcache_t;
+
+/*
+ * Absolute maximum number of cache slots for each small bin in the thread
+ * cache.  This is an additional constraint beyond that imposed as: twice the
+ * number of regions per run for this size class.
+ *
+ * This constant must be an even number.
+ */
+#define	TCACHE_NSLOTS_SMALL_MAX		200
+
+/* Number of cache slots for large size classes. */
+#define	TCACHE_NSLOTS_LARGE		20
+
+/* (1U << opt_lg_tcache_max) is used to compute tcache_maxclass. */
+#define	LG_TCACHE_MAXCLASS_DEFAULT	15
+
+/*
+ * (1U << opt_lg_tcache_gc_sweep) is the approximate number of allocation
+ * events between full GC sweeps (-1: disabled).  Integer rounding may cause
+ * the actual number to be slightly higher, since GC is performed
+ * incrementally.
+ */
+#define	LG_TCACHE_GC_SWEEP_DEFAULT	13
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+/*
+ * Read-only information associated with each element of tcache_t's tbins array
+ * is stored separately, mainly to reduce memory usage.
+ */
+struct tcache_bin_info_s {
+	unsigned	ncached_max;	/* Upper limit on ncached. */
+};
+
+struct tcache_bin_s {
+#  ifdef JEMALLOC_STATS
+	tcache_bin_stats_t tstats;
+#  endif
+	int		low_water;	/* Min # cached since last GC. */
+	unsigned	lg_fill_div;	/* Fill (ncached_max >> lg_fill_div). */
+	unsigned	ncached;	/* # of cached objects. */
+	void		**avail;	/* Stack of available objects. */
+};
+
+struct tcache_s {
+#  ifdef JEMALLOC_STATS
+	ql_elm(tcache_t) link;		/* Used for aggregating stats. */
+#  endif
+#  ifdef JEMALLOC_PROF
+	uint64_t	prof_accumbytes;/* Cleared after arena_prof_accum() */
+#  endif
+	arena_t		*arena;		/* This thread's arena. */
+	unsigned	ev_cnt;		/* Event count since incremental GC. */
+	unsigned	next_gc_bin;	/* Next bin to GC. */
+	tcache_bin_t	tbins[1];	/* Dynamically sized. */
+	/*
+	 * The pointer stacks associated with tbins follow as a contiguous
+	 * array.  During tcache initialization, the avail pointer in each
+	 * element of tbins is initialized to point to the proper offset within
+	 * this array.
+	 */
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+extern bool	opt_tcache;
+extern ssize_t	opt_lg_tcache_max;
+extern ssize_t	opt_lg_tcache_gc_sweep;
+
+extern tcache_bin_info_t	*tcache_bin_info;
+
+/* Map of thread-specific caches. */
+#ifndef NO_TLS
+extern __thread tcache_t	*tcache_tls
+    JEMALLOC_ATTR(tls_model("initial-exec"));
+#  define TCACHE_GET()	tcache_tls
+#  define TCACHE_SET(v)	do {						\
+	tcache_tls = (tcache_t *)(v);					\
+	pthread_setspecific(tcache_tsd, (void *)(v));			\
+} while (0)
+#else
+#  define TCACHE_GET()	((tcache_t *)pthread_getspecific(tcache_tsd))
+#  define TCACHE_SET(v)	do {						\
+	pthread_setspecific(tcache_tsd, (void *)(v));			\
+} while (0)
+#endif
+extern pthread_key_t		tcache_tsd;
+
+/*
+ * Number of tcache bins.  There are nbins small-object bins, plus 0 or more
+ * large-object bins.
+ */
+extern size_t			nhbins;
+
+/* Maximum cached size class. */
+extern size_t			tcache_maxclass;
+
+/* Number of tcache allocation/deallocation events between incremental GCs. */
+extern unsigned			tcache_gc_incr;
+
+void	tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+    , tcache_t *tcache
+#endif
+    );
+void	tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+    , tcache_t *tcache
+#endif
+    );
+tcache_t *tcache_create(arena_t *arena);
+void	*tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin,
+    size_t binind);
+void	tcache_destroy(tcache_t *tcache);
+#ifdef JEMALLOC_STATS
+void	tcache_stats_merge(tcache_t *tcache, arena_t *arena);
+#endif
+bool	tcache_boot(void);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+void	tcache_event(tcache_t *tcache);
+tcache_t *tcache_get(void);
+void	*tcache_alloc_easy(tcache_bin_t *tbin);
+void	*tcache_alloc_small(tcache_t *tcache, size_t size, bool zero);
+void	*tcache_alloc_large(tcache_t *tcache, size_t size, bool zero);
+void	tcache_dalloc_small(tcache_t *tcache, void *ptr);
+void	tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TCACHE_C_))
+JEMALLOC_INLINE tcache_t *
+tcache_get(void)
+{
+	tcache_t *tcache;
+
+	if ((isthreaded & opt_tcache) == false)
+		return (NULL);
+
+	tcache = TCACHE_GET();
+	if ((uintptr_t)tcache <= (uintptr_t)2) {
+		if (tcache == NULL) {
+			tcache = tcache_create(choose_arena());
+			if (tcache == NULL)
+				return (NULL);
+		} else {
+			if (tcache == (void *)(uintptr_t)1) {
+				/*
+				 * Make a note that an allocator function was
+				 * called after the tcache_thread_cleanup() was
+				 * called.
+				 */
+				TCACHE_SET((uintptr_t)2);
+			}
+			return (NULL);
+		}
+	}
+
+	return (tcache);
+}
+
+JEMALLOC_INLINE void
+tcache_event(tcache_t *tcache)
+{
+
+	if (tcache_gc_incr == 0)
+		return;
+
+	tcache->ev_cnt++;
+	assert(tcache->ev_cnt <= tcache_gc_incr);
+	if (tcache->ev_cnt == tcache_gc_incr) {
+		size_t binind = tcache->next_gc_bin;
+		tcache_bin_t *tbin = &tcache->tbins[binind];
+		tcache_bin_info_t *tbin_info = &tcache_bin_info[binind];
+
+		if (tbin->low_water > 0) {
+			/*
+			 * Flush (ceiling) 3/4 of the objects below the low
+			 * water mark.
+			 */
+			if (binind < nbins) {
+				tcache_bin_flush_small(tbin, binind,
+				    tbin->ncached - tbin->low_water +
+				    (tbin->low_water >> 2)
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+				    , tcache
+#endif
+				    );
+			} else {
+				tcache_bin_flush_large(tbin, binind,
+				    tbin->ncached - tbin->low_water +
+				    (tbin->low_water >> 2)
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+				    , tcache
+#endif
+				    );
+			}
+			/*
+			 * Reduce fill count by 2X.  Limit lg_fill_div such that
+			 * the fill count is always at least 1.
+			 */
+			if ((tbin_info->ncached_max >> (tbin->lg_fill_div+1))
+			    >= 1)
+				tbin->lg_fill_div++;
+		} else if (tbin->low_water < 0) {
+			/*
+			 * Increase fill count by 2X.  Make sure lg_fill_div
+			 * stays greater than 0.
+			 */
+			if (tbin->lg_fill_div > 1)
+				tbin->lg_fill_div--;
+		}
+		tbin->low_water = tbin->ncached;
+
+		tcache->next_gc_bin++;
+		if (tcache->next_gc_bin == nhbins)
+			tcache->next_gc_bin = 0;
+		tcache->ev_cnt = 0;
+	}
+}
+
+JEMALLOC_INLINE void *
+tcache_alloc_easy(tcache_bin_t *tbin)
+{
+	void *ret;
+
+	if (tbin->ncached == 0) {
+		tbin->low_water = -1;
+		return (NULL);
+	}
+	tbin->ncached--;
+	if ((int)tbin->ncached < tbin->low_water)
+		tbin->low_water = tbin->ncached;
+	ret = tbin->avail[tbin->ncached];
+	return (ret);
+}
+
+JEMALLOC_INLINE void *
+tcache_alloc_small(tcache_t *tcache, size_t size, bool zero)
+{
+	void *ret;
+	size_t binind;
+	tcache_bin_t *tbin;
+
+	binind = SMALL_SIZE2BIN(size);
+	assert(binind < nbins);
+	tbin = &tcache->tbins[binind];
+	ret = tcache_alloc_easy(tbin);
+	if (ret == NULL) {
+		ret = tcache_alloc_small_hard(tcache, tbin, binind);
+		if (ret == NULL)
+			return (NULL);
+	}
+	assert(arena_salloc(ret) == arena_bin_info[binind].reg_size);
+
+	if (zero == false) {
+#ifdef JEMALLOC_FILL
+		if (opt_junk)
+			memset(ret, 0xa5, size);
+		else if (opt_zero)
+			memset(ret, 0, size);
+#endif
+	} else
+		memset(ret, 0, size);
+
+#ifdef JEMALLOC_STATS
+	tbin->tstats.nrequests++;
+#endif
+#ifdef JEMALLOC_PROF
+	tcache->prof_accumbytes += arena_bin_info[binind].reg_size;
+#endif
+	tcache_event(tcache);
+	return (ret);
+}
+
+JEMALLOC_INLINE void *
+tcache_alloc_large(tcache_t *tcache, size_t size, bool zero)
+{
+	void *ret;
+	size_t binind;
+	tcache_bin_t *tbin;
+
+	size = PAGE_CEILING(size);
+	assert(size <= tcache_maxclass);
+	binind = nbins + (size >> PAGE_SHIFT) - 1;
+	assert(binind < nhbins);
+	tbin = &tcache->tbins[binind];
+	ret = tcache_alloc_easy(tbin);
+	if (ret == NULL) {
+		/*
+		 * Only allocate one large object at a time, because it's quite
+		 * expensive to create one and not use it.
+		 */
+		ret = arena_malloc_large(tcache->arena, size, zero);
+		if (ret == NULL)
+			return (NULL);
+	} else {
+#ifdef JEMALLOC_PROF
+		arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
+		size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >>
+		    PAGE_SHIFT);
+		chunk->map[pageind-map_bias].bits &= ~CHUNK_MAP_CLASS_MASK;
+#endif
+		if (zero == false) {
+#ifdef JEMALLOC_FILL
+			if (opt_junk)
+				memset(ret, 0xa5, size);
+			else if (opt_zero)
+				memset(ret, 0, size);
+#endif
+		} else
+			memset(ret, 0, size);
+
+#ifdef JEMALLOC_STATS
+		tbin->tstats.nrequests++;
+#endif
+#ifdef JEMALLOC_PROF
+		tcache->prof_accumbytes += size;
+#endif
+	}
+
+	tcache_event(tcache);
+	return (ret);
+}
+
+JEMALLOC_INLINE void
+tcache_dalloc_small(tcache_t *tcache, void *ptr)
+{
+	arena_t *arena;
+	arena_chunk_t *chunk;
+	arena_run_t *run;
+	arena_bin_t *bin;
+	tcache_bin_t *tbin;
+	tcache_bin_info_t *tbin_info;
+	size_t pageind, binind;
+	arena_chunk_map_t *mapelm;
+
+	assert(arena_salloc(ptr) <= small_maxclass);
+
+	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+	arena = chunk->arena;
+	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
+	mapelm = &chunk->map[pageind-map_bias];
+	run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
+	    (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT));
+	dassert(run->magic == ARENA_RUN_MAGIC);
+	bin = run->bin;
+	binind = ((uintptr_t)bin - (uintptr_t)&arena->bins) /
+	    sizeof(arena_bin_t);
+	assert(binind < nbins);
+
+#ifdef JEMALLOC_FILL
+	if (opt_junk)
+		memset(ptr, 0x5a, arena_bin_info[binind].reg_size);
+#endif
+
+	tbin = &tcache->tbins[binind];
+	tbin_info = &tcache_bin_info[binind];
+	if (tbin->ncached == tbin_info->ncached_max) {
+		tcache_bin_flush_small(tbin, binind, (tbin_info->ncached_max >>
+		    1)
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+		    , tcache
+#endif
+		    );
+	}
+	assert(tbin->ncached < tbin_info->ncached_max);
+	tbin->avail[tbin->ncached] = ptr;
+	tbin->ncached++;
+
+	tcache_event(tcache);
+}
+
+JEMALLOC_INLINE void
+tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size)
+{
+	arena_t *arena;
+	arena_chunk_t *chunk;
+	size_t pageind, binind;
+	tcache_bin_t *tbin;
+	tcache_bin_info_t *tbin_info;
+
+	assert((size & PAGE_MASK) == 0);
+	assert(arena_salloc(ptr) > small_maxclass);
+	assert(arena_salloc(ptr) <= tcache_maxclass);
+
+	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+	arena = chunk->arena;
+	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
+	binind = nbins + (size >> PAGE_SHIFT) - 1;
+
+#ifdef JEMALLOC_FILL
+	if (opt_junk)
+		memset(ptr, 0x5a, size);
+#endif
+
+	tbin = &tcache->tbins[binind];
+	tbin_info = &tcache_bin_info[binind];
+	if (tbin->ncached == tbin_info->ncached_max) {
+		tcache_bin_flush_large(tbin, binind, (tbin_info->ncached_max >>
+		    1)
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+		    , tcache
+#endif
+		    );
+	}
+	assert(tbin->ncached < tbin_info->ncached_max);
+	tbin->avail[tbin->ncached] = ptr;
+	tbin->ncached++;
+
+	tcache_event(tcache);
+}
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
+#endif /* JEMALLOC_TCACHE */
--- a/deps/jemalloc.orig/include/jemalloc/internal/zone.h
+++ b/deps/jemalloc.orig/include/jemalloc/internal/zone.h
--- a/deps/jemalloc.orig/include/jemalloc/jemalloc.h.in
+++ b/deps/jemalloc.orig/include/jemalloc/jemalloc.h.in
@ -0,0 +1,66 @@
+#ifndef JEMALLOC_H_
+#define	JEMALLOC_H_
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <limits.h>
+#include <strings.h>
+
+#define	JEMALLOC_VERSION "@jemalloc_version@"
+#define	JEMALLOC_VERSION_MAJOR @jemalloc_version_major@
+#define	JEMALLOC_VERSION_MINOR @jemalloc_version_minor@
+#define	JEMALLOC_VERSION_BUGFIX @jemalloc_version_bugfix@
+#define	JEMALLOC_VERSION_NREV @jemalloc_version_nrev@
+#define	JEMALLOC_VERSION_GID "@jemalloc_version_gid@"
+
+#include "jemalloc_defs@install_suffix@.h"
+#ifndef JEMALLOC_P
+#  define JEMALLOC_P(s) s
+#endif
+
+#define	ALLOCM_LG_ALIGN(la)	(la)
+#if LG_SIZEOF_PTR == 2
+#define	ALLOCM_ALIGN(a)	(ffs(a)-1)
+#else
+#define	ALLOCM_ALIGN(a)	((a < (size_t)INT_MAX) ? ffs(a)-1 : ffs(a>>32)+31)
+#endif
+#define	ALLOCM_ZERO	((int)0x40)
+#define	ALLOCM_NO_MOVE	((int)0x80)
+
+#define	ALLOCM_SUCCESS		0
+#define	ALLOCM_ERR_OOM		1
+#define	ALLOCM_ERR_NOT_MOVED	2
+
+extern const char	*JEMALLOC_P(malloc_conf);
+extern void		(*JEMALLOC_P(malloc_message))(void *, const char *);
+
+void	*JEMALLOC_P(malloc)(size_t size) JEMALLOC_ATTR(malloc);
+void	*JEMALLOC_P(calloc)(size_t num, size_t size) JEMALLOC_ATTR(malloc);
+int	JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size)
+    JEMALLOC_ATTR(nonnull(1));
+void	*JEMALLOC_P(realloc)(void *ptr, size_t size);
+void	JEMALLOC_P(free)(void *ptr);
+
+size_t	JEMALLOC_P(malloc_usable_size)(const void *ptr);
+void	JEMALLOC_P(malloc_stats_print)(void (*write_cb)(void *, const char *),
+    void *cbopaque, const char *opts);
+int	JEMALLOC_P(mallctl)(const char *name, void *oldp, size_t *oldlenp,
+    void *newp, size_t newlen);
+int	JEMALLOC_P(mallctlnametomib)(const char *name, size_t *mibp,
+    size_t *miblenp);
+int	JEMALLOC_P(mallctlbymib)(const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen);
+
+int	JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags)
+    JEMALLOC_ATTR(nonnull(1));
+int	JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size,
+    size_t extra, int flags) JEMALLOC_ATTR(nonnull(1));
+int	JEMALLOC_P(sallocm)(const void *ptr, size_t *rsize, int flags)
+    JEMALLOC_ATTR(nonnull(1));
+int	JEMALLOC_P(dallocm)(void *ptr, int flags) JEMALLOC_ATTR(nonnull(1));
+
+#ifdef __cplusplus
+};
+#endif
+#endif /* JEMALLOC_H_ */
--- a/deps/jemalloc.orig/include/jemalloc/jemalloc_defs.h.in
+++ b/deps/jemalloc.orig/include/jemalloc/jemalloc_defs.h.in
@ -0,0 +1,167 @@
+#ifndef JEMALLOC_DEFS_H_
+#define	JEMALLOC_DEFS_H_
+
+/*
+ * If JEMALLOC_PREFIX is defined, it will cause all public APIs to be prefixed.
+ * This makes it possible, with some care, to use multiple allocators
+ * simultaneously.
+ *
+ * In many cases it is more convenient to manually prefix allocator function
+ * calls than to let macros do it automatically, particularly when using
+ * multiple allocators simultaneously.  Define JEMALLOC_MANGLE before
+ * #include'ing jemalloc.h in order to cause name mangling that corresponds to
+ * the API prefixing.
+ */
+#undef JEMALLOC_PREFIX
+#undef JEMALLOC_CPREFIX
+#if (defined(JEMALLOC_PREFIX) && defined(JEMALLOC_MANGLE))
+#undef JEMALLOC_P
+#endif
+
+/*
+ * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs.
+ * For shared libraries, symbol visibility mechanisms prevent these symbols
+ * from being exported, but for static libraries, naming collisions are a real
+ * possibility.
+ */
+#undef JEMALLOC_PRIVATE_NAMESPACE
+#undef JEMALLOC_N
+
+/*
+ * Hyper-threaded CPUs may need a special instruction inside spin loops in
+ * order to yield to another virtual CPU.
+ */
+#undef CPU_SPINWAIT
+
+/*
+ * Defined if OSAtomic*() functions are available, as provided by Darwin, and
+ * documented in the atomic(3) manual page.
+ */
+#undef JEMALLOC_OSATOMIC
+
+/*
+ * Defined if OSSpin*() functions are available, as provided by Darwin, and
+ * documented in the spinlock(3) manual page.
+ */
+#undef JEMALLOC_OSSPIN
+
+/* Defined if __attribute__((...)) syntax is supported. */
+#undef JEMALLOC_HAVE_ATTR
+#ifdef JEMALLOC_HAVE_ATTR
+#  define JEMALLOC_ATTR(s) __attribute__((s))
+#else
+#  define JEMALLOC_ATTR(s)
+#endif
+
+/* JEMALLOC_CC_SILENCE enables code that silences unuseful compiler warnings. */
+#undef JEMALLOC_CC_SILENCE
+
+/*
+ * JEMALLOC_DEBUG enables assertions and other sanity checks, and disables
+ * inline functions.
+ */
+#undef JEMALLOC_DEBUG
+
+/* JEMALLOC_STATS enables statistics calculation. */
+#undef JEMALLOC_STATS
+
+/* JEMALLOC_PROF enables allocation profiling. */
+#undef JEMALLOC_PROF
+
+/* Use libunwind for profile backtracing if defined. */
+#undef JEMALLOC_PROF_LIBUNWIND
+
+/* Use libgcc for profile backtracing if defined. */
+#undef JEMALLOC_PROF_LIBGCC
+
+/* Use gcc intrinsics for profile backtracing if defined. */
+#undef JEMALLOC_PROF_GCC
+
+/*
+ * JEMALLOC_TINY enables support for tiny objects, which are smaller than one
+ * quantum.
+ */
+#undef JEMALLOC_TINY
+
+/*
+ * JEMALLOC_TCACHE enables a thread-specific caching layer for small objects.
+ * This makes it possible to allocate/deallocate objects without any locking
+ * when the cache is in the steady state.
+ */
+#undef JEMALLOC_TCACHE
+
+/*
+ * JEMALLOC_DSS enables use of sbrk(2) to allocate chunks from the data storage
+ * segment (DSS).
+ */
+#undef JEMALLOC_DSS
+
+/* JEMALLOC_SWAP enables mmap()ed swap file support. */
+#undef JEMALLOC_SWAP
+
+/* Support memory filling (junk/zero). */
+#undef JEMALLOC_FILL
+
+/* Support optional abort() on OOM. */
+#undef JEMALLOC_XMALLOC
+
+/* Support SYSV semantics. */
+#undef JEMALLOC_SYSV
+
+/* Support lazy locking (avoid locking unless a second thread is launched). */
+#undef JEMALLOC_LAZY_LOCK
+
+/* Determine page size at run time if defined. */
+#undef DYNAMIC_PAGE_SHIFT
+
+/* One page is 2^STATIC_PAGE_SHIFT bytes. */
+#undef STATIC_PAGE_SHIFT
+
+/* TLS is used to map arenas and magazine caches to threads. */
+#undef NO_TLS
+
+/*
+ * JEMALLOC_IVSALLOC enables ivsalloc(), which verifies that pointers reside
+ * within jemalloc-owned chunks before dereferencing them.
+ */
+#undef JEMALLOC_IVSALLOC
+
+/*
+ * Define overrides for non-standard allocator-related functions if they
+ * are present on the system.
+ */
+#undef JEMALLOC_OVERRIDE_MEMALIGN
+#undef JEMALLOC_OVERRIDE_VALLOC
+
+/*
+ * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings.
+ */
+#undef JEMALLOC_ZONE
+#undef JEMALLOC_ZONE_VERSION
+
+/* If defined, use mremap(...MREMAP_FIXED...) for huge realloc(). */
+#undef JEMALLOC_MREMAP_FIXED
+
+/*
+ * Methods for purging unused pages differ between operating systems.
+ *
+ *   madvise(..., MADV_DONTNEED) : On Linux, this immediately discards pages,
+ *                                 such that new pages will be demand-zeroed if
+ *                                 the address region is later touched.
+ *   madvise(..., MADV_FREE) : On FreeBSD and Darwin, this marks pages as being
+ *                             unused, such that they will be discarded rather
+ *                             than swapped out.
+ */
+#undef JEMALLOC_PURGE_MADVISE_DONTNEED
+#undef JEMALLOC_PURGE_MADVISE_FREE
+
+/* sizeof(void *) == 2^LG_SIZEOF_PTR. */
+#undef LG_SIZEOF_PTR
+
+/* sizeof(int) == 2^LG_SIZEOF_INT. */
+#undef LG_SIZEOF_INT
+
+/* sizeof(long) == 2^LG_SIZEOF_LONG. */
+#undef LG_SIZEOF_LONG
+
+#endif /* JEMALLOC_DEFS_H_ */
--- a/deps/jemalloc.orig/install-sh
+++ b/deps/jemalloc.orig/install-sh
@ -0,0 +1,250 @@
+#! /bin/sh
+#
+# install - install a program, script, or datafile
+# This comes from X11R5 (mit/util/scripts/install.sh).
+#
+# Copyright 1991 by the Massachusetts Institute of Technology
+#
+# Permission to use, copy, modify, distribute, and sell this software and its
+# documentation for any purpose is hereby granted without fee, provided that
+# the above copyright notice appear in all copies and that both that
+# copyright notice and this permission notice appear in supporting
+# documentation, and that the name of M.I.T. not be used in advertising or
+# publicity pertaining to distribution of the software without specific,
+# written prior permission.  M.I.T. makes no representations about the
+# suitability of this software for any purpose.  It is provided "as is"
+# without express or implied warranty.
+#
+# Calling this script install-sh is preferred over install.sh, to prevent
+# `make' implicit rules from creating a file called install from it
+# when there is no Makefile.
+#
+# This script is compatible with the BSD install script, but was written
+# from scratch.  It can only install one file at a time, a restriction
+# shared with many OS's install programs.
+
+
+# set DOITPROG to echo to test this script
+
+# Don't use :- since 4.3BSD and earlier shells don't like it.
+doit="${DOITPROG-}"
+
+
+# put in absolute paths if you don't have them in your path; or use env. vars.
+
+mvprog="${MVPROG-mv}"
+cpprog="${CPPROG-cp}"
+chmodprog="${CHMODPROG-chmod}"
+chownprog="${CHOWNPROG-chown}"
+chgrpprog="${CHGRPPROG-chgrp}"
+stripprog="${STRIPPROG-strip}"
+rmprog="${RMPROG-rm}"
+mkdirprog="${MKDIRPROG-mkdir}"
+
+transformbasename=""
+transform_arg=""
+instcmd="$mvprog"
+chmodcmd="$chmodprog 0755"
+chowncmd=""
+chgrpcmd=""
+stripcmd=""
+rmcmd="$rmprog -f"
+mvcmd="$mvprog"
+src=""
+dst=""
+dir_arg=""
+
+while [ x"$1" != x ]; do
+    case $1 in
+	-c) instcmd="$cpprog"
+	    shift
+	    continue;;
+
+	-d) dir_arg=true
+	    shift
+	    continue;;
+
+	-m) chmodcmd="$chmodprog $2"
+	    shift
+	    shift
+	    continue;;
+
+	-o) chowncmd="$chownprog $2"
+	    shift
+	    shift
+	    continue;;
+
+	-g) chgrpcmd="$chgrpprog $2"
+	    shift
+	    shift
+	    continue;;
+
+	-s) stripcmd="$stripprog"
+	    shift
+	    continue;;
+
+	-t=*) transformarg=`echo $1 | sed 's/-t=//'`
+	    shift
+	    continue;;
+
+	-b=*) transformbasename=`echo $1 | sed 's/-b=//'`
+	    shift
+	    continue;;
+
+	*)  if [ x"$src" = x ]
+	    then
+		src=$1
+	    else
+		# this colon is to work around a 386BSD /bin/sh bug
+		:
+		dst=$1
+	    fi
+	    shift
+	    continue;;
+    esac
+done
+
+if [ x"$src" = x ]
+then
+	echo "install:	no input file specified"
+	exit 1
+else
+	true
+fi
+
+if [ x"$dir_arg" != x ]; then
+	dst=$src
+	src=""
+	
+	if [ -d $dst ]; then
+		instcmd=:
+	else
+		instcmd=mkdir
+	fi
+else
+
+# Waiting for this to be detected by the "$instcmd $src $dsttmp" command
+# might cause directories to be created, which would be especially bad 
+# if $src (and thus $dsttmp) contains '*'.
+
+	if [ -f $src -o -d $src ]
+	then
+		true
+	else
+		echo "install:  $src does not exist"
+		exit 1
+	fi
+	
+	if [ x"$dst" = x ]
+	then
+		echo "install:	no destination specified"
+		exit 1
+	else
+		true
+	fi
+
+# If destination is a directory, append the input filename; if your system
+# does not like double slashes in filenames, you may need to add some logic
+
+	if [ -d $dst ]
+	then
+		dst="$dst"/`basename $src`
+	else
+		true
+	fi
+fi
+
+## this sed command emulates the dirname command
+dstdir=`echo $dst | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'`
+
+# Make sure that the destination directory exists.
+#  this part is taken from Noah Friedman's mkinstalldirs script
+
+# Skip lots of stat calls in the usual case.
+if [ ! -d "$dstdir" ]; then
+defaultIFS='	
+'
+IFS="${IFS-${defaultIFS}}"
+
+oIFS="${IFS}"
+# Some sh's can't handle IFS=/ for some reason.
+IFS='%'
+set - `echo ${dstdir} | sed -e 's@/@%@g' -e 's@^%@/@'`
+IFS="${oIFS}"
+
+pathcomp=''
+
+while [ $# -ne 0 ] ; do
+	pathcomp="${pathcomp}${1}"
+	shift
+
+	if [ ! -d "${pathcomp}" ] ;
+        then
+		$mkdirprog "${pathcomp}"
+	else
+		true
+	fi
+
+	pathcomp="${pathcomp}/"
+done
+fi
+
+if [ x"$dir_arg" != x ]
+then
+	$doit $instcmd $dst &&
+
+	if [ x"$chowncmd" != x ]; then $doit $chowncmd $dst; else true ; fi &&
+	if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dst; else true ; fi &&
+	if [ x"$stripcmd" != x ]; then $doit $stripcmd $dst; else true ; fi &&
+	if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dst; else true ; fi
+else
+
+# If we're going to rename the final executable, determine the name now.
+
+	if [ x"$transformarg" = x ] 
+	then
+		dstfile=`basename $dst`
+	else
+		dstfile=`basename $dst $transformbasename | 
+			sed $transformarg`$transformbasename
+	fi
+
+# don't allow the sed command to completely eliminate the filename
+
+	if [ x"$dstfile" = x ] 
+	then
+		dstfile=`basename $dst`
+	else
+		true
+	fi
+
+# Make a temp file name in the proper directory.
+
+	dsttmp=$dstdir/#inst.$$#
+
+# Move or copy the file name to the temp name
+
+	$doit $instcmd $src $dsttmp &&
+
+	trap "rm -f ${dsttmp}" 0 &&
+
+# and set any options; do chmod last to preserve setuid bits
+
+# If any of these fail, we abort the whole thing.  If we want to
+# ignore errors from any of these, just make sure not to ignore
+# errors from the above "$doit $instcmd $src $dsttmp" command.
+
+	if [ x"$chowncmd" != x ]; then $doit $chowncmd $dsttmp; else true;fi &&
+	if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dsttmp; else true;fi &&
+	if [ x"$stripcmd" != x ]; then $doit $stripcmd $dsttmp; else true;fi &&
+	if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dsttmp; else true;fi &&
+
+# Now rename the file to the real destination.
+
+	$doit $rmcmd -f $dstdir/$dstfile &&
+	$doit $mvcmd $dsttmp $dstdir/$dstfile 
+
+fi &&
+
+
+exit 0
--- a/deps/jemalloc.orig/src/arena.c
+++ b/deps/jemalloc.orig/src/arena.c
--- a/deps/jemalloc.orig/src/atomic.c
+++ b/deps/jemalloc.orig/src/atomic.c
@ -0,0 +1,2 @@
+#define	JEMALLOC_ATOMIC_C_
+#include "jemalloc/internal/jemalloc_internal.h"
--- a/deps/jemalloc.orig/src/base.c
+++ b/deps/jemalloc.orig/src/base.c
@ -0,0 +1,106 @@
+#define	JEMALLOC_BASE_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/******************************************************************************/
+/* Data. */
+
+malloc_mutex_t	base_mtx;
+
+/*
+ * Current pages that are being used for internal memory allocations.  These
+ * pages are carved up in cacheline-size quanta, so that there is no chance of
+ * false cache line sharing.
+ */
+static void		*base_pages;
+static void		*base_next_addr;
+static void		*base_past_addr; /* Addr immediately past base_pages. */
+static extent_node_t	*base_nodes;
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+static bool	base_pages_alloc(size_t minsize);
+
+/******************************************************************************/
+
+static bool
+base_pages_alloc(size_t minsize)
+{
+	size_t csize;
+	bool zero;
+
+	assert(minsize != 0);
+	csize = CHUNK_CEILING(minsize);
+	zero = false;
+	base_pages = chunk_alloc(csize, true, &zero);
+	if (base_pages == NULL)
+		return (true);
+	base_next_addr = base_pages;
+	base_past_addr = (void *)((uintptr_t)base_pages + csize);
+
+	return (false);
+}
+
+void *
+base_alloc(size_t size)
+{
+	void *ret;
+	size_t csize;
+
+	/* Round size up to nearest multiple of the cacheline size. */
+	csize = CACHELINE_CEILING(size);
+
+	malloc_mutex_lock(&base_mtx);
+	/* Make sure there's enough space for the allocation. */
+	if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
+		if (base_pages_alloc(csize)) {
+			malloc_mutex_unlock(&base_mtx);
+			return (NULL);
+		}
+	}
+	/* Allocate. */
+	ret = base_next_addr;
+	base_next_addr = (void *)((uintptr_t)base_next_addr + csize);
+	malloc_mutex_unlock(&base_mtx);
+
+	return (ret);
+}
+
+extent_node_t *
+base_node_alloc(void)
+{
+	extent_node_t *ret;
+
+	malloc_mutex_lock(&base_mtx);
+	if (base_nodes != NULL) {
+		ret = base_nodes;
+		base_nodes = *(extent_node_t **)ret;
+		malloc_mutex_unlock(&base_mtx);
+	} else {
+		malloc_mutex_unlock(&base_mtx);
+		ret = (extent_node_t *)base_alloc(sizeof(extent_node_t));
+	}
+
+	return (ret);
+}
+
+void
+base_node_dealloc(extent_node_t *node)
+{
+
+	malloc_mutex_lock(&base_mtx);
+	*(extent_node_t **)node = base_nodes;
+	base_nodes = node;
+	malloc_mutex_unlock(&base_mtx);
+}
+
+bool
+base_boot(void)
+{
+
+	base_nodes = NULL;
+	if (malloc_mutex_init(&base_mtx))
+		return (true);
+
+	return (false);
+}
--- a/deps/jemalloc.orig/src/bitmap.c
+++ b/deps/jemalloc.orig/src/bitmap.c
@ -0,0 +1,90 @@
+#define JEMALLOC_BITMAP_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+static size_t	bits2groups(size_t nbits);
+
+/******************************************************************************/
+
+static size_t
+bits2groups(size_t nbits)
+{
+
+	return ((nbits >> LG_BITMAP_GROUP_NBITS) +
+	    !!(nbits & BITMAP_GROUP_NBITS_MASK));
+}
+
+void
+bitmap_info_init(bitmap_info_t *binfo, size_t nbits)
+{
+	unsigned i;
+	size_t group_count;
+
+	assert(nbits > 0);
+	assert(nbits <= (ZU(1) << LG_BITMAP_MAXBITS));
+
+	/*
+	 * Compute the number of groups necessary to store nbits bits, and
+	 * progressively work upward through the levels until reaching a level
+	 * that requires only one group.
+	 */
+	binfo->levels[0].group_offset = 0;
+	group_count = bits2groups(nbits);
+	for (i = 1; group_count > 1; i++) {
+		assert(i < BITMAP_MAX_LEVELS);
+		binfo->levels[i].group_offset = binfo->levels[i-1].group_offset
+		    + group_count;
+		group_count = bits2groups(group_count);
+	}
+	binfo->levels[i].group_offset = binfo->levels[i-1].group_offset
+	    + group_count;
+	binfo->nlevels = i;
+	binfo->nbits = nbits;
+}
+
+size_t
+bitmap_info_ngroups(const bitmap_info_t *binfo)
+{
+
+	return (binfo->levels[binfo->nlevels].group_offset << LG_SIZEOF_BITMAP);
+}
+
+size_t
+bitmap_size(size_t nbits)
+{
+	bitmap_info_t binfo;
+
+	bitmap_info_init(&binfo, nbits);
+	return (bitmap_info_ngroups(&binfo));
+}
+
+void
+bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo)
+{
+	size_t extra;
+	unsigned i;
+
+	/*
+	 * Bits are actually inverted with regard to the external bitmap
+	 * interface, so the bitmap starts out with all 1 bits, except for
+	 * trailing unused bits (if any).  Note that each group uses bit 0 to
+	 * correspond to the first logical bit in the group, so extra bits
+	 * are the most significant bits of the last group.
+	 */
+	memset(bitmap, 0xffU, binfo->levels[binfo->nlevels].group_offset <<
+	    LG_SIZEOF_BITMAP);
+	extra = (BITMAP_GROUP_NBITS - (binfo->nbits & BITMAP_GROUP_NBITS_MASK))
+	    & BITMAP_GROUP_NBITS_MASK;
+	if (extra != 0)
+		bitmap[binfo->levels[1].group_offset - 1] >>= extra;
+	for (i = 1; i < binfo->nlevels; i++) {
+		size_t group_count = binfo->levels[i].group_offset -
+		    binfo->levels[i-1].group_offset;
+		extra = (BITMAP_GROUP_NBITS - (group_count &
+		    BITMAP_GROUP_NBITS_MASK)) & BITMAP_GROUP_NBITS_MASK;
+		if (extra != 0)
+			bitmap[binfo->levels[i+1].group_offset - 1] >>= extra;
+	}
+}
--- a/deps/jemalloc.orig/src/chunk.c
+++ b/deps/jemalloc.orig/src/chunk.c
@ -0,0 +1,173 @@
+#define	JEMALLOC_CHUNK_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/******************************************************************************/
+/* Data. */
+
+size_t	opt_lg_chunk = LG_CHUNK_DEFAULT;
+#ifdef JEMALLOC_SWAP
+bool	opt_overcommit = true;
+#endif
+
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+malloc_mutex_t	chunks_mtx;
+chunk_stats_t	stats_chunks;
+#endif
+
+#ifdef JEMALLOC_IVSALLOC
+rtree_t		*chunks_rtree;
+#endif
+
+/* Various chunk-related settings. */
+size_t		chunksize;
+size_t		chunksize_mask; /* (chunksize - 1). */
+size_t		chunk_npages;
+size_t		map_bias;
+size_t		arena_maxclass; /* Max size class for arenas. */
+
+/******************************************************************************/
+
+/*
+ * If the caller specifies (*zero == false), it is still possible to receive
+ * zeroed memory, in which case *zero is toggled to true.  arena_chunk_alloc()
+ * takes advantage of this to avoid demanding zeroed chunks, but taking
+ * advantage of them if they are returned.
+ */
+void *
+chunk_alloc(size_t size, bool base, bool *zero)
+{
+	void *ret;
+
+	assert(size != 0);
+	assert((size & chunksize_mask) == 0);
+
+#ifdef JEMALLOC_SWAP
+	if (swap_enabled) {
+		ret = chunk_alloc_swap(size, zero);
+		if (ret != NULL)
+			goto RETURN;
+	}
+
+	if (swap_enabled == false || opt_overcommit) {
+#endif
+#ifdef JEMALLOC_DSS
+		ret = chunk_alloc_dss(size, zero);
+		if (ret != NULL)
+			goto RETURN;
+#endif
+		ret = chunk_alloc_mmap(size);
+		if (ret != NULL) {
+			*zero = true;
+			goto RETURN;
+		}
+#ifdef JEMALLOC_SWAP
+	}
+#endif
+
+	/* All strategies for allocation failed. */
+	ret = NULL;
+RETURN:
+#ifdef JEMALLOC_IVSALLOC
+	if (base == false && ret != NULL) {
+		if (rtree_set(chunks_rtree, (uintptr_t)ret, ret)) {
+			chunk_dealloc(ret, size, true);
+			return (NULL);
+		}
+	}
+#endif
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+	if (ret != NULL) {
+#  ifdef JEMALLOC_PROF
+		bool gdump;
+#  endif
+		malloc_mutex_lock(&chunks_mtx);
+#  ifdef JEMALLOC_STATS
+		stats_chunks.nchunks += (size / chunksize);
+#  endif
+		stats_chunks.curchunks += (size / chunksize);
+		if (stats_chunks.curchunks > stats_chunks.highchunks) {
+			stats_chunks.highchunks = stats_chunks.curchunks;
+#  ifdef JEMALLOC_PROF
+			gdump = true;
+#  endif
+		}
+#  ifdef JEMALLOC_PROF
+		else
+			gdump = false;
+#  endif
+		malloc_mutex_unlock(&chunks_mtx);
+#  ifdef JEMALLOC_PROF
+		if (opt_prof && opt_prof_gdump && gdump)
+			prof_gdump();
+#  endif
+	}
+#endif
+
+	assert(CHUNK_ADDR2BASE(ret) == ret);
+	return (ret);
+}
+
+void
+chunk_dealloc(void *chunk, size_t size, bool unmap)
+{
+
+	assert(chunk != NULL);
+	assert(CHUNK_ADDR2BASE(chunk) == chunk);
+	assert(size != 0);
+	assert((size & chunksize_mask) == 0);
+
+#ifdef JEMALLOC_IVSALLOC
+	rtree_set(chunks_rtree, (uintptr_t)chunk, NULL);
+#endif
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+	malloc_mutex_lock(&chunks_mtx);
+	stats_chunks.curchunks -= (size / chunksize);
+	malloc_mutex_unlock(&chunks_mtx);
+#endif
+
+	if (unmap) {
+#ifdef JEMALLOC_SWAP
+		if (swap_enabled && chunk_dealloc_swap(chunk, size) == false)
+			return;
+#endif
+#ifdef JEMALLOC_DSS
+		if (chunk_dealloc_dss(chunk, size) == false)
+			return;
+#endif
+		chunk_dealloc_mmap(chunk, size);
+	}
+}
+
+bool
+chunk_boot(void)
+{
+
+	/* Set variables according to the value of opt_lg_chunk. */
+	chunksize = (ZU(1) << opt_lg_chunk);
+	assert(chunksize >= PAGE_SIZE);
+	chunksize_mask = chunksize - 1;
+	chunk_npages = (chunksize >> PAGE_SHIFT);
+
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+	if (malloc_mutex_init(&chunks_mtx))
+		return (true);
+	memset(&stats_chunks, 0, sizeof(chunk_stats_t));
+#endif
+#ifdef JEMALLOC_SWAP
+	if (chunk_swap_boot())
+		return (true);
+#endif
+	if (chunk_mmap_boot())
+		return (true);
+#ifdef JEMALLOC_DSS
+	if (chunk_dss_boot())
+		return (true);
+#endif
+#ifdef JEMALLOC_IVSALLOC
+	chunks_rtree = rtree_new((ZU(1) << (LG_SIZEOF_PTR+3)) - opt_lg_chunk);
+	if (chunks_rtree == NULL)
+		return (true);
+#endif
+
+	return (false);
+}
--- a/deps/jemalloc.orig/src/chunk_dss.c
+++ b/deps/jemalloc.orig/src/chunk_dss.c
@ -0,0 +1,284 @@
+#define	JEMALLOC_CHUNK_DSS_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+#ifdef JEMALLOC_DSS
+/******************************************************************************/
+/* Data. */
+
+malloc_mutex_t	dss_mtx;
+
+/* Base address of the DSS. */
+static void	*dss_base;
+/* Current end of the DSS, or ((void *)-1) if the DSS is exhausted. */
+static void	*dss_prev;
+/* Current upper limit on DSS addresses. */
+static void	*dss_max;
+
+/*
+ * Trees of chunks that were previously allocated (trees differ only in node
+ * ordering).  These are used when allocating chunks, in an attempt to re-use
+ * address space.  Depending on function, different tree orderings are needed,
+ * which is why there are two trees with the same contents.
+ */
+static extent_tree_t	dss_chunks_szad;
+static extent_tree_t	dss_chunks_ad;
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+static void	*chunk_recycle_dss(size_t size, bool *zero);
+static extent_node_t *chunk_dealloc_dss_record(void *chunk, size_t size);
+
+/******************************************************************************/
+
+static void *
+chunk_recycle_dss(size_t size, bool *zero)
+{
+	extent_node_t *node, key;
+
+	key.addr = NULL;
+	key.size = size;
+	malloc_mutex_lock(&dss_mtx);
+	node = extent_tree_szad_nsearch(&dss_chunks_szad, &key);
+	if (node != NULL) {
+		void *ret = node->addr;
+
+		/* Remove node from the tree. */
+		extent_tree_szad_remove(&dss_chunks_szad, node);
+		if (node->size == size) {
+			extent_tree_ad_remove(&dss_chunks_ad, node);
+			base_node_dealloc(node);
+		} else {
+			/*
+			 * Insert the remainder of node's address range as a
+			 * smaller chunk.  Its position within dss_chunks_ad
+			 * does not change.
+			 */
+			assert(node->size > size);
+			node->addr = (void *)((uintptr_t)node->addr + size);
+			node->size -= size;
+			extent_tree_szad_insert(&dss_chunks_szad, node);
+		}
+		malloc_mutex_unlock(&dss_mtx);
+
+		if (*zero)
+			memset(ret, 0, size);
+		return (ret);
+	}
+	malloc_mutex_unlock(&dss_mtx);
+
+	return (NULL);
+}
+
+void *
+chunk_alloc_dss(size_t size, bool *zero)
+{
+	void *ret;
+
+	ret = chunk_recycle_dss(size, zero);
+	if (ret != NULL)
+		return (ret);
+
+	/*
+	 * sbrk() uses a signed increment argument, so take care not to
+	 * interpret a huge allocation request as a negative increment.
+	 */
+	if ((intptr_t)size < 0)
+		return (NULL);
+
+	malloc_mutex_lock(&dss_mtx);
+	if (dss_prev != (void *)-1) {
+		intptr_t incr;
+
+		/*
+		 * The loop is necessary to recover from races with other
+		 * threads that are using the DSS for something other than
+		 * malloc.
+		 */
+		do {
+			/* Get the current end of the DSS. */
+			dss_max = sbrk(0);
+
+			/*
+			 * Calculate how much padding is necessary to
+			 * chunk-align the end of the DSS.
+			 */
+			incr = (intptr_t)size
+			    - (intptr_t)CHUNK_ADDR2OFFSET(dss_max);
+			if (incr == (intptr_t)size)
+				ret = dss_max;
+			else {
+				ret = (void *)((intptr_t)dss_max + incr);
+				incr += size;
+			}
+
+			dss_prev = sbrk(incr);
+			if (dss_prev == dss_max) {
+				/* Success. */
+				dss_max = (void *)((intptr_t)dss_prev + incr);
+				malloc_mutex_unlock(&dss_mtx);
+				*zero = true;
+				return (ret);
+			}
+		} while (dss_prev != (void *)-1);
+	}
+	malloc_mutex_unlock(&dss_mtx);
+
+	return (NULL);
+}
+
+static extent_node_t *
+chunk_dealloc_dss_record(void *chunk, size_t size)
+{
+	extent_node_t *xnode, *node, *prev, key;
+
+	xnode = NULL;
+	while (true) {
+		key.addr = (void *)((uintptr_t)chunk + size);
+		node = extent_tree_ad_nsearch(&dss_chunks_ad, &key);
+		/* Try to coalesce forward. */
+		if (node != NULL && node->addr == key.addr) {
+			/*
+			 * Coalesce chunk with the following address range.
+			 * This does not change the position within
+			 * dss_chunks_ad, so only remove/insert from/into
+			 * dss_chunks_szad.
+			 */
+			extent_tree_szad_remove(&dss_chunks_szad, node);
+			node->addr = chunk;
+			node->size += size;
+			extent_tree_szad_insert(&dss_chunks_szad, node);
+			break;
+		} else if (xnode == NULL) {
+			/*
+			 * It is possible that base_node_alloc() will cause a
+			 * new base chunk to be allocated, so take care not to
+			 * deadlock on dss_mtx, and recover if another thread
+			 * deallocates an adjacent chunk while this one is busy
+			 * allocating xnode.
+			 */
+			malloc_mutex_unlock(&dss_mtx);
+			xnode = base_node_alloc();
+			malloc_mutex_lock(&dss_mtx);
+			if (xnode == NULL)
+				return (NULL);
+		} else {
+			/* Coalescing forward failed, so insert a new node. */
+			node = xnode;
+			xnode = NULL;
+			node->addr = chunk;
+			node->size = size;
+			extent_tree_ad_insert(&dss_chunks_ad, node);
+			extent_tree_szad_insert(&dss_chunks_szad, node);
+			break;
+		}
+	}
+	/* Discard xnode if it ended up unused do to a race. */
+	if (xnode != NULL)
+		base_node_dealloc(xnode);
+
+	/* Try to coalesce backward. */
+	prev = extent_tree_ad_prev(&dss_chunks_ad, node);
+	if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) ==
+	    chunk) {
+		/*
+		 * Coalesce chunk with the previous address range.  This does
+		 * not change the position within dss_chunks_ad, so only
+		 * remove/insert node from/into dss_chunks_szad.
+		 */
+		extent_tree_szad_remove(&dss_chunks_szad, prev);
+		extent_tree_ad_remove(&dss_chunks_ad, prev);
+
+		extent_tree_szad_remove(&dss_chunks_szad, node);
+		node->addr = prev->addr;
+		node->size += prev->size;
+		extent_tree_szad_insert(&dss_chunks_szad, node);
+
+		base_node_dealloc(prev);
+	}
+
+	return (node);
+}
+
+bool
+chunk_in_dss(void *chunk)
+{
+	bool ret;
+
+	malloc_mutex_lock(&dss_mtx);
+	if ((uintptr_t)chunk >= (uintptr_t)dss_base
+	    && (uintptr_t)chunk < (uintptr_t)dss_max)
+		ret = true;
+	else
+		ret = false;
+	malloc_mutex_unlock(&dss_mtx);
+
+	return (ret);
+}
+
+bool
+chunk_dealloc_dss(void *chunk, size_t size)
+{
+	bool ret;
+
+	malloc_mutex_lock(&dss_mtx);
+	if ((uintptr_t)chunk >= (uintptr_t)dss_base
+	    && (uintptr_t)chunk < (uintptr_t)dss_max) {
+		extent_node_t *node;
+
+		/* Try to coalesce with other unused chunks. */
+		node = chunk_dealloc_dss_record(chunk, size);
+		if (node != NULL) {
+			chunk = node->addr;
+			size = node->size;
+		}
+
+		/* Get the current end of the DSS. */
+		dss_max = sbrk(0);
+
+		/*
+		 * Try to shrink the DSS if this chunk is at the end of the
+		 * DSS.  The sbrk() call here is subject to a race condition
+		 * with threads that use brk(2) or sbrk(2) directly, but the
+		 * alternative would be to leak memory for the sake of poorly
+		 * designed multi-threaded programs.
+		 */
+		if ((void *)((uintptr_t)chunk + size) == dss_max
+		    && (dss_prev = sbrk(-(intptr_t)size)) == dss_max) {
+			/* Success. */
+			dss_max = (void *)((intptr_t)dss_prev - (intptr_t)size);
+
+			if (node != NULL) {
+				extent_tree_szad_remove(&dss_chunks_szad, node);
+				extent_tree_ad_remove(&dss_chunks_ad, node);
+				base_node_dealloc(node);
+			}
+		} else
+			madvise(chunk, size, MADV_DONTNEED);
+
+		ret = false;
+		goto RETURN;
+	}
+
+	ret = true;
+RETURN:
+	malloc_mutex_unlock(&dss_mtx);
+	return (ret);
+}
+
+bool
+chunk_dss_boot(void)
+{
+
+	if (malloc_mutex_init(&dss_mtx))
+		return (true);
+	dss_base = sbrk(0);
+	dss_prev = dss_base;
+	dss_max = dss_base;
+	extent_tree_szad_new(&dss_chunks_szad);
+	extent_tree_ad_new(&dss_chunks_ad);
+
+	return (false);
+}
+
+/******************************************************************************/
+#endif /* JEMALLOC_DSS */
--- a/deps/jemalloc.orig/src/chunk_mmap.c
+++ b/deps/jemalloc.orig/src/chunk_mmap.c
@ -0,0 +1,239 @@
+#define	JEMALLOC_CHUNK_MMAP_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/******************************************************************************/
+/* Data. */
+
+/*
+ * Used by chunk_alloc_mmap() to decide whether to attempt the fast path and
+ * potentially avoid some system calls.
+ */
+#ifndef NO_TLS
+static __thread bool	mmap_unaligned_tls
+    JEMALLOC_ATTR(tls_model("initial-exec"));
+#define	MMAP_UNALIGNED_GET()	mmap_unaligned_tls
+#define	MMAP_UNALIGNED_SET(v)	do {					\
+	mmap_unaligned_tls = (v);					\
+} while (0)
+#else
+static pthread_key_t	mmap_unaligned_tsd;
+#define	MMAP_UNALIGNED_GET()	((bool)pthread_getspecific(mmap_unaligned_tsd))
+#define	MMAP_UNALIGNED_SET(v)	do {					\
+	pthread_setspecific(mmap_unaligned_tsd, (void *)(v));		\
+} while (0)
+#endif
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+static void	*pages_map(void *addr, size_t size, bool noreserve);
+static void	pages_unmap(void *addr, size_t size);
+static void	*chunk_alloc_mmap_slow(size_t size, bool unaligned,
+    bool noreserve);
+static void	*chunk_alloc_mmap_internal(size_t size, bool noreserve);
+
+/******************************************************************************/
+
+static void *
+pages_map(void *addr, size_t size, bool noreserve)
+{
+	void *ret;
+
+	/*
+	 * We don't use MAP_FIXED here, because it can cause the *replacement*
+	 * of existing mappings, and we only want to create new mappings.
+	 */
+	int flags = MAP_PRIVATE | MAP_ANON;
+#ifdef MAP_NORESERVE
+	if (noreserve)
+		flags |= MAP_NORESERVE;
+#endif
+	ret = mmap(addr, size, PROT_READ | PROT_WRITE, flags, -1, 0);
+	assert(ret != NULL);
+
+	if (ret == MAP_FAILED)
+		ret = NULL;
+	else if (addr != NULL && ret != addr) {
+		/*
+		 * We succeeded in mapping memory, but not in the right place.
+		 */
+		if (munmap(ret, size) == -1) {
+			char buf[BUFERROR_BUF];
+
+			buferror(errno, buf, sizeof(buf));
+			malloc_write("<jemalloc>: Error in munmap(): ");
+			malloc_write(buf);
+			malloc_write("\n");
+			if (opt_abort)
+				abort();
+		}
+		ret = NULL;
+	}
+
+	assert(ret == NULL || (addr == NULL && ret != addr)
+	    || (addr != NULL && ret == addr));
+	return (ret);
+}
+
+static void
+pages_unmap(void *addr, size_t size)
+{
+
+	if (munmap(addr, size) == -1) {
+		char buf[BUFERROR_BUF];
+
+		buferror(errno, buf, sizeof(buf));
+		malloc_write("<jemalloc>: Error in munmap(): ");
+		malloc_write(buf);
+		malloc_write("\n");
+		if (opt_abort)
+			abort();
+	}
+}
+
+static void *
+chunk_alloc_mmap_slow(size_t size, bool unaligned, bool noreserve)
+{
+	void *ret;
+	size_t offset;
+
+	/* Beware size_t wrap-around. */
+	if (size + chunksize <= size)
+		return (NULL);
+
+	ret = pages_map(NULL, size + chunksize, noreserve);
+	if (ret == NULL)
+		return (NULL);
+
+	/* Clean up unneeded leading/trailing space. */
+	offset = CHUNK_ADDR2OFFSET(ret);
+	if (offset != 0) {
+		/* Note that mmap() returned an unaligned mapping. */
+		unaligned = true;
+
+		/* Leading space. */
+		pages_unmap(ret, chunksize - offset);
+
+		ret = (void *)((uintptr_t)ret +
+		    (chunksize - offset));
+
+		/* Trailing space. */
+		pages_unmap((void *)((uintptr_t)ret + size),
+		    offset);
+	} else {
+		/* Trailing space only. */
+		pages_unmap((void *)((uintptr_t)ret + size),
+		    chunksize);
+	}
+
+	/*
+	 * If mmap() returned an aligned mapping, reset mmap_unaligned so that
+	 * the next chunk_alloc_mmap() execution tries the fast allocation
+	 * method.
+	 */
+	if (unaligned == false)
+		MMAP_UNALIGNED_SET(false);
+
+	return (ret);
+}
+
+static void *
+chunk_alloc_mmap_internal(size_t size, bool noreserve)
+{
+	void *ret;
+
+	/*
+	 * Ideally, there would be a way to specify alignment to mmap() (like
+	 * NetBSD has), but in the absence of such a feature, we have to work
+	 * hard to efficiently create aligned mappings.  The reliable, but
+	 * slow method is to create a mapping that is over-sized, then trim the
+	 * excess.  However, that always results in at least one call to
+	 * pages_unmap().
+	 *
+	 * A more optimistic approach is to try mapping precisely the right
+	 * amount, then try to append another mapping if alignment is off.  In
+	 * practice, this works out well as long as the application is not
+	 * interleaving mappings via direct mmap() calls.  If we do run into a
+	 * situation where there is an interleaved mapping and we are unable to
+	 * extend an unaligned mapping, our best option is to switch to the
+	 * slow method until mmap() returns another aligned mapping.  This will
+	 * tend to leave a gap in the memory map that is too small to cause
+	 * later problems for the optimistic method.
+	 *
+	 * Another possible confounding factor is address space layout
+	 * randomization (ASLR), which causes mmap(2) to disregard the
+	 * requested address.  mmap_unaligned tracks whether the previous
+	 * chunk_alloc_mmap() execution received any unaligned or relocated
+	 * mappings, and if so, the current execution will immediately fall
+	 * back to the slow method.  However, we keep track of whether the fast
+	 * method would have succeeded, and if so, we make a note to try the
+	 * fast method next time.
+	 */
+
+	if (MMAP_UNALIGNED_GET() == false) {
+		size_t offset;
+
+		ret = pages_map(NULL, size, noreserve);
+		if (ret == NULL)
+			return (NULL);
+
+		offset = CHUNK_ADDR2OFFSET(ret);
+		if (offset != 0) {
+			MMAP_UNALIGNED_SET(true);
+			/* Try to extend chunk boundary. */
+			if (pages_map((void *)((uintptr_t)ret + size),
+			    chunksize - offset, noreserve) == NULL) {
+				/*
+				 * Extension failed.  Clean up, then revert to
+				 * the reliable-but-expensive method.
+				 */
+				pages_unmap(ret, size);
+				ret = chunk_alloc_mmap_slow(size, true,
+				    noreserve);
+			} else {
+				/* Clean up unneeded leading space. */
+				pages_unmap(ret, chunksize - offset);
+				ret = (void *)((uintptr_t)ret + (chunksize -
+				    offset));
+			}
+		}
+	} else
+		ret = chunk_alloc_mmap_slow(size, false, noreserve);
+
+	return (ret);
+}
+
+void *
+chunk_alloc_mmap(size_t size)
+{
+
+	return (chunk_alloc_mmap_internal(size, false));
+}
+
+void *
+chunk_alloc_mmap_noreserve(size_t size)
+{
+
+	return (chunk_alloc_mmap_internal(size, true));
+}
+
+void
+chunk_dealloc_mmap(void *chunk, size_t size)
+{
+
+	pages_unmap(chunk, size);
+}
+
+bool
+chunk_mmap_boot(void)
+{
+
+#ifdef NO_TLS
+	if (pthread_key_create(&mmap_unaligned_tsd, NULL) != 0) {
+		malloc_write("<jemalloc>: Error in pthread_key_create()\n");
+		return (true);
+	}
+#endif
+
+	return (false);
+}
--- a/deps/jemalloc.orig/src/chunk_swap.c
+++ b/deps/jemalloc.orig/src/chunk_swap.c
--- a/deps/jemalloc.orig/src/ckh.c
+++ b/deps/jemalloc.orig/src/ckh.c
@ -0,0 +1,619 @@
+/*
+ *******************************************************************************
+ * Implementation of (2^1+,2) cuckoo hashing, where 2^1+ indicates that each
+ * hash bucket contains 2^n cells, for n >= 1, and 2 indicates that two hash
+ * functions are employed.  The original cuckoo hashing algorithm was described
+ * in:
+ *
+ *   Pagh, R., F.F. Rodler (2004) Cuckoo Hashing.  Journal of Algorithms
+ *     51(2):122-144.
+ *
+ * Generalization of cuckoo hashing was discussed in:
+ *
+ *   Erlingsson, U., M. Manasse, F. McSherry (2006) A cool and practical
+ *     alternative to traditional hash tables.  In Proceedings of the 7th
+ *     Workshop on Distributed Data and Structures (WDAS'06), Santa Clara, CA,
+ *     January 2006.
+ *
+ * This implementation uses precisely two hash functions because that is the
+ * fewest that can work, and supporting multiple hashes is an implementation
+ * burden.  Here is a reproduction of Figure 1 from Erlingsson et al. (2006)
+ * that shows approximate expected maximum load factors for various
+ * configurations:
+ *
+ *           |         #cells/bucket         |
+ *   #hashes |   1   |   2   |   4   |   8   |
+ *   --------+-------+-------+-------+-------+
+ *         1 | 0.006 | 0.006 | 0.03  | 0.12  |
+ *         2 | 0.49  | 0.86  |>0.93< |>0.96< |
+ *         3 | 0.91  | 0.97  | 0.98  | 0.999 |
+ *         4 | 0.97  | 0.99  | 0.999 |       |
+ *
+ * The number of cells per bucket is chosen such that a bucket fits in one cache
+ * line.  So, on 32- and 64-bit systems, we use (8,2) and (4,2) cuckoo hashing,
+ * respectively.
+ *
+ ******************************************************************************/
+#define	JEMALLOC_CKH_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+static bool	ckh_grow(ckh_t *ckh);
+static void	ckh_shrink(ckh_t *ckh);
+
+/******************************************************************************/
+
+/*
+ * Search bucket for key and return the cell number if found; SIZE_T_MAX
+ * otherwise.
+ */
+JEMALLOC_INLINE size_t
+ckh_bucket_search(ckh_t *ckh, size_t bucket, const void *key)
+{
+	ckhc_t *cell;
+	unsigned i;
+
+	for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) {
+		cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i];
+		if (cell->key != NULL && ckh->keycomp(key, cell->key))
+			return ((bucket << LG_CKH_BUCKET_CELLS) + i);
+	}
+
+	return (SIZE_T_MAX);
+}
+
+/*
+ * Search table for key and return cell number if found; SIZE_T_MAX otherwise.
+ */
+JEMALLOC_INLINE size_t
+ckh_isearch(ckh_t *ckh, const void *key)
+{
+	size_t hash1, hash2, bucket, cell;
+
+	assert(ckh != NULL);
+	dassert(ckh->magic == CKH_MAGIC);
+
+	ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2);
+
+	/* Search primary bucket. */
+	bucket = hash1 & ((ZU(1) << ckh->lg_curbuckets) - 1);
+	cell = ckh_bucket_search(ckh, bucket, key);
+	if (cell != SIZE_T_MAX)
+		return (cell);
+
+	/* Search secondary bucket. */
+	bucket = hash2 & ((ZU(1) << ckh->lg_curbuckets) - 1);
+	cell = ckh_bucket_search(ckh, bucket, key);
+	return (cell);
+}
+
+JEMALLOC_INLINE bool
+ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key,
+    const void *data)
+{
+	ckhc_t *cell;
+	unsigned offset, i;
+
+	/*
+	 * Cycle through the cells in the bucket, starting at a random position.
+	 * The randomness avoids worst-case search overhead as buckets fill up.
+	 */
+	prn32(offset, LG_CKH_BUCKET_CELLS, ckh->prn_state, CKH_A, CKH_C);
+	for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) {
+		cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) +
+		    ((i + offset) & ((ZU(1) << LG_CKH_BUCKET_CELLS) - 1))];
+		if (cell->key == NULL) {
+			cell->key = key;
+			cell->data = data;
+			ckh->count++;
+			return (false);
+		}
+	}
+
+	return (true);
+}
+
+/*
+ * No space is available in bucket.  Randomly evict an item, then try to find an
+ * alternate location for that item.  Iteratively repeat this
+ * eviction/relocation procedure until either success or detection of an
+ * eviction/relocation bucket cycle.
+ */
+JEMALLOC_INLINE bool
+ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
+    void const **argdata)
+{
+	const void *key, *data, *tkey, *tdata;
+	ckhc_t *cell;
+	size_t hash1, hash2, bucket, tbucket;
+	unsigned i;
+
+	bucket = argbucket;
+	key = *argkey;
+	data = *argdata;
+	while (true) {
+		/*
+		 * Choose a random item within the bucket to evict.  This is
+		 * critical to correct function, because without (eventually)
+		 * evicting all items within a bucket during iteration, it
+		 * would be possible to get stuck in an infinite loop if there
+		 * were an item for which both hashes indicated the same
+		 * bucket.
+		 */
+		prn32(i, LG_CKH_BUCKET_CELLS, ckh->prn_state, CKH_A, CKH_C);
+		cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i];
+		assert(cell->key != NULL);
+
+		/* Swap cell->{key,data} and {key,data} (evict). */
+		tkey = cell->key; tdata = cell->data;
+		cell->key = key; cell->data = data;
+		key = tkey; data = tdata;
+
+#ifdef CKH_COUNT
+		ckh->nrelocs++;
+#endif
+
+		/* Find the alternate bucket for the evicted item. */
+		ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2);
+		tbucket = hash2 & ((ZU(1) << ckh->lg_curbuckets) - 1);
+		if (tbucket == bucket) {
+			tbucket = hash1 & ((ZU(1) << ckh->lg_curbuckets) - 1);
+			/*
+			 * It may be that (tbucket == bucket) still, if the
+			 * item's hashes both indicate this bucket.  However,
+			 * we are guaranteed to eventually escape this bucket
+			 * during iteration, assuming pseudo-random item
+			 * selection (true randomness would make infinite
+			 * looping a remote possibility).  The reason we can
+			 * never get trapped forever is that there are two
+			 * cases:
+			 *
+			 * 1) This bucket == argbucket, so we will quickly
+			 *    detect an eviction cycle and terminate.
+			 * 2) An item was evicted to this bucket from another,
+			 *    which means that at least one item in this bucket
+			 *    has hashes that indicate distinct buckets.
+			 */
+		}
+		/* Check for a cycle. */
+		if (tbucket == argbucket) {
+			*argkey = key;
+			*argdata = data;
+			return (true);
+		}
+
+		bucket = tbucket;
+		if (ckh_try_bucket_insert(ckh, bucket, key, data) == false)
+			return (false);
+	}
+}
+
+JEMALLOC_INLINE bool
+ckh_try_insert(ckh_t *ckh, void const**argkey, void const**argdata)
+{
+	size_t hash1, hash2, bucket;
+	const void *key = *argkey;
+	const void *data = *argdata;
+
+	ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2);
+
+	/* Try to insert in primary bucket. */
+	bucket = hash1 & ((ZU(1) << ckh->lg_curbuckets) - 1);
+	if (ckh_try_bucket_insert(ckh, bucket, key, data) == false)
+		return (false);
+
+	/* Try to insert in secondary bucket. */
+	bucket = hash2 & ((ZU(1) << ckh->lg_curbuckets) - 1);
+	if (ckh_try_bucket_insert(ckh, bucket, key, data) == false)
+		return (false);
+
+	/*
+	 * Try to find a place for this item via iterative eviction/relocation.
+	 */
+	return (ckh_evict_reloc_insert(ckh, bucket, argkey, argdata));
+}
+
+/*
+ * Try to rebuild the hash table from scratch by inserting all items from the
+ * old table into the new.
+ */
+JEMALLOC_INLINE bool
+ckh_rebuild(ckh_t *ckh, ckhc_t *aTab)
+{
+	size_t count, i, nins;
+	const void *key, *data;
+
+	count = ckh->count;
+	ckh->count = 0;
+	for (i = nins = 0; nins < count; i++) {
+		if (aTab[i].key != NULL) {
+			key = aTab[i].key;
+			data = aTab[i].data;
+			if (ckh_try_insert(ckh, &key, &data)) {
+				ckh->count = count;
+				return (true);
+			}
+			nins++;
+		}
+	}
+
+	return (false);
+}
+
+static bool
+ckh_grow(ckh_t *ckh)
+{
+	bool ret;
+	ckhc_t *tab, *ttab;
+	size_t lg_curcells;
+	unsigned lg_prevbuckets;
+
+#ifdef CKH_COUNT
+	ckh->ngrows++;
+#endif
+
+	/*
+	 * It is possible (though unlikely, given well behaved hashes) that the
+	 * table will have to be doubled more than once in order to create a
+	 * usable table.
+	 */
+	lg_prevbuckets = ckh->lg_curbuckets;
+	lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS;
+	while (true) {
+		size_t usize;
+
+		lg_curcells++;
+		usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE, NULL);
+		if (usize == 0) {
+			ret = true;
+			goto RETURN;
+		}
+		tab = (ckhc_t *)ipalloc(usize, CACHELINE, true);
+		if (tab == NULL) {
+			ret = true;
+			goto RETURN;
+		}
+		/* Swap in new table. */
+		ttab = ckh->tab;
+		ckh->tab = tab;
+		tab = ttab;
+		ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
+
+		if (ckh_rebuild(ckh, tab) == false) {
+			idalloc(tab);
+			break;
+		}
+
+		/* Rebuilding failed, so back out partially rebuilt table. */
+		idalloc(ckh->tab);
+		ckh->tab = tab;
+		ckh->lg_curbuckets = lg_prevbuckets;
+	}
+
+	ret = false;
+RETURN:
+	return (ret);
+}
+
+static void
+ckh_shrink(ckh_t *ckh)
+{
+	ckhc_t *tab, *ttab;
+	size_t lg_curcells, usize;
+	unsigned lg_prevbuckets;
+
+	/*
+	 * It is possible (though unlikely, given well behaved hashes) that the
+	 * table rebuild will fail.
+	 */
+	lg_prevbuckets = ckh->lg_curbuckets;
+	lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS - 1;
+	usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE, NULL);
+	if (usize == 0)
+		return;
+	tab = (ckhc_t *)ipalloc(usize, CACHELINE, true);
+	if (tab == NULL) {
+		/*
+		 * An OOM error isn't worth propagating, since it doesn't
+		 * prevent this or future operations from proceeding.
+		 */
+		return;
+	}
+	/* Swap in new table. */
+	ttab = ckh->tab;
+	ckh->tab = tab;
+	tab = ttab;
+	ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
+
+	if (ckh_rebuild(ckh, tab) == false) {
+		idalloc(tab);
+#ifdef CKH_COUNT
+		ckh->nshrinks++;
+#endif
+		return;
+	}
+
+	/* Rebuilding failed, so back out partially rebuilt table. */
+	idalloc(ckh->tab);
+	ckh->tab = tab;
+	ckh->lg_curbuckets = lg_prevbuckets;
+#ifdef CKH_COUNT
+	ckh->nshrinkfails++;
+#endif
+}
+
+bool
+ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp)
+{
+	bool ret;
+	size_t mincells, usize;
+	unsigned lg_mincells;
+
+	assert(minitems > 0);
+	assert(hash != NULL);
+	assert(keycomp != NULL);
+
+#ifdef CKH_COUNT
+	ckh->ngrows = 0;
+	ckh->nshrinks = 0;
+	ckh->nshrinkfails = 0;
+	ckh->ninserts = 0;
+	ckh->nrelocs = 0;
+#endif
+	ckh->prn_state = 42; /* Value doesn't really matter. */
+	ckh->count = 0;
+
+	/*
+	 * Find the minimum power of 2 that is large enough to fit aBaseCount
+	 * entries.  We are using (2+,2) cuckoo hashing, which has an expected
+	 * maximum load factor of at least ~0.86, so 0.75 is a conservative load
+	 * factor that will typically allow 2^aLgMinItems to fit without ever
+	 * growing the table.
+	 */
+	assert(LG_CKH_BUCKET_CELLS > 0);
+	mincells = ((minitems + (3 - (minitems % 3))) / 3) << 2;
+	for (lg_mincells = LG_CKH_BUCKET_CELLS;
+	    (ZU(1) << lg_mincells) < mincells;
+	    lg_mincells++)
+		; /* Do nothing. */
+	ckh->lg_minbuckets = lg_mincells - LG_CKH_BUCKET_CELLS;
+	ckh->lg_curbuckets = lg_mincells - LG_CKH_BUCKET_CELLS;
+	ckh->hash = hash;
+	ckh->keycomp = keycomp;
+
+	usize = sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE, NULL);
+	if (usize == 0) {
+		ret = true;
+		goto RETURN;
+	}
+	ckh->tab = (ckhc_t *)ipalloc(usize, CACHELINE, true);
+	if (ckh->tab == NULL) {
+		ret = true;
+		goto RETURN;
+	}
+
+#ifdef JEMALLOC_DEBUG
+	ckh->magic = CKH_MAGIC;
+#endif
+
+	ret = false;
+RETURN:
+	return (ret);
+}
+
+void
+ckh_delete(ckh_t *ckh)
+{
+
+	assert(ckh != NULL);
+	dassert(ckh->magic == CKH_MAGIC);
+
+#ifdef CKH_VERBOSE
+	malloc_printf(
+	    "%s(%p): ngrows: %"PRIu64", nshrinks: %"PRIu64","
+	    " nshrinkfails: %"PRIu64", ninserts: %"PRIu64","
+	    " nrelocs: %"PRIu64"\n", __func__, ckh,
+	    (unsigned long long)ckh->ngrows,
+	    (unsigned long long)ckh->nshrinks,
+	    (unsigned long long)ckh->nshrinkfails,
+	    (unsigned long long)ckh->ninserts,
+	    (unsigned long long)ckh->nrelocs);
+#endif
+
+	idalloc(ckh->tab);
+#ifdef JEMALLOC_DEBUG
+	memset(ckh, 0x5a, sizeof(ckh_t));
+#endif
+}
+
+size_t
+ckh_count(ckh_t *ckh)
+{
+
+	assert(ckh != NULL);
+	dassert(ckh->magic == CKH_MAGIC);
+
+	return (ckh->count);
+}
+
+bool
+ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data)
+{
+	size_t i, ncells;
+
+	for (i = *tabind, ncells = (ZU(1) << (ckh->lg_curbuckets +
+	    LG_CKH_BUCKET_CELLS)); i < ncells; i++) {
+		if (ckh->tab[i].key != NULL) {
+			if (key != NULL)
+				*key = (void *)ckh->tab[i].key;
+			if (data != NULL)
+				*data = (void *)ckh->tab[i].data;
+			*tabind = i + 1;
+			return (false);
+		}
+	}
+
+	return (true);
+}
+
+bool
+ckh_insert(ckh_t *ckh, const void *key, const void *data)
+{
+	bool ret;
+
+	assert(ckh != NULL);
+	dassert(ckh->magic == CKH_MAGIC);
+	assert(ckh_search(ckh, key, NULL, NULL));
+
+#ifdef CKH_COUNT
+	ckh->ninserts++;
+#endif
+
+	while (ckh_try_insert(ckh, &key, &data)) {
+		if (ckh_grow(ckh)) {
+			ret = true;
+			goto RETURN;
+		}
+	}
+
+	ret = false;
+RETURN:
+	return (ret);
+}
+
+bool
+ckh_remove(ckh_t *ckh, const void *searchkey, void **key, void **data)
+{
+	size_t cell;
+
+	assert(ckh != NULL);
+	dassert(ckh->magic == CKH_MAGIC);
+
+	cell = ckh_isearch(ckh, searchkey);
+	if (cell != SIZE_T_MAX) {
+		if (key != NULL)
+			*key = (void *)ckh->tab[cell].key;
+		if (data != NULL)
+			*data = (void *)ckh->tab[cell].data;
+		ckh->tab[cell].key = NULL;
+		ckh->tab[cell].data = NULL; /* Not necessary. */
+
+		ckh->count--;
+		/* Try to halve the table if it is less than 1/4 full. */
+		if (ckh->count < (ZU(1) << (ckh->lg_curbuckets
+		    + LG_CKH_BUCKET_CELLS - 2)) && ckh->lg_curbuckets
+		    > ckh->lg_minbuckets) {
+			/* Ignore error due to OOM. */
+			ckh_shrink(ckh);
+		}
+
+		return (false);
+	}
+
+	return (true);
+}
+
+bool
+ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data)
+{
+	size_t cell;
+
+	assert(ckh != NULL);
+	dassert(ckh->magic == CKH_MAGIC);
+
+	cell = ckh_isearch(ckh, searchkey);
+	if (cell != SIZE_T_MAX) {
+		if (key != NULL)
+			*key = (void *)ckh->tab[cell].key;
+		if (data != NULL)
+			*data = (void *)ckh->tab[cell].data;
+		return (false);
+	}
+
+	return (true);
+}
+
+void
+ckh_string_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2)
+{
+	size_t ret1, ret2;
+	uint64_t h;
+
+	assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64));
+	assert(hash1 != NULL);
+	assert(hash2 != NULL);
+
+	h = hash(key, strlen((const char *)key), 0x94122f335b332aeaLLU);
+	if (minbits <= 32) {
+		/*
+		 * Avoid doing multiple hashes, since a single hash provides
+		 * enough bits.
+		 */
+		ret1 = h & ZU(0xffffffffU);
+		ret2 = h >> 32;
+	} else {
+		ret1 = h;
+		ret2 = hash(key, strlen((const char *)key),
+		    0x8432a476666bbc13LLU);
+	}
+
+	*hash1 = ret1;
+	*hash2 = ret2;
+}
+
+bool
+ckh_string_keycomp(const void *k1, const void *k2)
+{
+
+    assert(k1 != NULL);
+    assert(k2 != NULL);
+
+    return (strcmp((char *)k1, (char *)k2) ? false : true);
+}
+
+void
+ckh_pointer_hash(const void *key, unsigned minbits, size_t *hash1,
+    size_t *hash2)
+{
+	size_t ret1, ret2;
+	uint64_t h;
+	union {
+		const void	*v;
+		uint64_t	i;
+	} u;
+
+	assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64));
+	assert(hash1 != NULL);
+	assert(hash2 != NULL);
+
+	assert(sizeof(u.v) == sizeof(u.i));
+#if (LG_SIZEOF_PTR != LG_SIZEOF_INT)
+	u.i = 0;
+#endif
+	u.v = key;
+	h = hash(&u.i, sizeof(u.i), 0xd983396e68886082LLU);
+	if (minbits <= 32) {
+		/*
+		 * Avoid doing multiple hashes, since a single hash provides
+		 * enough bits.
+		 */
+		ret1 = h & ZU(0xffffffffU);
+		ret2 = h >> 32;
+	} else {
+		assert(SIZEOF_PTR == 8);
+		ret1 = h;
+		ret2 = hash(&u.i, sizeof(u.i), 0x5e2be9aff8709a5dLLU);
+	}
+
+	*hash1 = ret1;
+	*hash2 = ret2;
+}
+
+bool
+ckh_pointer_keycomp(const void *k1, const void *k2)
+{
+
+	return ((k1 == k2) ? true : false);
+}
--- a/deps/jemalloc.orig/src/ctl.c
+++ b/deps/jemalloc.orig/src/ctl.c
--- a/deps/jemalloc.orig/src/extent.c
+++ b/deps/jemalloc.orig/src/extent.c
@ -0,0 +1,41 @@
+#define	JEMALLOC_EXTENT_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/******************************************************************************/
+
+#if (defined(JEMALLOC_SWAP) || defined(JEMALLOC_DSS))
+static inline int
+extent_szad_comp(extent_node_t *a, extent_node_t *b)
+{
+	int ret;
+	size_t a_size = a->size;
+	size_t b_size = b->size;
+
+	ret = (a_size > b_size) - (a_size < b_size);
+	if (ret == 0) {
+		uintptr_t a_addr = (uintptr_t)a->addr;
+		uintptr_t b_addr = (uintptr_t)b->addr;
+
+		ret = (a_addr > b_addr) - (a_addr < b_addr);
+	}
+
+	return (ret);
+}
+
+/* Generate red-black tree functions. */
+rb_gen(, extent_tree_szad_, extent_tree_t, extent_node_t, link_szad,
+    extent_szad_comp)
+#endif
+
+static inline int
+extent_ad_comp(extent_node_t *a, extent_node_t *b)
+{
+	uintptr_t a_addr = (uintptr_t)a->addr;
+	uintptr_t b_addr = (uintptr_t)b->addr;
+
+	return ((a_addr > b_addr) - (a_addr < b_addr));
+}
+
+/* Generate red-black tree functions. */
+rb_gen(, extent_tree_ad_, extent_tree_t, extent_node_t, link_ad,
+    extent_ad_comp)
--- a/deps/jemalloc.orig/src/hash.c
+++ b/deps/jemalloc.orig/src/hash.c
@ -0,0 +1,2 @@
+#define	JEMALLOC_HASH_C_
+#include "jemalloc/internal/jemalloc_internal.h"
--- a/deps/jemalloc.orig/src/huge.c
+++ b/deps/jemalloc.orig/src/huge.c
@ -0,0 +1,386 @@
+#define	JEMALLOC_HUGE_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/******************************************************************************/
+/* Data. */
+
+#ifdef JEMALLOC_STATS
+uint64_t	huge_nmalloc;
+uint64_t	huge_ndalloc;
+size_t		huge_allocated;
+#endif
+
+malloc_mutex_t	huge_mtx;
+
+/******************************************************************************/
+
+/* Tree of chunks that are stand-alone huge allocations. */
+static extent_tree_t	huge;
+
+void *
+huge_malloc(size_t size, bool zero)
+{
+	void *ret;
+	size_t csize;
+	extent_node_t *node;
+
+	/* Allocate one or more contiguous chunks for this request. */
+
+	csize = CHUNK_CEILING(size);
+	if (csize == 0) {
+		/* size is large enough to cause size_t wrap-around. */
+		return (NULL);
+	}
+
+	/* Allocate an extent node with which to track the chunk. */
+	node = base_node_alloc();
+	if (node == NULL)
+		return (NULL);
+
+	ret = chunk_alloc(csize, false, &zero);
+	if (ret == NULL) {
+		base_node_dealloc(node);
+		return (NULL);
+	}
+
+	/* Insert node into huge. */
+	node->addr = ret;
+	node->size = csize;
+
+	malloc_mutex_lock(&huge_mtx);
+	extent_tree_ad_insert(&huge, node);
+#ifdef JEMALLOC_STATS
+	stats_cactive_add(csize);
+	huge_nmalloc++;
+	huge_allocated += csize;
+#endif
+	malloc_mutex_unlock(&huge_mtx);
+
+#ifdef JEMALLOC_FILL
+	if (zero == false) {
+		if (opt_junk)
+			memset(ret, 0xa5, csize);
+		else if (opt_zero)
+			memset(ret, 0, csize);
+	}
+#endif
+
+	return (ret);
+}
+
+/* Only handles large allocations that require more than chunk alignment. */
+void *
+huge_palloc(size_t size, size_t alignment, bool zero)
+{
+	void *ret;
+	size_t alloc_size, chunk_size, offset;
+	extent_node_t *node;
+
+	/*
+	 * This allocation requires alignment that is even larger than chunk
+	 * alignment.  This means that huge_malloc() isn't good enough.
+	 *
+	 * Allocate almost twice as many chunks as are demanded by the size or
+	 * alignment, in order to assure the alignment can be achieved, then
+	 * unmap leading and trailing chunks.
+	 */
+	assert(alignment > chunksize);
+
+	chunk_size = CHUNK_CEILING(size);
+
+	if (size >= alignment)
+		alloc_size = chunk_size + alignment - chunksize;
+	else
+		alloc_size = (alignment << 1) - chunksize;
+
+	/* Allocate an extent node with which to track the chunk. */
+	node = base_node_alloc();
+	if (node == NULL)
+		return (NULL);
+
+	ret = chunk_alloc(alloc_size, false, &zero);
+	if (ret == NULL) {
+		base_node_dealloc(node);
+		return (NULL);
+	}
+
+	offset = (uintptr_t)ret & (alignment - 1);
+	assert((offset & chunksize_mask) == 0);
+	assert(offset < alloc_size);
+	if (offset == 0) {
+		/* Trim trailing space. */
+		chunk_dealloc((void *)((uintptr_t)ret + chunk_size), alloc_size
+		    - chunk_size, true);
+	} else {
+		size_t trailsize;
+
+		/* Trim leading space. */
+		chunk_dealloc(ret, alignment - offset, true);
+
+		ret = (void *)((uintptr_t)ret + (alignment - offset));
+
+		trailsize = alloc_size - (alignment - offset) - chunk_size;
+		if (trailsize != 0) {
+		    /* Trim trailing space. */
+		    assert(trailsize < alloc_size);
+		    chunk_dealloc((void *)((uintptr_t)ret + chunk_size),
+			trailsize, true);
+		}
+	}
+
+	/* Insert node into huge. */
+	node->addr = ret;
+	node->size = chunk_size;
+
+	malloc_mutex_lock(&huge_mtx);
+	extent_tree_ad_insert(&huge, node);
+#ifdef JEMALLOC_STATS
+	stats_cactive_add(chunk_size);
+	huge_nmalloc++;
+	huge_allocated += chunk_size;
+#endif
+	malloc_mutex_unlock(&huge_mtx);
+
+#ifdef JEMALLOC_FILL
+	if (zero == false) {
+		if (opt_junk)
+			memset(ret, 0xa5, chunk_size);
+		else if (opt_zero)
+			memset(ret, 0, chunk_size);
+	}
+#endif
+
+	return (ret);
+}
+
+void *
+huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra)
+{
+
+	/*
+	 * Avoid moving the allocation if the size class can be left the same.
+	 */
+	if (oldsize > arena_maxclass
+	    && CHUNK_CEILING(oldsize) >= CHUNK_CEILING(size)
+	    && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(size+extra)) {
+		assert(CHUNK_CEILING(oldsize) == oldsize);
+#ifdef JEMALLOC_FILL
+		if (opt_junk && size < oldsize) {
+			memset((void *)((uintptr_t)ptr + size), 0x5a,
+			    oldsize - size);
+		}
+#endif
+		return (ptr);
+	}
+
+	/* Reallocation would require a move. */
+	return (NULL);
+}
+
+void *
+huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
+    size_t alignment, bool zero)
+{
+	void *ret;
+	size_t copysize;
+
+	/* Try to avoid moving the allocation. */
+	ret = huge_ralloc_no_move(ptr, oldsize, size, extra);
+	if (ret != NULL)
+		return (ret);
+
+	/*
+	 * size and oldsize are different enough that we need to use a
+	 * different size class.  In that case, fall back to allocating new
+	 * space and copying.
+	 */
+	if (alignment > chunksize)
+		ret = huge_palloc(size + extra, alignment, zero);
+	else
+		ret = huge_malloc(size + extra, zero);
+
+	if (ret == NULL) {
+		if (extra == 0)
+			return (NULL);
+		/* Try again, this time without extra. */
+		if (alignment > chunksize)
+			ret = huge_palloc(size, alignment, zero);
+		else
+			ret = huge_malloc(size, zero);
+
+		if (ret == NULL)
+			return (NULL);
+	}
+
+	/*
+	 * Copy at most size bytes (not size+extra), since the caller has no
+	 * expectation that the extra bytes will be reliably preserved.
+	 */
+	copysize = (size < oldsize) ? size : oldsize;
+
+	/*
+	 * Use mremap(2) if this is a huge-->huge reallocation, and neither the
+	 * source nor the destination are in swap or dss.
+	 */
+#ifdef JEMALLOC_MREMAP_FIXED
+	if (oldsize >= chunksize
+#  ifdef JEMALLOC_SWAP
+	    && (swap_enabled == false || (chunk_in_swap(ptr) == false &&
+	    chunk_in_swap(ret) == false))
+#  endif
+#  ifdef JEMALLOC_DSS
+	    && chunk_in_dss(ptr) == false && chunk_in_dss(ret) == false
+#  endif
+	    ) {
+		size_t newsize = huge_salloc(ret);
+
+		/*
+		 * Remove ptr from the tree of huge allocations before
+		 * performing the remap operation, in order to avoid the
+		 * possibility of another thread acquiring that mapping before
+		 * this one removes it from the tree.
+		 */
+		huge_dalloc(ptr, false);
+		if (mremap(ptr, oldsize, newsize, MREMAP_MAYMOVE|MREMAP_FIXED,
+		    ret) == MAP_FAILED) {
+			/*
+			 * Assuming no chunk management bugs in the allocator,
+			 * the only documented way an error can occur here is
+			 * if the application changed the map type for a
+			 * portion of the old allocation.  This is firmly in
+			 * undefined behavior territory, so write a diagnostic
+			 * message, and optionally abort.
+			 */
+			char buf[BUFERROR_BUF];
+
+			buferror(errno, buf, sizeof(buf));
+			malloc_write("<jemalloc>: Error in mremap(): ");
+			malloc_write(buf);
+			malloc_write("\n");
+			if (opt_abort)
+				abort();
+			memcpy(ret, ptr, copysize);
+			chunk_dealloc_mmap(ptr, oldsize);
+		}
+	} else
+#endif
+	{
+		memcpy(ret, ptr, copysize);
+		idalloc(ptr);
+	}
+	return (ret);
+}
+
+void
+huge_dalloc(void *ptr, bool unmap)
+{
+	extent_node_t *node, key;
+
+	malloc_mutex_lock(&huge_mtx);
+
+	/* Extract from tree of huge allocations. */
+	key.addr = ptr;
+	node = extent_tree_ad_search(&huge, &key);
+	assert(node != NULL);
+	assert(node->addr == ptr);
+	extent_tree_ad_remove(&huge, node);
+
+#ifdef JEMALLOC_STATS
+	stats_cactive_sub(node->size);
+	huge_ndalloc++;
+	huge_allocated -= node->size;
+#endif
+
+	malloc_mutex_unlock(&huge_mtx);
+
+	if (unmap) {
+	/* Unmap chunk. */
+#ifdef JEMALLOC_FILL
+#if (defined(JEMALLOC_SWAP) || defined(JEMALLOC_DSS))
+		if (opt_junk)
+			memset(node->addr, 0x5a, node->size);
+#endif
+#endif
+	}
+
+	chunk_dealloc(node->addr, node->size, unmap);
+
+	base_node_dealloc(node);
+}
+
+size_t
+huge_salloc(const void *ptr)
+{
+	size_t ret;
+	extent_node_t *node, key;
+
+	malloc_mutex_lock(&huge_mtx);
+
+	/* Extract from tree of huge allocations. */
+	key.addr = __DECONST(void *, ptr);
+	node = extent_tree_ad_search(&huge, &key);
+	assert(node != NULL);
+
+	ret = node->size;
+
+	malloc_mutex_unlock(&huge_mtx);
+
+	return (ret);
+}
+
+#ifdef JEMALLOC_PROF
+prof_ctx_t *
+huge_prof_ctx_get(const void *ptr)
+{
+	prof_ctx_t *ret;
+	extent_node_t *node, key;
+
+	malloc_mutex_lock(&huge_mtx);
+
+	/* Extract from tree of huge allocations. */
+	key.addr = __DECONST(void *, ptr);
+	node = extent_tree_ad_search(&huge, &key);
+	assert(node != NULL);
+
+	ret = node->prof_ctx;
+
+	malloc_mutex_unlock(&huge_mtx);
+
+	return (ret);
+}
+
+void
+huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
+{
+	extent_node_t *node, key;
+
+	malloc_mutex_lock(&huge_mtx);
+
+	/* Extract from tree of huge allocations. */
+	key.addr = __DECONST(void *, ptr);
+	node = extent_tree_ad_search(&huge, &key);
+	assert(node != NULL);
+
+	node->prof_ctx = ctx;
+
+	malloc_mutex_unlock(&huge_mtx);
+}
+#endif
+
+bool
+huge_boot(void)
+{
+
+	/* Initialize chunks data. */
+	if (malloc_mutex_init(&huge_mtx))
+		return (true);
+	extent_tree_ad_new(&huge);
+
+#ifdef JEMALLOC_STATS
+	huge_nmalloc = 0;
+	huge_ndalloc = 0;
+	huge_allocated = 0;
+#endif
+
+	return (false);
+}
--- a/deps/jemalloc.orig/src/jemalloc.c
+++ b/deps/jemalloc.orig/src/jemalloc.c
--- a/deps/jemalloc.orig/src/mb.c
+++ b/deps/jemalloc.orig/src/mb.c
@ -0,0 +1,2 @@
+#define	JEMALLOC_MB_C_
+#include "jemalloc/internal/jemalloc_internal.h"
--- a/deps/jemalloc.orig/src/mutex.c
+++ b/deps/jemalloc.orig/src/mutex.c
@ -0,0 +1,90 @@
+#define	JEMALLOC_MUTEX_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/******************************************************************************/
+/* Data. */
+
+#ifdef JEMALLOC_LAZY_LOCK
+bool isthreaded = false;
+#endif
+
+#ifdef JEMALLOC_LAZY_LOCK
+static void	pthread_create_once(void);
+#endif
+
+/******************************************************************************/
+/*
+ * We intercept pthread_create() calls in order to toggle isthreaded if the
+ * process goes multi-threaded.
+ */
+
+#ifdef JEMALLOC_LAZY_LOCK
+static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *,
+    void *(*)(void *), void *__restrict);
+
+static void
+pthread_create_once(void)
+{
+
+	pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create");
+	if (pthread_create_fptr == NULL) {
+		malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, "
+		    "\"pthread_create\")\n");
+		abort();
+	}
+
+	isthreaded = true;
+}
+
+JEMALLOC_ATTR(visibility("default"))
+int
+pthread_create(pthread_t *__restrict thread,
+    const pthread_attr_t *__restrict attr, void *(*start_routine)(void *),
+    void *__restrict arg)
+{
+	static pthread_once_t once_control = PTHREAD_ONCE_INIT;
+
+	pthread_once(&once_control, pthread_create_once);
+
+	return (pthread_create_fptr(thread, attr, start_routine, arg));
+}
+#endif
+
+/******************************************************************************/
+
+bool
+malloc_mutex_init(malloc_mutex_t *mutex)
+{
+#ifdef JEMALLOC_OSSPIN
+	*mutex = 0;
+#else
+	pthread_mutexattr_t attr;
+
+	if (pthread_mutexattr_init(&attr) != 0)
+		return (true);
+#ifdef PTHREAD_MUTEX_ADAPTIVE_NP
+	pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP);
+#else
+	pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_DEFAULT);
+#endif
+	if (pthread_mutex_init(mutex, &attr) != 0) {
+		pthread_mutexattr_destroy(&attr);
+		return (true);
+	}
+	pthread_mutexattr_destroy(&attr);
+
+#endif
+	return (false);
+}
+
+void
+malloc_mutex_destroy(malloc_mutex_t *mutex)
+{
+
+#ifndef JEMALLOC_OSSPIN
+	if (pthread_mutex_destroy(mutex) != 0) {
+		malloc_write("<jemalloc>: Error in pthread_mutex_destroy()\n");
+		abort();
+	}
+#endif
+}
--- a/deps/jemalloc.orig/src/prof.c
+++ b/deps/jemalloc.orig/src/prof.c
--- a/deps/jemalloc.orig/src/rtree.c
+++ b/deps/jemalloc.orig/src/rtree.c
@ -0,0 +1,46 @@
+#define	JEMALLOC_RTREE_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+rtree_t *
+rtree_new(unsigned bits)
+{
+	rtree_t *ret;
+	unsigned bits_per_level, height, i;
+
+	bits_per_level = ffs(pow2_ceil((RTREE_NODESIZE / sizeof(void *)))) - 1;
+	height = bits / bits_per_level;
+	if (height * bits_per_level != bits)
+		height++;
+	assert(height * bits_per_level >= bits);
+
+	ret = (rtree_t*)base_alloc(offsetof(rtree_t, level2bits) +
+	    (sizeof(unsigned) * height));
+	if (ret == NULL)
+		return (NULL);
+	memset(ret, 0, offsetof(rtree_t, level2bits) + (sizeof(unsigned) *
+	    height));
+
+	if (malloc_mutex_init(&ret->mutex)) {
+		/* Leak the rtree. */
+		return (NULL);
+	}
+	ret->height = height;
+	if (bits_per_level * height > bits)
+		ret->level2bits[0] = bits % bits_per_level;
+	else
+		ret->level2bits[0] = bits_per_level;
+	for (i = 1; i < height; i++)
+		ret->level2bits[i] = bits_per_level;
+
+	ret->root = (void**)base_alloc(sizeof(void *) << ret->level2bits[0]);
+	if (ret->root == NULL) {
+		/*
+		 * We leak the rtree here, since there's no generic base
+		 * deallocation.
+		 */
+		return (NULL);
+	}
+	memset(ret->root, 0, sizeof(void *) << ret->level2bits[0]);
+
+	return (ret);
+}
--- a/deps/jemalloc.orig/src/stats.c
+++ b/deps/jemalloc.orig/src/stats.c
@ -0,0 +1,790 @@
+#define	JEMALLOC_STATS_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+#define	CTL_GET(n, v, t) do {						\
+	size_t sz = sizeof(t);						\
+	xmallctl(n, v, &sz, NULL, 0);					\
+} while (0)
+
+#define	CTL_I_GET(n, v, t) do {						\
+	size_t mib[6];							\
+	size_t miblen = sizeof(mib) / sizeof(size_t);			\
+	size_t sz = sizeof(t);						\
+	xmallctlnametomib(n, mib, &miblen);				\
+	mib[2] = i;							\
+	xmallctlbymib(mib, miblen, v, &sz, NULL, 0);			\
+} while (0)
+
+#define	CTL_J_GET(n, v, t) do {						\
+	size_t mib[6];							\
+	size_t miblen = sizeof(mib) / sizeof(size_t);			\
+	size_t sz = sizeof(t);						\
+	xmallctlnametomib(n, mib, &miblen);				\
+	mib[2] = j;							\
+	xmallctlbymib(mib, miblen, v, &sz, NULL, 0);			\
+} while (0)
+
+#define	CTL_IJ_GET(n, v, t) do {					\
+	size_t mib[6];							\
+	size_t miblen = sizeof(mib) / sizeof(size_t);			\
+	size_t sz = sizeof(t);						\
+	xmallctlnametomib(n, mib, &miblen);				\
+	mib[2] = i;							\
+	mib[4] = j;							\
+	xmallctlbymib(mib, miblen, v, &sz, NULL, 0);			\
+} while (0)
+
+/******************************************************************************/
+/* Data. */
+
+bool	opt_stats_print = false;
+
+#ifdef JEMALLOC_STATS
+size_t	stats_cactive = 0;
+#endif
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+#ifdef JEMALLOC_STATS
+static void	malloc_vcprintf(void (*write_cb)(void *, const char *),
+    void *cbopaque, const char *format, va_list ap);
+static void	stats_arena_bins_print(void (*write_cb)(void *, const char *),
+    void *cbopaque, unsigned i);
+static void	stats_arena_lruns_print(void (*write_cb)(void *, const char *),
+    void *cbopaque, unsigned i);
+static void	stats_arena_print(void (*write_cb)(void *, const char *),
+    void *cbopaque, unsigned i);
+#endif
+
+/******************************************************************************/
+
+/*
+ * We don't want to depend on vsnprintf() for production builds, since that can
+ * cause unnecessary bloat for static binaries.  u2s() provides minimal integer
+ * printing functionality, so that malloc_printf() use can be limited to
+ * JEMALLOC_STATS code.
+ */
+char *
+u2s(uint64_t x, unsigned base, char *s)
+{
+	unsigned i;
+
+	i = UMAX2S_BUFSIZE - 1;
+	s[i] = '\0';
+	switch (base) {
+	case 10:
+		do {
+			i--;
+			s[i] = "0123456789"[x % (uint64_t)10];
+			x /= (uint64_t)10;
+		} while (x > 0);
+		break;
+	case 16:
+		do {
+			i--;
+			s[i] = "0123456789abcdef"[x & 0xf];
+			x >>= 4;
+		} while (x > 0);
+		break;
+	default:
+		do {
+			i--;
+			s[i] = "0123456789abcdefghijklmnopqrstuvwxyz"[x %
+			    (uint64_t)base];
+			x /= (uint64_t)base;
+		} while (x > 0);
+	}
+
+	return (&s[i]);
+}
+
+#ifdef JEMALLOC_STATS
+static void
+malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
+    const char *format, va_list ap)
+{
+	char buf[4096];
+
+	if (write_cb == NULL) {
+		/*
+		 * The caller did not provide an alternate write_cb callback
+		 * function, so use the default one.  malloc_write() is an
+		 * inline function, so use malloc_message() directly here.
+		 */
+		write_cb = JEMALLOC_P(malloc_message);
+		cbopaque = NULL;
+	}
+
+	vsnprintf(buf, sizeof(buf), format, ap);
+	write_cb(cbopaque, buf);
+}
+
+/*
+ * Print to a callback function in such a way as to (hopefully) avoid memory
+ * allocation.
+ */
+JEMALLOC_ATTR(format(printf, 3, 4))
+void
+malloc_cprintf(void (*write_cb)(void *, const char *), void *cbopaque,
+    const char *format, ...)
+{
+	va_list ap;
+
+	va_start(ap, format);
+	malloc_vcprintf(write_cb, cbopaque, format, ap);
+	va_end(ap);
+}
+
+/*
+ * Print to stderr in such a way as to (hopefully) avoid memory allocation.
+ */
+JEMALLOC_ATTR(format(printf, 1, 2))
+void
+malloc_printf(const char *format, ...)
+{
+	va_list ap;
+
+	va_start(ap, format);
+	malloc_vcprintf(NULL, NULL, format, ap);
+	va_end(ap);
+}
+#endif
+
+#ifdef JEMALLOC_STATS
+static void
+stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
+    unsigned i)
+{
+	size_t pagesize;
+	bool config_tcache;
+	unsigned nbins, j, gap_start;
+
+	CTL_GET("arenas.pagesize", &pagesize, size_t);
+
+	CTL_GET("config.tcache", &config_tcache, bool);
+	if (config_tcache) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "bins:     bin    size regs pgs    allocated      nmalloc"
+		    "      ndalloc    nrequests       nfills     nflushes"
+		    "      newruns       reruns      maxruns      curruns\n");
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "bins:     bin    size regs pgs    allocated      nmalloc"
+		    "      ndalloc      newruns       reruns      maxruns"
+		    "      curruns\n");
+	}
+	CTL_GET("arenas.nbins", &nbins, unsigned);
+	for (j = 0, gap_start = UINT_MAX; j < nbins; j++) {
+		uint64_t nruns;
+
+		CTL_IJ_GET("stats.arenas.0.bins.0.nruns", &nruns, uint64_t);
+		if (nruns == 0) {
+			if (gap_start == UINT_MAX)
+				gap_start = j;
+		} else {
+			unsigned ntbins_, nqbins, ncbins, nsbins;
+			size_t reg_size, run_size, allocated;
+			uint32_t nregs;
+			uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes;
+			uint64_t reruns;
+			size_t highruns, curruns;
+
+			if (gap_start != UINT_MAX) {
+				if (j > gap_start + 1) {
+					/* Gap of more than one size class. */
+					malloc_cprintf(write_cb, cbopaque,
+					    "[%u..%u]\n", gap_start,
+					    j - 1);
+				} else {
+					/* Gap of one size class. */
+					malloc_cprintf(write_cb, cbopaque,
+					    "[%u]\n", gap_start);
+				}
+				gap_start = UINT_MAX;
+			}
+			CTL_GET("arenas.ntbins", &ntbins_, unsigned);
+			CTL_GET("arenas.nqbins", &nqbins, unsigned);
+			CTL_GET("arenas.ncbins", &ncbins, unsigned);
+			CTL_GET("arenas.nsbins", &nsbins, unsigned);
+			CTL_J_GET("arenas.bin.0.size", &reg_size, size_t);
+			CTL_J_GET("arenas.bin.0.nregs", &nregs, uint32_t);
+			CTL_J_GET("arenas.bin.0.run_size", &run_size, size_t);
+			CTL_IJ_GET("stats.arenas.0.bins.0.allocated",
+			    &allocated, size_t);
+			CTL_IJ_GET("stats.arenas.0.bins.0.nmalloc",
+			    &nmalloc, uint64_t);
+			CTL_IJ_GET("stats.arenas.0.bins.0.ndalloc",
+			    &ndalloc, uint64_t);
+			if (config_tcache) {
+				CTL_IJ_GET("stats.arenas.0.bins.0.nrequests",
+				    &nrequests, uint64_t);
+				CTL_IJ_GET("stats.arenas.0.bins.0.nfills",
+				    &nfills, uint64_t);
+				CTL_IJ_GET("stats.arenas.0.bins.0.nflushes",
+				    &nflushes, uint64_t);
+			}
+			CTL_IJ_GET("stats.arenas.0.bins.0.nreruns", &reruns,
+			    uint64_t);
+			CTL_IJ_GET("stats.arenas.0.bins.0.highruns", &highruns,
+			    size_t);
+			CTL_IJ_GET("stats.arenas.0.bins.0.curruns", &curruns,
+			    size_t);
+			if (config_tcache) {
+				malloc_cprintf(write_cb, cbopaque,
+				    "%13u %1s %5zu %4u %3zu %12zu %12"PRIu64
+				    " %12"PRIu64" %12"PRIu64" %12"PRIu64
+				    " %12"PRIu64" %12"PRIu64" %12"PRIu64
+				    " %12zu %12zu\n",
+				    j,
+				    j < ntbins_ ? "T" : j < ntbins_ + nqbins ?
+				    "Q" : j < ntbins_ + nqbins + ncbins ? "C" :
+				    "S",
+				    reg_size, nregs, run_size / pagesize,
+				    allocated, nmalloc, ndalloc, nrequests,
+				    nfills, nflushes, nruns, reruns, highruns,
+				    curruns);
+			} else {
+				malloc_cprintf(write_cb, cbopaque,
+				    "%13u %1s %5zu %4u %3zu %12zu %12"PRIu64
+				    " %12"PRIu64" %12"PRIu64" %12"PRIu64
+				    " %12zu %12zu\n",
+				    j,
+				    j < ntbins_ ? "T" : j < ntbins_ + nqbins ?
+				    "Q" : j < ntbins_ + nqbins + ncbins ? "C" :
+				    "S",
+				    reg_size, nregs, run_size / pagesize,
+				    allocated, nmalloc, ndalloc, nruns, reruns,
+				    highruns, curruns);
+			}
+		}
+	}
+	if (gap_start != UINT_MAX) {
+		if (j > gap_start + 1) {
+			/* Gap of more than one size class. */
+			malloc_cprintf(write_cb, cbopaque, "[%u..%u]\n",
+			    gap_start, j - 1);
+		} else {
+			/* Gap of one size class. */
+			malloc_cprintf(write_cb, cbopaque, "[%u]\n", gap_start);
+		}
+	}
+}
+
+static void
+stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque,
+    unsigned i)
+{
+	size_t pagesize, nlruns, j;
+	ssize_t gap_start;
+
+	CTL_GET("arenas.pagesize", &pagesize, size_t);
+
+	malloc_cprintf(write_cb, cbopaque,
+	    "large:   size pages      nmalloc      ndalloc    nrequests"
+	    "      maxruns      curruns\n");
+	CTL_GET("arenas.nlruns", &nlruns, size_t);
+	for (j = 0, gap_start = -1; j < nlruns; j++) {
+		uint64_t nmalloc, ndalloc, nrequests;
+		size_t run_size, highruns, curruns;
+
+		CTL_IJ_GET("stats.arenas.0.lruns.0.nmalloc", &nmalloc,
+		    uint64_t);
+		CTL_IJ_GET("stats.arenas.0.lruns.0.ndalloc", &ndalloc,
+		    uint64_t);
+		CTL_IJ_GET("stats.arenas.0.lruns.0.nrequests", &nrequests,
+		    uint64_t);
+		if (nrequests == 0) {
+			if (gap_start == -1)
+				gap_start = j;
+		} else {
+			CTL_J_GET("arenas.lrun.0.size", &run_size, size_t);
+			CTL_IJ_GET("stats.arenas.0.lruns.0.highruns", &highruns,
+			    size_t);
+			CTL_IJ_GET("stats.arenas.0.lruns.0.curruns", &curruns,
+			    size_t);
+			if (gap_start != -1) {
+				malloc_cprintf(write_cb, cbopaque, "[%zu]\n",
+				    j - gap_start);
+				gap_start = -1;
+			}
+			malloc_cprintf(write_cb, cbopaque,
+			    "%13zu %5zu %12"PRIu64" %12"PRIu64" %12"PRIu64
+			    " %12zu %12zu\n",
+			    run_size, run_size / pagesize, nmalloc, ndalloc,
+			    nrequests, highruns, curruns);
+		}
+	}
+	if (gap_start != -1)
+		malloc_cprintf(write_cb, cbopaque, "[%zu]\n", j - gap_start);
+}
+
+static void
+stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
+    unsigned i)
+{
+	unsigned nthreads;
+	size_t pagesize, pactive, pdirty, mapped;
+	uint64_t npurge, nmadvise, purged;
+	size_t small_allocated;
+	uint64_t small_nmalloc, small_ndalloc, small_nrequests;
+	size_t large_allocated;
+	uint64_t large_nmalloc, large_ndalloc, large_nrequests;
+
+	CTL_GET("arenas.pagesize", &pagesize, size_t);
+
+	CTL_I_GET("stats.arenas.0.nthreads", &nthreads, unsigned);
+	malloc_cprintf(write_cb, cbopaque,
+	    "assigned threads: %u\n", nthreads);
+	CTL_I_GET("stats.arenas.0.pactive", &pactive, size_t);
+	CTL_I_GET("stats.arenas.0.pdirty", &pdirty, size_t);
+	CTL_I_GET("stats.arenas.0.npurge", &npurge, uint64_t);
+	CTL_I_GET("stats.arenas.0.nmadvise", &nmadvise, uint64_t);
+	CTL_I_GET("stats.arenas.0.purged", &purged, uint64_t);
+	malloc_cprintf(write_cb, cbopaque,
+	    "dirty pages: %zu:%zu active:dirty, %"PRIu64" sweep%s,"
+	    " %"PRIu64" madvise%s, %"PRIu64" purged\n",
+	    pactive, pdirty, npurge, npurge == 1 ? "" : "s",
+	    nmadvise, nmadvise == 1 ? "" : "s", purged);
+
+	malloc_cprintf(write_cb, cbopaque,
+	    "            allocated      nmalloc      ndalloc    nrequests\n");
+	CTL_I_GET("stats.arenas.0.small.allocated", &small_allocated, size_t);
+	CTL_I_GET("stats.arenas.0.small.nmalloc", &small_nmalloc, uint64_t);
+	CTL_I_GET("stats.arenas.0.small.ndalloc", &small_ndalloc, uint64_t);
+	CTL_I_GET("stats.arenas.0.small.nrequests", &small_nrequests, uint64_t);
+	malloc_cprintf(write_cb, cbopaque,
+	    "small:   %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64"\n",
+	    small_allocated, small_nmalloc, small_ndalloc, small_nrequests);
+	CTL_I_GET("stats.arenas.0.large.allocated", &large_allocated, size_t);
+	CTL_I_GET("stats.arenas.0.large.nmalloc", &large_nmalloc, uint64_t);
+	CTL_I_GET("stats.arenas.0.large.ndalloc", &large_ndalloc, uint64_t);
+	CTL_I_GET("stats.arenas.0.large.nrequests", &large_nrequests, uint64_t);
+	malloc_cprintf(write_cb, cbopaque,
+	    "large:   %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64"\n",
+	    large_allocated, large_nmalloc, large_ndalloc, large_nrequests);
+	malloc_cprintf(write_cb, cbopaque,
+	    "total:   %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64"\n",
+	    small_allocated + large_allocated,
+	    small_nmalloc + large_nmalloc,
+	    small_ndalloc + large_ndalloc,
+	    small_nrequests + large_nrequests);
+	malloc_cprintf(write_cb, cbopaque, "active:  %12zu\n",
+	    pactive * pagesize );
+	CTL_I_GET("stats.arenas.0.mapped", &mapped, size_t);
+	malloc_cprintf(write_cb, cbopaque, "mapped:  %12zu\n", mapped);
+
+	stats_arena_bins_print(write_cb, cbopaque, i);
+	stats_arena_lruns_print(write_cb, cbopaque, i);
+}
+#endif
+
+void
+stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
+    const char *opts)
+{
+	int err;
+	uint64_t epoch;
+	size_t u64sz;
+	char s[UMAX2S_BUFSIZE];
+	bool general = true;
+	bool merged = true;
+	bool unmerged = true;
+	bool bins = true;
+	bool large = true;
+
+	/*
+	 * Refresh stats, in case mallctl() was called by the application.
+	 *
+	 * Check for OOM here, since refreshing the ctl cache can trigger
+	 * allocation.  In practice, none of the subsequent mallctl()-related
+	 * calls in this function will cause OOM if this one succeeds.
+	 * */
+	epoch = 1;
+	u64sz = sizeof(uint64_t);
+	err = JEMALLOC_P(mallctl)("epoch", &epoch, &u64sz, &epoch,
+	    sizeof(uint64_t));
+	if (err != 0) {
+		if (err == EAGAIN) {
+			malloc_write("<jemalloc>: Memory allocation failure in "
+			    "mallctl(\"epoch\", ...)\n");
+			return;
+		}
+		malloc_write("<jemalloc>: Failure in mallctl(\"epoch\", "
+		    "...)\n");
+		abort();
+	}
+
+	if (write_cb == NULL) {
+		/*
+		 * The caller did not provide an alternate write_cb callback
+		 * function, so use the default one.  malloc_write() is an
+		 * inline function, so use malloc_message() directly here.
+		 */
+		write_cb = JEMALLOC_P(malloc_message);
+		cbopaque = NULL;
+	}
+
+	if (opts != NULL) {
+		unsigned i;
+
+		for (i = 0; opts[i] != '\0'; i++) {
+			switch (opts[i]) {
+				case 'g':
+					general = false;
+					break;
+				case 'm':
+					merged = false;
+					break;
+				case 'a':
+					unmerged = false;
+					break;
+				case 'b':
+					bins = false;
+					break;
+				case 'l':
+					large = false;
+					break;
+				default:;
+			}
+		}
+	}
+
+	write_cb(cbopaque, "___ Begin jemalloc statistics ___\n");
+	if (general) {
+		int err;
+		const char *cpv;
+		bool bv;
+		unsigned uv;
+		ssize_t ssv;
+		size_t sv, bsz, ssz, sssz, cpsz;
+
+		bsz = sizeof(bool);
+		ssz = sizeof(size_t);
+		sssz = sizeof(ssize_t);
+		cpsz = sizeof(const char *);
+
+		CTL_GET("version", &cpv, const char *);
+		write_cb(cbopaque, "Version: ");
+		write_cb(cbopaque, cpv);
+		write_cb(cbopaque, "\n");
+		CTL_GET("config.debug", &bv, bool);
+		write_cb(cbopaque, "Assertions ");
+		write_cb(cbopaque, bv ? "enabled" : "disabled");
+		write_cb(cbopaque, "\n");
+
+#define OPT_WRITE_BOOL(n)						\
+		if ((err = JEMALLOC_P(mallctl)("opt."#n, &bv, &bsz,	\
+		    NULL, 0)) == 0) {					\
+			write_cb(cbopaque, "  opt."#n": ");		\
+			write_cb(cbopaque, bv ? "true" : "false");	\
+			write_cb(cbopaque, "\n");			\
+		}
+#define OPT_WRITE_SIZE_T(n)						\
+		if ((err = JEMALLOC_P(mallctl)("opt."#n, &sv, &ssz,	\
+		    NULL, 0)) == 0) {					\
+			write_cb(cbopaque, "  opt."#n": ");		\
+			write_cb(cbopaque, u2s(sv, 10, s));		\
+			write_cb(cbopaque, "\n");			\
+		}
+#define OPT_WRITE_SSIZE_T(n)						\
+		if ((err = JEMALLOC_P(mallctl)("opt."#n, &ssv, &sssz,	\
+		    NULL, 0)) == 0) {					\
+			if (ssv >= 0) {					\
+				write_cb(cbopaque, "  opt."#n": ");	\
+				write_cb(cbopaque, u2s(ssv, 10, s));	\
+			} else {					\
+				write_cb(cbopaque, "  opt."#n": -");	\
+				write_cb(cbopaque, u2s(-ssv, 10, s));	\
+			}						\
+			write_cb(cbopaque, "\n");			\
+		}
+#define OPT_WRITE_CHAR_P(n)						\
+		if ((err = JEMALLOC_P(mallctl)("opt."#n, &cpv, &cpsz,	\
+		    NULL, 0)) == 0) {					\
+			write_cb(cbopaque, "  opt."#n": \"");		\
+			write_cb(cbopaque, cpv);			\
+			write_cb(cbopaque, "\"\n");			\
+		}
+
+		write_cb(cbopaque, "Run-time option settings:\n");
+		OPT_WRITE_BOOL(abort)
+		OPT_WRITE_SIZE_T(lg_qspace_max)
+		OPT_WRITE_SIZE_T(lg_cspace_max)
+		OPT_WRITE_SIZE_T(lg_chunk)
+		OPT_WRITE_SIZE_T(narenas)
+		OPT_WRITE_SSIZE_T(lg_dirty_mult)
+		OPT_WRITE_BOOL(stats_print)
+		OPT_WRITE_BOOL(junk)
+		OPT_WRITE_BOOL(zero)
+		OPT_WRITE_BOOL(sysv)
+		OPT_WRITE_BOOL(xmalloc)
+		OPT_WRITE_BOOL(tcache)
+		OPT_WRITE_SSIZE_T(lg_tcache_gc_sweep)
+		OPT_WRITE_SSIZE_T(lg_tcache_max)
+		OPT_WRITE_BOOL(prof)
+		OPT_WRITE_CHAR_P(prof_prefix)
+		OPT_WRITE_SIZE_T(lg_prof_bt_max)
+		OPT_WRITE_BOOL(prof_active)
+		OPT_WRITE_SSIZE_T(lg_prof_sample)
+		OPT_WRITE_BOOL(prof_accum)
+		OPT_WRITE_SSIZE_T(lg_prof_tcmax)
+		OPT_WRITE_SSIZE_T(lg_prof_interval)
+		OPT_WRITE_BOOL(prof_gdump)
+		OPT_WRITE_BOOL(prof_leak)
+		OPT_WRITE_BOOL(overcommit)
+
+#undef OPT_WRITE_BOOL
+#undef OPT_WRITE_SIZE_T
+#undef OPT_WRITE_SSIZE_T
+#undef OPT_WRITE_CHAR_P
+
+		write_cb(cbopaque, "CPUs: ");
+		write_cb(cbopaque, u2s(ncpus, 10, s));
+		write_cb(cbopaque, "\n");
+
+		CTL_GET("arenas.narenas", &uv, unsigned);
+		write_cb(cbopaque, "Max arenas: ");
+		write_cb(cbopaque, u2s(uv, 10, s));
+		write_cb(cbopaque, "\n");
+
+		write_cb(cbopaque, "Pointer size: ");
+		write_cb(cbopaque, u2s(sizeof(void *), 10, s));
+		write_cb(cbopaque, "\n");
+
+		CTL_GET("arenas.quantum", &sv, size_t);
+		write_cb(cbopaque, "Quantum size: ");
+		write_cb(cbopaque, u2s(sv, 10, s));
+		write_cb(cbopaque, "\n");
+
+		CTL_GET("arenas.cacheline", &sv, size_t);
+		write_cb(cbopaque, "Cacheline size (assumed): ");
+		write_cb(cbopaque, u2s(sv, 10, s));
+		write_cb(cbopaque, "\n");
+
+		CTL_GET("arenas.subpage", &sv, size_t);
+		write_cb(cbopaque, "Subpage spacing: ");
+		write_cb(cbopaque, u2s(sv, 10, s));
+		write_cb(cbopaque, "\n");
+
+		if ((err = JEMALLOC_P(mallctl)("arenas.tspace_min", &sv, &ssz,
+		    NULL, 0)) == 0) {
+			write_cb(cbopaque, "Tiny 2^n-spaced sizes: [");
+			write_cb(cbopaque, u2s(sv, 10, s));
+			write_cb(cbopaque, "..");
+
+			CTL_GET("arenas.tspace_max", &sv, size_t);
+			write_cb(cbopaque, u2s(sv, 10, s));
+			write_cb(cbopaque, "]\n");
+		}
+
+		CTL_GET("arenas.qspace_min", &sv, size_t);
+		write_cb(cbopaque, "Quantum-spaced sizes: [");
+		write_cb(cbopaque, u2s(sv, 10, s));
+		write_cb(cbopaque, "..");
+		CTL_GET("arenas.qspace_max", &sv, size_t);
+		write_cb(cbopaque, u2s(sv, 10, s));
+		write_cb(cbopaque, "]\n");
+
+		CTL_GET("arenas.cspace_min", &sv, size_t);
+		write_cb(cbopaque, "Cacheline-spaced sizes: [");
+		write_cb(cbopaque, u2s(sv, 10, s));
+		write_cb(cbopaque, "..");
+		CTL_GET("arenas.cspace_max", &sv, size_t);
+		write_cb(cbopaque, u2s(sv, 10, s));
+		write_cb(cbopaque, "]\n");
+
+		CTL_GET("arenas.sspace_min", &sv, size_t);
+		write_cb(cbopaque, "Subpage-spaced sizes: [");
+		write_cb(cbopaque, u2s(sv, 10, s));
+		write_cb(cbopaque, "..");
+		CTL_GET("arenas.sspace_max", &sv, size_t);
+		write_cb(cbopaque, u2s(sv, 10, s));
+		write_cb(cbopaque, "]\n");
+
+		CTL_GET("opt.lg_dirty_mult", &ssv, ssize_t);
+		if (ssv >= 0) {
+			write_cb(cbopaque,
+			    "Min active:dirty page ratio per arena: ");
+			write_cb(cbopaque, u2s((1U << ssv), 10, s));
+			write_cb(cbopaque, ":1\n");
+		} else {
+			write_cb(cbopaque,
+			    "Min active:dirty page ratio per arena: N/A\n");
+		}
+		if ((err = JEMALLOC_P(mallctl)("arenas.tcache_max", &sv,
+		    &ssz, NULL, 0)) == 0) {
+			write_cb(cbopaque,
+			    "Maximum thread-cached size class: ");
+			write_cb(cbopaque, u2s(sv, 10, s));
+			write_cb(cbopaque, "\n");
+		}
+		if ((err = JEMALLOC_P(mallctl)("opt.lg_tcache_gc_sweep", &ssv,
+		    &ssz, NULL, 0)) == 0) {
+			size_t tcache_gc_sweep = (1U << ssv);
+			bool tcache_enabled;
+			CTL_GET("opt.tcache", &tcache_enabled, bool);
+			write_cb(cbopaque, "Thread cache GC sweep interval: ");
+			write_cb(cbopaque, tcache_enabled && ssv >= 0 ?
+			    u2s(tcache_gc_sweep, 10, s) : "N/A");
+			write_cb(cbopaque, "\n");
+		}
+		if ((err = JEMALLOC_P(mallctl)("opt.prof", &bv, &bsz, NULL, 0))
+		   == 0 && bv) {
+			CTL_GET("opt.lg_prof_bt_max", &sv, size_t);
+			write_cb(cbopaque, "Maximum profile backtrace depth: ");
+			write_cb(cbopaque, u2s((1U << sv), 10, s));
+			write_cb(cbopaque, "\n");
+
+			CTL_GET("opt.lg_prof_tcmax", &ssv, ssize_t);
+			write_cb(cbopaque,
+			    "Maximum per thread backtrace cache: ");
+			if (ssv >= 0) {
+				write_cb(cbopaque, u2s((1U << ssv), 10, s));
+				write_cb(cbopaque, " (2^");
+				write_cb(cbopaque, u2s(ssv, 10, s));
+				write_cb(cbopaque, ")\n");
+			} else
+				write_cb(cbopaque, "N/A\n");
+
+			CTL_GET("opt.lg_prof_sample", &sv, size_t);
+			write_cb(cbopaque, "Average profile sample interval: ");
+			write_cb(cbopaque, u2s((((uint64_t)1U) << sv), 10, s));
+			write_cb(cbopaque, " (2^");
+			write_cb(cbopaque, u2s(sv, 10, s));
+			write_cb(cbopaque, ")\n");
+
+			CTL_GET("opt.lg_prof_interval", &ssv, ssize_t);
+			write_cb(cbopaque, "Average profile dump interval: ");
+			if (ssv >= 0) {
+				write_cb(cbopaque, u2s((((uint64_t)1U) << ssv),
+				    10, s));
+				write_cb(cbopaque, " (2^");
+				write_cb(cbopaque, u2s(ssv, 10, s));
+				write_cb(cbopaque, ")\n");
+			} else
+				write_cb(cbopaque, "N/A\n");
+		}
+		CTL_GET("arenas.chunksize", &sv, size_t);
+		write_cb(cbopaque, "Chunk size: ");
+		write_cb(cbopaque, u2s(sv, 10, s));
+		CTL_GET("opt.lg_chunk", &sv, size_t);
+		write_cb(cbopaque, " (2^");
+		write_cb(cbopaque, u2s(sv, 10, s));
+		write_cb(cbopaque, ")\n");
+	}
+
+#ifdef JEMALLOC_STATS
+	{
+		int err;
+		size_t sszp, ssz;
+		size_t *cactive;
+		size_t allocated, active, mapped;
+		size_t chunks_current, chunks_high, swap_avail;
+		uint64_t chunks_total;
+		size_t huge_allocated;
+		uint64_t huge_nmalloc, huge_ndalloc;
+
+		sszp = sizeof(size_t *);
+		ssz = sizeof(size_t);
+
+		CTL_GET("stats.cactive", &cactive, size_t *);
+		CTL_GET("stats.allocated", &allocated, size_t);
+		CTL_GET("stats.active", &active, size_t);
+		CTL_GET("stats.mapped", &mapped, size_t);
+		malloc_cprintf(write_cb, cbopaque,
+		    "Allocated: %zu, active: %zu, mapped: %zu\n",
+		    allocated, active, mapped);
+		malloc_cprintf(write_cb, cbopaque,
+		    "Current active ceiling: %zu\n", atomic_read_z(cactive));
+
+		/* Print chunk stats. */
+		CTL_GET("stats.chunks.total", &chunks_total, uint64_t);
+		CTL_GET("stats.chunks.high", &chunks_high, size_t);
+		CTL_GET("stats.chunks.current", &chunks_current, size_t);
+		if ((err = JEMALLOC_P(mallctl)("swap.avail", &swap_avail, &ssz,
+		    NULL, 0)) == 0) {
+			size_t lg_chunk;
+
+			malloc_cprintf(write_cb, cbopaque, "chunks: nchunks   "
+			    "highchunks    curchunks   swap_avail\n");
+			CTL_GET("opt.lg_chunk", &lg_chunk, size_t);
+			malloc_cprintf(write_cb, cbopaque,
+			    "  %13"PRIu64"%13zu%13zu%13zu\n",
+			    chunks_total, chunks_high, chunks_current,
+			    swap_avail << lg_chunk);
+		} else {
+			malloc_cprintf(write_cb, cbopaque, "chunks: nchunks   "
+			    "highchunks    curchunks\n");
+			malloc_cprintf(write_cb, cbopaque,
+			    "  %13"PRIu64"%13zu%13zu\n",
+			    chunks_total, chunks_high, chunks_current);
+		}
+
+		/* Print huge stats. */
+		CTL_GET("stats.huge.nmalloc", &huge_nmalloc, uint64_t);
+		CTL_GET("stats.huge.ndalloc", &huge_ndalloc, uint64_t);
+		CTL_GET("stats.huge.allocated", &huge_allocated, size_t);
+		malloc_cprintf(write_cb, cbopaque,
+		    "huge: nmalloc      ndalloc    allocated\n");
+		malloc_cprintf(write_cb, cbopaque,
+		    " %12"PRIu64" %12"PRIu64" %12zu\n",
+		    huge_nmalloc, huge_ndalloc, huge_allocated);
+
+		if (merged) {
+			unsigned narenas;
+
+			CTL_GET("arenas.narenas", &narenas, unsigned);
+			{
+				bool initialized[narenas];
+				size_t isz;
+				unsigned i, ninitialized;
+
+				isz = sizeof(initialized);
+				xmallctl("arenas.initialized", initialized,
+				    &isz, NULL, 0);
+				for (i = ninitialized = 0; i < narenas; i++) {
+					if (initialized[i])
+						ninitialized++;
+				}
+
+				if (ninitialized > 1 || unmerged == false) {
+					/* Print merged arena stats. */
+					malloc_cprintf(write_cb, cbopaque,
+					    "\nMerged arenas stats:\n");
+					stats_arena_print(write_cb, cbopaque,
+					    narenas);
+				}
+			}
+		}
+
+		if (unmerged) {
+			unsigned narenas;
+
+			/* Print stats for each arena. */
+
+			CTL_GET("arenas.narenas", &narenas, unsigned);
+			{
+				bool initialized[narenas];
+				size_t isz;
+				unsigned i;
+
+				isz = sizeof(initialized);
+				xmallctl("arenas.initialized", initialized,
+				    &isz, NULL, 0);
+
+				for (i = 0; i < narenas; i++) {
+					if (initialized[i]) {
+						malloc_cprintf(write_cb,
+						    cbopaque,
+						    "\narenas[%u]:\n", i);
+						stats_arena_print(write_cb,
+						    cbopaque, i);
+					}
+				}
+			}
+		}
+	}
+#endif /* #ifdef JEMALLOC_STATS */
+	write_cb(cbopaque, "--- End jemalloc statistics ---\n");
+}
--- a/deps/jemalloc.orig/src/tcache.c
+++ b/deps/jemalloc.orig/src/tcache.c
@ -0,0 +1,480 @@
+#define	JEMALLOC_TCACHE_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+#ifdef JEMALLOC_TCACHE
+/******************************************************************************/
+/* Data. */
+
+bool	opt_tcache = true;
+ssize_t	opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
+ssize_t	opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT;
+
+tcache_bin_info_t	*tcache_bin_info;
+static unsigned		stack_nelms; /* Total stack elms per tcache. */
+
+/* Map of thread-specific caches. */
+#ifndef NO_TLS
+__thread tcache_t	*tcache_tls JEMALLOC_ATTR(tls_model("initial-exec"));
+#endif
+
+/*
+ * Same contents as tcache, but initialized such that the TSD destructor is
+ * called when a thread exits, so that the cache can be cleaned up.
+ */
+pthread_key_t		tcache_tsd;
+
+size_t				nhbins;
+size_t				tcache_maxclass;
+unsigned			tcache_gc_incr;
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+static void	tcache_thread_cleanup(void *arg);
+
+/******************************************************************************/
+
+void *
+tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind)
+{
+	void *ret;
+
+	arena_tcache_fill_small(tcache->arena, tbin, binind
+#ifdef JEMALLOC_PROF
+	    , tcache->prof_accumbytes
+#endif
+	    );
+#ifdef JEMALLOC_PROF
+	tcache->prof_accumbytes = 0;
+#endif
+	ret = tcache_alloc_easy(tbin);
+
+	return (ret);
+}
+
+void
+tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+    , tcache_t *tcache
+#endif
+    )
+{
+	void *ptr;
+	unsigned i, nflush, ndeferred;
+#ifdef JEMALLOC_STATS
+	bool merged_stats = false;
+#endif
+
+	assert(binind < nbins);
+	assert(rem <= tbin->ncached);
+
+	for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
+		/* Lock the arena bin associated with the first object. */
+		arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
+		    tbin->avail[0]);
+		arena_t *arena = chunk->arena;
+		arena_bin_t *bin = &arena->bins[binind];
+
+#ifdef JEMALLOC_PROF
+		if (arena == tcache->arena) {
+			malloc_mutex_lock(&arena->lock);
+			arena_prof_accum(arena, tcache->prof_accumbytes);
+			malloc_mutex_unlock(&arena->lock);
+			tcache->prof_accumbytes = 0;
+		}
+#endif
+
+		malloc_mutex_lock(&bin->lock);
+#ifdef JEMALLOC_STATS
+		if (arena == tcache->arena) {
+			assert(merged_stats == false);
+			merged_stats = true;
+			bin->stats.nflushes++;
+			bin->stats.nrequests += tbin->tstats.nrequests;
+			tbin->tstats.nrequests = 0;
+		}
+#endif
+		ndeferred = 0;
+		for (i = 0; i < nflush; i++) {
+			ptr = tbin->avail[i];
+			assert(ptr != NULL);
+			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+			if (chunk->arena == arena) {
+				size_t pageind = ((uintptr_t)ptr -
+				    (uintptr_t)chunk) >> PAGE_SHIFT;
+				arena_chunk_map_t *mapelm =
+				    &chunk->map[pageind-map_bias];
+				arena_dalloc_bin(arena, chunk, ptr, mapelm);
+			} else {
+				/*
+				 * This object was allocated via a different
+				 * arena bin than the one that is currently
+				 * locked.  Stash the object, so that it can be
+				 * handled in a future pass.
+				 */
+				tbin->avail[ndeferred] = ptr;
+				ndeferred++;
+			}
+		}
+		malloc_mutex_unlock(&bin->lock);
+	}
+#ifdef JEMALLOC_STATS
+	if (merged_stats == false) {
+		/*
+		 * The flush loop didn't happen to flush to this thread's
+		 * arena, so the stats didn't get merged.  Manually do so now.
+		 */
+		arena_bin_t *bin = &tcache->arena->bins[binind];
+		malloc_mutex_lock(&bin->lock);
+		bin->stats.nflushes++;
+		bin->stats.nrequests += tbin->tstats.nrequests;
+		tbin->tstats.nrequests = 0;
+		malloc_mutex_unlock(&bin->lock);
+	}
+#endif
+
+	memmove(tbin->avail, &tbin->avail[tbin->ncached - rem],
+	    rem * sizeof(void *));
+	tbin->ncached = rem;
+	if ((int)tbin->ncached < tbin->low_water)
+		tbin->low_water = tbin->ncached;
+}
+
+void
+tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+    , tcache_t *tcache
+#endif
+    )
+{
+	void *ptr;
+	unsigned i, nflush, ndeferred;
+#ifdef JEMALLOC_STATS
+	bool merged_stats = false;
+#endif
+
+	assert(binind < nhbins);
+	assert(rem <= tbin->ncached);
+
+	for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
+		/* Lock the arena associated with the first object. */
+		arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
+		    tbin->avail[0]);
+		arena_t *arena = chunk->arena;
+
+		malloc_mutex_lock(&arena->lock);
+#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
+		if (arena == tcache->arena) {
+#endif
+#ifdef JEMALLOC_PROF
+			arena_prof_accum(arena, tcache->prof_accumbytes);
+			tcache->prof_accumbytes = 0;
+#endif
+#ifdef JEMALLOC_STATS
+			merged_stats = true;
+			arena->stats.nrequests_large += tbin->tstats.nrequests;
+			arena->stats.lstats[binind - nbins].nrequests +=
+			    tbin->tstats.nrequests;
+			tbin->tstats.nrequests = 0;
+#endif
+#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
+		}
+#endif
+		ndeferred = 0;
+		for (i = 0; i < nflush; i++) {
+			ptr = tbin->avail[i];
+			assert(ptr != NULL);
+			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+			if (chunk->arena == arena)
+				arena_dalloc_large(arena, chunk, ptr);
+			else {
+				/*
+				 * This object was allocated via a different
+				 * arena than the one that is currently locked.
+				 * Stash the object, so that it can be handled
+				 * in a future pass.
+				 */
+				tbin->avail[ndeferred] = ptr;
+				ndeferred++;
+			}
+		}
+		malloc_mutex_unlock(&arena->lock);
+	}
+#ifdef JEMALLOC_STATS
+	if (merged_stats == false) {
+		/*
+		 * The flush loop didn't happen to flush to this thread's
+		 * arena, so the stats didn't get merged.  Manually do so now.
+		 */
+		arena_t *arena = tcache->arena;
+		malloc_mutex_lock(&arena->lock);
+		arena->stats.nrequests_large += tbin->tstats.nrequests;
+		arena->stats.lstats[binind - nbins].nrequests +=
+		    tbin->tstats.nrequests;
+		tbin->tstats.nrequests = 0;
+		malloc_mutex_unlock(&arena->lock);
+	}
+#endif
+
+	memmove(tbin->avail, &tbin->avail[tbin->ncached - rem],
+	    rem * sizeof(void *));
+	tbin->ncached = rem;
+	if ((int)tbin->ncached < tbin->low_water)
+		tbin->low_water = tbin->ncached;
+}
+
+tcache_t *
+tcache_create(arena_t *arena)
+{
+	tcache_t *tcache;
+	size_t size, stack_offset;
+	unsigned i;
+
+	size = offsetof(tcache_t, tbins) + (sizeof(tcache_bin_t) * nhbins);
+	/* Naturally align the pointer stacks. */
+	size = PTR_CEILING(size);
+	stack_offset = size;
+	size += stack_nelms * sizeof(void *);
+	/*
+	 * Round up to the nearest multiple of the cacheline size, in order to
+	 * avoid the possibility of false cacheline sharing.
+	 *
+	 * That this works relies on the same logic as in ipalloc(), but we
+	 * cannot directly call ipalloc() here due to tcache bootstrapping
+	 * issues.
+	 */
+	size = (size + CACHELINE_MASK) & (-CACHELINE);
+
+	if (size <= small_maxclass)
+		tcache = (tcache_t *)arena_malloc_small(arena, size, true);
+	else if (size <= tcache_maxclass)
+		tcache = (tcache_t *)arena_malloc_large(arena, size, true);
+	else
+		tcache = (tcache_t *)icalloc(size);
+
+	if (tcache == NULL)
+		return (NULL);
+
+#ifdef JEMALLOC_STATS
+	/* Link into list of extant tcaches. */
+	malloc_mutex_lock(&arena->lock);
+	ql_elm_new(tcache, link);
+	ql_tail_insert(&arena->tcache_ql, tcache, link);
+	malloc_mutex_unlock(&arena->lock);
+#endif
+
+	tcache->arena = arena;
+	assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
+	for (i = 0; i < nhbins; i++) {
+		tcache->tbins[i].lg_fill_div = 1;
+		tcache->tbins[i].avail = (void **)((uintptr_t)tcache +
+		    (uintptr_t)stack_offset);
+		stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
+	}
+
+	TCACHE_SET(tcache);
+
+	return (tcache);
+}
+
+void
+tcache_destroy(tcache_t *tcache)
+{
+	unsigned i;
+	size_t tcache_size;
+
+#ifdef JEMALLOC_STATS
+	/* Unlink from list of extant tcaches. */
+	malloc_mutex_lock(&tcache->arena->lock);
+	ql_remove(&tcache->arena->tcache_ql, tcache, link);
+	malloc_mutex_unlock(&tcache->arena->lock);
+	tcache_stats_merge(tcache, tcache->arena);
+#endif
+
+	for (i = 0; i < nbins; i++) {
+		tcache_bin_t *tbin = &tcache->tbins[i];
+		tcache_bin_flush_small(tbin, i, 0
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+		    , tcache
+#endif
+		    );
+
+#ifdef JEMALLOC_STATS
+		if (tbin->tstats.nrequests != 0) {
+			arena_t *arena = tcache->arena;
+			arena_bin_t *bin = &arena->bins[i];
+			malloc_mutex_lock(&bin->lock);
+			bin->stats.nrequests += tbin->tstats.nrequests;
+			malloc_mutex_unlock(&bin->lock);
+		}
+#endif
+	}
+
+	for (; i < nhbins; i++) {
+		tcache_bin_t *tbin = &tcache->tbins[i];
+		tcache_bin_flush_large(tbin, i, 0
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+		    , tcache
+#endif
+		    );
+
+#ifdef JEMALLOC_STATS
+		if (tbin->tstats.nrequests != 0) {
+			arena_t *arena = tcache->arena;
+			malloc_mutex_lock(&arena->lock);
+			arena->stats.nrequests_large += tbin->tstats.nrequests;
+			arena->stats.lstats[i - nbins].nrequests +=
+			    tbin->tstats.nrequests;
+			malloc_mutex_unlock(&arena->lock);
+		}
+#endif
+	}
+
+#ifdef JEMALLOC_PROF
+	if (tcache->prof_accumbytes > 0) {
+		malloc_mutex_lock(&tcache->arena->lock);
+		arena_prof_accum(tcache->arena, tcache->prof_accumbytes);
+		malloc_mutex_unlock(&tcache->arena->lock);
+	}
+#endif
+
+	tcache_size = arena_salloc(tcache);
+	if (tcache_size <= small_maxclass) {
+		arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache);
+		arena_t *arena = chunk->arena;
+		size_t pageind = ((uintptr_t)tcache - (uintptr_t)chunk) >>
+		    PAGE_SHIFT;
+		arena_chunk_map_t *mapelm = &chunk->map[pageind-map_bias];
+		arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
+		    (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) <<
+		    PAGE_SHIFT));
+		arena_bin_t *bin = run->bin;
+
+		malloc_mutex_lock(&bin->lock);
+		arena_dalloc_bin(arena, chunk, tcache, mapelm);
+		malloc_mutex_unlock(&bin->lock);
+	} else if (tcache_size <= tcache_maxclass) {
+		arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache);
+		arena_t *arena = chunk->arena;
+
+		malloc_mutex_lock(&arena->lock);
+		arena_dalloc_large(arena, chunk, tcache);
+		malloc_mutex_unlock(&arena->lock);
+	} else
+		idalloc(tcache);
+}
+
+static void
+tcache_thread_cleanup(void *arg)
+{
+	tcache_t *tcache = (tcache_t *)arg;
+
+	if (tcache == (void *)(uintptr_t)1) {
+		/*
+		 * The previous time this destructor was called, we set the key
+		 * to 1 so that other destructors wouldn't cause re-creation of
+		 * the tcache.  This time, do nothing, so that the destructor
+		 * will not be called again.
+		 */
+	} else if (tcache == (void *)(uintptr_t)2) {
+		/*
+		 * Another destructor called an allocator function after this
+		 * destructor was called.  Reset tcache to 1 in order to
+		 * receive another callback.
+		 */
+		TCACHE_SET((uintptr_t)1);
+	} else if (tcache != NULL) {
+		assert(tcache != (void *)(uintptr_t)1);
+		tcache_destroy(tcache);
+		TCACHE_SET((uintptr_t)1);
+	}
+}
+
+#ifdef JEMALLOC_STATS
+void
+tcache_stats_merge(tcache_t *tcache, arena_t *arena)
+{
+	unsigned i;
+
+	/* Merge and reset tcache stats. */
+	for (i = 0; i < nbins; i++) {
+		arena_bin_t *bin = &arena->bins[i];
+		tcache_bin_t *tbin = &tcache->tbins[i];
+		malloc_mutex_lock(&bin->lock);
+		bin->stats.nrequests += tbin->tstats.nrequests;
+		malloc_mutex_unlock(&bin->lock);
+		tbin->tstats.nrequests = 0;
+	}
+
+	for (; i < nhbins; i++) {
+		malloc_large_stats_t *lstats = &arena->stats.lstats[i - nbins];
+		tcache_bin_t *tbin = &tcache->tbins[i];
+		arena->stats.nrequests_large += tbin->tstats.nrequests;
+		lstats->nrequests += tbin->tstats.nrequests;
+		tbin->tstats.nrequests = 0;
+	}
+}
+#endif
+
+bool
+tcache_boot(void)
+{
+
+	if (opt_tcache) {
+		unsigned i;
+
+		/*
+		 * If necessary, clamp opt_lg_tcache_max, now that
+		 * small_maxclass and arena_maxclass are known.
+		 */
+		if (opt_lg_tcache_max < 0 || (1U <<
+		    opt_lg_tcache_max) < small_maxclass)
+			tcache_maxclass = small_maxclass;
+		else if ((1U << opt_lg_tcache_max) > arena_maxclass)
+			tcache_maxclass = arena_maxclass;
+		else
+			tcache_maxclass = (1U << opt_lg_tcache_max);
+
+		nhbins = nbins + (tcache_maxclass >> PAGE_SHIFT);
+
+		/* Initialize tcache_bin_info. */
+		tcache_bin_info = (tcache_bin_info_t *)base_alloc(nhbins *
+		    sizeof(tcache_bin_info_t));
+		if (tcache_bin_info == NULL)
+			return (true);
+		stack_nelms = 0;
+		for (i = 0; i < nbins; i++) {
+			if ((arena_bin_info[i].nregs << 1) <=
+			    TCACHE_NSLOTS_SMALL_MAX) {
+				tcache_bin_info[i].ncached_max =
+				    (arena_bin_info[i].nregs << 1);
+			} else {
+				tcache_bin_info[i].ncached_max =
+				    TCACHE_NSLOTS_SMALL_MAX;
+			}
+			stack_nelms += tcache_bin_info[i].ncached_max;
+		}
+		for (; i < nhbins; i++) {
+			tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE;
+			stack_nelms += tcache_bin_info[i].ncached_max;
+		}
+
+		/* Compute incremental GC event threshold. */
+		if (opt_lg_tcache_gc_sweep >= 0) {
+			tcache_gc_incr = ((1U << opt_lg_tcache_gc_sweep) /
+			    nbins) + (((1U << opt_lg_tcache_gc_sweep) % nbins ==
+			    0) ? 0 : 1);
+		} else
+			tcache_gc_incr = 0;
+
+		if (pthread_key_create(&tcache_tsd, tcache_thread_cleanup) !=
+		    0) {
+			malloc_write(
+			    "<jemalloc>: Error in pthread_key_create()\n");
+			abort();
+		}
+	}
+
+	return (false);
+}
+/******************************************************************************/
+#endif /* JEMALLOC_TCACHE */
--- a/deps/jemalloc.orig/src/zone.c
+++ b/deps/jemalloc.orig/src/zone.c
@ -0,0 +1,354 @@
+#include "jemalloc/internal/jemalloc_internal.h"
+#ifndef JEMALLOC_ZONE
+#  error "This source file is for zones on Darwin (OS X)."
+#endif
+
+/******************************************************************************/
+/* Data. */
+
+static malloc_zone_t zone, szone;
+static struct malloc_introspection_t zone_introspect, ozone_introspect;
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+static size_t	zone_size(malloc_zone_t *zone, void *ptr);
+static void	*zone_malloc(malloc_zone_t *zone, size_t size);
+static void	*zone_calloc(malloc_zone_t *zone, size_t num, size_t size);
+static void	*zone_valloc(malloc_zone_t *zone, size_t size);
+static void	zone_free(malloc_zone_t *zone, void *ptr);
+static void	*zone_realloc(malloc_zone_t *zone, void *ptr, size_t size);
+#if (JEMALLOC_ZONE_VERSION >= 6)
+static void	*zone_memalign(malloc_zone_t *zone, size_t alignment,
+    size_t size);
+static void	zone_free_definite_size(malloc_zone_t *zone, void *ptr,
+    size_t size);
+#endif
+static void	*zone_destroy(malloc_zone_t *zone);
+static size_t	zone_good_size(malloc_zone_t *zone, size_t size);
+static void	zone_force_lock(malloc_zone_t *zone);
+static void	zone_force_unlock(malloc_zone_t *zone);
+static size_t	ozone_size(malloc_zone_t *zone, void *ptr);
+static void	ozone_free(malloc_zone_t *zone, void *ptr);
+static void	*ozone_realloc(malloc_zone_t *zone, void *ptr, size_t size);
+static unsigned	ozone_batch_malloc(malloc_zone_t *zone, size_t size,
+    void **results, unsigned num_requested);
+static void	ozone_batch_free(malloc_zone_t *zone, void **to_be_freed,
+    unsigned num);
+#if (JEMALLOC_ZONE_VERSION >= 6)
+static void	ozone_free_definite_size(malloc_zone_t *zone, void *ptr,
+    size_t size);
+#endif
+static void	ozone_force_lock(malloc_zone_t *zone);
+static void	ozone_force_unlock(malloc_zone_t *zone);
+
+/******************************************************************************/
+/*
+ * Functions.
+ */
+
+static size_t
+zone_size(malloc_zone_t *zone, void *ptr)
+{
+
+	/*
+	 * There appear to be places within Darwin (such as setenv(3)) that
+	 * cause calls to this function with pointers that *no* zone owns.  If
+	 * we knew that all pointers were owned by *some* zone, we could split
+	 * our zone into two parts, and use one as the default allocator and
+	 * the other as the default deallocator/reallocator.  Since that will
+	 * not work in practice, we must check all pointers to assure that they
+	 * reside within a mapped chunk before determining size.
+	 */
+	return (ivsalloc(ptr));
+}
+
+static void *
+zone_malloc(malloc_zone_t *zone, size_t size)
+{
+
+	return (JEMALLOC_P(malloc)(size));
+}
+
+static void *
+zone_calloc(malloc_zone_t *zone, size_t num, size_t size)
+{
+
+	return (JEMALLOC_P(calloc)(num, size));
+}
+
+static void *
+zone_valloc(malloc_zone_t *zone, size_t size)
+{
+	void *ret = NULL; /* Assignment avoids useless compiler warning. */
+
+	JEMALLOC_P(posix_memalign)(&ret, PAGE_SIZE, size);
+
+	return (ret);
+}
+
+static void
+zone_free(malloc_zone_t *zone, void *ptr)
+{
+
+	JEMALLOC_P(free)(ptr);
+}
+
+static void *
+zone_realloc(malloc_zone_t *zone, void *ptr, size_t size)
+{
+
+	return (JEMALLOC_P(realloc)(ptr, size));
+}
+
+#if (JEMALLOC_ZONE_VERSION >= 6)
+static void *
+zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size)
+{
+	void *ret = NULL; /* Assignment avoids useless compiler warning. */
+
+	JEMALLOC_P(posix_memalign)(&ret, alignment, size);
+
+	return (ret);
+}
+
+static void
+zone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size)
+{
+
+	assert(ivsalloc(ptr) == size);
+	JEMALLOC_P(free)(ptr);
+}
+#endif
+
+static void *
+zone_destroy(malloc_zone_t *zone)
+{
+
+	/* This function should never be called. */
+	assert(false);
+	return (NULL);
+}
+
+static size_t
+zone_good_size(malloc_zone_t *zone, size_t size)
+{
+	size_t ret;
+	void *p;
+
+	/*
+	 * Actually create an object of the appropriate size, then find out
+	 * how large it could have been without moving up to the next size
+	 * class.
+	 */
+	p = JEMALLOC_P(malloc)(size);
+	if (p != NULL) {
+		ret = isalloc(p);
+		JEMALLOC_P(free)(p);
+	} else
+		ret = size;
+
+	return (ret);
+}
+
+static void
+zone_force_lock(malloc_zone_t *zone)
+{
+
+	if (isthreaded)
+		jemalloc_prefork();
+}
+
+static void
+zone_force_unlock(malloc_zone_t *zone)
+{
+
+	if (isthreaded)
+		jemalloc_postfork();
+}
+
+malloc_zone_t *
+create_zone(void)
+{
+
+	zone.size = (void *)zone_size;
+	zone.malloc = (void *)zone_malloc;
+	zone.calloc = (void *)zone_calloc;
+	zone.valloc = (void *)zone_valloc;
+	zone.free = (void *)zone_free;
+	zone.realloc = (void *)zone_realloc;
+	zone.destroy = (void *)zone_destroy;
+	zone.zone_name = "jemalloc_zone";
+	zone.batch_malloc = NULL;
+	zone.batch_free = NULL;
+	zone.introspect = &zone_introspect;
+	zone.version = JEMALLOC_ZONE_VERSION;
+#if (JEMALLOC_ZONE_VERSION >= 6)
+	zone.memalign = zone_memalign;
+	zone.free_definite_size = zone_free_definite_size;
+#endif
+
+	zone_introspect.enumerator = NULL;
+	zone_introspect.good_size = (void *)zone_good_size;
+	zone_introspect.check = NULL;
+	zone_introspect.print = NULL;
+	zone_introspect.log = NULL;
+	zone_introspect.force_lock = (void *)zone_force_lock;
+	zone_introspect.force_unlock = (void *)zone_force_unlock;
+	zone_introspect.statistics = NULL;
+#if (JEMALLOC_ZONE_VERSION >= 6)
+	zone_introspect.zone_locked = NULL;
+#endif
+
+	return (&zone);
+}
+
+static size_t
+ozone_size(malloc_zone_t *zone, void *ptr)
+{
+	size_t ret;
+
+	ret = ivsalloc(ptr);
+	if (ret == 0)
+		ret = szone.size(zone, ptr);
+
+	return (ret);
+}
+
+static void
+ozone_free(malloc_zone_t *zone, void *ptr)
+{
+
+	if (ivsalloc(ptr) != 0)
+		JEMALLOC_P(free)(ptr);
+	else {
+		size_t size = szone.size(zone, ptr);
+		if (size != 0)
+			(szone.free)(zone, ptr);
+	}
+}
+
+static void *
+ozone_realloc(malloc_zone_t *zone, void *ptr, size_t size)
+{
+	size_t oldsize;
+
+	if (ptr == NULL)
+		return (JEMALLOC_P(malloc)(size));
+
+	oldsize = ivsalloc(ptr);
+	if (oldsize != 0)
+		return (JEMALLOC_P(realloc)(ptr, size));
+	else {
+		oldsize = szone.size(zone, ptr);
+		if (oldsize == 0)
+			return (JEMALLOC_P(malloc)(size));
+		else {
+			void *ret = JEMALLOC_P(malloc)(size);
+			if (ret != NULL) {
+				memcpy(ret, ptr, (oldsize < size) ? oldsize :
+				    size);
+				(szone.free)(zone, ptr);
+			}
+			return (ret);
+		}
+	}
+}
+
+static unsigned
+ozone_batch_malloc(malloc_zone_t *zone, size_t size, void **results,
+    unsigned num_requested)
+{
+
+	/* Don't bother implementing this interface, since it isn't required. */
+	return (0);
+}
+
+static void
+ozone_batch_free(malloc_zone_t *zone, void **to_be_freed, unsigned num)
+{
+	unsigned i;
+
+	for (i = 0; i < num; i++)
+		ozone_free(zone, to_be_freed[i]);
+}
+
+#if (JEMALLOC_ZONE_VERSION >= 6)
+static void
+ozone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size)
+{
+
+	if (ivsalloc(ptr) != 0) {
+		assert(ivsalloc(ptr) == size);
+		JEMALLOC_P(free)(ptr);
+	} else {
+		assert(size == szone.size(zone, ptr));
+		szone.free_definite_size(zone, ptr, size);
+	}
+}
+#endif
+
+static void
+ozone_force_lock(malloc_zone_t *zone)
+{
+
+	/* jemalloc locking is taken care of by the normal jemalloc zone. */
+	szone.introspect->force_lock(zone);
+}
+
+static void
+ozone_force_unlock(malloc_zone_t *zone)
+{
+
+	/* jemalloc locking is taken care of by the normal jemalloc zone. */
+	szone.introspect->force_unlock(zone);
+}
+
+/*
+ * Overlay the default scalable zone (szone) such that existing allocations are
+ * drained, and further allocations come from jemalloc.  This is necessary
+ * because Core Foundation directly accesses and uses the szone before the
+ * jemalloc library is even loaded.
+ */
+void
+szone2ozone(malloc_zone_t *zone)
+{
+
+	/*
+	 * Stash a copy of the original szone so that we can call its
+	 * functions as needed.  Note that the internally, the szone stores its
+	 * bookkeeping data structures immediately following the malloc_zone_t
+	 * header, so when calling szone functions, we need to pass a pointer
+	 * to the original zone structure.
+	 */
+	memcpy(&szone, zone, sizeof(malloc_zone_t));
+
+	zone->size = (void *)ozone_size;
+	zone->malloc = (void *)zone_malloc;
+	zone->calloc = (void *)zone_calloc;
+	zone->valloc = (void *)zone_valloc;
+	zone->free = (void *)ozone_free;
+	zone->realloc = (void *)ozone_realloc;
+	zone->destroy = (void *)zone_destroy;
+	zone->zone_name = "jemalloc_ozone";
+	zone->batch_malloc = ozone_batch_malloc;
+	zone->batch_free = ozone_batch_free;
+	zone->introspect = &ozone_introspect;
+	zone->version = JEMALLOC_ZONE_VERSION;
+#if (JEMALLOC_ZONE_VERSION >= 6)
+	zone->memalign = zone_memalign;
+	zone->free_definite_size = ozone_free_definite_size;
+#endif
+
+	ozone_introspect.enumerator = NULL;
+	ozone_introspect.good_size = (void *)zone_good_size;
+	ozone_introspect.check = NULL;
+	ozone_introspect.print = NULL;
+	ozone_introspect.log = NULL;
+	ozone_introspect.force_lock = (void *)ozone_force_lock;
+	ozone_introspect.force_unlock = (void *)ozone_force_unlock;
+	ozone_introspect.statistics = NULL;
+#if (JEMALLOC_ZONE_VERSION >= 6)
+	ozone_introspect.zone_locked = NULL;
+#endif
+}
--- a/deps/jemalloc.orig/test/allocated.c
+++ b/deps/jemalloc.orig/test/allocated.c
@ -0,0 +1,142 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <pthread.h>
+#include <assert.h>
+#include <errno.h>
+#include <string.h>
+
+#define	JEMALLOC_MANGLE
+#include "jemalloc_test.h"
+
+void *
+thread_start(void *arg)
+{
+	int err;
+	void *p;
+	uint64_t a0, a1, d0, d1;
+	uint64_t *ap0, *ap1, *dp0, *dp1;
+	size_t sz, usize;
+
+	sz = sizeof(a0);
+	if ((err = JEMALLOC_P(mallctl)("thread.allocated", &a0, &sz, NULL,
+	    0))) {
+		if (err == ENOENT) {
+#ifdef JEMALLOC_STATS
+			assert(false);
+#endif
+			goto RETURN;
+		}
+		fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__,
+		    strerror(err));
+		exit(1);
+	}
+	sz = sizeof(ap0);
+	if ((err = JEMALLOC_P(mallctl)("thread.allocatedp", &ap0, &sz, NULL,
+	    0))) {
+		if (err == ENOENT) {
+#ifdef JEMALLOC_STATS
+			assert(false);
+#endif
+			goto RETURN;
+		}
+		fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__,
+		    strerror(err));
+		exit(1);
+	}
+	assert(*ap0 == a0);
+
+	sz = sizeof(d0);
+	if ((err = JEMALLOC_P(mallctl)("thread.deallocated", &d0, &sz, NULL,
+	    0))) {
+		if (err == ENOENT) {
+#ifdef JEMALLOC_STATS
+			assert(false);
+#endif
+			goto RETURN;
+		}
+		fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__,
+		    strerror(err));
+		exit(1);
+	}
+	sz = sizeof(dp0);
+	if ((err = JEMALLOC_P(mallctl)("thread.deallocatedp", &dp0, &sz, NULL,
+	    0))) {
+		if (err == ENOENT) {
+#ifdef JEMALLOC_STATS
+			assert(false);
+#endif
+			goto RETURN;
+		}
+		fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__,
+		    strerror(err));
+		exit(1);
+	}
+	assert(*dp0 == d0);
+
+	p = JEMALLOC_P(malloc)(1);
+	if (p == NULL) {
+		fprintf(stderr, "%s(): Error in malloc()\n", __func__);
+		exit(1);
+	}
+
+	sz = sizeof(a1);
+	JEMALLOC_P(mallctl)("thread.allocated", &a1, &sz, NULL, 0);
+	sz = sizeof(ap1);
+	JEMALLOC_P(mallctl)("thread.allocatedp", &ap1, &sz, NULL, 0);
+	assert(*ap1 == a1);
+	assert(ap0 == ap1);
+
+	usize = JEMALLOC_P(malloc_usable_size)(p);
+	assert(a0 + usize <= a1);
+
+	JEMALLOC_P(free)(p);
+
+	sz = sizeof(d1);
+	JEMALLOC_P(mallctl)("thread.deallocated", &d1, &sz, NULL, 0);
+	sz = sizeof(dp1);
+	JEMALLOC_P(mallctl)("thread.deallocatedp", &dp1, &sz, NULL, 0);
+	assert(*dp1 == d1);
+	assert(dp0 == dp1);
+
+	assert(d0 + usize <= d1);
+
+RETURN:
+	return (NULL);
+}
+
+int
+main(void)
+{
+	int ret = 0;
+	pthread_t thread;
+
+	fprintf(stderr, "Test begin\n");
+
+	thread_start(NULL);
+
+	if (pthread_create(&thread, NULL, thread_start, NULL)
+	    != 0) {
+		fprintf(stderr, "%s(): Error in pthread_create()\n", __func__);
+		ret = 1;
+		goto RETURN;
+	}
+	pthread_join(thread, (void *)&ret);
+
+	thread_start(NULL);
+
+	if (pthread_create(&thread, NULL, thread_start, NULL)
+	    != 0) {
+		fprintf(stderr, "%s(): Error in pthread_create()\n", __func__);
+		ret = 1;
+		goto RETURN;
+	}
+	pthread_join(thread, (void *)&ret);
+
+	thread_start(NULL);
+
+RETURN:
+	fprintf(stderr, "Test end\n");
+	return (ret);
+}
--- a/deps/jemalloc.orig/test/allocated.exp
+++ b/deps/jemalloc.orig/test/allocated.exp
@ -0,0 +1,2 @@
+Test begin
+Test end
--- a/deps/jemalloc.orig/test/allocm.c
+++ b/deps/jemalloc.orig/test/allocm.c
@ -0,0 +1,133 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+
+#define	JEMALLOC_MANGLE
+#include "jemalloc_test.h"
+
+#define CHUNK 0x400000
+/* #define MAXALIGN ((size_t)0x80000000000LLU) */
+#define MAXALIGN ((size_t)0x2000000LLU)
+#define NITER 4
+
+int
+main(void)
+{
+	int r;
+	void *p;
+	size_t sz, alignment, total, tsz;
+	unsigned i;
+	void *ps[NITER];
+
+	fprintf(stderr, "Test begin\n");
+
+	sz = 0;
+	r = JEMALLOC_P(allocm)(&p, &sz, 42, 0);
+	if (r != ALLOCM_SUCCESS) {
+		fprintf(stderr, "Unexpected allocm() error\n");
+		abort();
+	}
+	if (sz < 42)
+		fprintf(stderr, "Real size smaller than expected\n");
+	if (JEMALLOC_P(dallocm)(p, 0) != ALLOCM_SUCCESS)
+		fprintf(stderr, "Unexpected dallocm() error\n");
+
+	r = JEMALLOC_P(allocm)(&p, NULL, 42, 0);
+	if (r != ALLOCM_SUCCESS) {
+		fprintf(stderr, "Unexpected allocm() error\n");
+		abort();
+	}
+	if (JEMALLOC_P(dallocm)(p, 0) != ALLOCM_SUCCESS)
+		fprintf(stderr, "Unexpected dallocm() error\n");
+
+	r = JEMALLOC_P(allocm)(&p, NULL, 42, ALLOCM_ZERO);
+	if (r != ALLOCM_SUCCESS) {
+		fprintf(stderr, "Unexpected allocm() error\n");
+		abort();
+	}
+	if (JEMALLOC_P(dallocm)(p, 0) != ALLOCM_SUCCESS)
+		fprintf(stderr, "Unexpected dallocm() error\n");
+
+#if LG_SIZEOF_PTR == 3
+	alignment = 0x8000000000000000LLU;
+	sz        = 0x8000000000000000LLU;
+#else
+	alignment = 0x80000000LU;
+	sz        = 0x80000000LU;
+#endif
+	r = JEMALLOC_P(allocm)(&p, NULL, sz, ALLOCM_ALIGN(alignment));
+	if (r == ALLOCM_SUCCESS) {
+		fprintf(stderr,
+		    "Expected error for allocm(&p, %zu, 0x%x)\n",
+		    sz, ALLOCM_ALIGN(alignment));
+	}
+
+#if LG_SIZEOF_PTR == 3
+	alignment = 0x4000000000000000LLU;
+	sz        = 0x8400000000000001LLU;
+#else
+	alignment = 0x40000000LU;
+	sz        = 0x84000001LU;
+#endif
+	r = JEMALLOC_P(allocm)(&p, NULL, sz, ALLOCM_ALIGN(alignment));
+	if (r == ALLOCM_SUCCESS) {
+		fprintf(stderr,
+		    "Expected error for allocm(&p, %zu, 0x%x)\n",
+		    sz, ALLOCM_ALIGN(alignment));
+	}
+
+	alignment = 0x10LLU;
+#if LG_SIZEOF_PTR == 3
+	sz   = 0xfffffffffffffff0LLU;
+#else
+	sz   = 0xfffffff0LU;
+#endif
+	r = JEMALLOC_P(allocm)(&p, NULL, sz, ALLOCM_ALIGN(alignment));
+	if (r == ALLOCM_SUCCESS) {
+		fprintf(stderr,
+		    "Expected error for allocm(&p, %zu, 0x%x)\n",
+		    sz, ALLOCM_ALIGN(alignment));
+	}
+
+	for (i = 0; i < NITER; i++)
+		ps[i] = NULL;
+
+	for (alignment = 8;
+	    alignment <= MAXALIGN;
+	    alignment <<= 1) {
+		total = 0;
+		fprintf(stderr, "Alignment: %zu\n", alignment);
+		for (sz = 1;
+		    sz < 3 * alignment && sz < (1U << 31);
+		    sz += (alignment >> (LG_SIZEOF_PTR-1)) - 1) {
+			for (i = 0; i < NITER; i++) {
+				r = JEMALLOC_P(allocm)(&ps[i], NULL, sz,
+				    ALLOCM_ALIGN(alignment) | ALLOCM_ZERO);
+				if (r != ALLOCM_SUCCESS) {
+					fprintf(stderr,
+					    "Error for size %zu (0x%zx): %d\n",
+					    sz, sz, r);
+					exit(1);
+				}
+				if ((uintptr_t)p & (alignment-1)) {
+					fprintf(stderr,
+					    "%p inadequately aligned for"
+					    " alignment: %zu\n", p, alignment);
+				}
+				JEMALLOC_P(sallocm)(ps[i], &tsz, 0);
+				total += tsz;
+				if (total >= (MAXALIGN << 1))
+					break;
+			}
+			for (i = 0; i < NITER; i++) {
+				if (ps[i] != NULL) {
+					JEMALLOC_P(dallocm)(ps[i], 0);
+					ps[i] = NULL;
+				}
+			}
+		}
+	}
+
+	fprintf(stderr, "Test end\n");
+	return (0);
+}
--- a/deps/jemalloc.orig/test/allocm.exp
+++ b/deps/jemalloc.orig/test/allocm.exp
@ -0,0 +1,25 @@
+Test begin
+Alignment: 8
+Alignment: 16
+Alignment: 32
+Alignment: 64
+Alignment: 128
+Alignment: 256
+Alignment: 512
+Alignment: 1024
+Alignment: 2048
+Alignment: 4096
+Alignment: 8192
+Alignment: 16384
+Alignment: 32768
+Alignment: 65536
+Alignment: 131072
+Alignment: 262144
+Alignment: 524288
+Alignment: 1048576
+Alignment: 2097152
+Alignment: 4194304
+Alignment: 8388608
+Alignment: 16777216
+Alignment: 33554432
+Test end
--- a/deps/jemalloc.orig/test/bitmap.c
+++ b/deps/jemalloc.orig/test/bitmap.c
@ -0,0 +1,157 @@
+#define	JEMALLOC_MANGLE
+#include "jemalloc_test.h"
+
+/*
+ * Avoid using the assert() from jemalloc_internal.h, since it requires
+ * internal libjemalloc functionality.
+ * */
+#include <assert.h>
+
+/*
+ * Directly include the bitmap code, since it isn't exposed outside
+ * libjemalloc.
+ */
+#include "../src/bitmap.c"
+
+#if (LG_BITMAP_MAXBITS > 12)
+#  define MAXBITS	4500
+#else
+#  define MAXBITS	(1U << LG_BITMAP_MAXBITS)
+#endif
+
+static void
+test_bitmap_size(void)
+{
+	size_t i, prev_size;
+
+	prev_size = 0;
+	for (i = 1; i <= MAXBITS; i++) {
+		size_t size = bitmap_size(i);
+		assert(size >= prev_size);
+		prev_size = size;
+	}
+}
+
+static void
+test_bitmap_init(void)
+{
+	size_t i;
+
+	for (i = 1; i <= MAXBITS; i++) {
+		bitmap_info_t binfo;
+		bitmap_info_init(&binfo, i);
+		{
+			size_t j;
+			bitmap_t bitmap[bitmap_info_ngroups(&binfo)];
+			bitmap_init(bitmap, &binfo);
+
+			for (j = 0; j < i; j++)
+				assert(bitmap_get(bitmap, &binfo, j) == false);
+
+		}
+	}
+}
+
+static void
+test_bitmap_set(void)
+{
+	size_t i;
+
+	for (i = 1; i <= MAXBITS; i++) {
+		bitmap_info_t binfo;
+		bitmap_info_init(&binfo, i);
+		{
+			size_t j;
+			bitmap_t bitmap[bitmap_info_ngroups(&binfo)];
+			bitmap_init(bitmap, &binfo);
+
+			for (j = 0; j < i; j++)
+				bitmap_set(bitmap, &binfo, j);
+			assert(bitmap_full(bitmap, &binfo));
+		}
+	}
+}
+
+static void
+test_bitmap_unset(void)
+{
+	size_t i;
+
+	for (i = 1; i <= MAXBITS; i++) {
+		bitmap_info_t binfo;
+		bitmap_info_init(&binfo, i);
+		{
+			size_t j;
+			bitmap_t bitmap[bitmap_info_ngroups(&binfo)];
+			bitmap_init(bitmap, &binfo);
+
+			for (j = 0; j < i; j++)
+				bitmap_set(bitmap, &binfo, j);
+			assert(bitmap_full(bitmap, &binfo));
+			for (j = 0; j < i; j++)
+				bitmap_unset(bitmap, &binfo, j);
+			for (j = 0; j < i; j++)
+				bitmap_set(bitmap, &binfo, j);
+			assert(bitmap_full(bitmap, &binfo));
+		}
+	}
+}
+
+static void
+test_bitmap_sfu(void)
+{
+	size_t i;
+
+	for (i = 1; i <= MAXBITS; i++) {
+		bitmap_info_t binfo;
+		bitmap_info_init(&binfo, i);
+		{
+			ssize_t j;
+			bitmap_t bitmap[bitmap_info_ngroups(&binfo)];
+			bitmap_init(bitmap, &binfo);
+
+			/* Iteratively set bits starting at the beginning. */
+			for (j = 0; j < i; j++)
+				assert(bitmap_sfu(bitmap, &binfo) == j);
+			assert(bitmap_full(bitmap, &binfo));
+
+			/*
+			 * Iteratively unset bits starting at the end, and
+			 * verify that bitmap_sfu() reaches the unset bits.
+			 */
+			for (j = i - 1; j >= 0; j--) {
+				bitmap_unset(bitmap, &binfo, j);
+				assert(bitmap_sfu(bitmap, &binfo) == j);
+				bitmap_unset(bitmap, &binfo, j);
+			}
+			assert(bitmap_get(bitmap, &binfo, 0) == false);
+
+			/*
+			 * Iteratively set bits starting at the beginning, and
+			 * verify that bitmap_sfu() looks past them.
+			 */
+			for (j = 1; j < i; j++) {
+				bitmap_set(bitmap, &binfo, j - 1);
+				assert(bitmap_sfu(bitmap, &binfo) == j);
+				bitmap_unset(bitmap, &binfo, j);
+			}
+			assert(bitmap_sfu(bitmap, &binfo) == i - 1);
+			assert(bitmap_full(bitmap, &binfo));
+		}
+	}
+}
+
+int
+main(void)
+{
+	fprintf(stderr, "Test begin\n");
+
+	test_bitmap_size();
+	test_bitmap_init();
+	test_bitmap_set();
+	test_bitmap_unset();
+	test_bitmap_sfu();
+
+	fprintf(stderr, "Test end\n");
+	return (0);
+}
--- a/deps/jemalloc.orig/test/bitmap.exp
+++ b/deps/jemalloc.orig/test/bitmap.exp
@ -0,0 +1,2 @@
+Test begin
+Test end
--- a/deps/jemalloc.orig/test/mremap.c
+++ b/deps/jemalloc.orig/test/mremap.c
@ -0,0 +1,67 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <errno.h>
+#include <string.h>
+
+#define	JEMALLOC_MANGLE
+#include "jemalloc_test.h"
+
+int
+main(void)
+{
+	int ret, err;
+	size_t sz, lg_chunk, chunksize, i;
+	char *p, *q;
+
+	fprintf(stderr, "Test begin\n");
+
+	sz = sizeof(lg_chunk);
+	if ((err = JEMALLOC_P(mallctl)("opt.lg_chunk", &lg_chunk, &sz, NULL,
+	    0))) {
+		assert(err != ENOENT);
+		fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__,
+		    strerror(err));
+		ret = 1;
+		goto RETURN;
+	}
+	chunksize = ((size_t)1U) << lg_chunk;
+
+	p = (char *)malloc(chunksize);
+	if (p == NULL) {
+		fprintf(stderr, "malloc(%zu) --> %p\n", chunksize, p);
+		ret = 1;
+		goto RETURN;
+	}
+	memset(p, 'a', chunksize);
+
+	q = (char *)realloc(p, chunksize * 2);
+	if (q == NULL) {
+		fprintf(stderr, "realloc(%p, %zu) --> %p\n", p, chunksize * 2,
+		    q);
+		ret = 1;
+		goto RETURN;
+	}
+	for (i = 0; i < chunksize; i++) {
+		assert(q[i] == 'a');
+	}
+
+	p = q;
+
+	q = (char *)realloc(p, chunksize);
+	if (q == NULL) {
+		fprintf(stderr, "realloc(%p, %zu) --> %p\n", p, chunksize, q);
+		ret = 1;
+		goto RETURN;
+	}
+	for (i = 0; i < chunksize; i++) {
+		assert(q[i] == 'a');
+	}
+
+	free(q);
+
+	ret = 0;
+RETURN:
+	fprintf(stderr, "Test end\n");
+	return (ret);
+}
--- a/deps/jemalloc.orig/test/mremap.exp
+++ b/deps/jemalloc.orig/test/mremap.exp
@ -0,0 +1,2 @@
+Test begin
+Test end
--- a/deps/jemalloc.orig/test/posix_memalign.c
+++ b/deps/jemalloc.orig/test/posix_memalign.c
@ -0,0 +1,121 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+
+#define	JEMALLOC_MANGLE
+#include "jemalloc_test.h"
+
+#define CHUNK 0x400000
+/* #define MAXALIGN ((size_t)0x80000000000LLU) */
+#define MAXALIGN ((size_t)0x2000000LLU)
+#define NITER 4
+
+int
+main(void)
+{
+	size_t alignment, size, total;
+	unsigned i;
+	int err;
+	void *p, *ps[NITER];
+
+	fprintf(stderr, "Test begin\n");
+
+	/* Test error conditions. */
+	for (alignment = 0; alignment < sizeof(void *); alignment++) {
+		err = JEMALLOC_P(posix_memalign)(&p, alignment, 1);
+		if (err != EINVAL) {
+			fprintf(stderr,
+			    "Expected error for invalid alignment %zu\n",
+			    alignment);
+		}
+	}
+
+	for (alignment = sizeof(size_t); alignment < MAXALIGN;
+	    alignment <<= 1) {
+		err = JEMALLOC_P(posix_memalign)(&p, alignment + 1, 1);
+		if (err == 0) {
+			fprintf(stderr,
+			    "Expected error for invalid alignment %zu\n",
+			    alignment + 1);
+		}
+	}
+
+#if LG_SIZEOF_PTR == 3
+	alignment = 0x8000000000000000LLU;
+	size      = 0x8000000000000000LLU;
+#else
+	alignment = 0x80000000LU;
+	size      = 0x80000000LU;
+#endif
+	err = JEMALLOC_P(posix_memalign)(&p, alignment, size);
+	if (err == 0) {
+		fprintf(stderr,
+		    "Expected error for posix_memalign(&p, %zu, %zu)\n",
+		    alignment, size);
+	}
+
+#if LG_SIZEOF_PTR == 3
+	alignment = 0x4000000000000000LLU;
+	size      = 0x8400000000000001LLU;
+#else
+	alignment = 0x40000000LU;
+	size      = 0x84000001LU;
+#endif
+	err = JEMALLOC_P(posix_memalign)(&p, alignment, size);
+	if (err == 0) {
+		fprintf(stderr,
+		    "Expected error for posix_memalign(&p, %zu, %zu)\n",
+		    alignment, size);
+	}
+
+	alignment = 0x10LLU;
+#if LG_SIZEOF_PTR == 3
+	size = 0xfffffffffffffff0LLU;
+#else
+	size = 0xfffffff0LU;
+#endif
+	err = JEMALLOC_P(posix_memalign)(&p, alignment, size);
+	if (err == 0) {
+		fprintf(stderr,
+		    "Expected error for posix_memalign(&p, %zu, %zu)\n",
+		    alignment, size);
+	}
+
+	for (i = 0; i < NITER; i++)
+		ps[i] = NULL;
+
+	for (alignment = 8;
+	    alignment <= MAXALIGN;
+	    alignment <<= 1) {
+		total = 0;
+		fprintf(stderr, "Alignment: %zu\n", alignment);
+		for (size = 1;
+		    size < 3 * alignment && size < (1U << 31);
+		    size += (alignment >> (LG_SIZEOF_PTR-1)) - 1) {
+			for (i = 0; i < NITER; i++) {
+				err = JEMALLOC_P(posix_memalign)(&ps[i],
+				    alignment, size);
+				if (err) {
+					fprintf(stderr,
+					    "Error for size %zu (0x%zx): %s\n",
+					    size, size, strerror(err));
+					exit(1);
+				}
+				total += JEMALLOC_P(malloc_usable_size)(ps[i]);
+				if (total >= (MAXALIGN << 1))
+					break;
+			}
+			for (i = 0; i < NITER; i++) {
+				if (ps[i] != NULL) {
+					JEMALLOC_P(free)(ps[i]);
+					ps[i] = NULL;
+				}
+			}
+		}
+	}
+
+	fprintf(stderr, "Test end\n");
+	return (0);
+}
--- a/deps/jemalloc.orig/test/posix_memalign.exp
+++ b/deps/jemalloc.orig/test/posix_memalign.exp
@ -0,0 +1,25 @@
+Test begin
+Alignment: 8
+Alignment: 16
+Alignment: 32
+Alignment: 64
+Alignment: 128
+Alignment: 256
+Alignment: 512
+Alignment: 1024
+Alignment: 2048
+Alignment: 4096
+Alignment: 8192
+Alignment: 16384
+Alignment: 32768
+Alignment: 65536
+Alignment: 131072
+Alignment: 262144
+Alignment: 524288
+Alignment: 1048576
+Alignment: 2097152
+Alignment: 4194304
+Alignment: 8388608
+Alignment: 16777216
+Alignment: 33554432
+Test end
--- a/deps/jemalloc.orig/test/rallocm.c
+++ b/deps/jemalloc.orig/test/rallocm.c
@ -0,0 +1,127 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <assert.h>
+
+#define	JEMALLOC_MANGLE
+#include "jemalloc_test.h"
+
+int
+main(void)
+{
+	size_t pagesize;
+	void *p, *q;
+	size_t sz, tsz;
+	int r;
+
+	fprintf(stderr, "Test begin\n");
+
+	/* Get page size. */
+	{
+		long result = sysconf(_SC_PAGESIZE);
+		assert(result != -1);
+		pagesize = (size_t)result;
+	}
+
+	r = JEMALLOC_P(allocm)(&p, &sz, 42, 0);
+	if (r != ALLOCM_SUCCESS) {
+		fprintf(stderr, "Unexpected allocm() error\n");
+		abort();
+	}
+
+	q = p;
+	r = JEMALLOC_P(rallocm)(&q, &tsz, sz, 0, ALLOCM_NO_MOVE);
+	if (r != ALLOCM_SUCCESS)
+		fprintf(stderr, "Unexpected rallocm() error\n");
+	if (q != p)
+		fprintf(stderr, "Unexpected object move\n");
+	if (tsz != sz) {
+		fprintf(stderr, "Unexpected size change: %zu --> %zu\n",
+		    sz, tsz);
+	}
+
+	q = p;
+	r = JEMALLOC_P(rallocm)(&q, &tsz, sz, 5, ALLOCM_NO_MOVE);
+	if (r != ALLOCM_SUCCESS)
+		fprintf(stderr, "Unexpected rallocm() error\n");
+	if (q != p)
+		fprintf(stderr, "Unexpected object move\n");
+	if (tsz != sz) {
+		fprintf(stderr, "Unexpected size change: %zu --> %zu\n",
+		    sz, tsz);
+	}
+
+	q = p;
+	r = JEMALLOC_P(rallocm)(&q, &tsz, sz + 5, 0, ALLOCM_NO_MOVE);
+	if (r != ALLOCM_ERR_NOT_MOVED)
+		fprintf(stderr, "Unexpected rallocm() result\n");
+	if (q != p)
+		fprintf(stderr, "Unexpected object move\n");
+	if (tsz != sz) {
+		fprintf(stderr, "Unexpected size change: %zu --> %zu\n",
+		    sz, tsz);
+	}
+
+	q = p;
+	r = JEMALLOC_P(rallocm)(&q, &tsz, sz + 5, 0, 0);
+	if (r != ALLOCM_SUCCESS)
+		fprintf(stderr, "Unexpected rallocm() error\n");
+	if (q == p)
+		fprintf(stderr, "Expected object move\n");
+	if (tsz == sz) {
+		fprintf(stderr, "Expected size change: %zu --> %zu\n",
+		    sz, tsz);
+	}
+	p = q;
+	sz = tsz;
+
+	r = JEMALLOC_P(rallocm)(&q, &tsz, pagesize*2, 0, 0);
+	if (r != ALLOCM_SUCCESS)
+		fprintf(stderr, "Unexpected rallocm() error\n");
+	if (q == p)
+		fprintf(stderr, "Expected object move\n");
+	if (tsz == sz) {
+		fprintf(stderr, "Expected size change: %zu --> %zu\n",
+		    sz, tsz);
+	}
+	p = q;
+	sz = tsz;
+
+	r = JEMALLOC_P(rallocm)(&q, &tsz, pagesize*4, 0, 0);
+	if (r != ALLOCM_SUCCESS)
+		fprintf(stderr, "Unexpected rallocm() error\n");
+	if (tsz == sz) {
+		fprintf(stderr, "Expected size change: %zu --> %zu\n",
+		    sz, tsz);
+	}
+	p = q;
+	sz = tsz;
+
+	r = JEMALLOC_P(rallocm)(&q, &tsz, pagesize*2, 0, ALLOCM_NO_MOVE);
+	if (r != ALLOCM_SUCCESS)
+		fprintf(stderr, "Unexpected rallocm() error\n");
+	if (q != p)
+		fprintf(stderr, "Unexpected object move\n");
+	if (tsz == sz) {
+		fprintf(stderr, "Expected size change: %zu --> %zu\n",
+		    sz, tsz);
+	}
+	sz = tsz;
+
+	r = JEMALLOC_P(rallocm)(&q, &tsz, pagesize*4, 0, ALLOCM_NO_MOVE);
+	if (r != ALLOCM_SUCCESS)
+		fprintf(stderr, "Unexpected rallocm() error\n");
+	if (q != p)
+		fprintf(stderr, "Unexpected object move\n");
+	if (tsz == sz) {
+		fprintf(stderr, "Expected size change: %zu --> %zu\n",
+		    sz, tsz);
+	}
+	sz = tsz;
+
+	JEMALLOC_P(dallocm)(p, 0);
+
+	fprintf(stderr, "Test end\n");
+	return (0);
+}
--- a/deps/jemalloc.orig/test/rallocm.exp
+++ b/deps/jemalloc.orig/test/rallocm.exp
@ -0,0 +1,2 @@
+Test begin
+Test end
--- a/deps/jemalloc.orig/test/thread_arena.c
+++ b/deps/jemalloc.orig/test/thread_arena.c
@ -0,0 +1,92 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <string.h>
+#include <assert.h>
+
+#define	JEMALLOC_MANGLE
+#include "jemalloc_test.h"
+
+#define NTHREADS 10
+
+void *
+thread_start(void *arg)
+{
+	unsigned main_arena_ind = *(unsigned *)arg;
+	void *p;
+	unsigned arena_ind;
+	size_t size;
+	int err;
+
+	p = JEMALLOC_P(malloc)(1);
+	if (p == NULL) {
+		fprintf(stderr, "%s(): Error in malloc()\n", __func__);
+		return (void *)1;
+	}
+
+	size = sizeof(arena_ind);
+	if ((err = JEMALLOC_P(mallctl)("thread.arena", &arena_ind, &size,
+	    &main_arena_ind, sizeof(main_arena_ind)))) {
+		fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__,
+		    strerror(err));
+		return (void *)1;
+	}
+
+	size = sizeof(arena_ind);
+	if ((err = JEMALLOC_P(mallctl)("thread.arena", &arena_ind, &size, NULL,
+	    0))) {
+		fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__,
+		    strerror(err));
+		return (void *)1;
+	}
+	assert(arena_ind == main_arena_ind);
+
+	return (NULL);
+}
+
+int
+main(void)
+{
+	int ret = 0;
+	void *p;
+	unsigned arena_ind;
+	size_t size;
+	int err;
+	pthread_t threads[NTHREADS];
+	unsigned i;
+
+	fprintf(stderr, "Test begin\n");
+
+	p = JEMALLOC_P(malloc)(1);
+	if (p == NULL) {
+		fprintf(stderr, "%s(): Error in malloc()\n", __func__);
+		ret = 1;
+		goto RETURN;
+	}
+
+	size = sizeof(arena_ind);
+	if ((err = JEMALLOC_P(mallctl)("thread.arena", &arena_ind, &size, NULL,
+	    0))) {
+		fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__,
+		    strerror(err));
+		ret = 1;
+		goto RETURN;
+	}
+
+	for (i = 0; i < NTHREADS; i++) {
+		if (pthread_create(&threads[i], NULL, thread_start,
+		    (void *)&arena_ind) != 0) {
+			fprintf(stderr, "%s(): Error in pthread_create()\n",
+			    __func__);
+			ret = 1;
+			goto RETURN;
+		}
+	}
+
+	for (i = 0; i < NTHREADS; i++)
+		pthread_join(threads[i], (void *)&ret);
+
+RETURN:
+	fprintf(stderr, "Test end\n");
+	return (ret);
+}
--- a/deps/jemalloc.orig/test/thread_arena.exp
+++ b/deps/jemalloc.orig/test/thread_arena.exp
@ -0,0 +1,2 @@
+Test begin
+Test end
--- a/deps/jemalloc/.gitignore
+++ b/deps/jemalloc/.gitignore
@ -11,6 +11,7 @@
 /lib/
 /Makefile
 /include/jemalloc/internal/jemalloc_internal\.h
+/include/jemalloc/internal/size_classes\.h
 /include/jemalloc/jemalloc\.h
 /include/jemalloc/jemalloc_defs\.h
 /test/jemalloc_test\.h
@ -21,3 +22,4 @@
 !test/*.c
 !test/*.exp
 /VERSION
+/bin/jemalloc.sh
--- a/deps/jemalloc/COPYING
+++ b/deps/jemalloc/COPYING
@ -1,9 +1,10 @@
 Unless otherwise specified, files in the jemalloc source distribution are
-subject to the following licenses:
+subject to the following license:
 --------------------------------------------------------------------------------
-Copyright (C) 2002-2010 Jason Evans <jasone@canonware.com>.
+Copyright (C) 2002-2012 Jason Evans <jasone@canonware.com>.
 All rights reserved.
-Copyright (C) 2007-2010 Mozilla Foundation.  All rights reserved.
+Copyright (C) 2007-2012 Mozilla Foundation.  All rights reserved.
+Copyright (C) 2009-2012 Facebook, Inc.  All rights reserved.

 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
@ -24,28 +25,3 @@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 --------------------------------------------------------------------------------
-Copyright (C) 2009-2010 Facebook, Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-* Redistributions of source code must retain the above copyright notice, this
-  list of conditions and the following disclaimer.
-* Redistributions in binary form must reproduce the above copyright notice, this
-  list of conditions and the following disclaimer in the documentation and/or
-  other materials provided with the distribution.
-* Neither the name of Facebook, Inc. nor the names of its contributors may be
-  used to endorse or promote products derived from this software without
-  specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--------------------------------------------------------------------------------
--- a/deps/jemalloc/ChangeLog
+++ b/deps/jemalloc/ChangeLog
@ -6,6 +6,95 @@ found in the git revision history:
    http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git
    git://canonware.com/jemalloc.git

+* 3.0.0 (May 11, 2012)
+
+  Although this version adds some major new features, the primary focus is on
+  internal code cleanup that facilitates maintainability and portability, most
+  of which is not reflected in the ChangeLog.  This is the first release to
+  incorporate substantial contributions from numerous other developers, and the
+  result is a more broadly useful allocator (see the git revision history for
+  contribution details).  Note that the license has been unified, thanks to
+  Facebook granting a license under the same terms as the other copyright
+  holders (see COPYING).
+
+  New features:
+  - Implement Valgrind support, redzones, and quarantine.
+  - Add support for additional platforms:
+    + FreeBSD
+    + Mac OS X Lion
+    + MinGW
+    + Windows (no support yet for replacing the system malloc)
+  - Add support for additional architectures:
+    + MIPS
+    + SH4
+    + Tilera
+  - Add support for cross compiling.
+  - Add nallocm(), which rounds a request size up to the nearest size class
+    without actually allocating.
+  - Implement aligned_alloc() (blame C11).
+  - Add the "thread.tcache.enabled" mallctl.
+  - Add the "opt.prof_final" mallctl.
+  - Update pprof (from gperftools 2.0).
+  - Add the --with-mangling option.
+  - Add the --disable-experimental option.
+  - Add the --disable-munmap option, and make it the default on Linux.
+  - Add the --enable-mremap option, which disables use of mremap(2) by default.
+
+  Incompatible changes:
+  - Enable stats by default.
+  - Enable fill by default.
+  - Disable lazy locking by default.
+  - Rename the "tcache.flush" mallctl to "thread.tcache.flush".
+  - Rename the "arenas.pagesize" mallctl to "arenas.page".
+  - Change the "opt.lg_prof_sample" default from 0 to 19 (1 B to 512 KiB).
+  - Change the "opt.prof_accum" default from true to false.
+
+  Removed features:
+  - Remove the swap feature, including the "config.swap", "swap.avail",
+    "swap.prezeroed", "swap.nfds", and "swap.fds" mallctls.
+  - Remove highruns statistics, including the
+    "stats.arenas.<i>.bins.<j>.highruns" and
+    "stats.arenas.<i>.lruns.<j>.highruns" mallctls.
+  - As part of small size class refactoring, remove the "opt.lg_[qc]space_max",
+    "arenas.cacheline", "arenas.subpage", "arenas.[tqcs]space_{min,max}", and
+    "arenas.[tqcs]bins" mallctls.
+  - Remove the "arenas.chunksize" mallctl.
+  - Remove the "opt.lg_prof_tcmax" option.
+  - Remove the "opt.lg_prof_bt_max" option.
+  - Remove the "opt.lg_tcache_gc_sweep" option.
+  - Remove the --disable-tiny option, including the "config.tiny" mallctl.
+  - Remove the --enable-dynamic-page-shift configure option.
+  - Remove the --enable-sysv configure option.
+
+  Bug fixes:
+  - Fix a statistics-related bug in the "thread.arena" mallctl that could cause
+    invalid statistics and crashes.
+  - Work around TLS deallocation via free() on Linux.  This bug could cause
+    write-after-free memory corruption.
+  - Fix a potential deadlock that could occur during interval- and
+    growth-triggered heap profile dumps.
+  - Fix large calloc() zeroing bugs due to dropping chunk map unzeroed flags.
+  - Fix chunk_alloc_dss() to stop claiming memory is zeroed.  This bug could
+    cause memory corruption and crashes with --enable-dss specified.
+  - Fix fork-related bugs that could cause deadlock in children between fork
+    and exec.
+  - Fix malloc_stats_print() to honor 'b' and 'l' in the opts parameter.
+  - Fix realloc(p, 0) to act like free(p).
+  - Do not enforce minimum alignment in memalign().
+  - Check for NULL pointer in malloc_usable_size().
+  - Fix an off-by-one heap profile statistics bug that could be observed in
+    interval- and growth-triggered heap profiles.
+  - Fix the "epoch" mallctl to update cached stats even if the passed in epoch
+    is 0.
+  - Fix bin->runcur management to fix a layout policy bug.  This bug did not
+    affect correctness.
+  - Fix a bug in choose_arena_hard() that potentially caused more arenas to be
+    initialized than necessary.
+  - Add missing "opt.lg_tcache_max" mallctl implementation.
+  - Use glibc allocator hooks to make mixed allocator usage less likely.
+  - Fix build issues for --disable-tcache.
+  - Don't mangle pthread_create() when --with-private-namespace is specified.
+
 * 2.2.5 (November 14, 2011)

  Bug fixes:
--- a/deps/jemalloc/INSTALL
+++ b/deps/jemalloc/INSTALL
@ -26,6 +26,19 @@ any of the following arguments (not a definitive list) to 'configure':
    Embed one or more library paths, so that libjemalloc can find the libraries
    it is linked to.  This works only on ELF-based systems.

+--with-mangling=<map>
+    Mangle public symbols specified in <map> which is a comma-separated list of
+    name:mangled pairs.
+
+    For example, to use ld's --wrap option as an alternative method for
+    overriding libc's malloc implementation, specify something like:
+
+      --with-mangling=malloc:__wrap_malloc,free:__wrap_free[...]
+
+    Note that mangling happens prior to application of the prefix specified by
+    --with-jemalloc-prefix, and mangled symbols are then ignored when applying
+    the prefix.
+
 --with-jemalloc-prefix=<prefix>
    Prefix all public APIs with <prefix>.  For example, if <prefix> is
    "prefix_", API changes like the following occur:
@ -62,8 +75,8 @@ any of the following arguments (not a definitive list) to 'configure':
    Enable assertions and validation code.  This incurs a substantial
    performance hit, but is very useful during application development.

--enable-stats
-    Enable statistics gathering functionality.  See the "opt.stats_print"
+--disable-stats
+    Disable statistics gathering functionality.  See the "opt.stats_print"
    option documentation for usage details.

 --enable-prof
@ -90,51 +103,50 @@ any of the following arguments (not a definitive list) to 'configure':
    Statically link against the specified libunwind.a rather than dynamically
    linking with -lunwind.

--disable-tiny
-    Disable tiny (sub-quantum-sized) object support.  Technically it is not
-    legal for a malloc implementation to allocate objects with less than
-    quantum alignment (8 or 16 bytes, depending on architecture), but in
-    practice it never causes any problems if, for example, 4-byte allocations
-    are 4-byte-aligned.
-
 --disable-tcache
    Disable thread-specific caches for small objects.  Objects are cached and
    released in bulk, thus reducing the total number of mutex operations.  See
    the "opt.tcache" option for usage details.

--enable-swap
-    Enable mmap()ed swap file support.  When this feature is built in, it is
-    possible to specify one or more files that act as backing store.  This
-    effectively allows for per application swap files.
+--enable-mremap
+    Enable huge realloc() via mremap(2).  mremap() is disabled by default
+    because the flavor used is specific to Linux, which has a quirk in its
+    virtual memory allocation algorithm that causes semi-permanent VM map holes
+    under normal jemalloc operation.
+
+--disable-munmap
+    Disable virtual memory deallocation via munmap(2); instead keep track of
+    the virtual memory for later use.  munmap() is disabled by default (i.e.
+    --disable-munmap is implied) on Linux, which has a quirk in its virtual
+    memory allocation algorithm that causes semi-permanent VM map holes under
+    normal jemalloc operation.

 --enable-dss
    Enable support for page allocation/deallocation via sbrk(2), in addition to
    mmap(2).

--enable-fill
-    Enable support for junk/zero filling of memory.  See the "opt.junk"/
-    "opt.zero" option documentation for usage details.
+--disable-fill
+    Disable support for junk/zero filling of memory, quarantine, and redzones.
+    See the "opt.junk", "opt.zero", "opt.quarantine", and "opt.redzone" option
+    documentation for usage details.
+
+--disable-valgrind
+    Disable support for Valgrind.
+
+--disable-experimental
+    Disable support for the experimental API (*allocm()).
+
+--enable-utrace
+    Enable utrace(2)-based allocation tracing.  This feature is not broadly
+    portable (FreeBSD has it, but Linux and OS X do not).

 --enable-xmalloc
    Enable support for optional immediate termination due to out-of-memory
    errors, as is commonly implemented by "xmalloc" wrapper function for malloc.
    See the "opt.xmalloc" option documentation for usage details.

--enable-sysv
-    Enable support for System V semantics, wherein malloc(0) returns NULL
-    rather than a minimal allocation.  See the "opt.sysv" option documentation
-    for usage details.
-
--enable-dynamic-page-shift
-    Under most conditions, the system page size never changes (usually 4KiB or
-    8KiB, depending on architecture and configuration), and unless this option
-    is enabled, jemalloc assumes that page size can safely be determined during
-    configuration and hard-coded.  Enabling dynamic page size determination has
-    a measurable impact on performance, since the compiler is forced to load
-    the page size from memory rather than embedding immediate values.
-
--disable-lazy-lock
-    Disable code that wraps pthread_create() to detect when an application
+--enable-lazy-lock
+    Enable code that wraps pthread_create() to detect when an application
    switches from single-threaded to multi-threaded mode, so that it can avoid
    mutex locking/unlocking operations while in single-threaded mode.  In
    practice, this feature usually has little impact on performance unless
@ -181,11 +193,24 @@ PATH="?"

 === Advanced compilation =======================================================

+To build only parts of jemalloc, use the following targets:
+
+    build_lib_shared
+    build_lib_static
+    build_lib
+    build_doc_html
+    build_doc_man
+    build_doc
+
 To install only parts of jemalloc, use the following targets:

    install_bin
    install_include
+    install_lib_shared
+    install_lib_static
    install_lib
+    install_doc_html
+    install_doc_man
    install_doc

 To clean up build results to varying degrees, use the following make targets:
@ -248,10 +273,6 @@ directory, issue configuration and build commands:

 The manual page is generated in both html and roff formats.  Any web browser
 can be used to view the html manual.  The roff manual page can be formatted
-prior to installation via any of the following commands:
+prior to installation via the following command:

    nroff -man -t doc/jemalloc.3
-
-    groff -man -t -Tps doc/jemalloc.3 | ps2pdf - doc/jemalloc.3.pdf
-
-    (cd doc; groff -man -man-ext -t -Thtml jemalloc.3 > jemalloc.3.html)
--- a/deps/jemalloc/Makefile.in
+++ b/deps/jemalloc/Makefile.in
@ -17,130 +17,185 @@ INCLUDEDIR := $(DESTDIR)@INCLUDEDIR@
 LIBDIR := $(DESTDIR)@LIBDIR@
 DATADIR := $(DESTDIR)@DATADIR@
 MANDIR := $(DESTDIR)@MANDIR@
+srcroot := @srcroot@
+objroot := @objroot@
+abs_srcroot := @abs_srcroot@
+abs_objroot := @abs_objroot@

 # Build parameters.
-CPPFLAGS := @CPPFLAGS@ -I@srcroot@include -I@objroot@include
+CPPFLAGS := @CPPFLAGS@ -I$(srcroot)include -I$(objroot)include
 CFLAGS := @CFLAGS@
-ifeq (macho, @abi@)
-CFLAGS += -dynamic
-endif
 LDFLAGS := @LDFLAGS@
+EXTRA_LDFLAGS := @EXTRA_LDFLAGS@
 LIBS := @LIBS@
 RPATH_EXTRA := @RPATH_EXTRA@
-ifeq (macho, @abi@)
-SO := dylib
-WL_SONAME := dylib_install_name
+SO := @so@
+IMPORTLIB := @importlib@
+O := @o@
+A := @a@
+EXE := @exe@
+LIBPREFIX := @libprefix@
+REV := @rev@
+install_suffix := @install_suffix@
+ABI := @abi@
+XSLTPROC := @XSLTPROC@
+AUTOCONF := @AUTOCONF@
+_RPATH = @RPATH@
+RPATH = $(if $(1),$(call _RPATH,$(1)))
+cfghdrs_in := @cfghdrs_in@
+cfghdrs_out := @cfghdrs_out@
+cfgoutputs_in := @cfgoutputs_in@
+cfgoutputs_out := @cfgoutputs_out@
+enable_autogen := @enable_autogen@
+enable_experimental := @enable_experimental@
+DSO_LDFLAGS = @DSO_LDFLAGS@
+SOREV = @SOREV@
+PIC_CFLAGS = @PIC_CFLAGS@
+CTARGET = @CTARGET@
+LDTARGET = @LDTARGET@
+MKLIB = @MKLIB@
+CC_MM = @CC_MM@
+
+ifeq (macho, $(ABI))
+TEST_LIBRARY_PATH := DYLD_FALLBACK_LIBRARY_PATH="$(objroot)lib"
 else
-SO := so
-WL_SONAME := soname
-endif
-REV := 1
-ifeq (macho, @abi@)
-TEST_LIBRARY_PATH := DYLD_FALLBACK_LIBRARY_PATH=@objroot@lib
+ifeq (pecoff, $(ABI))
+TEST_LIBRARY_PATH := PATH="$(PATH):$(objroot)lib"
 else
 TEST_LIBRARY_PATH :=
 endif
+endif
+
+LIBJEMALLOC := $(LIBPREFIX)jemalloc$(install_suffix)

 # Lists of files.
-BINS := @srcroot@bin/pprof
-CHDRS := @objroot@include/jemalloc/jemalloc@install_suffix@.h \
-	@objroot@include/jemalloc/jemalloc_defs@install_suffix@.h
-CSRCS := @srcroot@src/jemalloc.c @srcroot@src/arena.c @srcroot@src/atomic.c \
-	@srcroot@src/base.c @srcroot@src/bitmap.c @srcroot@src/chunk.c \
-	@srcroot@src/chunk_dss.c @srcroot@src/chunk_mmap.c \
-	@srcroot@src/chunk_swap.c @srcroot@src/ckh.c @srcroot@src/ctl.c \
-	@srcroot@src/extent.c @srcroot@src/hash.c @srcroot@src/huge.c \
-	@srcroot@src/mb.c @srcroot@src/mutex.c @srcroot@src/prof.c \
-	@srcroot@src/rtree.c @srcroot@src/stats.c @srcroot@src/tcache.c
-ifeq (macho, @abi@)
-CSRCS += @srcroot@src/zone.c
+BINS := $(srcroot)bin/pprof $(objroot)bin/jemalloc.sh
+CHDRS := $(objroot)include/jemalloc/jemalloc$(install_suffix).h \
+	$(objroot)include/jemalloc/jemalloc_defs$(install_suffix).h
+CSRCS := $(srcroot)src/jemalloc.c $(srcroot)src/arena.c $(srcroot)src/atomic.c \
+	$(srcroot)src/base.c $(srcroot)src/bitmap.c $(srcroot)src/chunk.c \
+	$(srcroot)src/chunk_dss.c $(srcroot)src/chunk_mmap.c \
+	$(srcroot)src/ckh.c $(srcroot)src/ctl.c $(srcroot)src/extent.c \
+	$(srcroot)src/hash.c $(srcroot)src/huge.c $(srcroot)src/mb.c \
+	$(srcroot)src/mutex.c $(srcroot)src/prof.c $(srcroot)src/quarantine.c \
+	$(srcroot)src/rtree.c $(srcroot)src/stats.c $(srcroot)src/tcache.c \
+	$(srcroot)src/util.c $(srcroot)src/tsd.c
+ifeq (macho, $(ABI))
+CSRCS += $(srcroot)src/zone.c
 endif
-STATIC_LIBS :=	@objroot@lib/libjemalloc@install_suffix@.a
-DSOS := @objroot@lib/libjemalloc@install_suffix@.$(SO).$(REV) \
-	@objroot@lib/libjemalloc@install_suffix@.$(SO) \
-	@objroot@lib/libjemalloc@install_suffix@_pic.a
-MAN3 := @objroot@doc/jemalloc@install_suffix@.3
-DOCS_XML := @objroot@doc/jemalloc@install_suffix@.xml
-DOCS_HTML := $(DOCS_XML:@objroot@%.xml=@srcroot@%.html)
-DOCS_MAN3 := $(DOCS_XML:@objroot@%.xml=@srcroot@%.3)
+ifeq ($(IMPORTLIB),$(SO))
+STATIC_LIBS := $(objroot)lib/$(LIBJEMALLOC).$(A)
+endif
+ifdef PIC_CFLAGS
+STATIC_LIBS += $(objroot)lib/$(LIBJEMALLOC)_pic.$(A)
+else
+STATIC_LIBS += $(objroot)lib/$(LIBJEMALLOC)_s.$(A)
+endif
+DSOS := $(objroot)lib/$(LIBJEMALLOC).$(SOREV)
+ifneq ($(SOREV),$(SO))
+DSOS += $(objroot)lib/$(LIBJEMALLOC).$(SO)
+endif
+MAN3 := $(objroot)doc/jemalloc$(install_suffix).3
+DOCS_XML := $(objroot)doc/jemalloc$(install_suffix).xml
+DOCS_HTML := $(DOCS_XML:$(objroot)%.xml=$(srcroot)%.html)
+DOCS_MAN3 := $(DOCS_XML:$(objroot)%.xml=$(srcroot)%.3)
 DOCS := $(DOCS_HTML) $(DOCS_MAN3)
-CTESTS := @srcroot@test/allocated.c @srcroot@test/allocm.c \
-	@srcroot@test/bitmap.c @srcroot@test/mremap.c \
-	@srcroot@test/posix_memalign.c @srcroot@test/rallocm.c \
-	@srcroot@test/thread_arena.c
+CTESTS := $(srcroot)test/aligned_alloc.c $(srcroot)test/allocated.c \
+	$(srcroot)test/bitmap.c $(srcroot)test/mremap.c \
+	$(srcroot)test/posix_memalign.c $(srcroot)test/thread_arena.c \
+	$(srcroot)test/thread_tcache_enabled.c
+ifeq ($(enable_experimental), 1)
+CTESTS += $(srcroot)test/allocm.c $(srcroot)test/rallocm.c
+endif
+
+COBJS := $(CSRCS:$(srcroot)%.c=$(objroot)%.$(O))
+CPICOBJS := $(CSRCS:$(srcroot)%.c=$(objroot)%.pic.$(O))
+CTESTOBJS := $(CTESTS:$(srcroot)%.c=$(objroot)%.$(O))

 .PHONY: all dist doc_html doc_man doc
 .PHONY: install_bin install_include install_lib
 .PHONY: install_html install_man install_doc install
 .PHONY: tests check clean distclean relclean

-.SECONDARY : $(CTESTS:@srcroot@%.c=@objroot@%.o)
+.SECONDARY : $(CTESTOBJS)

 # Default target.
-all: $(DSOS) $(STATIC_LIBS)
+all: build

-dist: doc
+dist: build_doc

-@srcroot@doc/%.html : @objroot@doc/%.xml @srcroot@doc/stylesheet.xsl @objroot@doc/html.xsl
-	@XSLTPROC@ -o $@ @objroot@doc/html.xsl $<
+$(srcroot)doc/%.html : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/html.xsl
+	$(XSLTPROC) -o $@ $(objroot)doc/html.xsl $<

-@srcroot@doc/%.3 : @objroot@doc/%.xml @srcroot@doc/stylesheet.xsl @objroot@doc/manpages.xsl
-	@XSLTPROC@ -o $@ @objroot@doc/manpages.xsl $<
+$(srcroot)doc/%.3 : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/manpages.xsl
+	$(XSLTPROC) -o $@ $(objroot)doc/manpages.xsl $<

-doc_html: $(DOCS_HTML)
-doc_man: $(DOCS_MAN3)
-doc: $(DOCS)
+build_doc_html: $(DOCS_HTML)
+build_doc_man: $(DOCS_MAN3)
+build_doc: $(DOCS)

 #
 # Include generated dependency files.
 #
-include $(CSRCS:@srcroot@%.c=@objroot@%.d)
-include $(CSRCS:@srcroot@%.c=@objroot@%.pic.d)
-include $(CTESTS:@srcroot@%.c=@objroot@%.d)
+ifdef CC_MM
+-include $(COBJS:%.$(O)=%.d)
+-include $(CPICOBJS:%.$(O)=%.d)
+-include $(CTESTOBJS:%.$(O)=%.d)
+endif

-@objroot@src/%.o: @srcroot@src/%.c
+$(COBJS): $(objroot)src/%.$(O): $(srcroot)src/%.c
+$(CPICOBJS): $(objroot)src/%.pic.$(O): $(srcroot)src/%.c
+$(CPICOBJS): CFLAGS += $(PIC_CFLAGS)
+$(CTESTOBJS): $(objroot)test/%.$(O): $(srcroot)test/%.c
+$(CTESTOBJS): CPPFLAGS += -I$(objroot)test
+ifneq ($(IMPORTLIB),$(SO))
+$(COBJS): CPPFLAGS += -DDLLEXPORT
+endif
+
+ifndef CC_MM
+# Dependencies
+HEADER_DIRS = $(srcroot)include/jemalloc/internal \
+	$(objroot)include/jemalloc $(objroot)include/jemalloc/internal
+HEADERS = $(wildcard $(foreach dir,$(HEADER_DIRS),$(dir)/*.h))
+$(COBJS) $(CPICOBJS) $(CTESTOBJS): $(HEADERS)
+$(CTESTOBJS): $(objroot)test/jemalloc_test.h
+endif
+
+$(COBJS) $(CPICOBJS) $(CTESTOBJS): %.$(O):
 	@mkdir -p $(@D)
-	$(CC) $(CFLAGS) -c $(CPPFLAGS) -o $@ $<
-	@$(SHELL) -ec "$(CC) -MM $(CPPFLAGS) $< | sed \"s/\($(subst /,\/,$(notdir $(basename $@)))\)\.o\([ :]*\)/$(subst /,\/,$(strip $(dir $@)))\1.o \2/g\" > $(@:%.o=%.d)"
+	$(CC) $(CFLAGS) -c $(CPPFLAGS) $(CTARGET) $<
+ifdef CC_MM
+	@$(CC) -MM $(CPPFLAGS) -MT $@ -o $(@:%.$(O)=%.d) $<
+endif

-@objroot@src/%.pic.o: @srcroot@src/%.c
-	@mkdir -p $(@D)
-	$(CC) $(CFLAGS) -fPIC -DPIC -c $(CPPFLAGS) -o $@ $<
-	@$(SHELL) -ec "$(CC) -MM $(CPPFLAGS) $< | sed \"s/\($(subst /,\/,$(notdir $(basename $(basename $@))))\)\.o\([ :]*\)/$(subst /,\/,$(strip $(dir $@)))\1.pic.o \2/g\" > $(@:%.o=%.d)"
-
-%.$(SO) : %.$(SO).$(REV)
+ifneq ($(SOREV),$(SO))
+%.$(SO) : %.$(SOREV)
 	@mkdir -p $(@D)
 	ln -sf $(<F) $@
-
-@objroot@lib/libjemalloc@install_suffix@.$(SO).$(REV) : $(CSRCS:@srcroot@%.c=@objroot@%.pic.o)
-	@mkdir -p $(@D)
-	$(CC) -shared -Wl,-$(WL_SONAME),$(@F) $(RPATH_EXTRA:%=@RPATH@%) -o $@ $+ $(LDFLAGS) $(LIBS)
-
-@objroot@lib/libjemalloc@install_suffix@_pic.a : $(CSRCS:@srcroot@%.c=@objroot@%.pic.o)
-	@mkdir -p $(@D)
-	ar crus $@ $+
-
-@objroot@lib/libjemalloc@install_suffix@.a : $(CSRCS:@srcroot@%.c=@objroot@%.o)
-	@mkdir -p $(@D)
-	ar crus $@ $+
-
-@objroot@test/%.o: @srcroot@test/%.c
-	@mkdir -p $(@D)
-	$(CC) $(CFLAGS) -c $(CPPFLAGS) -I@objroot@test -o $@ $<
-	@$(SHELL) -ec "$(CC) -MM $(CPPFLAGS) -I@objroot@test $< | sed \"s/\($(subst /,\/,$(notdir $(basename $@)))\)\.o\([ :]*\)/$(subst /,\/,$(strip $(dir $@)))\1.o \2/g\" > $(@:%.o=%.d)"
-
-# Automatic dependency generation misses #include "*.c".
-@objroot@test/bitmap.o : @objroot@src/bitmap.o
-
-@objroot@test/%: @objroot@test/%.o \
-		 @objroot@lib/libjemalloc@install_suffix@.$(SO)
-	@mkdir -p $(@D)
-ifneq (@RPATH@, )
-	$(CC) -o $@ $< @RPATH@@objroot@lib -L@objroot@lib -ljemalloc@install_suffix@ -lpthread
-else
-	$(CC) -o $@ $< -L@objroot@lib -ljemalloc@install_suffix@ -lpthread
 endif

+$(objroot)lib/$(LIBJEMALLOC).$(SOREV) : $(if $(PIC_CFLAGS),$(CPICOBJS),$(COBJS))
+	@mkdir -p $(@D)
+	$(CC) $(DSO_LDFLAGS) $(call RPATH,$(RPATH_EXTRA)) $(LDTARGET) $+ $(LDFLAGS) $(LIBS) $(EXTRA_LDFLAGS)
+
+$(objroot)lib/$(LIBJEMALLOC)_pic.$(A) : $(CPICOBJS)
+$(objroot)lib/$(LIBJEMALLOC).$(A) : $(COBJS)
+$(objroot)lib/$(LIBJEMALLOC)_s.$(A) : $(COBJS)
+
+$(STATIC_LIBS):
+	@mkdir -p $(@D)
+	$(MKLIB) $+
+
+$(objroot)test/bitmap$(EXE): $(objroot)src/bitmap.$(O)
+
+$(objroot)test/%$(EXE): $(objroot)test/%.$(O) $(objroot)src/util.$(O) $(DSOS)
+	@mkdir -p $(@D)
+	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(filter -lpthread,$(LIBS)) $(EXTRA_LDFLAGS)
+
+build_lib_shared: $(DSOS)
+build_lib_static: $(STATIC_LIBS)
+build: build_lib_shared build_lib_static
+
 install_bin:
 	install -d $(BINDIR)
 	@for b in $(BINS); do \
@ -155,46 +210,55 @@ install_include:
 	install -m 644 $$h $(INCLUDEDIR)/jemalloc; \
 done

-install_lib: $(DSOS) $(STATIC_LIBS)
+install_lib_shared: $(DSOS)
 	install -d $(LIBDIR)
-	install -m 755 @objroot@lib/libjemalloc@install_suffix@.$(SO).$(REV) $(LIBDIR)
-	ln -sf libjemalloc@install_suffix@.$(SO).$(REV) $(LIBDIR)/libjemalloc@install_suffix@.$(SO)
-	install -m 755 @objroot@lib/libjemalloc@install_suffix@_pic.a $(LIBDIR)
-	install -m 755 @objroot@lib/libjemalloc@install_suffix@.a $(LIBDIR)
+	install -m 755 $(objroot)lib/$(LIBJEMALLOC).$(SOREV) $(LIBDIR)
+ifneq ($(SOREV),$(SO))
+	ln -sf $(LIBJEMALLOC).$(SOREV) $(LIBDIR)/$(LIBJEMALLOC).$(SO)
+endif

-install_html:
-	install -d $(DATADIR)/doc/jemalloc@install_suffix@
-	@for d in $(DOCS_HTML); do \
-	echo "install -m 644 $$d $(DATADIR)/doc/jemalloc@install_suffix@"; \
-	install -m 644 $$d $(DATADIR)/doc/jemalloc@install_suffix@; \
+install_lib_static: $(STATIC_LIBS)
+	install -d $(LIBDIR)
+	@for l in $(STATIC_LIBS); do \
+	echo "install -m 755 $$l $(LIBDIR)"; \
+	install -m 755 $$l $(LIBDIR); \
 done

-install_man:
+install_lib: install_lib_shared install_lib_static
+
+install_doc_html:
+	install -d $(DATADIR)/doc/jemalloc$(install_suffix)
+	@for d in $(DOCS_HTML); do \
+	echo "install -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix)"; \
+	install -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix); \
+done
+
+install_doc_man:
 	install -d $(MANDIR)/man3
 	@for d in $(DOCS_MAN3); do \
 	echo "install -m 644 $$d $(MANDIR)/man3"; \
 	install -m 644 $$d $(MANDIR)/man3; \
 done

-install_doc: install_html install_man
+install_doc: install_doc_html install_doc_man

 install: install_bin install_include install_lib install_doc

-tests: $(CTESTS:@srcroot@%.c=@objroot@%)
+tests: $(CTESTS:$(srcroot)%.c=$(objroot)%$(EXE))

 check: tests
-	@mkdir -p @objroot@test
+	@mkdir -p $(objroot)test
 	@$(SHELL) -c 'total=0; \
 		failures=0; \
 		echo "========================================="; \
-		for t in $(CTESTS:@srcroot@%.c=@objroot@%); do \
+		for t in $(CTESTS:$(srcroot)%.c=$(objroot)%); do \
 			total=`expr $$total + 1`; \
 			/bin/echo -n "$${t} ... "; \
-			$(TEST_LIBRARY_PATH) $${t} @abs_srcroot@ @abs_objroot@ \
-			  > @objroot@$${t}.out 2>&1; \
-			if test -e "@srcroot@$${t}.exp"; then \
-				diff -u @srcroot@$${t}.exp \
-				  @objroot@$${t}.out >/dev/null 2>&1; \
+			$(TEST_LIBRARY_PATH) $${t}$(EXE) $(abs_srcroot) \
+			  $(abs_objroot) > $(objroot)$${t}.out 2>&1; \
+			if test -e "$(srcroot)$${t}.exp"; then \
+				diff -w -u $(srcroot)$${t}.exp \
+				  $(objroot)$${t}.out >/dev/null 2>&1; \
 				fail=$$?; \
 				if test "$${fail}" -eq "1" ; then \
 					failures=`expr $${failures} + 1`; \
@ -211,49 +275,49 @@ check: tests
 		echo "Failures: $${failures}/$${total}"'

 clean:
-	rm -f $(CSRCS:@srcroot@%.c=@objroot@%.o)
-	rm -f $(CSRCS:@srcroot@%.c=@objroot@%.pic.o)
-	rm -f $(CSRCS:@srcroot@%.c=@objroot@%.d)
-	rm -f $(CSRCS:@srcroot@%.c=@objroot@%.pic.d)
-	rm -f $(CTESTS:@srcroot@%.c=@objroot@%)
-	rm -f $(CTESTS:@srcroot@%.c=@objroot@%.o)
-	rm -f $(CTESTS:@srcroot@%.c=@objroot@%.d)
-	rm -f $(CTESTS:@srcroot@%.c=@objroot@%.out)
+	rm -f $(COBJS)
+	rm -f $(CPICOBJS)
+	rm -f $(COBJS:%.$(O)=%.d)
+	rm -f $(CPICOBJS:%.$(O)=%.d)
+	rm -f $(CTESTOBJS:%.$(O)=%$(EXE))
+	rm -f $(CTESTOBJS)
+	rm -f $(CTESTOBJS:%.$(O)=%.d)
+	rm -f $(CTESTOBJS:%.$(O)=%.out)
 	rm -f $(DSOS) $(STATIC_LIBS)

 distclean: clean
-	rm -rf @objroot@autom4te.cache
-	rm -f @objroot@config.log
-	rm -f @objroot@config.status
-	rm -f @objroot@config.stamp
-	rm -f @cfghdrs_out@
-	rm -f @cfgoutputs_out@
+	rm -rf $(objroot)autom4te.cache
+	rm -f $(objroot)config.log
+	rm -f $(objroot)config.status
+	rm -f $(objroot)config.stamp
+	rm -f $(cfghdrs_out)
+	rm -f $(cfgoutputs_out)

 relclean: distclean
-	rm -f @objroot@configure
-	rm -f @srcroot@VERSION
+	rm -f $(objroot)configure
+	rm -f $(srcroot)VERSION
 	rm -f $(DOCS_HTML)
 	rm -f $(DOCS_MAN3)

 #===============================================================================
 # Re-configuration rules.

-ifeq (@enable_autogen@, 1)
-@srcroot@configure : @srcroot@configure.ac
-	cd ./@srcroot@ && @AUTOCONF@
+ifeq ($(enable_autogen), 1)
+$(srcroot)configure : $(srcroot)configure.ac
+	cd ./$(srcroot) && $(AUTOCONF)

-@objroot@config.status : @srcroot@configure
-	./@objroot@config.status --recheck
+$(objroot)config.status : $(srcroot)configure
+	./$(objroot)config.status --recheck

-@srcroot@config.stamp.in : @srcroot@configure.ac
-	echo stamp > @srcroot@config.stamp.in
+$(srcroot)config.stamp.in : $(srcroot)configure.ac
+	echo stamp > $(srcroot)config.stamp.in

-@objroot@config.stamp : @cfgoutputs_in@ @cfghdrs_in@ @srcroot@configure
-	./@objroot@config.status
+$(objroot)config.stamp : $(cfgoutputs_in) $(cfghdrs_in) $(srcroot)configure
+	./$(objroot)config.status
 	@touch $@

 # There must be some action in order for make to re-read Makefile when it is
 # out of date.
-@cfgoutputs_out@ @cfghdrs_out@ : @objroot@config.stamp
+$(cfgoutputs_out) $(cfghdrs_out) : $(objroot)config.stamp
 	@true
 endif
--- a/deps/jemalloc/README
+++ b/deps/jemalloc/README
@ -1,10 +1,10 @@
 jemalloc is a general-purpose scalable concurrent malloc(3) implementation.
-This distribution is a stand-alone "portable" implementation that currently
-targets Linux and Apple OS X.  jemalloc is included as the default allocator in
-the FreeBSD and NetBSD operating systems, and it is used by the Mozilla Firefox
-web browser on Microsoft Windows-related platforms.  Depending on your needs,
-one of the other divergent versions may suit your needs better than this
-distribution.
+This distribution is a "portable" implementation that currently targets
+FreeBSD, Linux, Apple OS X, and MinGW.  jemalloc is included as the default
+allocator in the FreeBSD and NetBSD operating systems, and it is used by the
+Mozilla Firefox web browser on Microsoft Windows-related platforms.  Depending
+on your needs, one of the other divergent versions may suit your needs better
+than this distribution.

 The COPYING file contains copyright and licensing information.

--- a/deps/jemalloc/VERSION
+++ b/deps/jemalloc/VERSION
@ -1 +1 @@
-2.2.5-0-gfc1bb70e5f0d9a58b39efa39cc549b5af5104760
+3.0.0-0-gfc9b1dbf69f59d7ecfc4ac68da9847e017e1d046
--- a/deps/jemalloc/bin/jemalloc.sh.in
+++ b/deps/jemalloc/bin/jemalloc.sh.in
@ -0,0 +1,9 @@
+#!/bin/sh
+
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+
+@LD_PRELOAD_VAR@=${libdir}/libjemalloc.@SOREV@
+export @LD_PRELOAD_VAR@
+exec "$@"
--- a/deps/jemalloc/bin/pprof
+++ b/deps/jemalloc/bin/pprof
--- a/deps/jemalloc/config.guess
+++ b/deps/jemalloc/config.guess
--- a/deps/jemalloc/config.sub
+++ b/deps/jemalloc/config.sub
@ -1,9 +1,10 @@
 #! /bin/sh
 # Configuration validation subroutine script.
 #   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
-#   2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+#   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
+#   2011, 2012 Free Software Foundation, Inc.

-timestamp='2004-02-23'
+timestamp='2012-02-10'

 # This file is (in principle) common to ALL GNU software.
 # The presence of a machine in this file suggests that SOME GNU software
@ -20,23 +21,25 @@ timestamp='2004-02-23'
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330,
-# Boston, MA 02111-1307, USA.
-
+# along with this program; if not, see <http://www.gnu.org/licenses/>.
+#
 # As a special exception to the GNU General Public License, if you
 # distribute this file as part of a program that contains a
 # configuration script generated by Autoconf, you may include it under
 # the same distribution terms that you use for the rest of that program.

+
 # Please send patches to <config-patches@gnu.org>.  Submit a context
-# diff and a properly formatted ChangeLog entry.
+# diff and a properly formatted GNU ChangeLog entry.
 #
 # Configuration subroutine to validate and canonicalize a configuration type.
 # Supply the specified configuration type as an argument.
 # If it is invalid, we print an error message on stderr and exit with code 1.
 # Otherwise, we print the canonical config type on stdout and succeed.

+# You can get the latest version of this script from:
+# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD
+
 # This file is supposed to be the same for all GNU packages
 # and recognize all the CPU types, system types and aliases
 # that are meaningful with *any* GNU software.
@ -70,7 +73,8 @@ Report bugs and patches to <config-patches@gnu.org>."
 version="\
 GNU config.sub ($timestamp)

-Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
+Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
+2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
 Free Software Foundation, Inc.

 This is free software; see the source for copying conditions.  There is NO
@ -83,11 +87,11 @@ Try \`$me --help' for more information."
 while test $# -gt 0 ; do
  case $1 in
    --time-stamp | --time* | -t )
-       echo "$timestamp" ; exit 0 ;;
+       echo "$timestamp" ; exit ;;
    --version | -v )
-       echo "$version" ; exit 0 ;;
+       echo "$version" ; exit ;;
    --help | --h* | -h )
-       echo "$usage"; exit 0 ;;
+       echo "$usage"; exit ;;
    -- )     # Stop option processing
       shift; break ;;
    - )	# Use stdin as input.
@ -99,7 +103,7 @@ while test $# -gt 0 ; do
    *local*)
       # First pass through any local machine types.
       echo $1
-       exit 0;;
+       exit ;;

    * )
       break ;;
@ -118,11 +122,18 @@ esac
 # Here we must recognize all the valid KERNEL-OS combinations.
 maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
 case $maybe_os in
-  nto-qnx* | linux-gnu* | linux-dietlibc | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | \
-  kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* | storm-chaos* | os2-emx* | rtmk-nova*)
+  nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \
+  linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \
+  knetbsd*-gnu* | netbsd*-gnu* | \
+  kopensolaris*-gnu* | \
+  storm-chaos* | os2-emx* | rtmk-nova*)
    os=-$maybe_os
    basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
    ;;
+  android-linux)
+    os=-linux-android
+    basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`-unknown
+    ;;
  *)
    basic_machine=`echo $1 | sed 's/-[^-]*$//'`
    if [ $basic_machine != $1 ]
@ -145,10 +156,13 @@ case $os in
 	-convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\
 	-c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \
 	-harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \
-	-apple | -axis)
+	-apple | -axis | -knuth | -cray | -microblaze)
 		os=
 		basic_machine=$1
 		;;
+	-bluegene*)
+		os=-cnk
+		;;
 	-sim | -cisco | -oki | -wec | -winbond)
 		os=
 		basic_machine=$1
@ -170,6 +184,10 @@ case $os in
 	-hiux*)
 		os=-hiuxwe2
 		;;
+	-sco6)
+		os=-sco5v6
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
 	-sco5)
 		os=-sco3.2v5
 		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
@ -186,6 +204,10 @@ case $os in
 		# Don't forget version if it is 3.2v4 or newer.
 		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
 		;;
+	-sco5v6*)
+		# Don't forget version if it is 3.2v4 or newer.
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
 	-sco*)
 		os=-sco3.2v2
 		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
@ -227,25 +249,36 @@ case $basic_machine in
 	# Some are omitted here because they have special meanings below.
 	1750a | 580 \
 	| a29k \
+	| aarch64 | aarch64_be \
 	| alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \
 	| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
 	| am33_2.0 \
-	| arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr \
+	| arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \
+        | be32 | be64 \
+	| bfin \
 	| c4x | clipper \
 	| d10v | d30v | dlx | dsp16xx \
-	| fr30 | frv \
+	| epiphany \
+	| fido | fr30 | frv \
 	| h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
+	| hexagon \
 	| i370 | i860 | i960 | ia64 \
 	| ip2k | iq2000 \
-	| m32r | m68000 | m68k | m88k | mcore \
+	| le32 | le64 \
+	| lm32 \
+	| m32c | m32r | m32rle | m68000 | m68k | m88k \
+	| maxq | mb | microblaze | mcore | mep | metag \
 	| mips | mipsbe | mipseb | mipsel | mipsle \
 	| mips16 \
 	| mips64 | mips64el \
-	| mips64vr | mips64vrel \
+	| mips64octeon | mips64octeonel \
 	| mips64orion | mips64orionel \
+	| mips64r5900 | mips64r5900el \
+	| mips64vr | mips64vrel \
 	| mips64vr4100 | mips64vr4100el \
 	| mips64vr4300 | mips64vr4300el \
 	| mips64vr5000 | mips64vr5000el \
+	| mips64vr5900 | mips64vr5900el \
 	| mipsisa32 | mipsisa32el \
 	| mipsisa32r2 | mipsisa32r2el \
 	| mipsisa64 | mipsisa64el \
@ -254,30 +287,65 @@ case $basic_machine in
 	| mipsisa64sr71k | mipsisa64sr71kel \
 	| mipstx39 | mipstx39el \
 	| mn10200 | mn10300 \
+	| moxie \
+	| mt \
 	| msp430 \
+	| nds32 | nds32le | nds32be \
+	| nios | nios2 \
 	| ns16k | ns32k \
-	| openrisc | or32 \
+	| open8 \
+	| or32 \
 	| pdp10 | pdp11 | pj | pjl \
-	| powerpc | powerpc64 | powerpc64le | powerpcle | ppcbe \
+	| powerpc | powerpc64 | powerpc64le | powerpcle \
 	| pyramid \
-	| sh | sh[1234] | sh[23]e | sh[34]eb | shbe | shle | sh[1234]le | sh3ele \
+	| rl78 | rx \
+	| score \
+	| sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \
 	| sh64 | sh64le \
-	| sparc | sparc64 | sparc86x | sparclet | sparclite | sparcv9 | sparcv9b \
-	| strongarm \
-	| tahoe | thumb | tic4x | tic80 | tron \
-	| v850 | v850e \
+	| sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \
+	| sparcv8 | sparcv9 | sparcv9b | sparcv9v \
+	| spu \
+	| tahoe | tic4x | tic54x | tic55x | tic6x | tic80 | tron \
+	| ubicom32 \
+	| v850 | v850e | v850e1 | v850e2 | v850es | v850e2v3 \
 	| we32k \
-	| x86 | xscale | xstormy16 | xtensa \
-	| z8k)
+	| x86 | xc16x | xstormy16 | xtensa \
+	| z8k | z80)
 		basic_machine=$basic_machine-unknown
 		;;
-	m6811 | m68hc11 | m6812 | m68hc12)
-		# Motorola 68HC11/12.
+	c54x)
+		basic_machine=tic54x-unknown
+		;;
+	c55x)
+		basic_machine=tic55x-unknown
+		;;
+	c6x)
+		basic_machine=tic6x-unknown
+		;;
+	m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | picochip)
 		basic_machine=$basic_machine-unknown
 		os=-none
 		;;
 	m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65 | z8k)
 		;;
+	ms1)
+		basic_machine=mt-unknown
+		;;
+
+	strongarm | thumb | xscale)
+		basic_machine=arm-unknown
+		;;
+	xgate)
+		basic_machine=$basic_machine-unknown
+		os=-none
+		;;
+	xscaleeb)
+		basic_machine=armeb-unknown
+		;;
+
+	xscaleel)
+		basic_machine=armel-unknown
+		;;

 	# We use `pc' rather than `unknown'
 	# because (1) that's what they normally are, and
@ -293,32 +361,40 @@ case $basic_machine in
 	# Recognize the basic CPU types with company name.
 	580-* \
 	| a29k-* \
+	| aarch64-* | aarch64_be-* \
 	| alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \
 	| alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
 	| alphapca5[67]-* | alpha64pca5[67]-* | arc-* \
 	| arm-*  | armbe-* | armle-* | armeb-* | armv*-* \
-	| avr-* \
-	| bs2000-* \
-	| c[123]* | c30-* | [cjt]90-* | c4x-* | c54x-* | c55x-* | c6x-* \
-	| clipper-* | cydra-* \
+	| avr-* | avr32-* \
+	| be32-* | be64-* \
+	| bfin-* | bs2000-* \
+	| c[123]* | c30-* | [cjt]90-* | c4x-* \
+	| clipper-* | craynv-* | cydra-* \
 	| d10v-* | d30v-* | dlx-* \
 	| elxsi-* \
-	| f30[01]-* | f700-* | fr30-* | frv-* | fx80-* \
+	| f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \
 	| h8300-* | h8500-* \
 	| hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \
+	| hexagon-* \
 	| i*86-* | i860-* | i960-* | ia64-* \
 	| ip2k-* | iq2000-* \
-	| m32r-* \
+	| le32-* | le64-* \
+	| lm32-* \
+	| m32c-* | m32r-* | m32rle-* \
 	| m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \
-	| m88110-* | m88k-* | mcore-* \
+	| m88110-* | m88k-* | maxq-* | mcore-* | metag-* | microblaze-* \
 	| mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \
 	| mips16-* \
 	| mips64-* | mips64el-* \
-	| mips64vr-* | mips64vrel-* \
+	| mips64octeon-* | mips64octeonel-* \
 	| mips64orion-* | mips64orionel-* \
+	| mips64r5900-* | mips64r5900el-* \
+	| mips64vr-* | mips64vrel-* \
 	| mips64vr4100-* | mips64vr4100el-* \
 	| mips64vr4300-* | mips64vr4300el-* \
 	| mips64vr5000-* | mips64vr5000el-* \
+	| mips64vr5900-* | mips64vr5900el-* \
 	| mipsisa32-* | mipsisa32el-* \
 	| mipsisa32r2-* | mipsisa32r2el-* \
 	| mipsisa64-* | mipsisa64el-* \
@ -326,26 +402,39 @@ case $basic_machine in
 	| mipsisa64sb1-* | mipsisa64sb1el-* \
 	| mipsisa64sr71k-* | mipsisa64sr71kel-* \
 	| mipstx39-* | mipstx39el-* \
+	| mmix-* \
+	| mt-* \
 	| msp430-* \
-	| none-* | np1-* | nv1-* | ns16k-* | ns32k-* \
+	| nds32-* | nds32le-* | nds32be-* \
+	| nios-* | nios2-* \
+	| none-* | np1-* | ns16k-* | ns32k-* \
+	| open8-* \
 	| orion-* \
 	| pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \
-	| powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* | ppcbe-* \
+	| powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \
 	| pyramid-* \
-	| romp-* | rs6000-* \
-	| sh-* | sh[1234]-* | sh[23]e-* | sh[34]eb-* | shbe-* \
+	| rl78-* | romp-* | rs6000-* | rx-* \
+	| sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \
 	| shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \
-	| sparc-* | sparc64-* | sparc86x-* | sparclet-* | sparclite-* \
-	| sparcv9-* | sparcv9b-* | strongarm-* | sv1-* | sx?-* \
-	| tahoe-* | thumb-* \
+	| sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \
+	| sparclite-* \
+	| sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx?-* \
+	| tahoe-* \
 	| tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \
+	| tile*-* \
 	| tron-* \
-	| v850-* | v850e-* | vax-* \
+	| ubicom32-* \
+	| v850-* | v850e-* | v850e1-* | v850es-* | v850e2-* | v850e2v3-* \
+	| vax-* \
 	| we32k-* \
-	| x86-* | x86_64-* | xps100-* | xscale-* | xstormy16-* \
-	| xtensa-* \
+	| x86-* | x86_64-* | xc16x-* | xps100-* \
+	| xstormy16-* | xtensa*-* \
 	| ymp-* \
-	| z8k-*)
+	| z8k-* | z80-*)
+		;;
+	# Recognize the basic CPU types without company name, with glob match.
+	xtensa*)
+		basic_machine=$basic_machine-unknown
 		;;
 	# Recognize the various machine names and aliases which stand
 	# for a CPU type and a company and sometimes even an OS.
@ -409,6 +498,10 @@ case $basic_machine in
 		basic_machine=m68k-apollo
 		os=-bsd
 		;;
+	aros)
+		basic_machine=i386-pc
+		os=-aros
+		;;
 	aux)
 		basic_machine=m68k-apple
 		os=-aux
@ -417,10 +510,35 @@ case $basic_machine in
 		basic_machine=ns32k-sequent
 		os=-dynix
 		;;
+	blackfin)
+		basic_machine=bfin-unknown
+		os=-linux
+		;;
+	blackfin-*)
+		basic_machine=bfin-`echo $basic_machine | sed 's/^[^-]*-//'`
+		os=-linux
+		;;
+	bluegene*)
+		basic_machine=powerpc-ibm
+		os=-cnk
+		;;
+	c54x-*)
+		basic_machine=tic54x-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	c55x-*)
+		basic_machine=tic55x-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	c6x-*)
+		basic_machine=tic6x-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
 	c90)
 		basic_machine=c90-cray
 		os=-unicos
 		;;
+	cegcc)
+		basic_machine=arm-unknown
+		os=-cegcc
+		;;
 	convex-c1)
 		basic_machine=c1-convex
 		os=-bsd
@ -445,13 +563,20 @@ case $basic_machine in
 		basic_machine=j90-cray
 		os=-unicos
 		;;
-	cr16c)
-		basic_machine=cr16c-unknown
+	craynv)
+		basic_machine=craynv-cray
+		os=-unicosmp
+		;;
+	cr16 | cr16-*)
+		basic_machine=cr16-unknown
 		os=-elf
 		;;
 	crds | unos)
 		basic_machine=m68k-crds
 		;;
+	crisv32 | crisv32-* | etraxfs*)
+		basic_machine=crisv32-axis
+		;;
 	cris | cris-* | etrax*)
 		basic_machine=cris-axis
 		;;
@ -481,6 +606,14 @@ case $basic_machine in
 		basic_machine=m88k-motorola
 		os=-sysv3
 		;;
+	dicos)
+		basic_machine=i686-pc
+		os=-dicos
+		;;
+	djgpp)
+		basic_machine=i586-pc
+		os=-msdosdjgpp
+		;;
 	dpx20 | dpx20-*)
 		basic_machine=rs6000-bull
 		os=-bosx
@ -592,7 +725,6 @@ case $basic_machine in
 	i370-ibm* | ibm*)
 		basic_machine=i370-ibm
 		;;
-# I'm not sure what "Sysv32" means.  Should this be sysv3.2?
 	i*86v32)
 		basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
 		os=-sysv32
@ -631,6 +763,14 @@ case $basic_machine in
 		basic_machine=m68k-isi
 		os=-sysv
 		;;
+	m68knommu)
+		basic_machine=m68k-unknown
+		os=-linux
+		;;
+	m68knommu-*)
+		basic_machine=m68k-`echo $basic_machine | sed 's/^[^-]*-//'`
+		os=-linux
+		;;
 	m88k-omron*)
 		basic_machine=m88k-omron
 		;;
@ -642,10 +782,17 @@ case $basic_machine in
 		basic_machine=ns32k-utek
 		os=-sysv
 		;;
+	microblaze)
+		basic_machine=microblaze-xilinx
+		;;
 	mingw32)
 		basic_machine=i386-pc
 		os=-mingw32
 		;;
+	mingw32ce)
+		basic_machine=arm-unknown
+		os=-mingw32ce
+		;;
 	miniframe)
 		basic_machine=m68000-convergent
 		;;
@ -659,10 +806,6 @@ case $basic_machine in
 	mips3*)
 		basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown
 		;;
-	mmix*)
-		basic_machine=mmix-knuth
-		os=-mmixware
-		;;
 	monitor)
 		basic_machine=m68k-rom68k
 		os=-coff
@ -675,10 +818,21 @@ case $basic_machine in
 		basic_machine=i386-pc
 		os=-msdos
 		;;
+	ms1-*)
+		basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'`
+		;;
+	msys)
+		basic_machine=i386-pc
+		os=-msys
+		;;
 	mvs)
 		basic_machine=i370-ibm
 		os=-mvs
 		;;
+	nacl)
+		basic_machine=le32-unknown
+		os=-nacl
+		;;
 	ncr3000)
 		basic_machine=i486-ncr
 		os=-sysv4
@ -743,9 +897,11 @@ case $basic_machine in
 	np1)
 		basic_machine=np1-gould
 		;;
-	nv1)
-		basic_machine=nv1-cray
-		os=-unicosmp
+	neo-tandem)
+		basic_machine=neo-tandem
+		;;
+	nse-tandem)
+		basic_machine=nse-tandem
 		;;
 	nsr-tandem)
 		basic_machine=nsr-tandem
@ -754,9 +910,8 @@ case $basic_machine in
 		basic_machine=hppa1.1-oki
 		os=-proelf
 		;;
-	or32 | or32-*)
+	openrisc | openrisc-*)
 		basic_machine=or32-unknown
-		os=-coff
 		;;
 	os400)
 		basic_machine=powerpc-ibm
@ -778,6 +933,14 @@ case $basic_machine in
 		basic_machine=i860-intel
 		os=-osf
 		;;
+	parisc)
+		basic_machine=hppa-unknown
+		os=-linux
+		;;
+	parisc-*)
+		basic_machine=hppa-`echo $basic_machine | sed 's/^[^-]*-//'`
+		os=-linux
+		;;
 	pbd)
 		basic_machine=sparc-tti
 		;;
@ -787,6 +950,12 @@ case $basic_machine in
 	pc532 | pc532-*)
 		basic_machine=ns32k-pc532
 		;;
+	pc98)
+		basic_machine=i386-pc
+		;;
+	pc98-*)
+		basic_machine=i386-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
 	pentium | p5 | k5 | k6 | nexgen | viac3)
 		basic_machine=i586-pc
 		;;
@ -816,9 +985,10 @@ case $basic_machine in
 		;;
 	power)	basic_machine=power-ibm
 		;;
-	ppc)	basic_machine=powerpc-unknown
+	ppc | ppcbe)	basic_machine=powerpc-unknown
 		;;
-	ppc-*)	basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
+	ppc-* | ppcbe-*)
+		basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
 		;;
 	ppcle | powerpclittle | ppc-le | powerpc-little)
 		basic_machine=powerpcle-unknown
@ -843,6 +1013,10 @@ case $basic_machine in
 		basic_machine=i586-unknown
 		os=-pw32
 		;;
+	rdos)
+		basic_machine=i386-pc
+		os=-rdos
+		;;
 	rom68k)
 		basic_machine=m68k-rom68k
 		os=-coff
@ -869,6 +1043,10 @@ case $basic_machine in
 	sb1el)
 		basic_machine=mipsisa64sb1el-unknown
 		;;
+	sde)
+		basic_machine=mipsisa32-sde
+		os=-elf
+		;;
 	sei)
 		basic_machine=mips-sei
 		os=-seiux
@ -880,6 +1058,9 @@ case $basic_machine in
 		basic_machine=sh-hitachi
 		os=-hms
 		;;
+	sh5el)
+		basic_machine=sh5le-unknown
+		;;
 	sh64)
 		basic_machine=sh64-unknown
 		;;
@ -901,6 +1082,9 @@ case $basic_machine in
 		basic_machine=i860-stratus
 		os=-sysv4
 		;;
+	strongarm-* | thumb-*)
+		basic_machine=arm-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
 	sun2)
 		basic_machine=m68000-sun
 		;;
@ -957,17 +1141,9 @@ case $basic_machine in
 		basic_machine=t90-cray
 		os=-unicos
 		;;
-	tic54x | c54x*)
-		basic_machine=tic54x-unknown
-		os=-coff
-		;;
-	tic55x | c55x*)
-		basic_machine=tic55x-unknown
-		os=-coff
-		;;
-	tic6x | c6x*)
-		basic_machine=tic6x-unknown
-		os=-coff
+	tile*)
+		basic_machine=$basic_machine-unknown
+		os=-linux-gnu
 		;;
 	tx39)
 		basic_machine=mipstx39-unknown
@ -1029,9 +1205,16 @@ case $basic_machine in
 		basic_machine=hppa1.1-winbond
 		os=-proelf
 		;;
+	xbox)
+		basic_machine=i686-pc
+		os=-mingw32
+		;;
 	xps | xps100)
 		basic_machine=xps100-honeywell
 		;;
+	xscale-* | xscalee[bl]-*)
+		basic_machine=`echo $basic_machine | sed 's/^xscale/arm/'`
+		;;
 	ymp)
 		basic_machine=ymp-cray
 		os=-unicos
@ -1040,6 +1223,10 @@ case $basic_machine in
 		basic_machine=z8k-unknown
 		os=-sim
 		;;
+	z80-*-coff)
+		basic_machine=z80-unknown
+		os=-sim
+		;;
 	none)
 		basic_machine=none-none
 		os=-none
@ -1059,6 +1246,9 @@ case $basic_machine in
 	romp)
 		basic_machine=romp-ibm
 		;;
+	mmix)
+		basic_machine=mmix-knuth
+		;;
 	rs6000)
 		basic_machine=rs6000-ibm
 		;;
@ -1075,13 +1265,10 @@ case $basic_machine in
 	we32k)
 		basic_machine=we32k-att
 		;;
-	sh3 | sh4 | sh[34]eb | sh[1234]le | sh[23]ele)
+	sh[1234] | sh[24]a | sh[24]aeb | sh[34]eb | sh[1234]le | sh[23]ele)
 		basic_machine=sh-unknown
 		;;
-	sh64)
-		basic_machine=sh64-unknown
-		;;
-	sparc | sparcv9 | sparcv9b)
+	sparc | sparcv8 | sparcv9 | sparcv9b | sparcv9v)
 		basic_machine=sparc-sun
 		;;
 	cydra)
@ -1128,6 +1315,9 @@ case $os in
 	# First match some system type aliases
 	# that might get confused with valid system types.
 	# -solaris* is a basic system type, with this one exception.
+	-auroraux)
+		os=-auroraux
+		;;
 	-solaris1 | -solaris1.*)
 		os=`echo $os | sed -e 's|solaris1|sunos4|'`
 		;;
@ -1148,26 +1338,31 @@ case $os in
 	# Each alternative MUST END IN A *, to match a version number.
 	# -sysv* is not here because it comes later, after sysvr4.
 	-gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \
-	      | -*vms* | -sco* | -esix* | -isc* | -aix* | -sunos | -sunos[34]*\
-	      | -hpux* | -unos* | -osf* | -luna* | -dgux* | -solaris* | -sym* \
+	      | -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\
+	      | -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \
+	      | -sym* | -kopensolaris* \
 	      | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \
-	      | -aos* \
+	      | -aos* | -aros* \
 	      | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \
 	      | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \
-	      | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* | -openbsd* \
+	      | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \
+	      | -openbsd* | -solidbsd* \
 	      | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \
 	      | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \
 	      | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \
 	      | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
-	      | -chorusos* | -chorusrdb* \
-	      | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
-	      | -mingw32* | -linux-gnu* | -linux-uclibc* | -uxpv* | -beos* | -mpeix* | -udk* \
+	      | -chorusos* | -chorusrdb* | -cegcc* \
+	      | -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
+	      | -mingw32* | -linux-gnu* | -linux-android* \
+	      | -linux-newlib* | -linux-uclibc* \
+	      | -uxpv* | -beos* | -mpeix* | -udk* \
 	      | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \
 	      | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
 	      | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \
 	      | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \
 	      | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \
-	      | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly*)
+	      | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \
+	      | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es*)
 	# Remember, each alternative MUST END IN *, to match a version number.
 		;;
 	-qnx*)
@ -1185,7 +1380,7 @@ case $os in
 		os=`echo $os | sed -e 's|nto|nto-qnx|'`
 		;;
 	-sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \
-	      | -windows* | -osx | -abug | -netware* | -os9* | -beos* \
+	      | -windows* | -osx | -abug | -netware* | -os9* | -beos* | -haiku* \
 	      | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*)
 		;;
 	-mac*)
@ -1294,6 +1489,14 @@ case $os in
 	-kaos*)
 		os=-kaos
 		;;
+	-zvmoe)
+		os=-zvmoe
+		;;
+	-dicos*)
+		os=-dicos
+		;;
+	-nacl*)
+		;;
 	-none)
 		;;
 	*)
@ -1316,6 +1519,12 @@ else
 # system, and we'll never get to this point.

 case $basic_machine in
+	score-*)
+		os=-elf
+		;;
+	spu-*)
+		os=-elf
+		;;
 	*-acorn)
 		os=-riscix1.2
 		;;
@ -1328,6 +1537,15 @@ case $basic_machine in
 	c4x-* | tic4x-*)
 		os=-coff
 		;;
+	tic54x-*)
+		os=-coff
+		;;
+	tic55x-*)
+		os=-coff
+		;;
+	tic6x-*)
+		os=-coff
+		;;
 	# This must come before the *-dec entry.
 	pdp10-*)
 		os=-tops20
@ -1346,13 +1564,13 @@ case $basic_machine in
 		;;
 	m68000-sun)
 		os=-sunos3
-		# This also exists in the configure program, but was not the
-		# default.
-		# os=-sunos4
 		;;
 	m68*-cisco)
 		os=-aout
 		;;
+	mep-*)
+		os=-elf
+		;;
 	mips*-cisco)
 		os=-elf
 		;;
@ -1371,9 +1589,15 @@ case $basic_machine in
 	*-be)
 		os=-beos
 		;;
+	*-haiku)
+		os=-haiku
+		;;
 	*-ibm)
 		os=-aix
 		;;
+	*-knuth)
+		os=-mmixware
+		;;
 	*-wec)
 		os=-proelf
 		;;
@ -1476,7 +1700,7 @@ case $basic_machine in
 			-sunos*)
 				vendor=sun
 				;;
-			-aix*)
+			-cnk*|-aix*)
 				vendor=ibm
 				;;
 			-beos*)
@ -1539,7 +1763,7 @@ case $basic_machine in
 esac

 echo $basic_machine$os
-exit 0
+exit

 # Local variables:
 # eval: (add-hook 'write-file-hooks 'time-stamp)
--- a/deps/jemalloc/configure
+++ b/deps/jemalloc/configure
--- a/deps/jemalloc/configure.ac
+++ b/deps/jemalloc/configure.ac
--- a/deps/jemalloc/doc/jemalloc.3
+++ b/deps/jemalloc/doc/jemalloc.3
@ -1,13 +1,13 @@
 '\" t
 .\"     Title: JEMALLOC
 .\"    Author: Jason Evans
-.\" Generator: DocBook XSL Stylesheets v1.75.2 <http://docbook.sf.net/>
-.\"      Date: 11/14/2011
+.\" Generator: DocBook XSL Stylesheets v1.76.1 <http://docbook.sf.net/>
+.\"      Date: 05/11/2012
 .\"    Manual: User Manual
-.\"    Source: jemalloc 2.2.5-0-gfc1bb70e5f0d9a58b39efa39cc549b5af5104760
+.\"    Source: jemalloc 3.0.0-0-gfc9b1dbf69f59d7ecfc4ac68da9847e017e1d046
 .\"  Language: English
 .\"
-.TH "JEMALLOC" "3" "11/14/2011" "jemalloc 2.2.5-0-gfc1bb70e5f0d" "User Manual"
+.TH "JEMALLOC" "3" "05/11/2012" "jemalloc 3.0.0-0-gfc9b1dbf69f5" "User Manual"
 .\" -----------------------------------------------------------------
 .\" * Define some portability stuff
 .\" -----------------------------------------------------------------
@ -31,7 +31,7 @@
 jemalloc \- general purpose memory allocation functions
 .SH "LIBRARY"
 .PP
-This manual describes jemalloc 2\&.2\&.5\-0\-gfc1bb70e5f0d9a58b39efa39cc549b5af5104760\&. More information can be found at the
+This manual describes jemalloc 3\&.0\&.0\-0\-gfc9b1dbf69f59d7ecfc4ac68da9847e017e1d046\&. More information can be found at the
 \m[blue]\fBjemalloc website\fR\m[]\&\s-2\u[1]\d\s+2\&.
 .SH "SYNOPSIS"
 .sp
@ -48,6 +48,8 @@ This manual describes jemalloc 2\&.2\&.5\-0\-gfc1bb70e5f0d9a58b39efa39cc549b5af5
 .BI "void *calloc(size_t\ " "number" ", size_t\ " "size" ");"
 .HP \w'int\ posix_memalign('u
 .BI "int posix_memalign(void\ **" "ptr" ", size_t\ " "alignment" ", size_t\ " "size" ");"
+.HP \w'void\ *aligned_alloc('u
+.BI "void *aligned_alloc(size_t\ " "alignment" ", size_t\ " "size" ");"
 .HP \w'void\ *realloc('u
 .BI "void *realloc(void\ *" "ptr" ", size_t\ " "size" ");"
 .HP \w'void\ free('u
@ -76,6 +78,8 @@ const char *\fImalloc_conf\fR;
 .BI "int sallocm(const\ void\ *" "ptr" ", size_t\ *" "rsize" ", int\ " "flags" ");"
 .HP \w'int\ dallocm('u
 .BI "int dallocm(void\ *" "ptr" ", int\ " "flags" ");"
+.HP \w'int\ nallocm('u
+.BI "int nallocm(size_t\ *" "rsize" ", size_t\ " "size" ", int\ " "flags" ");"
 .SH "DESCRIPTION"
 .SS "Standard API"
 .PP
@ -110,6 +114,18 @@ must be a power of 2 at least as large as
 sizeof(\fBvoid *\fR)\&.
 .PP
 The
+\fBaligned_alloc\fR\fB\fR
+function allocates
+\fIsize\fR
+bytes of memory such that the allocation\*(Aqs base address is an even multiple of
+\fIalignment\fR\&. The requested
+\fIalignment\fR
+must be a power of 2\&. Behavior is undefined if
+\fIsize\fR
+is not an integral multiple of
+\fIalignment\fR\&.
+.PP
+The
 \fBrealloc\fR\fB\fR
 function changes the size of the previously allocated memory referenced by
 \fIptr\fR
@ -236,13 +252,16 @@ for (i = 0; i < nbins; i++) {
 .\}
 .SS "Experimental API"
 .PP
-The experimental API is subject to change or removal without regard for backward compatibility\&.
+The experimental API is subject to change or removal without regard for backward compatibility\&. If
+\fB\-\-disable\-experimental\fR
+is specified during configuration, the experimental API is omitted\&.
 .PP
 The
 \fBallocm\fR\fB\fR,
 \fBrallocm\fR\fB\fR,
-\fBsallocm\fR\fB\fR, and
-\fBdallocm\fR\fB\fR
+\fBsallocm\fR\fB\fR,
+\fBdallocm\fR\fB\fR, and
+\fBnallocm\fR\fB\fR
 functions all have a
 \fIflags\fR
 argument that can be used to specify options\&. The functions only check the options that are contextually relevant\&. Use bitwise or (|) operations to specify one or more of the following:
@ -286,7 +305,10 @@ to the base address of the allocation, and sets
 to the real size of the allocation if
 \fIrsize\fR
 is not
-\fBNULL\fR\&.
+\fBNULL\fR\&. Behavior is undefined if
+\fIsize\fR
+is
+\fB0\fR\&.
 .PP
 The
 \fBrallocm\fR\fB\fR
@ -306,6 +328,9 @@ is not
 is non\-zero, an attempt is made to resize the allocation to be at least
 \fIsize\fR + \fIextra\fR)
 bytes, though inability to allocate the extra byte(s) will not by itself result in failure\&. Behavior is undefined if
+\fIsize\fR
+is
+\fB0\fR, or if
 (\fIsize\fR + \fIextra\fR > \fBSIZE_T_MAX\fR)\&.
 .PP
 The
@ -319,6 +344,23 @@ The
 function causes the memory referenced by
 \fIptr\fR
 to be made available for future allocations\&.
+.PP
+The
+\fBnallocm\fR\fB\fR
+function allocates no memory, but it performs the same size computation as the
+\fBallocm\fR\fB\fR
+function, and if
+\fIrsize\fR
+is not
+\fBNULL\fR
+it sets
+\fI*rsize\fR
+to the real size of the allocation that would result from the equivalent
+\fBallocm\fR\fB\fR
+function call\&. Behavior is undefined if
+\fIsize\fR
+is
+\fB0\fR\&.
 .SH "TUNING"
 .PP
 Once, when the first call is made to one of the memory allocation routines, the allocator initializes its internals based in part on various options that can be specified at compile\- or run\-time\&.
@ -346,9 +388,9 @@ Traditionally, allocators have used
 to obtain memory, which is suboptimal for several reasons, including race conditions, increased fragmentation, and artificial limitations on maximum usable memory\&. If
 \fB\-\-enable\-dss\fR
 is specified during configuration, this allocator uses both
-\fBsbrk\fR(2)
+\fBmmap\fR(2)
 and
-\fBmmap\fR(2), in that order of preference; otherwise only
+\fBsbrk\fR(2), in that order of preference; otherwise only
 \fBmmap\fR(2)
 is used\&.
 .PP
@ -364,14 +406,8 @@ User objects are broken into three categories according to size: small, large, a
 .PP
 Each chunk that is managed by an arena tracks its contents as runs of contiguous pages (unused, backing a set of small objects, or backing one large object)\&. The combination of chunk alignment and chunk page maps makes it possible to determine all metadata regarding small and large allocations in constant time\&.
 .PP
-Small objects are managed in groups by page runs\&. Each run maintains a frontier and free list to track which regions are in use\&. Unless
-\fB\-\-disable\-tiny\fR
-is specified during configuration, allocation requests that are no more than half the quantum (8 or 16, depending on architecture) are rounded up to the nearest power of two that is at least
-sizeof(\fBvoid *\fR)\&. Allocation requests that are more than half the quantum, but no more than the minimum cacheline\-multiple size class (see the
-"opt\&.lg_qspace_max"
-option) are rounded up to the nearest multiple of the quantum\&. Allocation requests that are more than the minimum cacheline\-multiple size class, but no more than the minimum subpage\-multiple size class (see the
-"opt\&.lg_cspace_max"
-option) are rounded up to the nearest multiple of the cacheline size (64)\&. Allocation requests that are more than the minimum subpage\-multiple size class, but no more than the maximum subpage\-multiple size class are rounded up to the nearest multiple of the subpage size (256)\&. Allocation requests that are more than the maximum subpage\-multiple size class, but small enough to fit in an arena\-managed chunk (see the
+Small objects are managed in groups by page runs\&. Each run maintains a frontier and free list to track which regions are in use\&. Allocation requests that are no more than half the quantum (8 or 16, depending on architecture) are rounded up to the nearest power of two that is at least
+sizeof(\fBdouble\fR)\&. All other small object size classes are multiples of the quantum, spaced such that internal fragmentation is limited to approximately 25% for all but the smallest size classes\&. Allocation requests that are larger than the maximum small size class, but small enough to fit in an arena\-managed chunk (see the
 "opt\&.lg_chunk"
 option), are rounded up to the nearest run size\&. Allocation requests that are too large to fit in an arena\-managed chunk are rounded up to the nearest multiple of the chunk size\&.
 .PP
@ -387,51 +423,73 @@ Table 1\&.
 .B Table\ \&1.\ \&Size classes
 .TS
 allbox tab(:);
-lB lB lB.
+lB rB lB.
 T{
 Category
 T}:T{
-Subcategory
+Spacing
 T}:T{
 Size
 T}
 .T&
-l l l
-^ l l
-^ l l
-^ l l
-l s l
-l s l.
+l r l
+^ r l
+^ r l
+^ r l
+^ r l
+^ r l
+^ r l
+l r l
+l r l.
 T{
 Small
 T}:T{
-Tiny
+lg
 T}:T{
 [8]
 T}
 :T{
-Quantum\-spaced
+16
 T}:T{
 [16, 32, 48, \&.\&.\&., 128]
 T}
 :T{
-Cacheline\-spaced
+32
 T}:T{
-[192, 256, 320, \&.\&.\&., 512]
+[160, 192, 224, 256]
 T}
 :T{
-Subpage\-spaced
+64
 T}:T{
-[768, 1024, 1280, \&.\&.\&., 3840]
+[320, 384, 448, 512]
+T}
+:T{
+128
+T}:T{
+[640, 768, 896, 1024]
+T}
+:T{
+256
+T}:T{
+[1280, 1536, 1792, 2048]
+T}
+:T{
+512
+T}:T{
+[2560, 3072, 3584]
 T}
 T{
 Large
 T}:T{
+4 KiB
+T}:T{
 [4 KiB, 8 KiB, 12 KiB, \&.\&.\&., 4072 KiB]
 T}
 T{
 Huge
 T}:T{
+4 MiB
+T}:T{
 [4 MiB, 8 MiB, 12 MiB, \&.\&.\&.]
 T}
 .TE
@ -481,12 +539,6 @@ was specified during build configuration\&.
 was specified during build configuration\&.
 .RE
 .PP
-"config\&.dynamic_page_shift" (\fBbool\fR) r\-
-.RS 4
-\fB\-\-enable\-dynamic\-page\-shift\fR
-was specified during build configuration\&.
-.RE
-.PP
 "config\&.fill" (\fBbool\fR) r\-
 .RS 4
 \fB\-\-enable\-fill\fR
@ -499,6 +551,18 @@ was specified during build configuration\&.
 was specified during build configuration\&.
 .RE
 .PP
+"config\&.mremap" (\fBbool\fR) r\-
+.RS 4
+\fB\-\-enable\-mremap\fR
+was specified during build configuration\&.
+.RE
+.PP
+"config\&.munmap" (\fBbool\fR) r\-
+.RS 4
+\fB\-\-enable\-munmap\fR
+was specified during build configuration\&.
+.RE
+.PP
 "config\&.prof" (\fBbool\fR) r\-
 .RS 4
 \fB\-\-enable\-prof\fR
@ -523,36 +587,30 @@ was specified during build configuration\&.
 was specified during build configuration\&.
 .RE
 .PP
-"config\&.swap" (\fBbool\fR) r\-
-.RS 4
-\fB\-\-enable\-swap\fR
-was specified during build configuration\&.
-.RE
-.PP
-"config\&.sysv" (\fBbool\fR) r\-
-.RS 4
-\fB\-\-enable\-sysv\fR
-was specified during build configuration\&.
-.RE
-.PP
 "config\&.tcache" (\fBbool\fR) r\-
 .RS 4
 \fB\-\-disable\-tcache\fR
 was not specified during build configuration\&.
 .RE
 .PP
-"config\&.tiny" (\fBbool\fR) r\-
-.RS 4
-\fB\-\-disable\-tiny\fR
-was not specified during build configuration\&.
-.RE
-.PP
 "config\&.tls" (\fBbool\fR) r\-
 .RS 4
 \fB\-\-disable\-tls\fR
 was not specified during build configuration\&.
 .RE
 .PP
+"config\&.utrace" (\fBbool\fR) r\-
+.RS 4
+\fB\-\-enable\-utrace\fR
+was specified during build configuration\&.
+.RE
+.PP
+"config\&.valgrind" (\fBbool\fR) r\-
+.RS 4
+\fB\-\-enable\-valgrind\fR
+was specified during build configuration\&.
+.RE
+.PP
 "config\&.xmalloc" (\fBbool\fR) r\-
 .RS 4
 \fB\-\-enable\-xmalloc\fR
@ -568,16 +626,6 @@ in these cases\&. This option is disabled by default unless
 is specified during configuration, in which case it is enabled by default\&.
 .RE
 .PP
-"opt\&.lg_qspace_max" (\fBsize_t\fR) r\-
-.RS 4
-Size (log base 2) of the maximum size class that is a multiple of the quantum (8 or 16 bytes, depending on architecture)\&. Above this size, cacheline spacing is used for size classes\&. The default value is 128 bytes (2^7)\&.
-.RE
-.PP
-"opt\&.lg_cspace_max" (\fBsize_t\fR) r\-
-.RS 4
-Size (log base 2) of the maximum size class that is a multiple of the cacheline size (64)\&. Above this size, subpage spacing (256 bytes) is used for size classes\&. The default value is 512 bytes (2^9)\&.
-.RE
-.PP
 "opt\&.lg_chunk" (\fBsize_t\fR) r\-
 .RS 4
 Virtual memory chunk size (log base 2)\&. The default chunk size is 4 MiB (2^22)\&.
@ -615,6 +663,22 @@ Junk filling enabled/disabled\&. If enabled, each byte of uninitialized allocate
 is specified during configuration, in which case it is enabled by default\&.
 .RE
 .PP
+"opt\&.quarantine" (\fBsize_t\fR) r\- [\fB\-\-enable\-fill\fR]
+.RS 4
+Per thread quarantine size in bytes\&. If non\-zero, each thread maintains a FIFO object quarantine that stores up to the specified number of bytes of memory\&. The quarantined memory is not freed until it is released from quarantine, though it is immediately junk\-filled if the
+"opt\&.junk"
+option is enabled\&. This feature is of particular use in combination with
+\m[blue]\fBValgrind\fR\m[]\&\s-2\u[2]\d\s+2, which can detect attempts to access quarantined objects\&. This is intended for debugging and will impact performance negatively\&. The default quarantine size is 0\&.
+.RE
+.PP
+"opt\&.redzone" (\fBbool\fR) r\- [\fB\-\-enable\-fill\fR]
+.RS 4
+Redzones enabled/disabled\&. If enabled, small allocations have redzones before and after them\&. Furthermore, if the
+"opt\&.junk"
+option is enabled, the redzones are checked for corruption during deallocation\&. However, the primary intended purpose of this feature is to be used in combination with
+\m[blue]\fBValgrind\fR\m[]\&\s-2\u[2]\d\s+2, which needs redzones in order to do effective buffer overflow/underflow detection\&. This option is intended for debugging and will impact performance negatively\&. This option is disabled by default\&.
+.RE
+.PP
 "opt\&.zero" (\fBbool\fR) r\- [\fB\-\-enable\-fill\fR]
 .RS 4
 Zero filling enabled/disabled\&. If enabled, each byte of uninitialized allocated memory will be initialized to 0\&. Note that this initialization only happens once for each byte, so
@ -624,13 +688,25 @@ and
 calls do not zero memory that was previously allocated\&. This is intended for debugging and will impact performance negatively\&. This option is disabled by default\&.
 .RE
 .PP
-"opt\&.sysv" (\fBbool\fR) r\- [\fB\-\-enable\-sysv\fR]
+"opt\&.utrace" (\fBbool\fR) r\- [\fB\-\-enable\-utrace\fR]
 .RS 4
-If enabled, attempting to allocate zero bytes will return a
-\fBNULL\fR
-pointer instead of a valid pointer\&. (The default behavior is to make a minimal allocation and return a pointer to it\&.) This option is provided for System V compatibility\&. This option is incompatible with the
-"opt\&.xmalloc"
-option\&. This option is disabled by default\&.
+Allocation tracing based on
+\fButrace\fR(2)
+enabled/disabled\&. This option is disabled by default\&.
+.RE
+.PP
+"opt\&.valgrind" (\fBbool\fR) r\- [\fB\-\-enable\-valgrind\fR]
+.RS 4
+\m[blue]\fBValgrind\fR\m[]\&\s-2\u[2]\d\s+2
+support enabled/disabled\&. If enabled, several other options are automatically modified during options processing to work well with Valgrind:
+"opt\&.junk"
+and
+"opt\&.zero"
+are set to false,
+"opt\&.quarantine"
+is set to 16 MiB, and
+"opt\&.redzone"
+is set to true\&. This option is disabled by default\&.
 .RE
 .PP
 "opt\&.xmalloc" (\fBbool\fR) r\- [\fB\-\-enable\-xmalloc\fR]
@ -656,15 +732,8 @@ This option is disabled by default\&.
 "opt\&.tcache" (\fBbool\fR) r\- [\fB\-\-enable\-tcache\fR]
 .RS 4
 Thread\-specific caching enabled/disabled\&. When there are multiple threads, each thread uses a thread\-specific cache for objects up to a certain size\&. Thread\-specific caching allows many allocations to be satisfied without performing any thread synchronization, at the cost of increased memory use\&. See the
-"opt\&.lg_tcache_gc_sweep"
-and
 "opt\&.lg_tcache_max"
-options for related tuning information\&. This option is enabled by default\&.
-.RE
-.PP
-"opt\&.lg_tcache_gc_sweep" (\fBssize_t\fR) r\- [\fB\-\-enable\-tcache\fR]
-.RS 4
-Approximate interval (log base 2) between full thread\-specific cache garbage collection sweeps, counted in terms of thread\-specific cache allocation/deallocation events\&. Garbage collection is actually performed incrementally, one size class at a time, in order to avoid large collection pauses\&. The default sweep interval is 8192 (2^13); setting this option to \-1 will disable garbage collection\&.
+option for related tuning information\&. This option is enabled by default\&.
 .RE
 .PP
 "opt\&.lg_tcache_max" (\fBsize_t\fR) r\- [\fB\-\-enable\-tcache\fR]
@ -674,31 +743,22 @@ Maximum size class (log base 2) to cache in the thread\-specific cache\&. At a m
 .PP
 "opt\&.prof" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR]
 .RS 4
-Memory profiling enabled/disabled\&. If enabled, profile memory allocation activity, and use an
-\fBatexit\fR(3)
-function to dump final memory usage to a file named according to the pattern
-<prefix>\&.<pid>\&.<seq>\&.f\&.heap, where
-<prefix>
-is controlled by the
-"opt\&.prof_prefix"
-option\&. See the
-"opt\&.lg_prof_bt_max"
-option for backtrace depth control\&. See the
+Memory profiling enabled/disabled\&. If enabled, profile memory allocation activity\&. See the
 "opt\&.prof_active"
 option for on\-the\-fly activation/deactivation\&. See the
 "opt\&.lg_prof_sample"
 option for probabilistic sampling control\&. See the
 "opt\&.prof_accum"
 option for control of cumulative sample reporting\&. See the
-"opt\&.lg_prof_tcmax"
-option for control of per thread backtrace caching\&. See the
 "opt\&.lg_prof_interval"
-option for information on interval\-triggered profile dumping, and the
+option for information on interval\-triggered profile dumping, the
 "opt\&.prof_gdump"
-option for information on high\-water\-triggered profile dumping\&. Profile output is compatible with the included
+option for information on high\-water\-triggered profile dumping, and the
+"opt\&.prof_final"
+option for final profile dumping\&. Profile output is compatible with the included
 \fBpprof\fR
 Perl script, which originates from the
-\m[blue]\fBgoogle\-perftools package\fR\m[]\&\s-2\u[2]\d\s+2\&.
+\m[blue]\fBgperftools package\fR\m[]\&\s-2\u[3]\d\s+2\&.
 .RE
 .PP
 "opt\&.prof_prefix" (\fBconst char *\fR) r\- [\fB\-\-enable\-prof\fR]
@ -707,11 +767,6 @@ Filename prefix for profile dumps\&. If the prefix is set to the empty string, n
 jeprof\&.
 .RE
 .PP
-"opt\&.lg_prof_bt_max" (\fBsize_t\fR) r\- [\fB\-\-enable\-prof\fR]
-.RS 4
-Maximum backtrace depth (log base 2) when profiling memory allocation activity\&. The default is 128 (2^7)\&.
-.RE
-.PP
 "opt\&.prof_active" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR]
 .RS 4
 Profiling activated/deactivated\&. This is a secondary control mechanism that makes it possible to start the application with profiling enabled (see the
@ -723,21 +778,12 @@ mallctl\&. This option is enabled by default\&.
 .PP
 "opt\&.lg_prof_sample" (\fBssize_t\fR) r\- [\fB\-\-enable\-prof\fR]
 .RS 4
-Average interval (log base 2) between allocation samples, as measured in bytes of allocation activity\&. Increasing the sampling interval decreases profile fidelity, but also decreases the computational overhead\&. The default sample interval is 1 (2^0) (i\&.e\&. all allocations are sampled)\&.
+Average interval (log base 2) between allocation samples, as measured in bytes of allocation activity\&. Increasing the sampling interval decreases profile fidelity, but also decreases the computational overhead\&. The default sample interval is 512 KiB (2^19 B)\&.
 .RE
 .PP
 "opt\&.prof_accum" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR]
 .RS 4
-Reporting of cumulative object/byte counts in profile dumps enabled/disabled\&. If this option is enabled, every unique backtrace must be stored for the duration of execution\&. Depending on the application, this can impose a large memory overhead, and the cumulative counts are not always of interest\&. See the
-"opt\&.lg_prof_tcmax"
-option for control of per thread backtrace caching, which has important interactions\&. This option is enabled by default\&.
-.RE
-.PP
-"opt\&.lg_prof_tcmax" (\fBssize_t\fR) r\- [\fB\-\-enable\-prof\fR]
-.RS 4
-Maximum per thread backtrace cache (log base 2) used for heap profiling\&. A backtrace can only be discarded if the
-"opt\&.prof_accum"
-option is disabled, and no thread caches currently refer to the backtrace\&. Therefore, a backtrace cache limit should be imposed if the intention is to limit how much memory is used by backtraces\&. By default, no limit is imposed (encoded as \-1)\&.
+Reporting of cumulative object/byte counts in profile dumps enabled/disabled\&. If this option is enabled, every unique backtrace must be stored for the duration of execution\&. Depending on the application, this can impose a large memory overhead, and the cumulative counts are not always of interest\&. This option is disabled by default\&.
 .RE
 .PP
 "opt\&.lg_prof_interval" (\fBssize_t\fR) r\- [\fB\-\-enable\-prof\fR]
@ -760,33 +806,27 @@ is controlled by the
 option\&. This option is disabled by default\&.
 .RE
 .PP
+"opt\&.prof_final" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR]
+.RS 4
+Use an
+\fBatexit\fR(3)
+function to dump final memory usage to a file named according to the pattern
+<prefix>\&.<pid>\&.<seq>\&.f\&.heap, where
+<prefix>
+is controlled by the
+"opt\&.prof_prefix"
+option\&. This option is enabled by default\&.
+.RE
+.PP
 "opt\&.prof_leak" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR]
 .RS 4
 Leak reporting enabled/disabled\&. If enabled, use an
 \fBatexit\fR(3)
 function to report memory leaks detected by allocation sampling\&. See the
-"opt\&.lg_prof_bt_max"
-option for backtrace depth control\&. See the
 "opt\&.prof"
 option for information on analyzing heap profile output\&. This option is disabled by default\&.
 .RE
 .PP
-"opt\&.overcommit" (\fBbool\fR) r\- [\fB\-\-enable\-swap\fR]
-.RS 4
-Over\-commit enabled/disabled\&. If enabled, over\-commit memory as a side effect of using anonymous
-\fBmmap\fR(2)
-or
-\fBsbrk\fR(2)
-for virtual memory allocation\&. In order for overcommit to be disabled, the
-"swap\&.fds"
-mallctl must have been successfully written to\&. This option is enabled by default\&.
-.RE
-.PP
-"tcache\&.flush" (\fBvoid\fR) \-\- [\fB\-\-enable\-tcache\fR]
-.RS 4
-Flush calling thread\*(Aqs tcache\&. This interface releases all cached objects and internal data structures associated with the calling thread\*(Aqs thread\-specific cache\&. Ordinarily, this interface need not be called, since automatic periodic incremental garbage collection occurs, and the thread cache is automatically discarded when a thread exits\&. However, garbage collection is triggered by allocation activity, so it is possible for a thread that stops allocating/deallocating to retain its cache indefinitely, in which case the developer may find manual flushing useful\&.
-.RE
-.PP
 "thread\&.arena" (\fBunsigned\fR) rw
 .RS 4
 Get or set the arena associated with the calling thread\&. The arena index must be less than the maximum number of arenas (see the
@ -824,6 +864,17 @@ mallctl\&. This is useful for avoiding the overhead of repeated
 calls\&.
 .RE
 .PP
+"thread\&.tcache\&.enabled" (\fBbool\fR) rw [\fB\-\-enable\-tcache\fR]
+.RS 4
+Enable/disable calling thread\*(Aqs tcache\&. The tcache is implicitly flushed as a side effect of becoming disabled (see
+"thread\&.tcache\&.flush")\&.
+.RE
+.PP
+"thread\&.tcache\&.flush" (\fBvoid\fR) \-\- [\fB\-\-enable\-tcache\fR]
+.RS 4
+Flush calling thread\*(Aqs tcache\&. This interface releases all cached objects and internal data structures associated with the calling thread\*(Aqs thread\-specific cache\&. Ordinarily, this interface need not be called, since automatic periodic incremental garbage collection occurs, and the thread cache is automatically discarded when a thread exits\&. However, garbage collection is triggered by allocation activity, so it is possible for a thread that stops allocating/deallocating to retain its cache indefinitely, in which case the developer may find manual flushing useful\&.
+.RE
+.PP
 "arenas\&.narenas" (\fBunsigned\fR) r\-
 .RS 4
 Maximum number of arenas\&.
@ -841,94 +892,19 @@ booleans\&. Each boolean indicates whether the corresponding arena is initialize
 Quantum size\&.
 .RE
 .PP
-"arenas\&.cacheline" (\fBsize_t\fR) r\-
-.RS 4
-Assumed cacheline size\&.
-.RE
-.PP
-"arenas\&.subpage" (\fBsize_t\fR) r\-
-.RS 4
-Subpage size class interval\&.
-.RE
-.PP
-"arenas\&.pagesize" (\fBsize_t\fR) r\-
+"arenas\&.page" (\fBsize_t\fR) r\-
 .RS 4
 Page size\&.
 .RE
 .PP
-"arenas\&.chunksize" (\fBsize_t\fR) r\-
-.RS 4
-Chunk size\&.
-.RE
-.PP
-"arenas\&.tspace_min" (\fBsize_t\fR) r\-
-.RS 4
-Minimum tiny size class\&. Tiny size classes are powers of two\&.
-.RE
-.PP
-"arenas\&.tspace_max" (\fBsize_t\fR) r\-
-.RS 4
-Maximum tiny size class\&. Tiny size classes are powers of two\&.
-.RE
-.PP
-"arenas\&.qspace_min" (\fBsize_t\fR) r\-
-.RS 4
-Minimum quantum\-spaced size class\&.
-.RE
-.PP
-"arenas\&.qspace_max" (\fBsize_t\fR) r\-
-.RS 4
-Maximum quantum\-spaced size class\&.
-.RE
-.PP
-"arenas\&.cspace_min" (\fBsize_t\fR) r\-
-.RS 4
-Minimum cacheline\-spaced size class\&.
-.RE
-.PP
-"arenas\&.cspace_max" (\fBsize_t\fR) r\-
-.RS 4
-Maximum cacheline\-spaced size class\&.
-.RE
-.PP
-"arenas\&.sspace_min" (\fBsize_t\fR) r\-
-.RS 4
-Minimum subpage\-spaced size class\&.
-.RE
-.PP
-"arenas\&.sspace_max" (\fBsize_t\fR) r\-
-.RS 4
-Maximum subpage\-spaced size class\&.
-.RE
-.PP
 "arenas\&.tcache_max" (\fBsize_t\fR) r\- [\fB\-\-enable\-tcache\fR]
 .RS 4
 Maximum thread\-cached size class\&.
 .RE
 .PP
-"arenas\&.ntbins" (\fBunsigned\fR) r\-
-.RS 4
-Number of tiny bin size classes\&.
-.RE
-.PP
-"arenas\&.nqbins" (\fBunsigned\fR) r\-
-.RS 4
-Number of quantum\-spaced bin size classes\&.
-.RE
-.PP
-"arenas\&.ncbins" (\fBunsigned\fR) r\-
-.RS 4
-Number of cacheline\-spaced bin size classes\&.
-.RE
-.PP
-"arenas\&.nsbins" (\fBunsigned\fR) r\-
-.RS 4
-Number of subpage\-spaced bin size classes\&.
-.RE
-.PP
 "arenas\&.nbins" (\fBunsigned\fR) r\-
 .RS 4
-Total number of bin size classes\&.
+Number of bin size classes\&.
 .RE
 .PP
 "arenas\&.nhbins" (\fBunsigned\fR) r\- [\fB\-\-enable\-tcache\fR]
@ -1011,12 +987,12 @@ Total number of bytes in active pages allocated by the application\&. This is a
 "stats\&.mapped" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR]
 .RS 4
 Total number of bytes in chunks mapped on behalf of the application\&. This is a multiple of the chunk size, and is at least as large as
-"stats\&.active"\&. This does not include inactive chunks backed by swap files\&. his does not include inactive chunks embedded in the DSS\&.
+"stats\&.active"\&. This does not include inactive chunks\&.
 .RE
 .PP
 "stats\&.chunks\&.current" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR]
 .RS 4
-Total number of chunks actively mapped on behalf of the application\&. This does not include inactive chunks backed by swap files\&. This does not include inactive chunks embedded in the DSS\&.
+Total number of chunks actively mapped on behalf of the application\&. This does not include inactive chunks\&.
 .RE
 .PP
 "stats\&.chunks\&.total" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR]
@ -1163,11 +1139,6 @@ Cumulative number of runs created\&.
 Cumulative number of times the current run from which to allocate changed\&.
 .RE
 .PP
-"stats\&.arenas\&.<i>\&.bins\&.<j>\&.highruns" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR]
-.RS 4
-Maximum number of runs at any time thus far\&.
-.RE
-.PP
 "stats\&.arenas\&.<i>\&.bins\&.<j>\&.curruns" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR]
 .RS 4
 Current number of runs\&.
@ -1188,44 +1159,10 @@ Cumulative number of deallocation requests for this size class served directly b
 Cumulative number of allocation requests for this size class\&.
 .RE
 .PP
-"stats\&.arenas\&.<i>\&.lruns\&.<j>\&.highruns" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR]
-.RS 4
-Maximum number of runs at any time thus far for this size class\&.
-.RE
-.PP
 "stats\&.arenas\&.<i>\&.lruns\&.<j>\&.curruns" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR]
 .RS 4
 Current number of runs for this size class\&.
 .RE
-.PP
-"swap\&.avail" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats \-\-enable\-swap\fR]
-.RS 4
-Number of swap file bytes that are currently not associated with any chunk (i\&.e\&. mapped, but otherwise completely unmanaged)\&.
-.RE
-.PP
-"swap\&.prezeroed" (\fBbool\fR) rw [\fB\-\-enable\-swap\fR]
-.RS 4
-If true, the allocator assumes that the swap file(s) contain nothing but nil bytes\&. If this assumption is violated, allocator behavior is undefined\&. This value becomes read\-only after
-"swap\&.fds"
-is successfully written to\&.
-.RE
-.PP
-"swap\&.nfds" (\fBsize_t\fR) r\- [\fB\-\-enable\-swap\fR]
-.RS 4
-Number of file descriptors in use for swap\&.
-.RE
-.PP
-"swap\&.fds" (\fBint *\fR) rw [\fB\-\-enable\-swap\fR]
-.RS 4
-When written to, the files associated with the specified file descriptors are contiguously mapped via
-\fBmmap\fR(2)\&. The resulting virtual memory region is preferred over anonymous
-\fBmmap\fR(2)
-and
-\fBsbrk\fR(2)
-memory\&. Note that if a file\*(Aqs size is not a multiple of the page size, it is automatically truncated to the nearest page size multiple\&. See the
-"swap\&.prezeroed"
-mallctl for specifying that the files are pre\-zeroed\&.
-.RE
 .SH "DEBUGGING MALLOC PROBLEMS"
 .PP
 When debugging, it is a good idea to configure/build jemalloc with the
@ -1240,7 +1177,13 @@ option) tends to expose such bugs in the form of obviously incorrect results and
 "opt\&.zero"
 option) eliminates the symptoms of such bugs\&. Between these two options, it is usually possible to quickly detect, diagnose, and eliminate such bugs\&.
 .PP
-This implementation does not provide much detail about the problems it detects, because the performance impact for storing such information would be prohibitive\&. There are a number of allocator implementations available on the Internet which focus on detecting and pinpointing problems by trading performance for extra sanity checks and detailed diagnostics\&.
+This implementation does not provide much detail about the problems it detects, because the performance impact for storing such information would be prohibitive\&. However, jemalloc does integrate with the most excellent
+\m[blue]\fBValgrind\fR\m[]\&\s-2\u[2]\d\s+2
+tool if the
+\fB\-\-enable\-valgrind\fR
+configuration option is enabled and the
+"opt\&.valgrind"
+option is enabled\&.
 .SH "DIAGNOSTIC MESSAGES"
 .PP
 If any of the memory allocation/deallocation functions detect an error or warning condition, a message will be printed to file descriptor
@ -1296,6 +1239,28 @@ Memory allocation error\&.
 .RE
 .PP
 The
+\fBaligned_alloc\fR\fB\fR
+function returns a pointer to the allocated memory if successful; otherwise a
+\fBNULL\fR
+pointer is returned and
+\fIerrno\fR
+is set\&. The
+\fBaligned_alloc\fR\fB\fR
+function will fail if:
+.PP
+EINVAL
+.RS 4
+The
+\fIalignment\fR
+parameter is not a power of 2\&.
+.RE
+.PP
+ENOMEM
+.RS 4
+Memory allocation error\&.
+.RE
+.PP
+The
 \fBrealloc\fR\fB\fR
 function returns a pointer, possibly identical to
 \fIptr\fR, to the allocated memory if successful; otherwise a
@ -1370,14 +1335,15 @@ read/write processing\&.
 The
 \fBallocm\fR\fB\fR,
 \fBrallocm\fR\fB\fR,
-\fBsallocm\fR\fB\fR, and
-\fBdallocm\fR\fB\fR
+\fBsallocm\fR\fB\fR,
+\fBdallocm\fR\fB\fR, and
+\fBnallocm\fR\fB\fR
 functions return
 \fBALLOCM_SUCCESS\fR
 on success; otherwise they return an error value\&. The
-\fBallocm\fR\fB\fR
-and
-\fBrallocm\fR\fB\fR
+\fBallocm\fR\fB\fR,
+\fBrallocm\fR\fB\fR, and
+\fBnallocm\fR\fB\fR
 functions will fail if:
 .PP
 ALLOCM_ERR_OOM
@ -1442,6 +1408,7 @@ malloc_conf = "lg_chunk:24";
 \fBmadvise\fR(2),
 \fBmmap\fR(2),
 \fBsbrk\fR(2),
+\fButrace\fR(2),
 \fBalloca\fR(3),
 \fBatexit\fR(3),
 \fBgetpagesize\fR(3)
@ -1469,7 +1436,12 @@ jemalloc website
 \%http://www.canonware.com/jemalloc/
 .RE
 .IP " 2." 4
-google-perftools package
+Valgrind
 .RS 4
-\%http://code.google.com/p/google-perftools/
+\%http://valgrind.org/
+.RE
+.IP " 3." 4
+gperftools package
+.RS 4
+\%http://code.google.com/p/gperftools/
 .RE
--- a/deps/jemalloc/doc/jemalloc.html
+++ b/deps/jemalloc/doc/jemalloc.html
--- a/deps/jemalloc/doc/jemalloc.xml.in
+++ b/deps/jemalloc/doc/jemalloc.xml.in
--- a/deps/jemalloc/include/jemalloc/internal/arena.h
+++ b/deps/jemalloc/include/jemalloc/internal/arena.h
--- a/deps/jemalloc/include/jemalloc/internal/atomic.h
+++ b/deps/jemalloc/include/jemalloc/internal/atomic.h
@ -11,22 +11,8 @@

 #define	atomic_read_uint64(p)	atomic_add_uint64(p, 0)
 #define	atomic_read_uint32(p)	atomic_add_uint32(p, 0)
-
-#if (LG_SIZEOF_PTR == 3)
-#  define atomic_read_z(p)						\
-    (size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)0)
-#  define atomic_add_z(p, x)						\
-    (size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)x)
-#  define atomic_sub_z(p, x)						\
-    (size_t)atomic_sub_uint64((uint64_t *)p, (uint64_t)x)
-#elif (LG_SIZEOF_PTR == 2)
-#  define atomic_read_z(p)						\
-    (size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)0)
-#  define atomic_add_z(p, x)						\
-    (size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)x)
-#  define atomic_sub_z(p, x)						\
-    (size_t)atomic_sub_uint32((uint32_t *)p, (uint32_t)x)
-#endif
+#define	atomic_read_z(p)	atomic_add_z(p, 0)
+#define	atomic_read_u(p)	atomic_add_u(p, 0)

 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
@ -37,12 +23,17 @@ uint64_t	atomic_add_uint64(uint64_t *p, uint64_t x);
 uint64_t	atomic_sub_uint64(uint64_t *p, uint64_t x);
 uint32_t	atomic_add_uint32(uint32_t *p, uint32_t x);
 uint32_t	atomic_sub_uint32(uint32_t *p, uint32_t x);
+size_t	atomic_add_z(size_t *p, size_t x);
+size_t	atomic_sub_z(size_t *p, size_t x);
+unsigned	atomic_add_u(unsigned *p, unsigned x);
+unsigned	atomic_sub_u(unsigned *p, unsigned x);
 #endif

 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_))
 /******************************************************************************/
 /* 64-bit operations. */
-#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8
+#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
+#  ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8
 JEMALLOC_INLINE uint64_t
 atomic_add_uint64(uint64_t *p, uint64_t x)
 {
@ -56,6 +47,20 @@ atomic_sub_uint64(uint64_t *p, uint64_t x)

 	return (__sync_sub_and_fetch(p, x));
 }
+#elif (defined(_MSC_VER))
+JEMALLOC_INLINE uint64_t
+atomic_add_uint64(uint64_t *p, uint64_t x)
+{
+
+	return (InterlockedExchangeAdd64(p, x));
+}
+
+JEMALLOC_INLINE uint64_t
+atomic_sub_uint64(uint64_t *p, uint64_t x)
+{
+
+	return (InterlockedExchangeAdd64(p, -((int64_t)x)));
+}
 #elif (defined(JEMALLOC_OSATOMIC))
 JEMALLOC_INLINE uint64_t
 atomic_add_uint64(uint64_t *p, uint64_t x)
@ -70,7 +75,7 @@ atomic_sub_uint64(uint64_t *p, uint64_t x)

 	return (OSAtomicAdd64(-((int64_t)x), (int64_t *)p));
 }
-#elif (defined(__amd64_) || defined(__x86_64__))
+#  elif (defined(__amd64__) || defined(__x86_64__))
 JEMALLOC_INLINE uint64_t
 atomic_add_uint64(uint64_t *p, uint64_t x)
 {
@ -97,8 +102,43 @@ atomic_sub_uint64(uint64_t *p, uint64_t x)

 	return (x);
 }
-#else
-#  if (LG_SIZEOF_PTR == 3)
+#  elif (defined(JEMALLOC_ATOMIC9))
+JEMALLOC_INLINE uint64_t
+atomic_add_uint64(uint64_t *p, uint64_t x)
+{
+
+	/*
+	 * atomic_fetchadd_64() doesn't exist, but we only ever use this
+	 * function on LP64 systems, so atomic_fetchadd_long() will do.
+	 */
+	assert(sizeof(uint64_t) == sizeof(unsigned long));
+
+	return (atomic_fetchadd_long(p, (unsigned long)x) + x);
+}
+
+JEMALLOC_INLINE uint64_t
+atomic_sub_uint64(uint64_t *p, uint64_t x)
+{
+
+	assert(sizeof(uint64_t) == sizeof(unsigned long));
+
+	return (atomic_fetchadd_long(p, (unsigned long)(-(long)x)) - x);
+}
+#  elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8))
+JEMALLOC_INLINE uint64_t
+atomic_add_uint64(uint64_t *p, uint64_t x)
+{
+
+	return (__sync_add_and_fetch(p, x));
+}
+
+JEMALLOC_INLINE uint64_t
+atomic_sub_uint64(uint64_t *p, uint64_t x)
+{
+
+	return (__sync_sub_and_fetch(p, x));
+}
+#  else
 #    error "Missing implementation for 64-bit atomic operations"
 #  endif
 #endif
@ -119,6 +159,20 @@ atomic_sub_uint32(uint32_t *p, uint32_t x)

 	return (__sync_sub_and_fetch(p, x));
 }
+#elif (defined(_MSC_VER))
+JEMALLOC_INLINE uint32_t
+atomic_add_uint32(uint32_t *p, uint32_t x)
+{
+
+	return (InterlockedExchangeAdd(p, x));
+}
+
+JEMALLOC_INLINE uint32_t
+atomic_sub_uint32(uint32_t *p, uint32_t x)
+{
+
+	return (InterlockedExchangeAdd(p, -((int32_t)x)));
+}
 #elif (defined(JEMALLOC_OSATOMIC))
 JEMALLOC_INLINE uint32_t
 atomic_add_uint32(uint32_t *p, uint32_t x)
@ -133,7 +187,7 @@ atomic_sub_uint32(uint32_t *p, uint32_t x)

 	return (OSAtomicAdd32(-((int32_t)x), (int32_t *)p));
 }
-#elif (defined(__i386__) || defined(__amd64_) || defined(__x86_64__))
+#elif (defined(__i386__) || defined(__amd64__) || defined(__x86_64__))
 JEMALLOC_INLINE uint32_t
 atomic_add_uint32(uint32_t *p, uint32_t x)
 {
@ -160,9 +214,90 @@ atomic_sub_uint32(uint32_t *p, uint32_t x)

 	return (x);
 }
+#elif (defined(JEMALLOC_ATOMIC9))
+JEMALLOC_INLINE uint32_t
+atomic_add_uint32(uint32_t *p, uint32_t x)
+{
+
+	return (atomic_fetchadd_32(p, x) + x);
+}
+
+JEMALLOC_INLINE uint32_t
+atomic_sub_uint32(uint32_t *p, uint32_t x)
+{
+
+	return (atomic_fetchadd_32(p, (uint32_t)(-(int32_t)x)) - x);
+}
+#elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4))
+JEMALLOC_INLINE uint32_t
+atomic_add_uint32(uint32_t *p, uint32_t x)
+{
+
+	return (__sync_add_and_fetch(p, x));
+}
+
+JEMALLOC_INLINE uint32_t
+atomic_sub_uint32(uint32_t *p, uint32_t x)
+{
+
+	return (__sync_sub_and_fetch(p, x));
+}
 #else
 #  error "Missing implementation for 32-bit atomic operations"
 #endif
+
+/******************************************************************************/
+/* size_t operations. */
+JEMALLOC_INLINE size_t
+atomic_add_z(size_t *p, size_t x)
+{
+
+#if (LG_SIZEOF_PTR == 3)
+	return ((size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)x));
+#elif (LG_SIZEOF_PTR == 2)
+	return ((size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)x));
+#endif
+}
+
+JEMALLOC_INLINE size_t
+atomic_sub_z(size_t *p, size_t x)
+{
+
+#if (LG_SIZEOF_PTR == 3)
+	return ((size_t)atomic_add_uint64((uint64_t *)p,
+	    (uint64_t)-((int64_t)x)));
+#elif (LG_SIZEOF_PTR == 2)
+	return ((size_t)atomic_add_uint32((uint32_t *)p,
+	    (uint32_t)-((int32_t)x)));
+#endif
+}
+
+/******************************************************************************/
+/* unsigned operations. */
+JEMALLOC_INLINE unsigned
+atomic_add_u(unsigned *p, unsigned x)
+{
+
+#if (LG_SIZEOF_INT == 3)
+	return ((unsigned)atomic_add_uint64((uint64_t *)p, (uint64_t)x));
+#elif (LG_SIZEOF_INT == 2)
+	return ((unsigned)atomic_add_uint32((uint32_t *)p, (uint32_t)x));
+#endif
+}
+
+JEMALLOC_INLINE unsigned
+atomic_sub_u(unsigned *p, unsigned x)
+{
+
+#if (LG_SIZEOF_INT == 3)
+	return ((unsigned)atomic_add_uint64((uint64_t *)p,
+	    (uint64_t)-((int64_t)x)));
+#elif (LG_SIZEOF_INT == 2)
+	return ((unsigned)atomic_add_uint32((uint32_t *)p,
+	    (uint32_t)-((int32_t)x)));
+#endif
+}
+/******************************************************************************/
 #endif

 #endif /* JEMALLOC_H_INLINES */
--- a/deps/jemalloc/include/jemalloc/internal/base.h
+++ b/deps/jemalloc/include/jemalloc/internal/base.h
@ -9,12 +9,14 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS

-extern malloc_mutex_t	base_mtx;
-
 void	*base_alloc(size_t size);
+void	*base_calloc(size_t number, size_t size);
 extent_node_t *base_node_alloc(void);
 void	base_node_dealloc(extent_node_t *node);
 bool	base_boot(void);
+void	base_prefork(void);
+void	base_postfork_parent(void);
+void	base_postfork_child(void);

 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
--- a/deps/jemalloc/include/jemalloc/internal/chunk.h
+++ b/deps/jemalloc/include/jemalloc/internal/chunk.h
@ -28,20 +28,13 @@
 #ifdef JEMALLOC_H_EXTERNS

 extern size_t		opt_lg_chunk;
-#ifdef JEMALLOC_SWAP
-extern bool		opt_overcommit;
-#endif

-#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
 /* Protects stats_chunks; currently not used for any other purpose. */
 extern malloc_mutex_t	chunks_mtx;
 /* Chunk statistics. */
 extern chunk_stats_t	stats_chunks;
-#endif

-#ifdef JEMALLOC_IVSALLOC
 extern rtree_t		*chunks_rtree;
-#endif

 extern size_t		chunksize;
 extern size_t		chunksize_mask; /* (chunksize - 1). */
@ -49,7 +42,7 @@ extern size_t		chunk_npages;
 extern size_t		map_bias; /* Number of arena chunk header pages. */
 extern size_t		arena_maxclass; /* Max size class for arenas. */

-void	*chunk_alloc(size_t size, bool base, bool *zero);
+void	*chunk_alloc(size_t size, size_t alignment, bool base, bool *zero);
 void	chunk_dealloc(void *chunk, size_t size, bool unmap);
 bool	chunk_boot(void);

@ -60,6 +53,5 @@ bool	chunk_boot(void);
 #endif /* JEMALLOC_H_INLINES */
 /******************************************************************************/

-#include "jemalloc/internal/chunk_swap.h"
 #include "jemalloc/internal/chunk_dss.h"
 #include "jemalloc/internal/chunk_mmap.h"
--- a/Show More
+++ b/Show More