diff --git a/deps/concurrentqueue/blockingconcurrentqueue.h b/deps/concurrentqueue/blockingconcurrentqueue.h new file mode 100644 index 000000000..6002cd82f --- /dev/null +++ b/deps/concurrentqueue/blockingconcurrentqueue.h @@ -0,0 +1,583 @@ +// Provides an efficient blocking version of moodycamel::ConcurrentQueue. +// ©2015-2020 Cameron Desrochers. Distributed under the terms of the simplified +// BSD license, available at the top of concurrentqueue.h. +// Also dual-licensed under the Boost Software License (see LICENSE.md) +// Uses Jeff Preshing's semaphore implementation (under the terms of its +// separate zlib license, see lightweightsemaphore.h). + +#pragma once + +#include "concurrentqueue.h" +#include "lightweightsemaphore.h" + +#include +#include +#include +#include +#include + +namespace moodycamel +{ +// This is a blocking version of the queue. It has an almost identical interface to +// the normal non-blocking version, with the addition of various wait_dequeue() methods +// and the removal of producer-specific dequeue methods. +template +class BlockingConcurrentQueue +{ +private: + typedef ::moodycamel::ConcurrentQueue ConcurrentQueue; + typedef ::moodycamel::LightweightSemaphore LightweightSemaphore; + +public: + typedef typename ConcurrentQueue::producer_token_t producer_token_t; + typedef typename ConcurrentQueue::consumer_token_t consumer_token_t; + + typedef typename ConcurrentQueue::index_t index_t; + typedef typename ConcurrentQueue::size_t size_t; + typedef typename std::make_signed::type ssize_t; + + static const size_t BLOCK_SIZE = ConcurrentQueue::BLOCK_SIZE; + static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = ConcurrentQueue::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD; + static const size_t EXPLICIT_INITIAL_INDEX_SIZE = ConcurrentQueue::EXPLICIT_INITIAL_INDEX_SIZE; + static const size_t IMPLICIT_INITIAL_INDEX_SIZE = ConcurrentQueue::IMPLICIT_INITIAL_INDEX_SIZE; + static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = ConcurrentQueue::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; + static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = ConcurrentQueue::EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE; + static const size_t MAX_SUBQUEUE_SIZE = ConcurrentQueue::MAX_SUBQUEUE_SIZE; + +public: + // Creates a queue with at least `capacity` element slots; note that the + // actual number of elements that can be inserted without additional memory + // allocation depends on the number of producers and the block size (e.g. if + // the block size is equal to `capacity`, only a single block will be allocated + // up-front, which means only a single producer will be able to enqueue elements + // without an extra allocation -- blocks aren't shared between producers). + // This method is not thread safe -- it is up to the user to ensure that the + // queue is fully constructed before it starts being used by other threads (this + // includes making the memory effects of construction visible, possibly with a + // memory barrier). + explicit BlockingConcurrentQueue(size_t capacity = 6 * BLOCK_SIZE) + : inner(capacity), sema(create(0, (int)Traits::MAX_SEMA_SPINS), &BlockingConcurrentQueue::template destroy) + { + assert(reinterpret_cast((BlockingConcurrentQueue*)1) == &((BlockingConcurrentQueue*)1)->inner && "BlockingConcurrentQueue must have ConcurrentQueue as its first member"); + if (!sema) { + MOODYCAMEL_THROW(std::bad_alloc()); + } + } + + BlockingConcurrentQueue(size_t minCapacity, size_t maxExplicitProducers, size_t maxImplicitProducers) + : inner(minCapacity, maxExplicitProducers, maxImplicitProducers), sema(create(0, (int)Traits::MAX_SEMA_SPINS), &BlockingConcurrentQueue::template destroy) + { + assert(reinterpret_cast((BlockingConcurrentQueue*)1) == &((BlockingConcurrentQueue*)1)->inner && "BlockingConcurrentQueue must have ConcurrentQueue as its first member"); + if (!sema) { + MOODYCAMEL_THROW(std::bad_alloc()); + } + } + + // Disable copying and copy assignment + BlockingConcurrentQueue(BlockingConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION; + BlockingConcurrentQueue& operator=(BlockingConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION; + + // Moving is supported, but note that it is *not* a thread-safe operation. + // Nobody can use the queue while it's being moved, and the memory effects + // of that move must be propagated to other threads before they can use it. + // Note: When a queue is moved, its tokens are still valid but can only be + // used with the destination queue (i.e. semantically they are moved along + // with the queue itself). + BlockingConcurrentQueue(BlockingConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT + : inner(std::move(other.inner)), sema(std::move(other.sema)) + { } + + inline BlockingConcurrentQueue& operator=(BlockingConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT + { + return swap_internal(other); + } + + // Swaps this queue's state with the other's. Not thread-safe. + // Swapping two queues does not invalidate their tokens, however + // the tokens that were created for one queue must be used with + // only the swapped queue (i.e. the tokens are tied to the + // queue's movable state, not the object itself). + inline void swap(BlockingConcurrentQueue& other) MOODYCAMEL_NOEXCEPT + { + swap_internal(other); + } + +private: + BlockingConcurrentQueue& swap_internal(BlockingConcurrentQueue& other) + { + if (this == &other) { + return *this; + } + + inner.swap(other.inner); + sema.swap(other.sema); + return *this; + } + +public: + // Enqueues a single item (by copying it). + // Allocates memory if required. Only fails if memory allocation fails (or implicit + // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0, + // or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). + // Thread-safe. + inline bool enqueue(T const& item) + { + if ((details::likely)(inner.enqueue(item))) { + sema->signal(); + return true; + } + return false; + } + + // Enqueues a single item (by moving it, if possible). + // Allocates memory if required. Only fails if memory allocation fails (or implicit + // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0, + // or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). + // Thread-safe. + inline bool enqueue(T&& item) + { + if ((details::likely)(inner.enqueue(std::move(item)))) { + sema->signal(); + return true; + } + return false; + } + + // Enqueues a single item (by copying it) using an explicit producer token. + // Allocates memory if required. Only fails if memory allocation fails (or + // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). + // Thread-safe. + inline bool enqueue(producer_token_t const& token, T const& item) + { + if ((details::likely)(inner.enqueue(token, item))) { + sema->signal(); + return true; + } + return false; + } + + // Enqueues a single item (by moving it, if possible) using an explicit producer token. + // Allocates memory if required. Only fails if memory allocation fails (or + // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). + // Thread-safe. + inline bool enqueue(producer_token_t const& token, T&& item) + { + if ((details::likely)(inner.enqueue(token, std::move(item)))) { + sema->signal(); + return true; + } + return false; + } + + // Enqueues several items. + // Allocates memory if required. Only fails if memory allocation fails (or + // implicit production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE + // is 0, or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). + // Note: Use std::make_move_iterator if the elements should be moved instead of copied. + // Thread-safe. + template + inline bool enqueue_bulk(It itemFirst, size_t count) + { + if ((details::likely)(inner.enqueue_bulk(std::forward(itemFirst), count))) { + sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count); + return true; + } + return false; + } + + // Enqueues several items using an explicit producer token. + // Allocates memory if required. Only fails if memory allocation fails + // (or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). + // Note: Use std::make_move_iterator if the elements should be moved + // instead of copied. + // Thread-safe. + template + inline bool enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count) + { + if ((details::likely)(inner.enqueue_bulk(token, std::forward(itemFirst), count))) { + sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count); + return true; + } + return false; + } + + // Enqueues a single item (by copying it). + // Does not allocate memory. Fails if not enough room to enqueue (or implicit + // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE + // is 0). + // Thread-safe. + inline bool try_enqueue(T const& item) + { + if (inner.try_enqueue(item)) { + sema->signal(); + return true; + } + return false; + } + + // Enqueues a single item (by moving it, if possible). + // Does not allocate memory (except for one-time implicit producer). + // Fails if not enough room to enqueue (or implicit production is + // disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0). + // Thread-safe. + inline bool try_enqueue(T&& item) + { + if (inner.try_enqueue(std::move(item))) { + sema->signal(); + return true; + } + return false; + } + + // Enqueues a single item (by copying it) using an explicit producer token. + // Does not allocate memory. Fails if not enough room to enqueue. + // Thread-safe. + inline bool try_enqueue(producer_token_t const& token, T const& item) + { + if (inner.try_enqueue(token, item)) { + sema->signal(); + return true; + } + return false; + } + + // Enqueues a single item (by moving it, if possible) using an explicit producer token. + // Does not allocate memory. Fails if not enough room to enqueue. + // Thread-safe. + inline bool try_enqueue(producer_token_t const& token, T&& item) + { + if (inner.try_enqueue(token, std::move(item))) { + sema->signal(); + return true; + } + return false; + } + + // Enqueues several items. + // Does not allocate memory (except for one-time implicit producer). + // Fails if not enough room to enqueue (or implicit production is + // disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0). + // Note: Use std::make_move_iterator if the elements should be moved + // instead of copied. + // Thread-safe. + template + inline bool try_enqueue_bulk(It itemFirst, size_t count) + { + if (inner.try_enqueue_bulk(std::forward(itemFirst), count)) { + sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count); + return true; + } + return false; + } + + // Enqueues several items using an explicit producer token. + // Does not allocate memory. Fails if not enough room to enqueue. + // Note: Use std::make_move_iterator if the elements should be moved + // instead of copied. + // Thread-safe. + template + inline bool try_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count) + { + if (inner.try_enqueue_bulk(token, std::forward(itemFirst), count)) { + sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count); + return true; + } + return false; + } + + + // Attempts to dequeue from the queue. + // Returns false if all producer streams appeared empty at the time they + // were checked (so, the queue is likely but not guaranteed to be empty). + // Never allocates. Thread-safe. + template + inline bool try_dequeue(U& item) + { + if (sema->tryWait()) { + while (!inner.try_dequeue(item)) { + continue; + } + return true; + } + return false; + } + + // Attempts to dequeue from the queue using an explicit consumer token. + // Returns false if all producer streams appeared empty at the time they + // were checked (so, the queue is likely but not guaranteed to be empty). + // Never allocates. Thread-safe. + template + inline bool try_dequeue(consumer_token_t& token, U& item) + { + if (sema->tryWait()) { + while (!inner.try_dequeue(token, item)) { + continue; + } + return true; + } + return false; + } + + // Attempts to dequeue several elements from the queue. + // Returns the number of items actually dequeued. + // Returns 0 if all producer streams appeared empty at the time they + // were checked (so, the queue is likely but not guaranteed to be empty). + // Never allocates. Thread-safe. + template + inline size_t try_dequeue_bulk(It itemFirst, size_t max) + { + size_t count = 0; + max = (size_t)sema->tryWaitMany((LightweightSemaphore::ssize_t)(ssize_t)max); + while (count != max) { + count += inner.template try_dequeue_bulk(itemFirst, max - count); + } + return count; + } + + // Attempts to dequeue several elements from the queue using an explicit consumer token. + // Returns the number of items actually dequeued. + // Returns 0 if all producer streams appeared empty at the time they + // were checked (so, the queue is likely but not guaranteed to be empty). + // Never allocates. Thread-safe. + template + inline size_t try_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max) + { + size_t count = 0; + max = (size_t)sema->tryWaitMany((LightweightSemaphore::ssize_t)(ssize_t)max); + while (count != max) { + count += inner.template try_dequeue_bulk(token, itemFirst, max - count); + } + return count; + } + + + + // Blocks the current thread until there's something to dequeue, then + // dequeues it. + // Never allocates. Thread-safe. + template + inline void wait_dequeue(U& item) + { + while (!sema->wait()) { + continue; + } + while (!inner.try_dequeue(item)) { + continue; + } + } + + // Blocks the current thread until either there's something to dequeue + // or the timeout (specified in microseconds) expires. Returns false + // without setting `item` if the timeout expires, otherwise assigns + // to `item` and returns true. + // Using a negative timeout indicates an indefinite timeout, + // and is thus functionally equivalent to calling wait_dequeue. + // Never allocates. Thread-safe. + template + inline bool wait_dequeue_timed(U& item, std::int64_t timeout_usecs) + { + if (!sema->wait(timeout_usecs)) { + return false; + } + while (!inner.try_dequeue(item)) { + continue; + } + return true; + } + + // Blocks the current thread until either there's something to dequeue + // or the timeout expires. Returns false without setting `item` if the + // timeout expires, otherwise assigns to `item` and returns true. + // Never allocates. Thread-safe. + template + inline bool wait_dequeue_timed(U& item, std::chrono::duration const& timeout) + { + return wait_dequeue_timed(item, std::chrono::duration_cast(timeout).count()); + } + + // Blocks the current thread until there's something to dequeue, then + // dequeues it using an explicit consumer token. + // Never allocates. Thread-safe. + template + inline void wait_dequeue(consumer_token_t& token, U& item) + { + while (!sema->wait()) { + continue; + } + while (!inner.try_dequeue(token, item)) { + continue; + } + } + + // Blocks the current thread until either there's something to dequeue + // or the timeout (specified in microseconds) expires. Returns false + // without setting `item` if the timeout expires, otherwise assigns + // to `item` and returns true. + // Using a negative timeout indicates an indefinite timeout, + // and is thus functionally equivalent to calling wait_dequeue. + // Never allocates. Thread-safe. + template + inline bool wait_dequeue_timed(consumer_token_t& token, U& item, std::int64_t timeout_usecs) + { + if (!sema->wait(timeout_usecs)) { + return false; + } + while (!inner.try_dequeue(token, item)) { + continue; + } + return true; + } + + // Blocks the current thread until either there's something to dequeue + // or the timeout expires. Returns false without setting `item` if the + // timeout expires, otherwise assigns to `item` and returns true. + // Never allocates. Thread-safe. + template + inline bool wait_dequeue_timed(consumer_token_t& token, U& item, std::chrono::duration const& timeout) + { + return wait_dequeue_timed(token, item, std::chrono::duration_cast(timeout).count()); + } + + // Attempts to dequeue several elements from the queue. + // Returns the number of items actually dequeued, which will + // always be at least one (this method blocks until the queue + // is non-empty) and at most max. + // Never allocates. Thread-safe. + template + inline size_t wait_dequeue_bulk(It itemFirst, size_t max) + { + size_t count = 0; + max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max); + while (count != max) { + count += inner.template try_dequeue_bulk(itemFirst, max - count); + } + return count; + } + + // Attempts to dequeue several elements from the queue. + // Returns the number of items actually dequeued, which can + // be 0 if the timeout expires while waiting for elements, + // and at most max. + // Using a negative timeout indicates an indefinite timeout, + // and is thus functionally equivalent to calling wait_dequeue_bulk. + // Never allocates. Thread-safe. + template + inline size_t wait_dequeue_bulk_timed(It itemFirst, size_t max, std::int64_t timeout_usecs) + { + size_t count = 0; + max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max, timeout_usecs); + while (count != max) { + count += inner.template try_dequeue_bulk(itemFirst, max - count); + } + return count; + } + + // Attempts to dequeue several elements from the queue. + // Returns the number of items actually dequeued, which can + // be 0 if the timeout expires while waiting for elements, + // and at most max. + // Never allocates. Thread-safe. + template + inline size_t wait_dequeue_bulk_timed(It itemFirst, size_t max, std::chrono::duration const& timeout) + { + return wait_dequeue_bulk_timed(itemFirst, max, std::chrono::duration_cast(timeout).count()); + } + + // Attempts to dequeue several elements from the queue using an explicit consumer token. + // Returns the number of items actually dequeued, which will + // always be at least one (this method blocks until the queue + // is non-empty) and at most max. + // Never allocates. Thread-safe. + template + inline size_t wait_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max) + { + size_t count = 0; + max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max); + while (count != max) { + count += inner.template try_dequeue_bulk(token, itemFirst, max - count); + } + return count; + } + + // Attempts to dequeue several elements from the queue using an explicit consumer token. + // Returns the number of items actually dequeued, which can + // be 0 if the timeout expires while waiting for elements, + // and at most max. + // Using a negative timeout indicates an indefinite timeout, + // and is thus functionally equivalent to calling wait_dequeue_bulk. + // Never allocates. Thread-safe. + template + inline size_t wait_dequeue_bulk_timed(consumer_token_t& token, It itemFirst, size_t max, std::int64_t timeout_usecs) + { + size_t count = 0; + max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max, timeout_usecs); + while (count != max) { + count += inner.template try_dequeue_bulk(token, itemFirst, max - count); + } + return count; + } + + // Attempts to dequeue several elements from the queue using an explicit consumer token. + // Returns the number of items actually dequeued, which can + // be 0 if the timeout expires while waiting for elements, + // and at most max. + // Never allocates. Thread-safe. + template + inline size_t wait_dequeue_bulk_timed(consumer_token_t& token, It itemFirst, size_t max, std::chrono::duration const& timeout) + { + return wait_dequeue_bulk_timed(token, itemFirst, max, std::chrono::duration_cast(timeout).count()); + } + + + // Returns an estimate of the total number of elements currently in the queue. This + // estimate is only accurate if the queue has completely stabilized before it is called + // (i.e. all enqueue and dequeue operations have completed and their memory effects are + // visible on the calling thread, and no further operations start while this method is + // being called). + // Thread-safe. + inline size_t size_approx() const + { + return (size_t)sema->availableApprox(); + } + + + // Returns true if the underlying atomic variables used by + // the queue are lock-free (they should be on most platforms). + // Thread-safe. + static bool is_lock_free() + { + return ConcurrentQueue::is_lock_free(); + } + + +private: + template + static inline U* create(A1&& a1, A2&& a2) + { + void* p = (Traits::malloc)(sizeof(U)); + return p != nullptr ? new (p) U(std::forward(a1), std::forward(a2)) : nullptr; + } + + template + static inline void destroy(U* p) + { + if (p != nullptr) { + p->~U(); + } + (Traits::free)(p); + } + +private: + ConcurrentQueue inner; + std::unique_ptr sema; +}; + + +template +inline void swap(BlockingConcurrentQueue& a, BlockingConcurrentQueue& b) MOODYCAMEL_NOEXCEPT +{ + a.swap(b); +} + +} // end namespace moodycamel + diff --git a/deps/concurrentqueue/concurrentqueue.h b/deps/concurrentqueue/concurrentqueue.h new file mode 100644 index 000000000..121383eab --- /dev/null +++ b/deps/concurrentqueue/concurrentqueue.h @@ -0,0 +1,3743 @@ +// Provides a C++11 implementation of a multi-producer, multi-consumer lock-free queue. +// An overview, including benchmark results, is provided here: +// http://moodycamel.com/blog/2014/a-fast-general-purpose-lock-free-queue-for-c++ +// The full design is also described in excruciating detail at: +// http://moodycamel.com/blog/2014/detailed-design-of-a-lock-free-queue + +// Simplified BSD license: +// Copyright (c) 2013-2020, Cameron Desrochers. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, this list of +// conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, this list of +// conditions and the following disclaimer in the documentation and/or other materials +// provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +// OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR +// TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Also dual-licensed under the Boost Software License (see LICENSE.md) + +#pragma once + +#if defined(__GNUC__) && !defined(__INTEL_COMPILER) +// Disable -Wconversion warnings (spuriously triggered when Traits::size_t and +// Traits::index_t are set to < 32 bits, causing integer promotion, causing warnings +// upon assigning any computed values) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion" + +#ifdef MCDBGQ_USE_RELACY +#pragma GCC diagnostic ignored "-Wint-to-pointer-cast" +#endif +#endif + +#if defined(_MSC_VER) && (!defined(_HAS_CXX17) || !_HAS_CXX17) +// VS2019 with /W4 warns about constant conditional expressions but unless /std=c++17 or higher +// does not support `if constexpr`, so we have no choice but to simply disable the warning +#pragma warning(push) +#pragma warning(disable: 4127) // conditional expression is constant +#endif + +#if defined(__APPLE__) +#include "TargetConditionals.h" +#endif + +#ifdef MCDBGQ_USE_RELACY +#include "relacy/relacy_std.hpp" +#include "relacy_shims.h" +// We only use malloc/free anyway, and the delete macro messes up `= delete` method declarations. +// We'll override the default trait malloc ourselves without a macro. +#undef new +#undef delete +#undef malloc +#undef free +#else +#include // Requires C++11. Sorry VS2010. +#include +#endif +#include // for max_align_t +#include +#include +#include +#include +#include +#include +#include // for CHAR_BIT +#include +#include // partly for __WINPTHREADS_VERSION if on MinGW-w64 w/ POSIX threading + +// Platform-specific definitions of a numeric thread ID type and an invalid value +namespace moodycamel { namespace details { + template struct thread_id_converter { + typedef thread_id_t thread_id_numeric_size_t; + typedef thread_id_t thread_id_hash_t; + static thread_id_hash_t prehash(thread_id_t const& x) { return x; } + }; +} } +#if defined(MCDBGQ_USE_RELACY) +namespace moodycamel { namespace details { + typedef std::uint32_t thread_id_t; + static const thread_id_t invalid_thread_id = 0xFFFFFFFFU; + static const thread_id_t invalid_thread_id2 = 0xFFFFFFFEU; + static inline thread_id_t thread_id() { return rl::thread_index(); } +} } +#elif defined(_WIN32) || defined(__WINDOWS__) || defined(__WIN32__) +// No sense pulling in windows.h in a header, we'll manually declare the function +// we use and rely on backwards-compatibility for this not to break +extern "C" __declspec(dllimport) unsigned long __stdcall GetCurrentThreadId(void); +namespace moodycamel { namespace details { + static_assert(sizeof(unsigned long) == sizeof(std::uint32_t), "Expected size of unsigned long to be 32 bits on Windows"); + typedef std::uint32_t thread_id_t; + static const thread_id_t invalid_thread_id = 0; // See http://blogs.msdn.com/b/oldnewthing/archive/2004/02/23/78395.aspx + static const thread_id_t invalid_thread_id2 = 0xFFFFFFFFU; // Not technically guaranteed to be invalid, but is never used in practice. Note that all Win32 thread IDs are presently multiples of 4. + static inline thread_id_t thread_id() { return static_cast(::GetCurrentThreadId()); } +} } +#elif defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || (defined(__APPLE__) && TARGET_OS_IPHONE) || defined(MOODYCAMEL_NO_THREAD_LOCAL) +namespace moodycamel { namespace details { + static_assert(sizeof(std::thread::id) == 4 || sizeof(std::thread::id) == 8, "std::thread::id is expected to be either 4 or 8 bytes"); + + typedef std::thread::id thread_id_t; + static const thread_id_t invalid_thread_id; // Default ctor creates invalid ID + + // Note we don't define a invalid_thread_id2 since std::thread::id doesn't have one; it's + // only used if MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED is defined anyway, which it won't + // be. + static inline thread_id_t thread_id() { return std::this_thread::get_id(); } + + template struct thread_id_size { }; + template<> struct thread_id_size<4> { typedef std::uint32_t numeric_t; }; + template<> struct thread_id_size<8> { typedef std::uint64_t numeric_t; }; + + template<> struct thread_id_converter { + typedef thread_id_size::numeric_t thread_id_numeric_size_t; +#ifndef __APPLE__ + typedef std::size_t thread_id_hash_t; +#else + typedef thread_id_numeric_size_t thread_id_hash_t; +#endif + + static thread_id_hash_t prehash(thread_id_t const& x) + { +#ifndef __APPLE__ + return std::hash()(x); +#else + return *reinterpret_cast(&x); +#endif + } + }; +} } +#else +// Use a nice trick from this answer: http://stackoverflow.com/a/8438730/21475 +// In order to get a numeric thread ID in a platform-independent way, we use a thread-local +// static variable's address as a thread identifier :-) +#if defined(__GNUC__) || defined(__INTEL_COMPILER) +#define MOODYCAMEL_THREADLOCAL __thread +#elif defined(_MSC_VER) +#define MOODYCAMEL_THREADLOCAL __declspec(thread) +#else +// Assume C++11 compliant compiler +#define MOODYCAMEL_THREADLOCAL thread_local +#endif +namespace moodycamel { namespace details { + typedef std::uintptr_t thread_id_t; + static const thread_id_t invalid_thread_id = 0; // Address can't be nullptr + static const thread_id_t invalid_thread_id2 = 1; // Member accesses off a null pointer are also generally invalid. Plus it's not aligned. + inline thread_id_t thread_id() { static MOODYCAMEL_THREADLOCAL int x; return reinterpret_cast(&x); } +} } +#endif + +// Constexpr if +#ifndef MOODYCAMEL_CONSTEXPR_IF +#if (defined(_MSC_VER) && defined(_HAS_CXX17) && _HAS_CXX17) || __cplusplus > 201402L +#define MOODYCAMEL_CONSTEXPR_IF if constexpr +#define MOODYCAMEL_MAYBE_UNUSED [[maybe_unused]] +#else +#define MOODYCAMEL_CONSTEXPR_IF if +#define MOODYCAMEL_MAYBE_UNUSED +#endif +#endif + +// Exceptions +#ifndef MOODYCAMEL_EXCEPTIONS_ENABLED +#if (defined(_MSC_VER) && defined(_CPPUNWIND)) || (defined(__GNUC__) && defined(__EXCEPTIONS)) || (!defined(_MSC_VER) && !defined(__GNUC__)) +#define MOODYCAMEL_EXCEPTIONS_ENABLED +#endif +#endif +#ifdef MOODYCAMEL_EXCEPTIONS_ENABLED +#define MOODYCAMEL_TRY try +#define MOODYCAMEL_CATCH(...) catch(__VA_ARGS__) +#define MOODYCAMEL_RETHROW throw +#define MOODYCAMEL_THROW(expr) throw (expr) +#else +#define MOODYCAMEL_TRY MOODYCAMEL_CONSTEXPR_IF (true) +#define MOODYCAMEL_CATCH(...) else MOODYCAMEL_CONSTEXPR_IF (false) +#define MOODYCAMEL_RETHROW +#define MOODYCAMEL_THROW(expr) +#endif + +#ifndef MOODYCAMEL_NOEXCEPT +#if !defined(MOODYCAMEL_EXCEPTIONS_ENABLED) +#define MOODYCAMEL_NOEXCEPT +#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) true +#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) true +#elif defined(_MSC_VER) && defined(_NOEXCEPT) && _MSC_VER < 1800 +// VS2012's std::is_nothrow_[move_]constructible is broken and returns true when it shouldn't :-( +// We have to assume *all* non-trivial constructors may throw on VS2012! +#define MOODYCAMEL_NOEXCEPT _NOEXCEPT +#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) (std::is_rvalue_reference::value && std::is_move_constructible::value ? std::is_trivially_move_constructible::value : std::is_trivially_copy_constructible::value) +#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) ((std::is_rvalue_reference::value && std::is_move_assignable::value ? std::is_trivially_move_assignable::value || std::is_nothrow_move_assignable::value : std::is_trivially_copy_assignable::value || std::is_nothrow_copy_assignable::value) && MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr)) +#elif defined(_MSC_VER) && defined(_NOEXCEPT) && _MSC_VER < 1900 +#define MOODYCAMEL_NOEXCEPT _NOEXCEPT +#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) (std::is_rvalue_reference::value && std::is_move_constructible::value ? std::is_trivially_move_constructible::value || std::is_nothrow_move_constructible::value : std::is_trivially_copy_constructible::value || std::is_nothrow_copy_constructible::value) +#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) ((std::is_rvalue_reference::value && std::is_move_assignable::value ? std::is_trivially_move_assignable::value || std::is_nothrow_move_assignable::value : std::is_trivially_copy_assignable::value || std::is_nothrow_copy_assignable::value) && MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr)) +#else +#define MOODYCAMEL_NOEXCEPT noexcept +#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) noexcept(expr) +#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) noexcept(expr) +#endif +#endif + +#ifndef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED +#ifdef MCDBGQ_USE_RELACY +#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED +#else +// VS2013 doesn't support `thread_local`, and MinGW-w64 w/ POSIX threading has a crippling bug: http://sourceforge.net/p/mingw-w64/bugs/445 +// g++ <=4.7 doesn't support thread_local either. +// Finally, iOS/ARM doesn't have support for it either, and g++/ARM allows it to compile but it's unconfirmed to actually work +#if (!defined(_MSC_VER) || _MSC_VER >= 1900) && (!defined(__MINGW32__) && !defined(__MINGW64__) || !defined(__WINPTHREADS_VERSION)) && (!defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) && (!defined(__APPLE__) || !TARGET_OS_IPHONE) && !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) +// Assume `thread_local` is fully supported in all other C++11 compilers/platforms +//#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED // always disabled for now since several users report having problems with it on +#endif +#endif +#endif + +// VS2012 doesn't support deleted functions. +// In this case, we declare the function normally but don't define it. A link error will be generated if the function is called. +#ifndef MOODYCAMEL_DELETE_FUNCTION +#if defined(_MSC_VER) && _MSC_VER < 1800 +#define MOODYCAMEL_DELETE_FUNCTION +#else +#define MOODYCAMEL_DELETE_FUNCTION = delete +#endif +#endif + +namespace moodycamel { namespace details { +#ifndef MOODYCAMEL_ALIGNAS +// VS2013 doesn't support alignas or alignof, and align() requires a constant literal +#if defined(_MSC_VER) && _MSC_VER <= 1800 +#define MOODYCAMEL_ALIGNAS(alignment) __declspec(align(alignment)) +#define MOODYCAMEL_ALIGNOF(obj) __alignof(obj) +#define MOODYCAMEL_ALIGNED_TYPE_LIKE(T, obj) typename details::Vs2013Aligned::value, T>::type + template struct Vs2013Aligned { }; // default, unsupported alignment + template struct Vs2013Aligned<1, T> { typedef __declspec(align(1)) T type; }; + template struct Vs2013Aligned<2, T> { typedef __declspec(align(2)) T type; }; + template struct Vs2013Aligned<4, T> { typedef __declspec(align(4)) T type; }; + template struct Vs2013Aligned<8, T> { typedef __declspec(align(8)) T type; }; + template struct Vs2013Aligned<16, T> { typedef __declspec(align(16)) T type; }; + template struct Vs2013Aligned<32, T> { typedef __declspec(align(32)) T type; }; + template struct Vs2013Aligned<64, T> { typedef __declspec(align(64)) T type; }; + template struct Vs2013Aligned<128, T> { typedef __declspec(align(128)) T type; }; + template struct Vs2013Aligned<256, T> { typedef __declspec(align(256)) T type; }; +#else + template struct identity { typedef T type; }; +#define MOODYCAMEL_ALIGNAS(alignment) alignas(alignment) +#define MOODYCAMEL_ALIGNOF(obj) alignof(obj) +#define MOODYCAMEL_ALIGNED_TYPE_LIKE(T, obj) alignas(alignof(obj)) typename details::identity::type +#endif +#endif +} } + + +// TSAN can false report races in lock-free code. To enable TSAN to be used from projects that use this one, +// we can apply per-function compile-time suppression. +// See https://clang.llvm.org/docs/ThreadSanitizer.html#has-feature-thread-sanitizer +#define MOODYCAMEL_NO_TSAN +#if defined(__has_feature) + #if __has_feature(thread_sanitizer) + #undef MOODYCAMEL_NO_TSAN + #define MOODYCAMEL_NO_TSAN __attribute__((no_sanitize("thread"))) + #endif // TSAN +#endif // TSAN + +// Compiler-specific likely/unlikely hints +namespace moodycamel { namespace details { +#if defined(__GNUC__) + static inline bool (likely)(bool x) { return __builtin_expect((x), true); } + static inline bool (unlikely)(bool x) { return __builtin_expect((x), false); } +#else + static inline bool (likely)(bool x) { return x; } + static inline bool (unlikely)(bool x) { return x; } +#endif +} } + +#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG +#include "internal/concurrentqueue_internal_debug.h" +#endif + +namespace moodycamel { +namespace details { + template + struct const_numeric_max { + static_assert(std::is_integral::value, "const_numeric_max can only be used with integers"); + static const T value = std::numeric_limits::is_signed + ? (static_cast(1) << (sizeof(T) * CHAR_BIT - 1)) - static_cast(1) + : static_cast(-1); + }; + +#if defined(__GLIBCXX__) + typedef ::max_align_t std_max_align_t; // libstdc++ forgot to add it to std:: for a while +#else + typedef std::max_align_t std_max_align_t; // Others (e.g. MSVC) insist it can *only* be accessed via std:: +#endif + + // Some platforms have incorrectly set max_align_t to a type with <8 bytes alignment even while supporting + // 8-byte aligned scalar values (*cough* 32-bit iOS). Work around this with our own union. See issue #64. + typedef union { + std_max_align_t x; + long long y; + void* z; + } max_align_t; +} + +// Default traits for the ConcurrentQueue. To change some of the +// traits without re-implementing all of them, inherit from this +// struct and shadow the declarations you wish to be different; +// since the traits are used as a template type parameter, the +// shadowed declarations will be used where defined, and the defaults +// otherwise. +struct ConcurrentQueueDefaultTraits +{ + // General-purpose size type. std::size_t is strongly recommended. + typedef std::size_t size_t; + + // The type used for the enqueue and dequeue indices. Must be at least as + // large as size_t. Should be significantly larger than the number of elements + // you expect to hold at once, especially if you have a high turnover rate; + // for example, on 32-bit x86, if you expect to have over a hundred million + // elements or pump several million elements through your queue in a very + // short space of time, using a 32-bit type *may* trigger a race condition. + // A 64-bit int type is recommended in that case, and in practice will + // prevent a race condition no matter the usage of the queue. Note that + // whether the queue is lock-free with a 64-int type depends on the whether + // std::atomic is lock-free, which is platform-specific. + typedef std::size_t index_t; + + // Internally, all elements are enqueued and dequeued from multi-element + // blocks; this is the smallest controllable unit. If you expect few elements + // but many producers, a smaller block size should be favoured. For few producers + // and/or many elements, a larger block size is preferred. A sane default + // is provided. Must be a power of 2. + static const size_t BLOCK_SIZE = 32; + + // For explicit producers (i.e. when using a producer token), the block is + // checked for being empty by iterating through a list of flags, one per element. + // For large block sizes, this is too inefficient, and switching to an atomic + // counter-based approach is faster. The switch is made for block sizes strictly + // larger than this threshold. + static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = 32; + + // How many full blocks can be expected for a single explicit producer? This should + // reflect that number's maximum for optimal performance. Must be a power of 2. + static const size_t EXPLICIT_INITIAL_INDEX_SIZE = 32; + + // How many full blocks can be expected for a single implicit producer? This should + // reflect that number's maximum for optimal performance. Must be a power of 2. + static const size_t IMPLICIT_INITIAL_INDEX_SIZE = 32; + + // The initial size of the hash table mapping thread IDs to implicit producers. + // Note that the hash is resized every time it becomes half full. + // Must be a power of two, and either 0 or at least 1. If 0, implicit production + // (using the enqueue methods without an explicit producer token) is disabled. + static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = 32; + + // Controls the number of items that an explicit consumer (i.e. one with a token) + // must consume before it causes all consumers to rotate and move on to the next + // internal queue. + static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = 256; + + // The maximum number of elements (inclusive) that can be enqueued to a sub-queue. + // Enqueue operations that would cause this limit to be surpassed will fail. Note + // that this limit is enforced at the block level (for performance reasons), i.e. + // it's rounded up to the nearest block size. + static const size_t MAX_SUBQUEUE_SIZE = details::const_numeric_max::value; + + // The number of times to spin before sleeping when waiting on a semaphore. + // Recommended values are on the order of 1000-10000 unless the number of + // consumer threads exceeds the number of idle cores (in which case try 0-100). + // Only affects instances of the BlockingConcurrentQueue. + static const int MAX_SEMA_SPINS = 10000; + + +#ifndef MCDBGQ_USE_RELACY + // Memory allocation can be customized if needed. + // malloc should return nullptr on failure, and handle alignment like std::malloc. +#if defined(malloc) || defined(free) + // Gah, this is 2015, stop defining macros that break standard code already! + // Work around malloc/free being special macros: + static inline void* WORKAROUND_malloc(size_t size) { return malloc(size); } + static inline void WORKAROUND_free(void* ptr) { return free(ptr); } + static inline void* (malloc)(size_t size) { return WORKAROUND_malloc(size); } + static inline void (free)(void* ptr) { return WORKAROUND_free(ptr); } +#else + static inline void* malloc(size_t size) { return std::malloc(size); } + static inline void free(void* ptr) { return std::free(ptr); } +#endif +#else + // Debug versions when running under the Relacy race detector (ignore + // these in user code) + static inline void* malloc(size_t size) { return rl::rl_malloc(size, $); } + static inline void free(void* ptr) { return rl::rl_free(ptr, $); } +#endif +}; + + +// When producing or consuming many elements, the most efficient way is to: +// 1) Use one of the bulk-operation methods of the queue with a token +// 2) Failing that, use the bulk-operation methods without a token +// 3) Failing that, create a token and use that with the single-item methods +// 4) Failing that, use the single-parameter methods of the queue +// Having said that, don't create tokens willy-nilly -- ideally there should be +// a maximum of one token per thread (of each kind). +struct ProducerToken; +struct ConsumerToken; + +template class ConcurrentQueue; +template class BlockingConcurrentQueue; +class ConcurrentQueueTests; + + +namespace details +{ + struct ConcurrentQueueProducerTypelessBase + { + ConcurrentQueueProducerTypelessBase* next; + std::atomic inactive; + ProducerToken* token; + + ConcurrentQueueProducerTypelessBase() + : next(nullptr), inactive(false), token(nullptr) + { + } + }; + + template struct _hash_32_or_64 { + static inline std::uint32_t hash(std::uint32_t h) + { + // MurmurHash3 finalizer -- see https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp + // Since the thread ID is already unique, all we really want to do is propagate that + // uniqueness evenly across all the bits, so that we can use a subset of the bits while + // reducing collisions significantly + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + return h ^ (h >> 16); + } + }; + template<> struct _hash_32_or_64<1> { + static inline std::uint64_t hash(std::uint64_t h) + { + h ^= h >> 33; + h *= 0xff51afd7ed558ccd; + h ^= h >> 33; + h *= 0xc4ceb9fe1a85ec53; + return h ^ (h >> 33); + } + }; + template struct hash_32_or_64 : public _hash_32_or_64<(size > 4)> { }; + + static inline size_t hash_thread_id(thread_id_t id) + { + static_assert(sizeof(thread_id_t) <= 8, "Expected a platform where thread IDs are at most 64-bit values"); + return static_cast(hash_32_or_64::thread_id_hash_t)>::hash( + thread_id_converter::prehash(id))); + } + + template + static inline bool circular_less_than(T a, T b) + { +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable: 4554) +#endif + static_assert(std::is_integral::value && !std::numeric_limits::is_signed, "circular_less_than is intended to be used only with unsigned integer types"); + return static_cast(a - b) > static_cast(static_cast(1) << static_cast(sizeof(T) * CHAR_BIT - 1)); +#ifdef _MSC_VER +#pragma warning(pop) +#endif + } + + template + static inline char* align_for(char* ptr) + { + const std::size_t alignment = std::alignment_of::value; + return ptr + (alignment - (reinterpret_cast(ptr) % alignment)) % alignment; + } + + template + static inline T ceil_to_pow_2(T x) + { + static_assert(std::is_integral::value && !std::numeric_limits::is_signed, "ceil_to_pow_2 is intended to be used only with unsigned integer types"); + + // Adapted from http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 + --x; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + for (std::size_t i = 1; i < sizeof(T); i <<= 1) { + x |= x >> (i << 3); + } + ++x; + return x; + } + + template + static inline void swap_relaxed(std::atomic& left, std::atomic& right) + { + T temp = std::move(left.load(std::memory_order_relaxed)); + left.store(std::move(right.load(std::memory_order_relaxed)), std::memory_order_relaxed); + right.store(std::move(temp), std::memory_order_relaxed); + } + + template + static inline T const& nomove(T const& x) + { + return x; + } + + template + struct nomove_if + { + template + static inline T const& eval(T const& x) + { + return x; + } + }; + + template<> + struct nomove_if + { + template + static inline auto eval(U&& x) + -> decltype(std::forward(x)) + { + return std::forward(x); + } + }; + + template + static inline auto deref_noexcept(It& it) MOODYCAMEL_NOEXCEPT -> decltype(*it) + { + return *it; + } + +#if defined(__clang__) || !defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8) + template struct is_trivially_destructible : std::is_trivially_destructible { }; +#else + template struct is_trivially_destructible : std::has_trivial_destructor { }; +#endif + +#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED +#ifdef MCDBGQ_USE_RELACY + typedef RelacyThreadExitListener ThreadExitListener; + typedef RelacyThreadExitNotifier ThreadExitNotifier; +#else + struct ThreadExitListener + { + typedef void (*callback_t)(void*); + callback_t callback; + void* userData; + + ThreadExitListener* next; // reserved for use by the ThreadExitNotifier + }; + + + class ThreadExitNotifier + { + public: + static void subscribe(ThreadExitListener* listener) + { + auto& tlsInst = instance(); + listener->next = tlsInst.tail; + tlsInst.tail = listener; + } + + static void unsubscribe(ThreadExitListener* listener) + { + auto& tlsInst = instance(); + ThreadExitListener** prev = &tlsInst.tail; + for (auto ptr = tlsInst.tail; ptr != nullptr; ptr = ptr->next) { + if (ptr == listener) { + *prev = ptr->next; + break; + } + prev = &ptr->next; + } + } + + private: + ThreadExitNotifier() : tail(nullptr) { } + ThreadExitNotifier(ThreadExitNotifier const&) MOODYCAMEL_DELETE_FUNCTION; + ThreadExitNotifier& operator=(ThreadExitNotifier const&) MOODYCAMEL_DELETE_FUNCTION; + + ~ThreadExitNotifier() + { + // This thread is about to exit, let everyone know! + assert(this == &instance() && "If this assert fails, you likely have a buggy compiler! Change the preprocessor conditions such that MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED is no longer defined."); + for (auto ptr = tail; ptr != nullptr; ptr = ptr->next) { + ptr->callback(ptr->userData); + } + } + + // Thread-local + static inline ThreadExitNotifier& instance() + { + static thread_local ThreadExitNotifier notifier; + return notifier; + } + + private: + ThreadExitListener* tail; + }; +#endif +#endif + + template struct static_is_lock_free_num { enum { value = 0 }; }; + template<> struct static_is_lock_free_num { enum { value = ATOMIC_CHAR_LOCK_FREE }; }; + template<> struct static_is_lock_free_num { enum { value = ATOMIC_SHORT_LOCK_FREE }; }; + template<> struct static_is_lock_free_num { enum { value = ATOMIC_INT_LOCK_FREE }; }; + template<> struct static_is_lock_free_num { enum { value = ATOMIC_LONG_LOCK_FREE }; }; + template<> struct static_is_lock_free_num { enum { value = ATOMIC_LLONG_LOCK_FREE }; }; + template struct static_is_lock_free : static_is_lock_free_num::type> { }; + template<> struct static_is_lock_free { enum { value = ATOMIC_BOOL_LOCK_FREE }; }; + template struct static_is_lock_free { enum { value = ATOMIC_POINTER_LOCK_FREE }; }; +} + + +struct ProducerToken +{ + template + explicit ProducerToken(ConcurrentQueue& queue); + + template + explicit ProducerToken(BlockingConcurrentQueue& queue); + + ProducerToken(ProducerToken&& other) MOODYCAMEL_NOEXCEPT + : producer(other.producer) + { + other.producer = nullptr; + if (producer != nullptr) { + producer->token = this; + } + } + + inline ProducerToken& operator=(ProducerToken&& other) MOODYCAMEL_NOEXCEPT + { + swap(other); + return *this; + } + + void swap(ProducerToken& other) MOODYCAMEL_NOEXCEPT + { + std::swap(producer, other.producer); + if (producer != nullptr) { + producer->token = this; + } + if (other.producer != nullptr) { + other.producer->token = &other; + } + } + + // A token is always valid unless: + // 1) Memory allocation failed during construction + // 2) It was moved via the move constructor + // (Note: assignment does a swap, leaving both potentially valid) + // 3) The associated queue was destroyed + // Note that if valid() returns true, that only indicates + // that the token is valid for use with a specific queue, + // but not which one; that's up to the user to track. + inline bool valid() const { return producer != nullptr; } + + ~ProducerToken() + { + if (producer != nullptr) { + producer->token = nullptr; + producer->inactive.store(true, std::memory_order_release); + } + } + + // Disable copying and assignment + ProducerToken(ProducerToken const&) MOODYCAMEL_DELETE_FUNCTION; + ProducerToken& operator=(ProducerToken const&) MOODYCAMEL_DELETE_FUNCTION; + +private: + template friend class ConcurrentQueue; + friend class ConcurrentQueueTests; + +protected: + details::ConcurrentQueueProducerTypelessBase* producer; +}; + + +struct ConsumerToken +{ + template + explicit ConsumerToken(ConcurrentQueue& q); + + template + explicit ConsumerToken(BlockingConcurrentQueue& q); + + ConsumerToken(ConsumerToken&& other) MOODYCAMEL_NOEXCEPT + : initialOffset(other.initialOffset), lastKnownGlobalOffset(other.lastKnownGlobalOffset), itemsConsumedFromCurrent(other.itemsConsumedFromCurrent), currentProducer(other.currentProducer), desiredProducer(other.desiredProducer) + { + } + + inline ConsumerToken& operator=(ConsumerToken&& other) MOODYCAMEL_NOEXCEPT + { + swap(other); + return *this; + } + + void swap(ConsumerToken& other) MOODYCAMEL_NOEXCEPT + { + std::swap(initialOffset, other.initialOffset); + std::swap(lastKnownGlobalOffset, other.lastKnownGlobalOffset); + std::swap(itemsConsumedFromCurrent, other.itemsConsumedFromCurrent); + std::swap(currentProducer, other.currentProducer); + std::swap(desiredProducer, other.desiredProducer); + } + + // Disable copying and assignment + ConsumerToken(ConsumerToken const&) MOODYCAMEL_DELETE_FUNCTION; + ConsumerToken& operator=(ConsumerToken const&) MOODYCAMEL_DELETE_FUNCTION; + +private: + template friend class ConcurrentQueue; + friend class ConcurrentQueueTests; + +private: // but shared with ConcurrentQueue + std::uint32_t initialOffset; + std::uint32_t lastKnownGlobalOffset; + std::uint32_t itemsConsumedFromCurrent; + details::ConcurrentQueueProducerTypelessBase* currentProducer; + details::ConcurrentQueueProducerTypelessBase* desiredProducer; +}; + +// Need to forward-declare this swap because it's in a namespace. +// See http://stackoverflow.com/questions/4492062/why-does-a-c-friend-class-need-a-forward-declaration-only-in-other-namespaces +template +inline void swap(typename ConcurrentQueue::ImplicitProducerKVP& a, typename ConcurrentQueue::ImplicitProducerKVP& b) MOODYCAMEL_NOEXCEPT; + + +template +class ConcurrentQueue +{ +public: + typedef ::moodycamel::ProducerToken producer_token_t; + typedef ::moodycamel::ConsumerToken consumer_token_t; + + typedef typename Traits::index_t index_t; + typedef typename Traits::size_t size_t; + + static const size_t BLOCK_SIZE = static_cast(Traits::BLOCK_SIZE); + static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = static_cast(Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD); + static const size_t EXPLICIT_INITIAL_INDEX_SIZE = static_cast(Traits::EXPLICIT_INITIAL_INDEX_SIZE); + static const size_t IMPLICIT_INITIAL_INDEX_SIZE = static_cast(Traits::IMPLICIT_INITIAL_INDEX_SIZE); + static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = static_cast(Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE); + static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = static_cast(Traits::EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE); +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable: 4307) // + integral constant overflow (that's what the ternary expression is for!) +#pragma warning(disable: 4309) // static_cast: Truncation of constant value +#endif + static const size_t MAX_SUBQUEUE_SIZE = (details::const_numeric_max::value - static_cast(Traits::MAX_SUBQUEUE_SIZE) < BLOCK_SIZE) ? details::const_numeric_max::value : ((static_cast(Traits::MAX_SUBQUEUE_SIZE) + (BLOCK_SIZE - 1)) / BLOCK_SIZE * BLOCK_SIZE); +#ifdef _MSC_VER +#pragma warning(pop) +#endif + + static_assert(!std::numeric_limits::is_signed && std::is_integral::value, "Traits::size_t must be an unsigned integral type"); + static_assert(!std::numeric_limits::is_signed && std::is_integral::value, "Traits::index_t must be an unsigned integral type"); + static_assert(sizeof(index_t) >= sizeof(size_t), "Traits::index_t must be at least as wide as Traits::size_t"); + static_assert((BLOCK_SIZE > 1) && !(BLOCK_SIZE & (BLOCK_SIZE - 1)), "Traits::BLOCK_SIZE must be a power of 2 (and at least 2)"); + static_assert((EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD > 1) && !(EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD & (EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD - 1)), "Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD must be a power of 2 (and greater than 1)"); + static_assert((EXPLICIT_INITIAL_INDEX_SIZE > 1) && !(EXPLICIT_INITIAL_INDEX_SIZE & (EXPLICIT_INITIAL_INDEX_SIZE - 1)), "Traits::EXPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and greater than 1)"); + static_assert((IMPLICIT_INITIAL_INDEX_SIZE > 1) && !(IMPLICIT_INITIAL_INDEX_SIZE & (IMPLICIT_INITIAL_INDEX_SIZE - 1)), "Traits::IMPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and greater than 1)"); + static_assert((INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) || !(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE & (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE - 1)), "Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE must be a power of 2"); + static_assert(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0 || INITIAL_IMPLICIT_PRODUCER_HASH_SIZE >= 1, "Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE must be at least 1 (or 0 to disable implicit enqueueing)"); + +public: + // Creates a queue with at least `capacity` element slots; note that the + // actual number of elements that can be inserted without additional memory + // allocation depends on the number of producers and the block size (e.g. if + // the block size is equal to `capacity`, only a single block will be allocated + // up-front, which means only a single producer will be able to enqueue elements + // without an extra allocation -- blocks aren't shared between producers). + // This method is not thread safe -- it is up to the user to ensure that the + // queue is fully constructed before it starts being used by other threads (this + // includes making the memory effects of construction visible, possibly with a + // memory barrier). + explicit ConcurrentQueue(size_t capacity = 6 * BLOCK_SIZE) + : producerListTail(nullptr), + producerCount(0), + initialBlockPoolIndex(0), + nextExplicitConsumerId(0), + globalExplicitConsumerOffset(0) + { + implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed); + populate_initial_implicit_producer_hash(); + populate_initial_block_list(capacity / BLOCK_SIZE + ((capacity & (BLOCK_SIZE - 1)) == 0 ? 0 : 1)); + +#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG + // Track all the producers using a fully-resolved typed list for + // each kind; this makes it possible to debug them starting from + // the root queue object (otherwise wacky casts are needed that + // don't compile in the debugger's expression evaluator). + explicitProducers.store(nullptr, std::memory_order_relaxed); + implicitProducers.store(nullptr, std::memory_order_relaxed); +#endif + } + + // Computes the correct amount of pre-allocated blocks for you based + // on the minimum number of elements you want available at any given + // time, and the maximum concurrent number of each type of producer. + ConcurrentQueue(size_t minCapacity, size_t maxExplicitProducers, size_t maxImplicitProducers) + : producerListTail(nullptr), + producerCount(0), + initialBlockPoolIndex(0), + nextExplicitConsumerId(0), + globalExplicitConsumerOffset(0) + { + implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed); + populate_initial_implicit_producer_hash(); + size_t blocks = (((minCapacity + BLOCK_SIZE - 1) / BLOCK_SIZE) - 1) * (maxExplicitProducers + 1) + 2 * (maxExplicitProducers + maxImplicitProducers); + populate_initial_block_list(blocks); + +#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG + explicitProducers.store(nullptr, std::memory_order_relaxed); + implicitProducers.store(nullptr, std::memory_order_relaxed); +#endif + } + + // Note: The queue should not be accessed concurrently while it's + // being deleted. It's up to the user to synchronize this. + // This method is not thread safe. + ~ConcurrentQueue() + { + // Destroy producers + auto ptr = producerListTail.load(std::memory_order_relaxed); + while (ptr != nullptr) { + auto next = ptr->next_prod(); + if (ptr->token != nullptr) { + ptr->token->producer = nullptr; + } + destroy(ptr); + ptr = next; + } + + // Destroy implicit producer hash tables + MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE != 0) { + auto hash = implicitProducerHash.load(std::memory_order_relaxed); + while (hash != nullptr) { + auto prev = hash->prev; + if (prev != nullptr) { // The last hash is part of this object and was not allocated dynamically + for (size_t i = 0; i != hash->capacity; ++i) { + hash->entries[i].~ImplicitProducerKVP(); + } + hash->~ImplicitProducerHash(); + (Traits::free)(hash); + } + hash = prev; + } + } + + // Destroy global free list + auto block = freeList.head_unsafe(); + while (block != nullptr) { + auto next = block->freeListNext.load(std::memory_order_relaxed); + if (block->dynamicallyAllocated) { + destroy(block); + } + block = next; + } + + // Destroy initial free list + destroy_array(initialBlockPool, initialBlockPoolSize); + } + + // Disable copying and copy assignment + ConcurrentQueue(ConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION; + ConcurrentQueue& operator=(ConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION; + + // Moving is supported, but note that it is *not* a thread-safe operation. + // Nobody can use the queue while it's being moved, and the memory effects + // of that move must be propagated to other threads before they can use it. + // Note: When a queue is moved, its tokens are still valid but can only be + // used with the destination queue (i.e. semantically they are moved along + // with the queue itself). + ConcurrentQueue(ConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT + : producerListTail(other.producerListTail.load(std::memory_order_relaxed)), + producerCount(other.producerCount.load(std::memory_order_relaxed)), + initialBlockPoolIndex(other.initialBlockPoolIndex.load(std::memory_order_relaxed)), + initialBlockPool(other.initialBlockPool), + initialBlockPoolSize(other.initialBlockPoolSize), + freeList(std::move(other.freeList)), + nextExplicitConsumerId(other.nextExplicitConsumerId.load(std::memory_order_relaxed)), + globalExplicitConsumerOffset(other.globalExplicitConsumerOffset.load(std::memory_order_relaxed)) + { + // Move the other one into this, and leave the other one as an empty queue + implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed); + populate_initial_implicit_producer_hash(); + swap_implicit_producer_hashes(other); + + other.producerListTail.store(nullptr, std::memory_order_relaxed); + other.producerCount.store(0, std::memory_order_relaxed); + other.nextExplicitConsumerId.store(0, std::memory_order_relaxed); + other.globalExplicitConsumerOffset.store(0, std::memory_order_relaxed); + +#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG + explicitProducers.store(other.explicitProducers.load(std::memory_order_relaxed), std::memory_order_relaxed); + other.explicitProducers.store(nullptr, std::memory_order_relaxed); + implicitProducers.store(other.implicitProducers.load(std::memory_order_relaxed), std::memory_order_relaxed); + other.implicitProducers.store(nullptr, std::memory_order_relaxed); +#endif + + other.initialBlockPoolIndex.store(0, std::memory_order_relaxed); + other.initialBlockPoolSize = 0; + other.initialBlockPool = nullptr; + + reown_producers(); + } + + inline ConcurrentQueue& operator=(ConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT + { + return swap_internal(other); + } + + // Swaps this queue's state with the other's. Not thread-safe. + // Swapping two queues does not invalidate their tokens, however + // the tokens that were created for one queue must be used with + // only the swapped queue (i.e. the tokens are tied to the + // queue's movable state, not the object itself). + inline void swap(ConcurrentQueue& other) MOODYCAMEL_NOEXCEPT + { + swap_internal(other); + } + +private: + ConcurrentQueue& swap_internal(ConcurrentQueue& other) + { + if (this == &other) { + return *this; + } + + details::swap_relaxed(producerListTail, other.producerListTail); + details::swap_relaxed(producerCount, other.producerCount); + details::swap_relaxed(initialBlockPoolIndex, other.initialBlockPoolIndex); + std::swap(initialBlockPool, other.initialBlockPool); + std::swap(initialBlockPoolSize, other.initialBlockPoolSize); + freeList.swap(other.freeList); + details::swap_relaxed(nextExplicitConsumerId, other.nextExplicitConsumerId); + details::swap_relaxed(globalExplicitConsumerOffset, other.globalExplicitConsumerOffset); + + swap_implicit_producer_hashes(other); + + reown_producers(); + other.reown_producers(); + +#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG + details::swap_relaxed(explicitProducers, other.explicitProducers); + details::swap_relaxed(implicitProducers, other.implicitProducers); +#endif + + return *this; + } + +public: + // Enqueues a single item (by copying it). + // Allocates memory if required. Only fails if memory allocation fails (or implicit + // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0, + // or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). + // Thread-safe. + inline bool enqueue(T const& item) + { + MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; + else return inner_enqueue(item); + } + + // Enqueues a single item (by moving it, if possible). + // Allocates memory if required. Only fails if memory allocation fails (or implicit + // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0, + // or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). + // Thread-safe. + inline bool enqueue(T&& item) + { + MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; + else return inner_enqueue(std::move(item)); + } + + // Enqueues a single item (by copying it) using an explicit producer token. + // Allocates memory if required. Only fails if memory allocation fails (or + // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). + // Thread-safe. + inline bool enqueue(producer_token_t const& token, T const& item) + { + return inner_enqueue(token, item); + } + + // Enqueues a single item (by moving it, if possible) using an explicit producer token. + // Allocates memory if required. Only fails if memory allocation fails (or + // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). + // Thread-safe. + inline bool enqueue(producer_token_t const& token, T&& item) + { + return inner_enqueue(token, std::move(item)); + } + + // Enqueues several items. + // Allocates memory if required. Only fails if memory allocation fails (or + // implicit production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE + // is 0, or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). + // Note: Use std::make_move_iterator if the elements should be moved instead of copied. + // Thread-safe. + template + bool enqueue_bulk(It itemFirst, size_t count) + { + MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; + else return inner_enqueue_bulk(itemFirst, count); + } + + // Enqueues several items using an explicit producer token. + // Allocates memory if required. Only fails if memory allocation fails + // (or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). + // Note: Use std::make_move_iterator if the elements should be moved + // instead of copied. + // Thread-safe. + template + bool enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count) + { + return inner_enqueue_bulk(token, itemFirst, count); + } + + // Enqueues a single item (by copying it). + // Does not allocate memory. Fails if not enough room to enqueue (or implicit + // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE + // is 0). + // Thread-safe. + inline bool try_enqueue(T const& item) + { + MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; + else return inner_enqueue(item); + } + + // Enqueues a single item (by moving it, if possible). + // Does not allocate memory (except for one-time implicit producer). + // Fails if not enough room to enqueue (or implicit production is + // disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0). + // Thread-safe. + inline bool try_enqueue(T&& item) + { + MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; + else return inner_enqueue(std::move(item)); + } + + // Enqueues a single item (by copying it) using an explicit producer token. + // Does not allocate memory. Fails if not enough room to enqueue. + // Thread-safe. + inline bool try_enqueue(producer_token_t const& token, T const& item) + { + return inner_enqueue(token, item); + } + + // Enqueues a single item (by moving it, if possible) using an explicit producer token. + // Does not allocate memory. Fails if not enough room to enqueue. + // Thread-safe. + inline bool try_enqueue(producer_token_t const& token, T&& item) + { + return inner_enqueue(token, std::move(item)); + } + + // Enqueues several items. + // Does not allocate memory (except for one-time implicit producer). + // Fails if not enough room to enqueue (or implicit production is + // disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0). + // Note: Use std::make_move_iterator if the elements should be moved + // instead of copied. + // Thread-safe. + template + bool try_enqueue_bulk(It itemFirst, size_t count) + { + MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; + else return inner_enqueue_bulk(itemFirst, count); + } + + // Enqueues several items using an explicit producer token. + // Does not allocate memory. Fails if not enough room to enqueue. + // Note: Use std::make_move_iterator if the elements should be moved + // instead of copied. + // Thread-safe. + template + bool try_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count) + { + return inner_enqueue_bulk(token, itemFirst, count); + } + + + + // Attempts to dequeue from the queue. + // Returns false if all producer streams appeared empty at the time they + // were checked (so, the queue is likely but not guaranteed to be empty). + // Never allocates. Thread-safe. + template + bool try_dequeue(U& item) + { + // Instead of simply trying each producer in turn (which could cause needless contention on the first + // producer), we score them heuristically. + size_t nonEmptyCount = 0; + ProducerBase* best = nullptr; + size_t bestSize = 0; + for (auto ptr = producerListTail.load(std::memory_order_acquire); nonEmptyCount < 3 && ptr != nullptr; ptr = ptr->next_prod()) { + auto size = ptr->size_approx(); + if (size > 0) { + if (size > bestSize) { + bestSize = size; + best = ptr; + } + ++nonEmptyCount; + } + } + + // If there was at least one non-empty queue but it appears empty at the time + // we try to dequeue from it, we need to make sure every queue's been tried + if (nonEmptyCount > 0) { + if ((details::likely)(best->dequeue(item))) { + return true; + } + for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) { + if (ptr != best && ptr->dequeue(item)) { + return true; + } + } + } + return false; + } + + // Attempts to dequeue from the queue. + // Returns false if all producer streams appeared empty at the time they + // were checked (so, the queue is likely but not guaranteed to be empty). + // This differs from the try_dequeue(item) method in that this one does + // not attempt to reduce contention by interleaving the order that producer + // streams are dequeued from. So, using this method can reduce overall throughput + // under contention, but will give more predictable results in single-threaded + // consumer scenarios. This is mostly only useful for internal unit tests. + // Never allocates. Thread-safe. + template + bool try_dequeue_non_interleaved(U& item) + { + for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) { + if (ptr->dequeue(item)) { + return true; + } + } + return false; + } + + // Attempts to dequeue from the queue using an explicit consumer token. + // Returns false if all producer streams appeared empty at the time they + // were checked (so, the queue is likely but not guaranteed to be empty). + // Never allocates. Thread-safe. + template + bool try_dequeue(consumer_token_t& token, U& item) + { + // The idea is roughly as follows: + // Every 256 items from one producer, make everyone rotate (increase the global offset) -> this means the highest efficiency consumer dictates the rotation speed of everyone else, more or less + // If you see that the global offset has changed, you must reset your consumption counter and move to your designated place + // If there's no items where you're supposed to be, keep moving until you find a producer with some items + // If the global offset has not changed but you've run out of items to consume, move over from your current position until you find an producer with something in it + + if (token.desiredProducer == nullptr || token.lastKnownGlobalOffset != globalExplicitConsumerOffset.load(std::memory_order_relaxed)) { + if (!update_current_producer_after_rotation(token)) { + return false; + } + } + + // If there was at least one non-empty queue but it appears empty at the time + // we try to dequeue from it, we need to make sure every queue's been tried + if (static_cast(token.currentProducer)->dequeue(item)) { + if (++token.itemsConsumedFromCurrent == EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) { + globalExplicitConsumerOffset.fetch_add(1, std::memory_order_relaxed); + } + return true; + } + + auto tail = producerListTail.load(std::memory_order_acquire); + auto ptr = static_cast(token.currentProducer)->next_prod(); + if (ptr == nullptr) { + ptr = tail; + } + while (ptr != static_cast(token.currentProducer)) { + if (ptr->dequeue(item)) { + token.currentProducer = ptr; + token.itemsConsumedFromCurrent = 1; + return true; + } + ptr = ptr->next_prod(); + if (ptr == nullptr) { + ptr = tail; + } + } + return false; + } + + // Attempts to dequeue several elements from the queue. + // Returns the number of items actually dequeued. + // Returns 0 if all producer streams appeared empty at the time they + // were checked (so, the queue is likely but not guaranteed to be empty). + // Never allocates. Thread-safe. + template + size_t try_dequeue_bulk(It itemFirst, size_t max) + { + size_t count = 0; + for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) { + count += ptr->dequeue_bulk(itemFirst, max - count); + if (count == max) { + break; + } + } + return count; + } + + // Attempts to dequeue several elements from the queue using an explicit consumer token. + // Returns the number of items actually dequeued. + // Returns 0 if all producer streams appeared empty at the time they + // were checked (so, the queue is likely but not guaranteed to be empty). + // Never allocates. Thread-safe. + template + size_t try_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max) + { + if (token.desiredProducer == nullptr || token.lastKnownGlobalOffset != globalExplicitConsumerOffset.load(std::memory_order_relaxed)) { + if (!update_current_producer_after_rotation(token)) { + return 0; + } + } + + size_t count = static_cast(token.currentProducer)->dequeue_bulk(itemFirst, max); + if (count == max) { + if ((token.itemsConsumedFromCurrent += static_cast(max)) >= EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) { + globalExplicitConsumerOffset.fetch_add(1, std::memory_order_relaxed); + } + return max; + } + token.itemsConsumedFromCurrent += static_cast(count); + max -= count; + + auto tail = producerListTail.load(std::memory_order_acquire); + auto ptr = static_cast(token.currentProducer)->next_prod(); + if (ptr == nullptr) { + ptr = tail; + } + while (ptr != static_cast(token.currentProducer)) { + auto dequeued = ptr->dequeue_bulk(itemFirst, max); + count += dequeued; + if (dequeued != 0) { + token.currentProducer = ptr; + token.itemsConsumedFromCurrent = static_cast(dequeued); + } + if (dequeued == max) { + break; + } + max -= dequeued; + ptr = ptr->next_prod(); + if (ptr == nullptr) { + ptr = tail; + } + } + return count; + } + + + + // Attempts to dequeue from a specific producer's inner queue. + // If you happen to know which producer you want to dequeue from, this + // is significantly faster than using the general-case try_dequeue methods. + // Returns false if the producer's queue appeared empty at the time it + // was checked (so, the queue is likely but not guaranteed to be empty). + // Never allocates. Thread-safe. + template + inline bool try_dequeue_from_producer(producer_token_t const& producer, U& item) + { + return static_cast(producer.producer)->dequeue(item); + } + + // Attempts to dequeue several elements from a specific producer's inner queue. + // Returns the number of items actually dequeued. + // If you happen to know which producer you want to dequeue from, this + // is significantly faster than using the general-case try_dequeue methods. + // Returns 0 if the producer's queue appeared empty at the time it + // was checked (so, the queue is likely but not guaranteed to be empty). + // Never allocates. Thread-safe. + template + inline size_t try_dequeue_bulk_from_producer(producer_token_t const& producer, It itemFirst, size_t max) + { + return static_cast(producer.producer)->dequeue_bulk(itemFirst, max); + } + + + // Returns an estimate of the total number of elements currently in the queue. This + // estimate is only accurate if the queue has completely stabilized before it is called + // (i.e. all enqueue and dequeue operations have completed and their memory effects are + // visible on the calling thread, and no further operations start while this method is + // being called). + // Thread-safe. + size_t size_approx() const + { + size_t size = 0; + for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) { + size += ptr->size_approx(); + } + return size; + } + + + // Returns true if the underlying atomic variables used by + // the queue are lock-free (they should be on most platforms). + // Thread-safe. + static bool is_lock_free() + { + return + details::static_is_lock_free::value == 2 && + details::static_is_lock_free::value == 2 && + details::static_is_lock_free::value == 2 && + details::static_is_lock_free::value == 2 && + details::static_is_lock_free::value == 2 && + details::static_is_lock_free::thread_id_numeric_size_t>::value == 2; + } + + +private: + friend struct ProducerToken; + friend struct ConsumerToken; + struct ExplicitProducer; + friend struct ExplicitProducer; + struct ImplicitProducer; + friend struct ImplicitProducer; + friend class ConcurrentQueueTests; + + enum AllocationMode { CanAlloc, CannotAlloc }; + + + /////////////////////////////// + // Queue methods + /////////////////////////////// + + template + inline bool inner_enqueue(producer_token_t const& token, U&& element) + { + return static_cast(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue(std::forward(element)); + } + + template + inline bool inner_enqueue(U&& element) + { + auto producer = get_or_add_implicit_producer(); + return producer == nullptr ? false : producer->ConcurrentQueue::ImplicitProducer::template enqueue(std::forward(element)); + } + + template + inline bool inner_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count) + { + return static_cast(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue_bulk(itemFirst, count); + } + + template + inline bool inner_enqueue_bulk(It itemFirst, size_t count) + { + auto producer = get_or_add_implicit_producer(); + return producer == nullptr ? false : producer->ConcurrentQueue::ImplicitProducer::template enqueue_bulk(itemFirst, count); + } + + inline bool update_current_producer_after_rotation(consumer_token_t& token) + { + // Ah, there's been a rotation, figure out where we should be! + auto tail = producerListTail.load(std::memory_order_acquire); + if (token.desiredProducer == nullptr && tail == nullptr) { + return false; + } + auto prodCount = producerCount.load(std::memory_order_relaxed); + auto globalOffset = globalExplicitConsumerOffset.load(std::memory_order_relaxed); + if ((details::unlikely)(token.desiredProducer == nullptr)) { + // Aha, first time we're dequeueing anything. + // Figure out our local position + // Note: offset is from start, not end, but we're traversing from end -- subtract from count first + std::uint32_t offset = prodCount - 1 - (token.initialOffset % prodCount); + token.desiredProducer = tail; + for (std::uint32_t i = 0; i != offset; ++i) { + token.desiredProducer = static_cast(token.desiredProducer)->next_prod(); + if (token.desiredProducer == nullptr) { + token.desiredProducer = tail; + } + } + } + + std::uint32_t delta = globalOffset - token.lastKnownGlobalOffset; + if (delta >= prodCount) { + delta = delta % prodCount; + } + for (std::uint32_t i = 0; i != delta; ++i) { + token.desiredProducer = static_cast(token.desiredProducer)->next_prod(); + if (token.desiredProducer == nullptr) { + token.desiredProducer = tail; + } + } + + token.lastKnownGlobalOffset = globalOffset; + token.currentProducer = token.desiredProducer; + token.itemsConsumedFromCurrent = 0; + return true; + } + + + /////////////////////////// + // Free list + /////////////////////////// + + template + struct FreeListNode + { + FreeListNode() : freeListRefs(0), freeListNext(nullptr) { } + + std::atomic freeListRefs; + std::atomic freeListNext; + }; + + // A simple CAS-based lock-free free list. Not the fastest thing in the world under heavy contention, but + // simple and correct (assuming nodes are never freed until after the free list is destroyed), and fairly + // speedy under low contention. + template // N must inherit FreeListNode or have the same fields (and initialization of them) + struct FreeList + { + FreeList() : freeListHead(nullptr) { } + FreeList(FreeList&& other) : freeListHead(other.freeListHead.load(std::memory_order_relaxed)) { other.freeListHead.store(nullptr, std::memory_order_relaxed); } + void swap(FreeList& other) { details::swap_relaxed(freeListHead, other.freeListHead); } + + FreeList(FreeList const&) MOODYCAMEL_DELETE_FUNCTION; + FreeList& operator=(FreeList const&) MOODYCAMEL_DELETE_FUNCTION; + + inline void add(N* node) + { +#ifdef MCDBGQ_NOLOCKFREE_FREELIST + debug::DebugLock lock(mutex); +#endif + // We know that the should-be-on-freelist bit is 0 at this point, so it's safe to + // set it using a fetch_add + if (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST, std::memory_order_acq_rel) == 0) { + // Oh look! We were the last ones referencing this node, and we know + // we want to add it to the free list, so let's do it! + add_knowing_refcount_is_zero(node); + } + } + + inline N* try_get() + { +#ifdef MCDBGQ_NOLOCKFREE_FREELIST + debug::DebugLock lock(mutex); +#endif + auto head = freeListHead.load(std::memory_order_acquire); + while (head != nullptr) { + auto prevHead = head; + auto refs = head->freeListRefs.load(std::memory_order_relaxed); + if ((refs & REFS_MASK) == 0 || !head->freeListRefs.compare_exchange_strong(refs, refs + 1, std::memory_order_acquire, std::memory_order_relaxed)) { + head = freeListHead.load(std::memory_order_acquire); + continue; + } + + // Good, reference count has been incremented (it wasn't at zero), which means we can read the + // next and not worry about it changing between now and the time we do the CAS + auto next = head->freeListNext.load(std::memory_order_relaxed); + if (freeListHead.compare_exchange_strong(head, next, std::memory_order_acquire, std::memory_order_relaxed)) { + // Yay, got the node. This means it was on the list, which means shouldBeOnFreeList must be false no + // matter the refcount (because nobody else knows it's been taken off yet, it can't have been put back on). + assert((head->freeListRefs.load(std::memory_order_relaxed) & SHOULD_BE_ON_FREELIST) == 0); + + // Decrease refcount twice, once for our ref, and once for the list's ref + head->freeListRefs.fetch_sub(2, std::memory_order_release); + return head; + } + + // OK, the head must have changed on us, but we still need to decrease the refcount we increased. + // Note that we don't need to release any memory effects, but we do need to ensure that the reference + // count decrement happens-after the CAS on the head. + refs = prevHead->freeListRefs.fetch_sub(1, std::memory_order_acq_rel); + if (refs == SHOULD_BE_ON_FREELIST + 1) { + add_knowing_refcount_is_zero(prevHead); + } + } + + return nullptr; + } + + // Useful for traversing the list when there's no contention (e.g. to destroy remaining nodes) + N* head_unsafe() const { return freeListHead.load(std::memory_order_relaxed); } + + private: + inline void add_knowing_refcount_is_zero(N* node) + { + // Since the refcount is zero, and nobody can increase it once it's zero (except us, and we run + // only one copy of this method per node at a time, i.e. the single thread case), then we know + // we can safely change the next pointer of the node; however, once the refcount is back above + // zero, then other threads could increase it (happens under heavy contention, when the refcount + // goes to zero in between a load and a refcount increment of a node in try_get, then back up to + // something non-zero, then the refcount increment is done by the other thread) -- so, if the CAS + // to add the node to the actual list fails, decrease the refcount and leave the add operation to + // the next thread who puts the refcount back at zero (which could be us, hence the loop). + auto head = freeListHead.load(std::memory_order_relaxed); + while (true) { + node->freeListNext.store(head, std::memory_order_relaxed); + node->freeListRefs.store(1, std::memory_order_release); + if (!freeListHead.compare_exchange_strong(head, node, std::memory_order_release, std::memory_order_relaxed)) { + // Hmm, the add failed, but we can only try again when the refcount goes back to zero + if (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST - 1, std::memory_order_release) == 1) { + continue; + } + } + return; + } + } + + private: + // Implemented like a stack, but where node order doesn't matter (nodes are inserted out of order under contention) + std::atomic freeListHead; + + static const std::uint32_t REFS_MASK = 0x7FFFFFFF; + static const std::uint32_t SHOULD_BE_ON_FREELIST = 0x80000000; + +#ifdef MCDBGQ_NOLOCKFREE_FREELIST + debug::DebugMutex mutex; +#endif + }; + + + /////////////////////////// + // Block + /////////////////////////// + + enum InnerQueueContext { implicit_context = 0, explicit_context = 1 }; + + struct Block + { + Block() + : next(nullptr), elementsCompletelyDequeued(0), freeListRefs(0), freeListNext(nullptr), shouldBeOnFreeList(false), dynamicallyAllocated(true) + { +#ifdef MCDBGQ_TRACKMEM + owner = nullptr; +#endif + } + + template + inline bool is_empty() const + { + MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { + // Check flags + for (size_t i = 0; i < BLOCK_SIZE; ++i) { + if (!emptyFlags[i].load(std::memory_order_relaxed)) { + return false; + } + } + + // Aha, empty; make sure we have all other memory effects that happened before the empty flags were set + std::atomic_thread_fence(std::memory_order_acquire); + return true; + } + else { + // Check counter + if (elementsCompletelyDequeued.load(std::memory_order_relaxed) == BLOCK_SIZE) { + std::atomic_thread_fence(std::memory_order_acquire); + return true; + } + assert(elementsCompletelyDequeued.load(std::memory_order_relaxed) <= BLOCK_SIZE); + return false; + } + } + + // Returns true if the block is now empty (does not apply in explicit context) + template + inline bool set_empty(MOODYCAMEL_MAYBE_UNUSED index_t i) + { + MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { + // Set flag + assert(!emptyFlags[BLOCK_SIZE - 1 - static_cast(i & static_cast(BLOCK_SIZE - 1))].load(std::memory_order_relaxed)); + emptyFlags[BLOCK_SIZE - 1 - static_cast(i & static_cast(BLOCK_SIZE - 1))].store(true, std::memory_order_release); + return false; + } + else { + // Increment counter + auto prevVal = elementsCompletelyDequeued.fetch_add(1, std::memory_order_release); + assert(prevVal < BLOCK_SIZE); + return prevVal == BLOCK_SIZE - 1; + } + } + + // Sets multiple contiguous item statuses to 'empty' (assumes no wrapping and count > 0). + // Returns true if the block is now empty (does not apply in explicit context). + template + inline bool set_many_empty(MOODYCAMEL_MAYBE_UNUSED index_t i, size_t count) + { + MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { + // Set flags + std::atomic_thread_fence(std::memory_order_release); + i = BLOCK_SIZE - 1 - static_cast(i & static_cast(BLOCK_SIZE - 1)) - count + 1; + for (size_t j = 0; j != count; ++j) { + assert(!emptyFlags[i + j].load(std::memory_order_relaxed)); + emptyFlags[i + j].store(true, std::memory_order_relaxed); + } + return false; + } + else { + // Increment counter + auto prevVal = elementsCompletelyDequeued.fetch_add(count, std::memory_order_release); + assert(prevVal + count <= BLOCK_SIZE); + return prevVal + count == BLOCK_SIZE; + } + } + + template + inline void set_all_empty() + { + MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { + // Set all flags + for (size_t i = 0; i != BLOCK_SIZE; ++i) { + emptyFlags[i].store(true, std::memory_order_relaxed); + } + } + else { + // Reset counter + elementsCompletelyDequeued.store(BLOCK_SIZE, std::memory_order_relaxed); + } + } + + template + inline void reset_empty() + { + MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { + // Reset flags + for (size_t i = 0; i != BLOCK_SIZE; ++i) { + emptyFlags[i].store(false, std::memory_order_relaxed); + } + } + else { + // Reset counter + elementsCompletelyDequeued.store(0, std::memory_order_relaxed); + } + } + + inline T* operator[](index_t idx) MOODYCAMEL_NOEXCEPT { return static_cast(static_cast(elements)) + static_cast(idx & static_cast(BLOCK_SIZE - 1)); } + inline T const* operator[](index_t idx) const MOODYCAMEL_NOEXCEPT { return static_cast(static_cast(elements)) + static_cast(idx & static_cast(BLOCK_SIZE - 1)); } + + private: + static_assert(std::alignment_of::value <= sizeof(T), "The queue does not support types with an alignment greater than their size at this time"); + MOODYCAMEL_ALIGNED_TYPE_LIKE(char[sizeof(T) * BLOCK_SIZE], T) elements; + public: + Block* next; + std::atomic elementsCompletelyDequeued; + std::atomic emptyFlags[BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD ? BLOCK_SIZE : 1]; + public: + std::atomic freeListRefs; + std::atomic freeListNext; + std::atomic shouldBeOnFreeList; + bool dynamicallyAllocated; // Perhaps a better name for this would be 'isNotPartOfInitialBlockPool' + +#ifdef MCDBGQ_TRACKMEM + void* owner; +#endif + }; + static_assert(std::alignment_of::value >= std::alignment_of::value, "Internal error: Blocks must be at least as aligned as the type they are wrapping"); + + +#ifdef MCDBGQ_TRACKMEM +public: + struct MemStats; +private: +#endif + + /////////////////////////// + // Producer base + /////////////////////////// + + struct ProducerBase : public details::ConcurrentQueueProducerTypelessBase + { + ProducerBase(ConcurrentQueue* parent_, bool isExplicit_) : + tailIndex(0), + headIndex(0), + dequeueOptimisticCount(0), + dequeueOvercommit(0), + tailBlock(nullptr), + isExplicit(isExplicit_), + parent(parent_) + { + } + + virtual ~ProducerBase() { } + + template + inline bool dequeue(U& element) + { + if (isExplicit) { + return static_cast(this)->dequeue(element); + } + else { + return static_cast(this)->dequeue(element); + } + } + + template + inline size_t dequeue_bulk(It& itemFirst, size_t max) + { + if (isExplicit) { + return static_cast(this)->dequeue_bulk(itemFirst, max); + } + else { + return static_cast(this)->dequeue_bulk(itemFirst, max); + } + } + + inline ProducerBase* next_prod() const { return static_cast(next); } + + inline size_t size_approx() const + { + auto tail = tailIndex.load(std::memory_order_relaxed); + auto head = headIndex.load(std::memory_order_relaxed); + return details::circular_less_than(head, tail) ? static_cast(tail - head) : 0; + } + + inline index_t getTail() const { return tailIndex.load(std::memory_order_relaxed); } + protected: + std::atomic tailIndex; // Where to enqueue to next + std::atomic headIndex; // Where to dequeue from next + + std::atomic dequeueOptimisticCount; + std::atomic dequeueOvercommit; + + Block* tailBlock; + + public: + bool isExplicit; + ConcurrentQueue* parent; + + protected: +#ifdef MCDBGQ_TRACKMEM + friend struct MemStats; +#endif + }; + + + /////////////////////////// + // Explicit queue + /////////////////////////// + + struct ExplicitProducer : public ProducerBase + { + explicit ExplicitProducer(ConcurrentQueue* parent_) : + ProducerBase(parent_, true), + blockIndex(nullptr), + pr_blockIndexSlotsUsed(0), + pr_blockIndexSize(EXPLICIT_INITIAL_INDEX_SIZE >> 1), + pr_blockIndexFront(0), + pr_blockIndexEntries(nullptr), + pr_blockIndexRaw(nullptr) + { + size_t poolBasedIndexSize = details::ceil_to_pow_2(parent_->initialBlockPoolSize) >> 1; + if (poolBasedIndexSize > pr_blockIndexSize) { + pr_blockIndexSize = poolBasedIndexSize; + } + + new_block_index(0); // This creates an index with double the number of current entries, i.e. EXPLICIT_INITIAL_INDEX_SIZE + } + + ~ExplicitProducer() + { + // Destruct any elements not yet dequeued. + // Since we're in the destructor, we can assume all elements + // are either completely dequeued or completely not (no halfways). + if (this->tailBlock != nullptr) { // Note this means there must be a block index too + // First find the block that's partially dequeued, if any + Block* halfDequeuedBlock = nullptr; + if ((this->headIndex.load(std::memory_order_relaxed) & static_cast(BLOCK_SIZE - 1)) != 0) { + // The head's not on a block boundary, meaning a block somewhere is partially dequeued + // (or the head block is the tail block and was fully dequeued, but the head/tail are still not on a boundary) + size_t i = (pr_blockIndexFront - pr_blockIndexSlotsUsed) & (pr_blockIndexSize - 1); + while (details::circular_less_than(pr_blockIndexEntries[i].base + BLOCK_SIZE, this->headIndex.load(std::memory_order_relaxed))) { + i = (i + 1) & (pr_blockIndexSize - 1); + } + assert(details::circular_less_than(pr_blockIndexEntries[i].base, this->headIndex.load(std::memory_order_relaxed))); + halfDequeuedBlock = pr_blockIndexEntries[i].block; + } + + // Start at the head block (note the first line in the loop gives us the head from the tail on the first iteration) + auto block = this->tailBlock; + do { + block = block->next; + if (block->ConcurrentQueue::Block::template is_empty()) { + continue; + } + + size_t i = 0; // Offset into block + if (block == halfDequeuedBlock) { + i = static_cast(this->headIndex.load(std::memory_order_relaxed) & static_cast(BLOCK_SIZE - 1)); + } + + // Walk through all the items in the block; if this is the tail block, we need to stop when we reach the tail index + auto lastValidIndex = (this->tailIndex.load(std::memory_order_relaxed) & static_cast(BLOCK_SIZE - 1)) == 0 ? BLOCK_SIZE : static_cast(this->tailIndex.load(std::memory_order_relaxed) & static_cast(BLOCK_SIZE - 1)); + while (i != BLOCK_SIZE && (block != this->tailBlock || i != lastValidIndex)) { + (*block)[i++]->~T(); + } + } while (block != this->tailBlock); + } + + // Destroy all blocks that we own + if (this->tailBlock != nullptr) { + auto block = this->tailBlock; + do { + auto nextBlock = block->next; + if (block->dynamicallyAllocated) { + destroy(block); + } + else { + this->parent->add_block_to_free_list(block); + } + block = nextBlock; + } while (block != this->tailBlock); + } + + // Destroy the block indices + auto header = static_cast(pr_blockIndexRaw); + while (header != nullptr) { + auto prev = static_cast(header->prev); + header->~BlockIndexHeader(); + (Traits::free)(header); + header = prev; + } + } + + template + inline bool enqueue(U&& element) + { + index_t currentTailIndex = this->tailIndex.load(std::memory_order_relaxed); + index_t newTailIndex = 1 + currentTailIndex; + if ((currentTailIndex & static_cast(BLOCK_SIZE - 1)) == 0) { + // We reached the end of a block, start a new one + auto startBlock = this->tailBlock; + auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed; + if (this->tailBlock != nullptr && this->tailBlock->next->ConcurrentQueue::Block::template is_empty()) { + // We can re-use the block ahead of us, it's empty! + this->tailBlock = this->tailBlock->next; + this->tailBlock->ConcurrentQueue::Block::template reset_empty(); + + // We'll put the block on the block index (guaranteed to be room since we're conceptually removing the + // last block from it first -- except instead of removing then adding, we can just overwrite). + // Note that there must be a valid block index here, since even if allocation failed in the ctor, + // it would have been re-attempted when adding the first block to the queue; since there is such + // a block, a block index must have been successfully allocated. + } + else { + // Whatever head value we see here is >= the last value we saw here (relatively), + // and <= its current value. Since we have the most recent tail, the head must be + // <= to it. + auto head = this->headIndex.load(std::memory_order_relaxed); + assert(!details::circular_less_than(currentTailIndex, head)); + if (!details::circular_less_than(head, currentTailIndex + BLOCK_SIZE) + || (MAX_SUBQUEUE_SIZE != details::const_numeric_max::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) { + // We can't enqueue in another block because there's not enough leeway -- the + // tail could surpass the head by the time the block fills up! (Or we'll exceed + // the size limit, if the second part of the condition was true.) + return false; + } + // We're going to need a new block; check that the block index has room + if (pr_blockIndexRaw == nullptr || pr_blockIndexSlotsUsed == pr_blockIndexSize) { + // Hmm, the circular block index is already full -- we'll need + // to allocate a new index. Note pr_blockIndexRaw can only be nullptr if + // the initial allocation failed in the constructor. + + MOODYCAMEL_CONSTEXPR_IF (allocMode == CannotAlloc) { + return false; + } + else if (!new_block_index(pr_blockIndexSlotsUsed)) { + return false; + } + } + + // Insert a new block in the circular linked list + auto newBlock = this->parent->ConcurrentQueue::template requisition_block(); + if (newBlock == nullptr) { + return false; + } +#ifdef MCDBGQ_TRACKMEM + newBlock->owner = this; +#endif + newBlock->ConcurrentQueue::Block::template reset_empty(); + if (this->tailBlock == nullptr) { + newBlock->next = newBlock; + } + else { + newBlock->next = this->tailBlock->next; + this->tailBlock->next = newBlock; + } + this->tailBlock = newBlock; + ++pr_blockIndexSlotsUsed; + } + + MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast(nullptr)) T(std::forward(element)))) { + // The constructor may throw. We want the element not to appear in the queue in + // that case (without corrupting the queue): + MOODYCAMEL_TRY { + new ((*this->tailBlock)[currentTailIndex]) T(std::forward(element)); + } + MOODYCAMEL_CATCH (...) { + // Revert change to the current block, but leave the new block available + // for next time + pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed; + this->tailBlock = startBlock == nullptr ? this->tailBlock : startBlock; + MOODYCAMEL_RETHROW; + } + } + else { + (void)startBlock; + (void)originalBlockIndexSlotsUsed; + } + + // Add block to block index + auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront]; + entry.base = currentTailIndex; + entry.block = this->tailBlock; + blockIndex.load(std::memory_order_relaxed)->front.store(pr_blockIndexFront, std::memory_order_release); + pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1); + + MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast(nullptr)) T(std::forward(element)))) { + this->tailIndex.store(newTailIndex, std::memory_order_release); + return true; + } + } + + // Enqueue + new ((*this->tailBlock)[currentTailIndex]) T(std::forward(element)); + + this->tailIndex.store(newTailIndex, std::memory_order_release); + return true; + } + + template + bool dequeue(U& element) + { + auto tail = this->tailIndex.load(std::memory_order_relaxed); + auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed); + if (details::circular_less_than(this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit, tail)) { + // Might be something to dequeue, let's give it a try + + // Note that this if is purely for performance purposes in the common case when the queue is + // empty and the values are eventually consistent -- we may enter here spuriously. + + // Note that whatever the values of overcommit and tail are, they are not going to change (unless we + // change them) and must be the same value at this point (inside the if) as when the if condition was + // evaluated. + + // We insert an acquire fence here to synchronize-with the release upon incrementing dequeueOvercommit below. + // This ensures that whatever the value we got loaded into overcommit, the load of dequeueOptisticCount in + // the fetch_add below will result in a value at least as recent as that (and therefore at least as large). + // Note that I believe a compiler (signal) fence here would be sufficient due to the nature of fetch_add (all + // read-modify-write operations are guaranteed to work on the latest value in the modification order), but + // unfortunately that can't be shown to be correct using only the C++11 standard. + // See http://stackoverflow.com/questions/18223161/what-are-the-c11-memory-ordering-guarantees-in-this-corner-case + std::atomic_thread_fence(std::memory_order_acquire); + + // Increment optimistic counter, then check if it went over the boundary + auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(1, std::memory_order_relaxed); + + // Note that since dequeueOvercommit must be <= dequeueOptimisticCount (because dequeueOvercommit is only ever + // incremented after dequeueOptimisticCount -- this is enforced in the `else` block below), and since we now + // have a version of dequeueOptimisticCount that is at least as recent as overcommit (due to the release upon + // incrementing dequeueOvercommit and the acquire above that synchronizes with it), overcommit <= myDequeueCount. + // However, we can't assert this since both dequeueOptimisticCount and dequeueOvercommit may (independently) + // overflow; in such a case, though, the logic still holds since the difference between the two is maintained. + + // Note that we reload tail here in case it changed; it will be the same value as before or greater, since + // this load is sequenced after (happens after) the earlier load above. This is supported by read-read + // coherency (as defined in the standard), explained here: http://en.cppreference.com/w/cpp/atomic/memory_order + tail = this->tailIndex.load(std::memory_order_acquire); + if ((details::likely)(details::circular_less_than(myDequeueCount - overcommit, tail))) { + // Guaranteed to be at least one element to dequeue! + + // Get the index. Note that since there's guaranteed to be at least one element, this + // will never exceed tail. We need to do an acquire-release fence here since it's possible + // that whatever condition got us to this point was for an earlier enqueued element (that + // we already see the memory effects for), but that by the time we increment somebody else + // has incremented it, and we need to see the memory effects for *that* element, which is + // in such a case is necessarily visible on the thread that incremented it in the first + // place with the more current condition (they must have acquired a tail that is at least + // as recent). + auto index = this->headIndex.fetch_add(1, std::memory_order_acq_rel); + + + // Determine which block the element is in + + auto localBlockIndex = blockIndex.load(std::memory_order_acquire); + auto localBlockIndexHead = localBlockIndex->front.load(std::memory_order_acquire); + + // We need to be careful here about subtracting and dividing because of index wrap-around. + // When an index wraps, we need to preserve the sign of the offset when dividing it by the + // block size (in order to get a correct signed block count offset in all cases): + auto headBase = localBlockIndex->entries[localBlockIndexHead].base; + auto blockBaseIndex = index & ~static_cast(BLOCK_SIZE - 1); + auto offset = static_cast(static_cast::type>(blockBaseIndex - headBase) / BLOCK_SIZE); + auto block = localBlockIndex->entries[(localBlockIndexHead + offset) & (localBlockIndex->size - 1)].block; + + // Dequeue + auto& el = *((*block)[index]); + if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, element = std::move(el))) { + // Make sure the element is still fully dequeued and destroyed even if the assignment + // throws + struct Guard { + Block* block; + index_t index; + + ~Guard() + { + (*block)[index]->~T(); + block->ConcurrentQueue::Block::template set_empty(index); + } + } guard = { block, index }; + + element = std::move(el); // NOLINT + } + else { + element = std::move(el); // NOLINT + el.~T(); // NOLINT + block->ConcurrentQueue::Block::template set_empty(index); + } + + return true; + } + else { + // Wasn't anything to dequeue after all; make the effective dequeue count eventually consistent + this->dequeueOvercommit.fetch_add(1, std::memory_order_release); // Release so that the fetch_add on dequeueOptimisticCount is guaranteed to happen before this write + } + } + + return false; + } + + template + bool MOODYCAMEL_NO_TSAN enqueue_bulk(It itemFirst, size_t count) + { + // First, we need to make sure we have enough room to enqueue all of the elements; + // this means pre-allocating blocks and putting them in the block index (but only if + // all the allocations succeeded). + index_t startTailIndex = this->tailIndex.load(std::memory_order_relaxed); + auto startBlock = this->tailBlock; + auto originalBlockIndexFront = pr_blockIndexFront; + auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed; + + Block* firstAllocatedBlock = nullptr; + + // Figure out how many blocks we'll need to allocate, and do so + size_t blockBaseDiff = ((startTailIndex + count - 1) & ~static_cast(BLOCK_SIZE - 1)) - ((startTailIndex - 1) & ~static_cast(BLOCK_SIZE - 1)); + index_t currentTailIndex = (startTailIndex - 1) & ~static_cast(BLOCK_SIZE - 1); + if (blockBaseDiff > 0) { + // Allocate as many blocks as possible from ahead + while (blockBaseDiff > 0 && this->tailBlock != nullptr && this->tailBlock->next != firstAllocatedBlock && this->tailBlock->next->ConcurrentQueue::Block::template is_empty()) { + blockBaseDiff -= static_cast(BLOCK_SIZE); + currentTailIndex += static_cast(BLOCK_SIZE); + + this->tailBlock = this->tailBlock->next; + firstAllocatedBlock = firstAllocatedBlock == nullptr ? this->tailBlock : firstAllocatedBlock; + + auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront]; + entry.base = currentTailIndex; + entry.block = this->tailBlock; + pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1); + } + + // Now allocate as many blocks as necessary from the block pool + while (blockBaseDiff > 0) { + blockBaseDiff -= static_cast(BLOCK_SIZE); + currentTailIndex += static_cast(BLOCK_SIZE); + + auto head = this->headIndex.load(std::memory_order_relaxed); + assert(!details::circular_less_than(currentTailIndex, head)); + bool full = !details::circular_less_than(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head)); + if (pr_blockIndexRaw == nullptr || pr_blockIndexSlotsUsed == pr_blockIndexSize || full) { + MOODYCAMEL_CONSTEXPR_IF (allocMode == CannotAlloc) { + // Failed to allocate, undo changes (but keep injected blocks) + pr_blockIndexFront = originalBlockIndexFront; + pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed; + this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock; + return false; + } + else if (full || !new_block_index(originalBlockIndexSlotsUsed)) { + // Failed to allocate, undo changes (but keep injected blocks) + pr_blockIndexFront = originalBlockIndexFront; + pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed; + this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock; + return false; + } + + // pr_blockIndexFront is updated inside new_block_index, so we need to + // update our fallback value too (since we keep the new index even if we + // later fail) + originalBlockIndexFront = originalBlockIndexSlotsUsed; + } + + // Insert a new block in the circular linked list + auto newBlock = this->parent->ConcurrentQueue::template requisition_block(); + if (newBlock == nullptr) { + pr_blockIndexFront = originalBlockIndexFront; + pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed; + this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock; + return false; + } + +#ifdef MCDBGQ_TRACKMEM + newBlock->owner = this; +#endif + newBlock->ConcurrentQueue::Block::template set_all_empty(); + if (this->tailBlock == nullptr) { + newBlock->next = newBlock; + } + else { + newBlock->next = this->tailBlock->next; + this->tailBlock->next = newBlock; + } + this->tailBlock = newBlock; + firstAllocatedBlock = firstAllocatedBlock == nullptr ? this->tailBlock : firstAllocatedBlock; + + ++pr_blockIndexSlotsUsed; + + auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront]; + entry.base = currentTailIndex; + entry.block = this->tailBlock; + pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1); + } + + // Excellent, all allocations succeeded. Reset each block's emptiness before we fill them up, and + // publish the new block index front + auto block = firstAllocatedBlock; + while (true) { + block->ConcurrentQueue::Block::template reset_empty(); + if (block == this->tailBlock) { + break; + } + block = block->next; + } + + MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast(nullptr)) T(details::deref_noexcept(itemFirst)))) { + blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release); + } + } + + // Enqueue, one block at a time + index_t newTailIndex = startTailIndex + static_cast(count); + currentTailIndex = startTailIndex; + auto endBlock = this->tailBlock; + this->tailBlock = startBlock; + assert((startTailIndex & static_cast(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr || count == 0); + if ((startTailIndex & static_cast(BLOCK_SIZE - 1)) == 0 && firstAllocatedBlock != nullptr) { + this->tailBlock = firstAllocatedBlock; + } + while (true) { + index_t stopIndex = (currentTailIndex & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); + if (details::circular_less_than(newTailIndex, stopIndex)) { + stopIndex = newTailIndex; + } + MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast(nullptr)) T(details::deref_noexcept(itemFirst)))) { + while (currentTailIndex != stopIndex) { + new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++); + } + } + else { + MOODYCAMEL_TRY { + while (currentTailIndex != stopIndex) { + // Must use copy constructor even if move constructor is available + // because we may have to revert if there's an exception. + // Sorry about the horrible templated next line, but it was the only way + // to disable moving *at compile time*, which is important because a type + // may only define a (noexcept) move constructor, and so calls to the + // cctor will not compile, even if they are in an if branch that will never + // be executed + new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if(nullptr)) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst)); + ++currentTailIndex; + ++itemFirst; + } + } + MOODYCAMEL_CATCH (...) { + // Oh dear, an exception's been thrown -- destroy the elements that + // were enqueued so far and revert the entire bulk operation (we'll keep + // any allocated blocks in our linked list for later, though). + auto constructedStopIndex = currentTailIndex; + auto lastBlockEnqueued = this->tailBlock; + + pr_blockIndexFront = originalBlockIndexFront; + pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed; + this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock; + + if (!details::is_trivially_destructible::value) { + auto block = startBlock; + if ((startTailIndex & static_cast(BLOCK_SIZE - 1)) == 0) { + block = firstAllocatedBlock; + } + currentTailIndex = startTailIndex; + while (true) { + stopIndex = (currentTailIndex & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); + if (details::circular_less_than(constructedStopIndex, stopIndex)) { + stopIndex = constructedStopIndex; + } + while (currentTailIndex != stopIndex) { + (*block)[currentTailIndex++]->~T(); + } + if (block == lastBlockEnqueued) { + break; + } + block = block->next; + } + } + MOODYCAMEL_RETHROW; + } + } + + if (this->tailBlock == endBlock) { + assert(currentTailIndex == newTailIndex); + break; + } + this->tailBlock = this->tailBlock->next; + } + + MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast(nullptr)) T(details::deref_noexcept(itemFirst)))) { + if (firstAllocatedBlock != nullptr) + blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release); + } + + this->tailIndex.store(newTailIndex, std::memory_order_release); + return true; + } + + template + size_t dequeue_bulk(It& itemFirst, size_t max) + { + auto tail = this->tailIndex.load(std::memory_order_relaxed); + auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed); + auto desiredCount = static_cast(tail - (this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit)); + if (details::circular_less_than(0, desiredCount)) { + desiredCount = desiredCount < max ? desiredCount : max; + std::atomic_thread_fence(std::memory_order_acquire); + + auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed); + + tail = this->tailIndex.load(std::memory_order_acquire); + auto actualCount = static_cast(tail - (myDequeueCount - overcommit)); + if (details::circular_less_than(0, actualCount)) { + actualCount = desiredCount < actualCount ? desiredCount : actualCount; + if (actualCount < desiredCount) { + this->dequeueOvercommit.fetch_add(desiredCount - actualCount, std::memory_order_release); + } + + // Get the first index. Note that since there's guaranteed to be at least actualCount elements, this + // will never exceed tail. + auto firstIndex = this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel); + + // Determine which block the first element is in + auto localBlockIndex = blockIndex.load(std::memory_order_acquire); + auto localBlockIndexHead = localBlockIndex->front.load(std::memory_order_acquire); + + auto headBase = localBlockIndex->entries[localBlockIndexHead].base; + auto firstBlockBaseIndex = firstIndex & ~static_cast(BLOCK_SIZE - 1); + auto offset = static_cast(static_cast::type>(firstBlockBaseIndex - headBase) / BLOCK_SIZE); + auto indexIndex = (localBlockIndexHead + offset) & (localBlockIndex->size - 1); + + // Iterate the blocks and dequeue + auto index = firstIndex; + do { + auto firstIndexInBlock = index; + index_t endIndex = (index & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); + endIndex = details::circular_less_than(firstIndex + static_cast(actualCount), endIndex) ? firstIndex + static_cast(actualCount) : endIndex; + auto block = localBlockIndex->entries[indexIndex].block; + if (MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, details::deref_noexcept(itemFirst) = std::move((*(*block)[index])))) { + while (index != endIndex) { + auto& el = *((*block)[index]); + *itemFirst++ = std::move(el); + el.~T(); + ++index; + } + } + else { + MOODYCAMEL_TRY { + while (index != endIndex) { + auto& el = *((*block)[index]); + *itemFirst = std::move(el); + ++itemFirst; + el.~T(); + ++index; + } + } + MOODYCAMEL_CATCH (...) { + // It's too late to revert the dequeue, but we can make sure that all + // the dequeued objects are properly destroyed and the block index + // (and empty count) are properly updated before we propagate the exception + do { + block = localBlockIndex->entries[indexIndex].block; + while (index != endIndex) { + (*block)[index++]->~T(); + } + block->ConcurrentQueue::Block::template set_many_empty(firstIndexInBlock, static_cast(endIndex - firstIndexInBlock)); + indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1); + + firstIndexInBlock = index; + endIndex = (index & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); + endIndex = details::circular_less_than(firstIndex + static_cast(actualCount), endIndex) ? firstIndex + static_cast(actualCount) : endIndex; + } while (index != firstIndex + actualCount); + + MOODYCAMEL_RETHROW; + } + } + block->ConcurrentQueue::Block::template set_many_empty(firstIndexInBlock, static_cast(endIndex - firstIndexInBlock)); + indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1); + } while (index != firstIndex + actualCount); + + return actualCount; + } + else { + // Wasn't anything to dequeue after all; make the effective dequeue count eventually consistent + this->dequeueOvercommit.fetch_add(desiredCount, std::memory_order_release); + } + } + + return 0; + } + + private: + struct BlockIndexEntry + { + index_t base; + Block* block; + }; + + struct BlockIndexHeader + { + size_t size; + std::atomic front; // Current slot (not next, like pr_blockIndexFront) + BlockIndexEntry* entries; + void* prev; + }; + + + bool new_block_index(size_t numberOfFilledSlotsToExpose) + { + auto prevBlockSizeMask = pr_blockIndexSize - 1; + + // Create the new block + pr_blockIndexSize <<= 1; + auto newRawPtr = static_cast((Traits::malloc)(sizeof(BlockIndexHeader) + std::alignment_of::value - 1 + sizeof(BlockIndexEntry) * pr_blockIndexSize)); + if (newRawPtr == nullptr) { + pr_blockIndexSize >>= 1; // Reset to allow graceful retry + return false; + } + + auto newBlockIndexEntries = reinterpret_cast(details::align_for(newRawPtr + sizeof(BlockIndexHeader))); + + // Copy in all the old indices, if any + size_t j = 0; + if (pr_blockIndexSlotsUsed != 0) { + auto i = (pr_blockIndexFront - pr_blockIndexSlotsUsed) & prevBlockSizeMask; + do { + newBlockIndexEntries[j++] = pr_blockIndexEntries[i]; + i = (i + 1) & prevBlockSizeMask; + } while (i != pr_blockIndexFront); + } + + // Update everything + auto header = new (newRawPtr) BlockIndexHeader; + header->size = pr_blockIndexSize; + header->front.store(numberOfFilledSlotsToExpose - 1, std::memory_order_relaxed); + header->entries = newBlockIndexEntries; + header->prev = pr_blockIndexRaw; // we link the new block to the old one so we can free it later + + pr_blockIndexFront = j; + pr_blockIndexEntries = newBlockIndexEntries; + pr_blockIndexRaw = newRawPtr; + blockIndex.store(header, std::memory_order_release); + + return true; + } + + private: + std::atomic blockIndex; + + // To be used by producer only -- consumer must use the ones in referenced by blockIndex + size_t pr_blockIndexSlotsUsed; + size_t pr_blockIndexSize; + size_t pr_blockIndexFront; // Next slot (not current) + BlockIndexEntry* pr_blockIndexEntries; + void* pr_blockIndexRaw; + +#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG + public: + ExplicitProducer* nextExplicitProducer; + private: +#endif + +#ifdef MCDBGQ_TRACKMEM + friend struct MemStats; +#endif + }; + + + ////////////////////////////////// + // Implicit queue + ////////////////////////////////// + + struct ImplicitProducer : public ProducerBase + { + ImplicitProducer(ConcurrentQueue* parent_) : + ProducerBase(parent_, false), + nextBlockIndexCapacity(IMPLICIT_INITIAL_INDEX_SIZE), + blockIndex(nullptr) + { + new_block_index(); + } + + ~ImplicitProducer() + { + // Note that since we're in the destructor we can assume that all enqueue/dequeue operations + // completed already; this means that all undequeued elements are placed contiguously across + // contiguous blocks, and that only the first and last remaining blocks can be only partially + // empty (all other remaining blocks must be completely full). + +#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED + // Unregister ourselves for thread termination notification + if (!this->inactive.load(std::memory_order_relaxed)) { + details::ThreadExitNotifier::unsubscribe(&threadExitListener); + } +#endif + + // Destroy all remaining elements! + auto tail = this->tailIndex.load(std::memory_order_relaxed); + auto index = this->headIndex.load(std::memory_order_relaxed); + Block* block = nullptr; + assert(index == tail || details::circular_less_than(index, tail)); + bool forceFreeLastBlock = index != tail; // If we enter the loop, then the last (tail) block will not be freed + while (index != tail) { + if ((index & static_cast(BLOCK_SIZE - 1)) == 0 || block == nullptr) { + if (block != nullptr) { + // Free the old block + this->parent->add_block_to_free_list(block); + } + + block = get_block_index_entry_for_index(index)->value.load(std::memory_order_relaxed); + } + + ((*block)[index])->~T(); + ++index; + } + // Even if the queue is empty, there's still one block that's not on the free list + // (unless the head index reached the end of it, in which case the tail will be poised + // to create a new block). + if (this->tailBlock != nullptr && (forceFreeLastBlock || (tail & static_cast(BLOCK_SIZE - 1)) != 0)) { + this->parent->add_block_to_free_list(this->tailBlock); + } + + // Destroy block index + auto localBlockIndex = blockIndex.load(std::memory_order_relaxed); + if (localBlockIndex != nullptr) { + for (size_t i = 0; i != localBlockIndex->capacity; ++i) { + localBlockIndex->index[i]->~BlockIndexEntry(); + } + do { + auto prev = localBlockIndex->prev; + localBlockIndex->~BlockIndexHeader(); + (Traits::free)(localBlockIndex); + localBlockIndex = prev; + } while (localBlockIndex != nullptr); + } + } + + template + inline bool enqueue(U&& element) + { + index_t currentTailIndex = this->tailIndex.load(std::memory_order_relaxed); + index_t newTailIndex = 1 + currentTailIndex; + if ((currentTailIndex & static_cast(BLOCK_SIZE - 1)) == 0) { + // We reached the end of a block, start a new one + auto head = this->headIndex.load(std::memory_order_relaxed); + assert(!details::circular_less_than(currentTailIndex, head)); + if (!details::circular_less_than(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) { + return false; + } +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX + debug::DebugLock lock(mutex); +#endif + // Find out where we'll be inserting this block in the block index + BlockIndexEntry* idxEntry; + if (!insert_block_index_entry(idxEntry, currentTailIndex)) { + return false; + } + + // Get ahold of a new block + auto newBlock = this->parent->ConcurrentQueue::template requisition_block(); + if (newBlock == nullptr) { + rewind_block_index_tail(); + idxEntry->value.store(nullptr, std::memory_order_relaxed); + return false; + } +#ifdef MCDBGQ_TRACKMEM + newBlock->owner = this; +#endif + newBlock->ConcurrentQueue::Block::template reset_empty(); + + MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast(nullptr)) T(std::forward(element)))) { + // May throw, try to insert now before we publish the fact that we have this new block + MOODYCAMEL_TRY { + new ((*newBlock)[currentTailIndex]) T(std::forward(element)); + } + MOODYCAMEL_CATCH (...) { + rewind_block_index_tail(); + idxEntry->value.store(nullptr, std::memory_order_relaxed); + this->parent->add_block_to_free_list(newBlock); + MOODYCAMEL_RETHROW; + } + } + + // Insert the new block into the index + idxEntry->value.store(newBlock, std::memory_order_relaxed); + + this->tailBlock = newBlock; + + MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast(nullptr)) T(std::forward(element)))) { + this->tailIndex.store(newTailIndex, std::memory_order_release); + return true; + } + } + + // Enqueue + new ((*this->tailBlock)[currentTailIndex]) T(std::forward(element)); + + this->tailIndex.store(newTailIndex, std::memory_order_release); + return true; + } + + template + bool dequeue(U& element) + { + // See ExplicitProducer::dequeue for rationale and explanation + index_t tail = this->tailIndex.load(std::memory_order_relaxed); + index_t overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed); + if (details::circular_less_than(this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit, tail)) { + std::atomic_thread_fence(std::memory_order_acquire); + + index_t myDequeueCount = this->dequeueOptimisticCount.fetch_add(1, std::memory_order_relaxed); + tail = this->tailIndex.load(std::memory_order_acquire); + if ((details::likely)(details::circular_less_than(myDequeueCount - overcommit, tail))) { + index_t index = this->headIndex.fetch_add(1, std::memory_order_acq_rel); + + // Determine which block the element is in + auto entry = get_block_index_entry_for_index(index); + + // Dequeue + auto block = entry->value.load(std::memory_order_relaxed); + auto& el = *((*block)[index]); + + if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, element = std::move(el))) { +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX + // Note: Acquiring the mutex with every dequeue instead of only when a block + // is released is very sub-optimal, but it is, after all, purely debug code. + debug::DebugLock lock(producer->mutex); +#endif + struct Guard { + Block* block; + index_t index; + BlockIndexEntry* entry; + ConcurrentQueue* parent; + + ~Guard() + { + (*block)[index]->~T(); + if (block->ConcurrentQueue::Block::template set_empty(index)) { + entry->value.store(nullptr, std::memory_order_relaxed); + parent->add_block_to_free_list(block); + } + } + } guard = { block, index, entry, this->parent }; + + element = std::move(el); // NOLINT + } + else { + element = std::move(el); // NOLINT + el.~T(); // NOLINT + + if (block->ConcurrentQueue::Block::template set_empty(index)) { + { +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX + debug::DebugLock lock(mutex); +#endif + // Add the block back into the global free pool (and remove from block index) + entry->value.store(nullptr, std::memory_order_relaxed); + } + this->parent->add_block_to_free_list(block); // releases the above store + } + } + + return true; + } + else { + this->dequeueOvercommit.fetch_add(1, std::memory_order_release); + } + } + + return false; + } + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable: 4706) // assignment within conditional expression +#endif + template + bool enqueue_bulk(It itemFirst, size_t count) + { + // First, we need to make sure we have enough room to enqueue all of the elements; + // this means pre-allocating blocks and putting them in the block index (but only if + // all the allocations succeeded). + + // Note that the tailBlock we start off with may not be owned by us any more; + // this happens if it was filled up exactly to the top (setting tailIndex to + // the first index of the next block which is not yet allocated), then dequeued + // completely (putting it on the free list) before we enqueue again. + + index_t startTailIndex = this->tailIndex.load(std::memory_order_relaxed); + auto startBlock = this->tailBlock; + Block* firstAllocatedBlock = nullptr; + auto endBlock = this->tailBlock; + + // Figure out how many blocks we'll need to allocate, and do so + size_t blockBaseDiff = ((startTailIndex + count - 1) & ~static_cast(BLOCK_SIZE - 1)) - ((startTailIndex - 1) & ~static_cast(BLOCK_SIZE - 1)); + index_t currentTailIndex = (startTailIndex - 1) & ~static_cast(BLOCK_SIZE - 1); + if (blockBaseDiff > 0) { +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX + debug::DebugLock lock(mutex); +#endif + do { + blockBaseDiff -= static_cast(BLOCK_SIZE); + currentTailIndex += static_cast(BLOCK_SIZE); + + // Find out where we'll be inserting this block in the block index + BlockIndexEntry* idxEntry = nullptr; // initialization here unnecessary but compiler can't always tell + Block* newBlock; + bool indexInserted = false; + auto head = this->headIndex.load(std::memory_order_relaxed); + assert(!details::circular_less_than(currentTailIndex, head)); + bool full = !details::circular_less_than(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head)); + + if (full || !(indexInserted = insert_block_index_entry(idxEntry, currentTailIndex)) || (newBlock = this->parent->ConcurrentQueue::template requisition_block()) == nullptr) { + // Index allocation or block allocation failed; revert any other allocations + // and index insertions done so far for this operation + if (indexInserted) { + rewind_block_index_tail(); + idxEntry->value.store(nullptr, std::memory_order_relaxed); + } + currentTailIndex = (startTailIndex - 1) & ~static_cast(BLOCK_SIZE - 1); + for (auto block = firstAllocatedBlock; block != nullptr; block = block->next) { + currentTailIndex += static_cast(BLOCK_SIZE); + idxEntry = get_block_index_entry_for_index(currentTailIndex); + idxEntry->value.store(nullptr, std::memory_order_relaxed); + rewind_block_index_tail(); + } + this->parent->add_blocks_to_free_list(firstAllocatedBlock); + this->tailBlock = startBlock; + + return false; + } + +#ifdef MCDBGQ_TRACKMEM + newBlock->owner = this; +#endif + newBlock->ConcurrentQueue::Block::template reset_empty(); + newBlock->next = nullptr; + + // Insert the new block into the index + idxEntry->value.store(newBlock, std::memory_order_relaxed); + + // Store the chain of blocks so that we can undo if later allocations fail, + // and so that we can find the blocks when we do the actual enqueueing + if ((startTailIndex & static_cast(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr) { + assert(this->tailBlock != nullptr); + this->tailBlock->next = newBlock; + } + this->tailBlock = newBlock; + endBlock = newBlock; + firstAllocatedBlock = firstAllocatedBlock == nullptr ? newBlock : firstAllocatedBlock; + } while (blockBaseDiff > 0); + } + + // Enqueue, one block at a time + index_t newTailIndex = startTailIndex + static_cast(count); + currentTailIndex = startTailIndex; + this->tailBlock = startBlock; + assert((startTailIndex & static_cast(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr || count == 0); + if ((startTailIndex & static_cast(BLOCK_SIZE - 1)) == 0 && firstAllocatedBlock != nullptr) { + this->tailBlock = firstAllocatedBlock; + } + while (true) { + index_t stopIndex = (currentTailIndex & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); + if (details::circular_less_than(newTailIndex, stopIndex)) { + stopIndex = newTailIndex; + } + MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast(nullptr)) T(details::deref_noexcept(itemFirst)))) { + while (currentTailIndex != stopIndex) { + new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++); + } + } + else { + MOODYCAMEL_TRY { + while (currentTailIndex != stopIndex) { + new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if(nullptr)) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst)); + ++currentTailIndex; + ++itemFirst; + } + } + MOODYCAMEL_CATCH (...) { + auto constructedStopIndex = currentTailIndex; + auto lastBlockEnqueued = this->tailBlock; + + if (!details::is_trivially_destructible::value) { + auto block = startBlock; + if ((startTailIndex & static_cast(BLOCK_SIZE - 1)) == 0) { + block = firstAllocatedBlock; + } + currentTailIndex = startTailIndex; + while (true) { + stopIndex = (currentTailIndex & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); + if (details::circular_less_than(constructedStopIndex, stopIndex)) { + stopIndex = constructedStopIndex; + } + while (currentTailIndex != stopIndex) { + (*block)[currentTailIndex++]->~T(); + } + if (block == lastBlockEnqueued) { + break; + } + block = block->next; + } + } + + currentTailIndex = (startTailIndex - 1) & ~static_cast(BLOCK_SIZE - 1); + for (auto block = firstAllocatedBlock; block != nullptr; block = block->next) { + currentTailIndex += static_cast(BLOCK_SIZE); + auto idxEntry = get_block_index_entry_for_index(currentTailIndex); + idxEntry->value.store(nullptr, std::memory_order_relaxed); + rewind_block_index_tail(); + } + this->parent->add_blocks_to_free_list(firstAllocatedBlock); + this->tailBlock = startBlock; + MOODYCAMEL_RETHROW; + } + } + + if (this->tailBlock == endBlock) { + assert(currentTailIndex == newTailIndex); + break; + } + this->tailBlock = this->tailBlock->next; + } + this->tailIndex.store(newTailIndex, std::memory_order_release); + return true; + } +#ifdef _MSC_VER +#pragma warning(pop) +#endif + + template + size_t dequeue_bulk(It& itemFirst, size_t max) + { + auto tail = this->tailIndex.load(std::memory_order_relaxed); + auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed); + auto desiredCount = static_cast(tail - (this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit)); + if (details::circular_less_than(0, desiredCount)) { + desiredCount = desiredCount < max ? desiredCount : max; + std::atomic_thread_fence(std::memory_order_acquire); + + auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed); + + tail = this->tailIndex.load(std::memory_order_acquire); + auto actualCount = static_cast(tail - (myDequeueCount - overcommit)); + if (details::circular_less_than(0, actualCount)) { + actualCount = desiredCount < actualCount ? desiredCount : actualCount; + if (actualCount < desiredCount) { + this->dequeueOvercommit.fetch_add(desiredCount - actualCount, std::memory_order_release); + } + + // Get the first index. Note that since there's guaranteed to be at least actualCount elements, this + // will never exceed tail. + auto firstIndex = this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel); + + // Iterate the blocks and dequeue + auto index = firstIndex; + BlockIndexHeader* localBlockIndex; + auto indexIndex = get_block_index_index_for_index(index, localBlockIndex); + do { + auto blockStartIndex = index; + index_t endIndex = (index & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); + endIndex = details::circular_less_than(firstIndex + static_cast(actualCount), endIndex) ? firstIndex + static_cast(actualCount) : endIndex; + + auto entry = localBlockIndex->index[indexIndex]; + auto block = entry->value.load(std::memory_order_relaxed); + if (MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, details::deref_noexcept(itemFirst) = std::move((*(*block)[index])))) { + while (index != endIndex) { + auto& el = *((*block)[index]); + *itemFirst++ = std::move(el); + el.~T(); + ++index; + } + } + else { + MOODYCAMEL_TRY { + while (index != endIndex) { + auto& el = *((*block)[index]); + *itemFirst = std::move(el); + ++itemFirst; + el.~T(); + ++index; + } + } + MOODYCAMEL_CATCH (...) { + do { + entry = localBlockIndex->index[indexIndex]; + block = entry->value.load(std::memory_order_relaxed); + while (index != endIndex) { + (*block)[index++]->~T(); + } + + if (block->ConcurrentQueue::Block::template set_many_empty(blockStartIndex, static_cast(endIndex - blockStartIndex))) { +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX + debug::DebugLock lock(mutex); +#endif + entry->value.store(nullptr, std::memory_order_relaxed); + this->parent->add_block_to_free_list(block); + } + indexIndex = (indexIndex + 1) & (localBlockIndex->capacity - 1); + + blockStartIndex = index; + endIndex = (index & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); + endIndex = details::circular_less_than(firstIndex + static_cast(actualCount), endIndex) ? firstIndex + static_cast(actualCount) : endIndex; + } while (index != firstIndex + actualCount); + + MOODYCAMEL_RETHROW; + } + } + if (block->ConcurrentQueue::Block::template set_many_empty(blockStartIndex, static_cast(endIndex - blockStartIndex))) { + { +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX + debug::DebugLock lock(mutex); +#endif + // Note that the set_many_empty above did a release, meaning that anybody who acquires the block + // we're about to free can use it safely since our writes (and reads!) will have happened-before then. + entry->value.store(nullptr, std::memory_order_relaxed); + } + this->parent->add_block_to_free_list(block); // releases the above store + } + indexIndex = (indexIndex + 1) & (localBlockIndex->capacity - 1); + } while (index != firstIndex + actualCount); + + return actualCount; + } + else { + this->dequeueOvercommit.fetch_add(desiredCount, std::memory_order_release); + } + } + + return 0; + } + + private: + // The block size must be > 1, so any number with the low bit set is an invalid block base index + static const index_t INVALID_BLOCK_BASE = 1; + + struct BlockIndexEntry + { + std::atomic key; + std::atomic value; + }; + + struct BlockIndexHeader + { + size_t capacity; + std::atomic tail; + BlockIndexEntry* entries; + BlockIndexEntry** index; + BlockIndexHeader* prev; + }; + + template + inline bool insert_block_index_entry(BlockIndexEntry*& idxEntry, index_t blockStartIndex) + { + auto localBlockIndex = blockIndex.load(std::memory_order_relaxed); // We're the only writer thread, relaxed is OK + if (localBlockIndex == nullptr) { + return false; // this can happen if new_block_index failed in the constructor + } + size_t newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & (localBlockIndex->capacity - 1); + idxEntry = localBlockIndex->index[newTail]; + if (idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE || + idxEntry->value.load(std::memory_order_relaxed) == nullptr) { + + idxEntry->key.store(blockStartIndex, std::memory_order_relaxed); + localBlockIndex->tail.store(newTail, std::memory_order_release); + return true; + } + + // No room in the old block index, try to allocate another one! + MOODYCAMEL_CONSTEXPR_IF (allocMode == CannotAlloc) { + return false; + } + else if (!new_block_index()) { + return false; + } + localBlockIndex = blockIndex.load(std::memory_order_relaxed); + newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & (localBlockIndex->capacity - 1); + idxEntry = localBlockIndex->index[newTail]; + assert(idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE); + idxEntry->key.store(blockStartIndex, std::memory_order_relaxed); + localBlockIndex->tail.store(newTail, std::memory_order_release); + return true; + } + + inline void rewind_block_index_tail() + { + auto localBlockIndex = blockIndex.load(std::memory_order_relaxed); + localBlockIndex->tail.store((localBlockIndex->tail.load(std::memory_order_relaxed) - 1) & (localBlockIndex->capacity - 1), std::memory_order_relaxed); + } + + inline BlockIndexEntry* get_block_index_entry_for_index(index_t index) const + { + BlockIndexHeader* localBlockIndex; + auto idx = get_block_index_index_for_index(index, localBlockIndex); + return localBlockIndex->index[idx]; + } + + inline size_t get_block_index_index_for_index(index_t index, BlockIndexHeader*& localBlockIndex) const + { +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX + debug::DebugLock lock(mutex); +#endif + index &= ~static_cast(BLOCK_SIZE - 1); + localBlockIndex = blockIndex.load(std::memory_order_acquire); + auto tail = localBlockIndex->tail.load(std::memory_order_acquire); + auto tailBase = localBlockIndex->index[tail]->key.load(std::memory_order_relaxed); + assert(tailBase != INVALID_BLOCK_BASE); + // Note: Must use division instead of shift because the index may wrap around, causing a negative + // offset, whose negativity we want to preserve + auto offset = static_cast(static_cast::type>(index - tailBase) / BLOCK_SIZE); + size_t idx = (tail + offset) & (localBlockIndex->capacity - 1); + assert(localBlockIndex->index[idx]->key.load(std::memory_order_relaxed) == index && localBlockIndex->index[idx]->value.load(std::memory_order_relaxed) != nullptr); + return idx; + } + + bool new_block_index() + { + auto prev = blockIndex.load(std::memory_order_relaxed); + size_t prevCapacity = prev == nullptr ? 0 : prev->capacity; + auto entryCount = prev == nullptr ? nextBlockIndexCapacity : prevCapacity; + auto raw = static_cast((Traits::malloc)( + sizeof(BlockIndexHeader) + + std::alignment_of::value - 1 + sizeof(BlockIndexEntry) * entryCount + + std::alignment_of::value - 1 + sizeof(BlockIndexEntry*) * nextBlockIndexCapacity)); + if (raw == nullptr) { + return false; + } + + auto header = new (raw) BlockIndexHeader; + auto entries = reinterpret_cast(details::align_for(raw + sizeof(BlockIndexHeader))); + auto index = reinterpret_cast(details::align_for(reinterpret_cast(entries) + sizeof(BlockIndexEntry) * entryCount)); + if (prev != nullptr) { + auto prevTail = prev->tail.load(std::memory_order_relaxed); + auto prevPos = prevTail; + size_t i = 0; + do { + prevPos = (prevPos + 1) & (prev->capacity - 1); + index[i++] = prev->index[prevPos]; + } while (prevPos != prevTail); + assert(i == prevCapacity); + } + for (size_t i = 0; i != entryCount; ++i) { + new (entries + i) BlockIndexEntry; + entries[i].key.store(INVALID_BLOCK_BASE, std::memory_order_relaxed); + index[prevCapacity + i] = entries + i; + } + header->prev = prev; + header->entries = entries; + header->index = index; + header->capacity = nextBlockIndexCapacity; + header->tail.store((prevCapacity - 1) & (nextBlockIndexCapacity - 1), std::memory_order_relaxed); + + blockIndex.store(header, std::memory_order_release); + + nextBlockIndexCapacity <<= 1; + + return true; + } + + private: + size_t nextBlockIndexCapacity; + std::atomic blockIndex; + +#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED + public: + details::ThreadExitListener threadExitListener; + private: +#endif + +#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG + public: + ImplicitProducer* nextImplicitProducer; + private: +#endif + +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX + mutable debug::DebugMutex mutex; +#endif +#ifdef MCDBGQ_TRACKMEM + friend struct MemStats; +#endif + }; + + + ////////////////////////////////// + // Block pool manipulation + ////////////////////////////////// + + void populate_initial_block_list(size_t blockCount) + { + initialBlockPoolSize = blockCount; + if (initialBlockPoolSize == 0) { + initialBlockPool = nullptr; + return; + } + + initialBlockPool = create_array(blockCount); + if (initialBlockPool == nullptr) { + initialBlockPoolSize = 0; + } + for (size_t i = 0; i < initialBlockPoolSize; ++i) { + initialBlockPool[i].dynamicallyAllocated = false; + } + } + + inline Block* try_get_block_from_initial_pool() + { + if (initialBlockPoolIndex.load(std::memory_order_relaxed) >= initialBlockPoolSize) { + return nullptr; + } + + auto index = initialBlockPoolIndex.fetch_add(1, std::memory_order_relaxed); + + return index < initialBlockPoolSize ? (initialBlockPool + index) : nullptr; + } + + inline void add_block_to_free_list(Block* block) + { +#ifdef MCDBGQ_TRACKMEM + block->owner = nullptr; +#endif + freeList.add(block); + } + + inline void add_blocks_to_free_list(Block* block) + { + while (block != nullptr) { + auto next = block->next; + add_block_to_free_list(block); + block = next; + } + } + + inline Block* try_get_block_from_free_list() + { + return freeList.try_get(); + } + + // Gets a free block from one of the memory pools, or allocates a new one (if applicable) + template + Block* requisition_block() + { + auto block = try_get_block_from_initial_pool(); + if (block != nullptr) { + return block; + } + + block = try_get_block_from_free_list(); + if (block != nullptr) { + return block; + } + + MOODYCAMEL_CONSTEXPR_IF (canAlloc == CanAlloc) { + return create(); + } + else { + return nullptr; + } + } + + +#ifdef MCDBGQ_TRACKMEM + public: + struct MemStats { + size_t allocatedBlocks; + size_t usedBlocks; + size_t freeBlocks; + size_t ownedBlocksExplicit; + size_t ownedBlocksImplicit; + size_t implicitProducers; + size_t explicitProducers; + size_t elementsEnqueued; + size_t blockClassBytes; + size_t queueClassBytes; + size_t implicitBlockIndexBytes; + size_t explicitBlockIndexBytes; + + friend class ConcurrentQueue; + + private: + static MemStats getFor(ConcurrentQueue* q) + { + MemStats stats = { 0 }; + + stats.elementsEnqueued = q->size_approx(); + + auto block = q->freeList.head_unsafe(); + while (block != nullptr) { + ++stats.allocatedBlocks; + ++stats.freeBlocks; + block = block->freeListNext.load(std::memory_order_relaxed); + } + + for (auto ptr = q->producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) { + bool implicit = dynamic_cast(ptr) != nullptr; + stats.implicitProducers += implicit ? 1 : 0; + stats.explicitProducers += implicit ? 0 : 1; + + if (implicit) { + auto prod = static_cast(ptr); + stats.queueClassBytes += sizeof(ImplicitProducer); + auto head = prod->headIndex.load(std::memory_order_relaxed); + auto tail = prod->tailIndex.load(std::memory_order_relaxed); + auto hash = prod->blockIndex.load(std::memory_order_relaxed); + if (hash != nullptr) { + for (size_t i = 0; i != hash->capacity; ++i) { + if (hash->index[i]->key.load(std::memory_order_relaxed) != ImplicitProducer::INVALID_BLOCK_BASE && hash->index[i]->value.load(std::memory_order_relaxed) != nullptr) { + ++stats.allocatedBlocks; + ++stats.ownedBlocksImplicit; + } + } + stats.implicitBlockIndexBytes += hash->capacity * sizeof(typename ImplicitProducer::BlockIndexEntry); + for (; hash != nullptr; hash = hash->prev) { + stats.implicitBlockIndexBytes += sizeof(typename ImplicitProducer::BlockIndexHeader) + hash->capacity * sizeof(typename ImplicitProducer::BlockIndexEntry*); + } + } + for (; details::circular_less_than(head, tail); head += BLOCK_SIZE) { + //auto block = prod->get_block_index_entry_for_index(head); + ++stats.usedBlocks; + } + } + else { + auto prod = static_cast(ptr); + stats.queueClassBytes += sizeof(ExplicitProducer); + auto tailBlock = prod->tailBlock; + bool wasNonEmpty = false; + if (tailBlock != nullptr) { + auto block = tailBlock; + do { + ++stats.allocatedBlocks; + if (!block->ConcurrentQueue::Block::template is_empty() || wasNonEmpty) { + ++stats.usedBlocks; + wasNonEmpty = wasNonEmpty || block != tailBlock; + } + ++stats.ownedBlocksExplicit; + block = block->next; + } while (block != tailBlock); + } + auto index = prod->blockIndex.load(std::memory_order_relaxed); + while (index != nullptr) { + stats.explicitBlockIndexBytes += sizeof(typename ExplicitProducer::BlockIndexHeader) + index->size * sizeof(typename ExplicitProducer::BlockIndexEntry); + index = static_cast(index->prev); + } + } + } + + auto freeOnInitialPool = q->initialBlockPoolIndex.load(std::memory_order_relaxed) >= q->initialBlockPoolSize ? 0 : q->initialBlockPoolSize - q->initialBlockPoolIndex.load(std::memory_order_relaxed); + stats.allocatedBlocks += freeOnInitialPool; + stats.freeBlocks += freeOnInitialPool; + + stats.blockClassBytes = sizeof(Block) * stats.allocatedBlocks; + stats.queueClassBytes += sizeof(ConcurrentQueue); + + return stats; + } + }; + + // For debugging only. Not thread-safe. + MemStats getMemStats() + { + return MemStats::getFor(this); + } + private: + friend struct MemStats; +#endif + + + ////////////////////////////////// + // Producer list manipulation + ////////////////////////////////// + + ProducerBase* recycle_or_create_producer(bool isExplicit) + { + bool recycled; + return recycle_or_create_producer(isExplicit, recycled); + } + + ProducerBase* recycle_or_create_producer(bool isExplicit, bool& recycled) + { +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH + debug::DebugLock lock(implicitProdMutex); +#endif + // Try to re-use one first + for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) { + if (ptr->inactive.load(std::memory_order_relaxed) && ptr->isExplicit == isExplicit) { + bool expected = true; + if (ptr->inactive.compare_exchange_strong(expected, /* desired */ false, std::memory_order_acquire, std::memory_order_relaxed)) { + // We caught one! It's been marked as activated, the caller can have it + recycled = true; + return ptr; + } + } + } + + recycled = false; + return add_producer(isExplicit ? static_cast(create(this)) : create(this)); + } + + ProducerBase* add_producer(ProducerBase* producer) + { + // Handle failed memory allocation + if (producer == nullptr) { + return nullptr; + } + + producerCount.fetch_add(1, std::memory_order_relaxed); + + // Add it to the lock-free list + auto prevTail = producerListTail.load(std::memory_order_relaxed); + do { + producer->next = prevTail; + } while (!producerListTail.compare_exchange_weak(prevTail, producer, std::memory_order_release, std::memory_order_relaxed)); + +#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG + if (producer->isExplicit) { + auto prevTailExplicit = explicitProducers.load(std::memory_order_relaxed); + do { + static_cast(producer)->nextExplicitProducer = prevTailExplicit; + } while (!explicitProducers.compare_exchange_weak(prevTailExplicit, static_cast(producer), std::memory_order_release, std::memory_order_relaxed)); + } + else { + auto prevTailImplicit = implicitProducers.load(std::memory_order_relaxed); + do { + static_cast(producer)->nextImplicitProducer = prevTailImplicit; + } while (!implicitProducers.compare_exchange_weak(prevTailImplicit, static_cast(producer), std::memory_order_release, std::memory_order_relaxed)); + } +#endif + + return producer; + } + + void reown_producers() + { + // After another instance is moved-into/swapped-with this one, all the + // producers we stole still think their parents are the other queue. + // So fix them up! + for (auto ptr = producerListTail.load(std::memory_order_relaxed); ptr != nullptr; ptr = ptr->next_prod()) { + ptr->parent = this; + } + } + + + ////////////////////////////////// + // Implicit producer hash + ////////////////////////////////// + + struct ImplicitProducerKVP + { + std::atomic key; + ImplicitProducer* value; // No need for atomicity since it's only read by the thread that sets it in the first place + + ImplicitProducerKVP() : value(nullptr) { } + + ImplicitProducerKVP(ImplicitProducerKVP&& other) MOODYCAMEL_NOEXCEPT + { + key.store(other.key.load(std::memory_order_relaxed), std::memory_order_relaxed); + value = other.value; + } + + inline ImplicitProducerKVP& operator=(ImplicitProducerKVP&& other) MOODYCAMEL_NOEXCEPT + { + swap(other); + return *this; + } + + inline void swap(ImplicitProducerKVP& other) MOODYCAMEL_NOEXCEPT + { + if (this != &other) { + details::swap_relaxed(key, other.key); + std::swap(value, other.value); + } + } + }; + + template + friend void moodycamel::swap(typename ConcurrentQueue::ImplicitProducerKVP&, typename ConcurrentQueue::ImplicitProducerKVP&) MOODYCAMEL_NOEXCEPT; + + struct ImplicitProducerHash + { + size_t capacity; + ImplicitProducerKVP* entries; + ImplicitProducerHash* prev; + }; + + inline void populate_initial_implicit_producer_hash() + { + MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) { + return; + } + else { + implicitProducerHashCount.store(0, std::memory_order_relaxed); + auto hash = &initialImplicitProducerHash; + hash->capacity = INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; + hash->entries = &initialImplicitProducerHashEntries[0]; + for (size_t i = 0; i != INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; ++i) { + initialImplicitProducerHashEntries[i].key.store(details::invalid_thread_id, std::memory_order_relaxed); + } + hash->prev = nullptr; + implicitProducerHash.store(hash, std::memory_order_relaxed); + } + } + + void swap_implicit_producer_hashes(ConcurrentQueue& other) + { + MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) { + return; + } + else { + // Swap (assumes our implicit producer hash is initialized) + initialImplicitProducerHashEntries.swap(other.initialImplicitProducerHashEntries); + initialImplicitProducerHash.entries = &initialImplicitProducerHashEntries[0]; + other.initialImplicitProducerHash.entries = &other.initialImplicitProducerHashEntries[0]; + + details::swap_relaxed(implicitProducerHashCount, other.implicitProducerHashCount); + + details::swap_relaxed(implicitProducerHash, other.implicitProducerHash); + if (implicitProducerHash.load(std::memory_order_relaxed) == &other.initialImplicitProducerHash) { + implicitProducerHash.store(&initialImplicitProducerHash, std::memory_order_relaxed); + } + else { + ImplicitProducerHash* hash; + for (hash = implicitProducerHash.load(std::memory_order_relaxed); hash->prev != &other.initialImplicitProducerHash; hash = hash->prev) { + continue; + } + hash->prev = &initialImplicitProducerHash; + } + if (other.implicitProducerHash.load(std::memory_order_relaxed) == &initialImplicitProducerHash) { + other.implicitProducerHash.store(&other.initialImplicitProducerHash, std::memory_order_relaxed); + } + else { + ImplicitProducerHash* hash; + for (hash = other.implicitProducerHash.load(std::memory_order_relaxed); hash->prev != &initialImplicitProducerHash; hash = hash->prev) { + continue; + } + hash->prev = &other.initialImplicitProducerHash; + } + } + } + + // Only fails (returns nullptr) if memory allocation fails + ImplicitProducer* get_or_add_implicit_producer() + { + // Note that since the data is essentially thread-local (key is thread ID), + // there's a reduced need for fences (memory ordering is already consistent + // for any individual thread), except for the current table itself. + + // Start by looking for the thread ID in the current and all previous hash tables. + // If it's not found, it must not be in there yet, since this same thread would + // have added it previously to one of the tables that we traversed. + + // Code and algorithm adapted from http://preshing.com/20130605/the-worlds-simplest-lock-free-hash-table + +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH + debug::DebugLock lock(implicitProdMutex); +#endif + + auto id = details::thread_id(); + auto hashedId = details::hash_thread_id(id); + + auto mainHash = implicitProducerHash.load(std::memory_order_acquire); + assert(mainHash != nullptr); // silence clang-tidy and MSVC warnings (hash cannot be null) + for (auto hash = mainHash; hash != nullptr; hash = hash->prev) { + // Look for the id in this hash + auto index = hashedId; + while (true) { // Not an infinite loop because at least one slot is free in the hash table + index &= hash->capacity - 1; + + auto probedKey = hash->entries[index].key.load(std::memory_order_relaxed); + if (probedKey == id) { + // Found it! If we had to search several hashes deep, though, we should lazily add it + // to the current main hash table to avoid the extended search next time. + // Note there's guaranteed to be room in the current hash table since every subsequent + // table implicitly reserves space for all previous tables (there's only one + // implicitProducerHashCount). + auto value = hash->entries[index].value; + if (hash != mainHash) { + index = hashedId; + while (true) { + index &= mainHash->capacity - 1; + probedKey = mainHash->entries[index].key.load(std::memory_order_relaxed); + auto empty = details::invalid_thread_id; +#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED + auto reusable = details::invalid_thread_id2; + if ((probedKey == empty && mainHash->entries[index].key.compare_exchange_strong(empty, id, std::memory_order_relaxed, std::memory_order_relaxed)) || + (probedKey == reusable && mainHash->entries[index].key.compare_exchange_strong(reusable, id, std::memory_order_acquire, std::memory_order_acquire))) { +#else + if ((probedKey == empty && mainHash->entries[index].key.compare_exchange_strong(empty, id, std::memory_order_relaxed, std::memory_order_relaxed))) { +#endif + mainHash->entries[index].value = value; + break; + } + ++index; + } + } + + return value; + } + if (probedKey == details::invalid_thread_id) { + break; // Not in this hash table + } + ++index; + } + } + + // Insert! + auto newCount = 1 + implicitProducerHashCount.fetch_add(1, std::memory_order_relaxed); + while (true) { + // NOLINTNEXTLINE(clang-analyzer-core.NullDereference) + if (newCount >= (mainHash->capacity >> 1) && !implicitProducerHashResizeInProgress.test_and_set(std::memory_order_acquire)) { + // We've acquired the resize lock, try to allocate a bigger hash table. + // Note the acquire fence synchronizes with the release fence at the end of this block, and hence when + // we reload implicitProducerHash it must be the most recent version (it only gets changed within this + // locked block). + mainHash = implicitProducerHash.load(std::memory_order_acquire); + if (newCount >= (mainHash->capacity >> 1)) { + auto newCapacity = mainHash->capacity << 1; + while (newCount >= (newCapacity >> 1)) { + newCapacity <<= 1; + } + auto raw = static_cast((Traits::malloc)(sizeof(ImplicitProducerHash) + std::alignment_of::value - 1 + sizeof(ImplicitProducerKVP) * newCapacity)); + if (raw == nullptr) { + // Allocation failed + implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed); + implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed); + return nullptr; + } + + auto newHash = new (raw) ImplicitProducerHash; + newHash->capacity = static_cast(newCapacity); + newHash->entries = reinterpret_cast(details::align_for(raw + sizeof(ImplicitProducerHash))); + for (size_t i = 0; i != newCapacity; ++i) { + new (newHash->entries + i) ImplicitProducerKVP; + newHash->entries[i].key.store(details::invalid_thread_id, std::memory_order_relaxed); + } + newHash->prev = mainHash; + implicitProducerHash.store(newHash, std::memory_order_release); + implicitProducerHashResizeInProgress.clear(std::memory_order_release); + mainHash = newHash; + } + else { + implicitProducerHashResizeInProgress.clear(std::memory_order_release); + } + } + + // If it's < three-quarters full, add to the old one anyway so that we don't have to wait for the next table + // to finish being allocated by another thread (and if we just finished allocating above, the condition will + // always be true) + if (newCount < (mainHash->capacity >> 1) + (mainHash->capacity >> 2)) { + bool recycled; + auto producer = static_cast(recycle_or_create_producer(false, recycled)); + if (producer == nullptr) { + implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed); + return nullptr; + } + if (recycled) { + implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed); + } + +#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED + producer->threadExitListener.callback = &ConcurrentQueue::implicit_producer_thread_exited_callback; + producer->threadExitListener.userData = producer; + details::ThreadExitNotifier::subscribe(&producer->threadExitListener); +#endif + + auto index = hashedId; + while (true) { + index &= mainHash->capacity - 1; + auto probedKey = mainHash->entries[index].key.load(std::memory_order_relaxed); + + auto empty = details::invalid_thread_id; +#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED + auto reusable = details::invalid_thread_id2; + if ((probedKey == empty && mainHash->entries[index].key.compare_exchange_strong(empty, id, std::memory_order_relaxed, std::memory_order_relaxed)) || + (probedKey == reusable && mainHash->entries[index].key.compare_exchange_strong(reusable, id, std::memory_order_acquire, std::memory_order_acquire))) { +#else + if ((probedKey == empty && mainHash->entries[index].key.compare_exchange_strong(empty, id, std::memory_order_relaxed, std::memory_order_relaxed))) { +#endif + mainHash->entries[index].value = producer; + break; + } + ++index; + } + return producer; + } + + // Hmm, the old hash is quite full and somebody else is busy allocating a new one. + // We need to wait for the allocating thread to finish (if it succeeds, we add, if not, + // we try to allocate ourselves). + mainHash = implicitProducerHash.load(std::memory_order_acquire); + } + } + +#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED + void implicit_producer_thread_exited(ImplicitProducer* producer) + { + // Remove from thread exit listeners + details::ThreadExitNotifier::unsubscribe(&producer->threadExitListener); + + // Remove from hash +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH + debug::DebugLock lock(implicitProdMutex); +#endif + auto hash = implicitProducerHash.load(std::memory_order_acquire); + assert(hash != nullptr); // The thread exit listener is only registered if we were added to a hash in the first place + auto id = details::thread_id(); + auto hashedId = details::hash_thread_id(id); + details::thread_id_t probedKey; + + // We need to traverse all the hashes just in case other threads aren't on the current one yet and are + // trying to add an entry thinking there's a free slot (because they reused a producer) + for (; hash != nullptr; hash = hash->prev) { + auto index = hashedId; + do { + index &= hash->capacity - 1; + probedKey = hash->entries[index].key.load(std::memory_order_relaxed); + if (probedKey == id) { + hash->entries[index].key.store(details::invalid_thread_id2, std::memory_order_release); + break; + } + ++index; + } while (probedKey != details::invalid_thread_id); // Can happen if the hash has changed but we weren't put back in it yet, or if we weren't added to this hash in the first place + } + + // Mark the queue as being recyclable + producer->inactive.store(true, std::memory_order_release); + } + + static void implicit_producer_thread_exited_callback(void* userData) + { + auto producer = static_cast(userData); + auto queue = producer->parent; + queue->implicit_producer_thread_exited(producer); + } +#endif + + ////////////////////////////////// + // Utility functions + ////////////////////////////////// + + template + static inline void* aligned_malloc(size_t size) + { + MOODYCAMEL_CONSTEXPR_IF (std::alignment_of::value <= std::alignment_of::value) + return (Traits::malloc)(size); + else { + size_t alignment = std::alignment_of::value; + void* raw = (Traits::malloc)(size + alignment - 1 + sizeof(void*)); + if (!raw) + return nullptr; + char* ptr = details::align_for(reinterpret_cast(raw) + sizeof(void*)); + *(reinterpret_cast(ptr) - 1) = raw; + return ptr; + } + } + + template + static inline void aligned_free(void* ptr) + { + MOODYCAMEL_CONSTEXPR_IF (std::alignment_of::value <= std::alignment_of::value) + return (Traits::free)(ptr); + else + (Traits::free)(ptr ? *(reinterpret_cast(ptr) - 1) : nullptr); + } + + template + static inline U* create_array(size_t count) + { + assert(count > 0); + U* p = static_cast(aligned_malloc(sizeof(U) * count)); + if (p == nullptr) + return nullptr; + + for (size_t i = 0; i != count; ++i) + new (p + i) U(); + return p; + } + + template + static inline void destroy_array(U* p, size_t count) + { + if (p != nullptr) { + assert(count > 0); + for (size_t i = count; i != 0; ) + (p + --i)->~U(); + } + aligned_free(p); + } + + template + static inline U* create() + { + void* p = aligned_malloc(sizeof(U)); + return p != nullptr ? new (p) U : nullptr; + } + + template + static inline U* create(A1&& a1) + { + void* p = aligned_malloc(sizeof(U)); + return p != nullptr ? new (p) U(std::forward(a1)) : nullptr; + } + + template + static inline void destroy(U* p) + { + if (p != nullptr) + p->~U(); + aligned_free(p); + } + +private: + std::atomic producerListTail; + std::atomic producerCount; + + std::atomic initialBlockPoolIndex; + Block* initialBlockPool; + size_t initialBlockPoolSize; + +#ifndef MCDBGQ_USEDEBUGFREELIST + FreeList freeList; +#else + debug::DebugFreeList freeList; +#endif + + std::atomic implicitProducerHash; + std::atomic implicitProducerHashCount; // Number of slots logically used + ImplicitProducerHash initialImplicitProducerHash; + std::array initialImplicitProducerHashEntries; + std::atomic_flag implicitProducerHashResizeInProgress; + + std::atomic nextExplicitConsumerId; + std::atomic globalExplicitConsumerOffset; + +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH + debug::DebugMutex implicitProdMutex; +#endif + +#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG + std::atomic explicitProducers; + std::atomic implicitProducers; +#endif +}; + + +template +ProducerToken::ProducerToken(ConcurrentQueue& queue) + : producer(queue.recycle_or_create_producer(true)) +{ + if (producer != nullptr) { + producer->token = this; + } +} + +template +ProducerToken::ProducerToken(BlockingConcurrentQueue& queue) + : producer(reinterpret_cast*>(&queue)->recycle_or_create_producer(true)) +{ + if (producer != nullptr) { + producer->token = this; + } +} + +template +ConsumerToken::ConsumerToken(ConcurrentQueue& queue) + : itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr) +{ + initialOffset = queue.nextExplicitConsumerId.fetch_add(1, std::memory_order_release); + lastKnownGlobalOffset = static_cast(-1); +} + +template +ConsumerToken::ConsumerToken(BlockingConcurrentQueue& queue) + : itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr) +{ + initialOffset = reinterpret_cast*>(&queue)->nextExplicitConsumerId.fetch_add(1, std::memory_order_release); + lastKnownGlobalOffset = static_cast(-1); +} + +template +inline void swap(ConcurrentQueue& a, ConcurrentQueue& b) MOODYCAMEL_NOEXCEPT +{ + a.swap(b); +} + +inline void swap(ProducerToken& a, ProducerToken& b) MOODYCAMEL_NOEXCEPT +{ + a.swap(b); +} + +inline void swap(ConsumerToken& a, ConsumerToken& b) MOODYCAMEL_NOEXCEPT +{ + a.swap(b); +} + +template +inline void swap(typename ConcurrentQueue::ImplicitProducerKVP& a, typename ConcurrentQueue::ImplicitProducerKVP& b) MOODYCAMEL_NOEXCEPT +{ + a.swap(b); +} + +} + +#if defined(_MSC_VER) && (!defined(_HAS_CXX17) || !_HAS_CXX17) +#pragma warning(pop) +#endif + +#if defined(__GNUC__) && !defined(__INTEL_COMPILER) +#pragma GCC diagnostic pop +#endif + diff --git a/deps/concurrentqueue/lightweightsemaphore.h b/deps/concurrentqueue/lightweightsemaphore.h new file mode 100644 index 000000000..bb7c7a403 --- /dev/null +++ b/deps/concurrentqueue/lightweightsemaphore.h @@ -0,0 +1,412 @@ +// Provides an efficient implementation of a semaphore (LightweightSemaphore). +// This is an extension of Jeff Preshing's sempahore implementation (licensed +// under the terms of its separate zlib license) that has been adapted and +// extended by Cameron Desrochers. + +#pragma once + +#include // For std::size_t +#include +#include // For std::make_signed + +#if defined(_WIN32) +// Avoid including windows.h in a header; we only need a handful of +// items, so we'll redeclare them here (this is relatively safe since +// the API generally has to remain stable between Windows versions). +// I know this is an ugly hack but it still beats polluting the global +// namespace with thousands of generic names or adding a .cpp for nothing. +extern "C" { + struct _SECURITY_ATTRIBUTES; + __declspec(dllimport) void* __stdcall CreateSemaphoreW(_SECURITY_ATTRIBUTES* lpSemaphoreAttributes, long lInitialCount, long lMaximumCount, const wchar_t* lpName); + __declspec(dllimport) int __stdcall CloseHandle(void* hObject); + __declspec(dllimport) unsigned long __stdcall WaitForSingleObject(void* hHandle, unsigned long dwMilliseconds); + __declspec(dllimport) int __stdcall ReleaseSemaphore(void* hSemaphore, long lReleaseCount, long* lpPreviousCount); +} +#elif defined(__MACH__) +#include +#elif defined(__unix__) +#include +#endif + +namespace moodycamel +{ +namespace details +{ + +// Code in the mpmc_sema namespace below is an adaptation of Jeff Preshing's +// portable + lightweight semaphore implementations, originally from +// https://github.com/preshing/cpp11-on-multicore/blob/master/common/sema.h +// LICENSE: +// Copyright (c) 2015 Jeff Preshing +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must not +// claim that you wrote the original software. If you use this software +// in a product, an acknowledgement in the product documentation would be +// appreciated but is not required. +// 2. Altered source versions must be plainly marked as such, and must not be +// misrepresented as being the original software. +// 3. This notice may not be removed or altered from any source distribution. +#if defined(_WIN32) +class Semaphore +{ +private: + void* m_hSema; + + Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION; + Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION; + +public: + Semaphore(int initialCount = 0) + { + assert(initialCount >= 0); + const long maxLong = 0x7fffffff; + m_hSema = CreateSemaphoreW(nullptr, initialCount, maxLong, nullptr); + assert(m_hSema); + } + + ~Semaphore() + { + CloseHandle(m_hSema); + } + + bool wait() + { + const unsigned long infinite = 0xffffffff; + return WaitForSingleObject(m_hSema, infinite) == 0; + } + + bool try_wait() + { + return WaitForSingleObject(m_hSema, 0) == 0; + } + + bool timed_wait(std::uint64_t usecs) + { + return WaitForSingleObject(m_hSema, (unsigned long)(usecs / 1000)) == 0; + } + + void signal(int count = 1) + { + while (!ReleaseSemaphore(m_hSema, count, nullptr)); + } +}; +#elif defined(__MACH__) +//--------------------------------------------------------- +// Semaphore (Apple iOS and OSX) +// Can't use POSIX semaphores due to http://lists.apple.com/archives/darwin-kernel/2009/Apr/msg00010.html +//--------------------------------------------------------- +class Semaphore +{ +private: + semaphore_t m_sema; + + Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION; + Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION; + +public: + Semaphore(int initialCount = 0) + { + assert(initialCount >= 0); + kern_return_t rc = semaphore_create(mach_task_self(), &m_sema, SYNC_POLICY_FIFO, initialCount); + assert(rc == KERN_SUCCESS); + (void)rc; + } + + ~Semaphore() + { + semaphore_destroy(mach_task_self(), m_sema); + } + + bool wait() + { + return semaphore_wait(m_sema) == KERN_SUCCESS; + } + + bool try_wait() + { + return timed_wait(0); + } + + bool timed_wait(std::uint64_t timeout_usecs) + { + mach_timespec_t ts; + ts.tv_sec = static_cast(timeout_usecs / 1000000); + ts.tv_nsec = static_cast((timeout_usecs % 1000000) * 1000); + + // added in OSX 10.10: https://developer.apple.com/library/prerelease/mac/documentation/General/Reference/APIDiffsMacOSX10_10SeedDiff/modules/Darwin.html + kern_return_t rc = semaphore_timedwait(m_sema, ts); + return rc == KERN_SUCCESS; + } + + void signal() + { + while (semaphore_signal(m_sema) != KERN_SUCCESS); + } + + void signal(int count) + { + while (count-- > 0) + { + while (semaphore_signal(m_sema) != KERN_SUCCESS); + } + } +}; +#elif defined(__unix__) +//--------------------------------------------------------- +// Semaphore (POSIX, Linux) +//--------------------------------------------------------- +class Semaphore +{ +private: + sem_t m_sema; + + Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION; + Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION; + +public: + Semaphore(int initialCount = 0) + { + assert(initialCount >= 0); + int rc = sem_init(&m_sema, 0, static_cast(initialCount)); + assert(rc == 0); + (void)rc; + } + + ~Semaphore() + { + sem_destroy(&m_sema); + } + + bool wait() + { + // http://stackoverflow.com/questions/2013181/gdb-causes-sem-wait-to-fail-with-eintr-error + int rc; + do { + rc = sem_wait(&m_sema); + } while (rc == -1 && errno == EINTR); + return rc == 0; + } + + bool try_wait() + { + int rc; + do { + rc = sem_trywait(&m_sema); + } while (rc == -1 && errno == EINTR); + return rc == 0; + } + + bool timed_wait(std::uint64_t usecs) + { + struct timespec ts; + const int usecs_in_1_sec = 1000000; + const int nsecs_in_1_sec = 1000000000; + clock_gettime(CLOCK_REALTIME, &ts); + ts.tv_sec += (time_t)(usecs / usecs_in_1_sec); + ts.tv_nsec += (long)(usecs % usecs_in_1_sec) * 1000; + // sem_timedwait bombs if you have more than 1e9 in tv_nsec + // so we have to clean things up before passing it in + if (ts.tv_nsec >= nsecs_in_1_sec) { + ts.tv_nsec -= nsecs_in_1_sec; + ++ts.tv_sec; + } + + int rc; + do { + rc = sem_timedwait(&m_sema, &ts); + } while (rc == -1 && errno == EINTR); + return rc == 0; + } + + void signal() + { + while (sem_post(&m_sema) == -1); + } + + void signal(int count) + { + while (count-- > 0) + { + while (sem_post(&m_sema) == -1); + } + } +}; +#else +#error Unsupported platform! (No semaphore wrapper available) +#endif + +} // end namespace details + + +//--------------------------------------------------------- +// LightweightSemaphore +//--------------------------------------------------------- +class LightweightSemaphore +{ +public: + typedef std::make_signed::type ssize_t; + +private: + std::atomic m_count; + details::Semaphore m_sema; + int m_maxSpins; + + bool waitWithPartialSpinning(std::int64_t timeout_usecs = -1) + { + ssize_t oldCount; + int spin = m_maxSpins; + while (--spin >= 0) + { + oldCount = m_count.load(std::memory_order_relaxed); + if ((oldCount > 0) && m_count.compare_exchange_strong(oldCount, oldCount - 1, std::memory_order_acquire, std::memory_order_relaxed)) + return true; + std::atomic_signal_fence(std::memory_order_acquire); // Prevent the compiler from collapsing the loop. + } + oldCount = m_count.fetch_sub(1, std::memory_order_acquire); + if (oldCount > 0) + return true; + if (timeout_usecs < 0) + { + if (m_sema.wait()) + return true; + } + if (timeout_usecs > 0 && m_sema.timed_wait((std::uint64_t)timeout_usecs)) + return true; + // At this point, we've timed out waiting for the semaphore, but the + // count is still decremented indicating we may still be waiting on + // it. So we have to re-adjust the count, but only if the semaphore + // wasn't signaled enough times for us too since then. If it was, we + // need to release the semaphore too. + while (true) + { + oldCount = m_count.load(std::memory_order_acquire); + if (oldCount >= 0 && m_sema.try_wait()) + return true; + if (oldCount < 0 && m_count.compare_exchange_strong(oldCount, oldCount + 1, std::memory_order_relaxed, std::memory_order_relaxed)) + return false; + } + } + + ssize_t waitManyWithPartialSpinning(ssize_t max, std::int64_t timeout_usecs = -1) + { + assert(max > 0); + ssize_t oldCount; + int spin = m_maxSpins; + while (--spin >= 0) + { + oldCount = m_count.load(std::memory_order_relaxed); + if (oldCount > 0) + { + ssize_t newCount = oldCount > max ? oldCount - max : 0; + if (m_count.compare_exchange_strong(oldCount, newCount, std::memory_order_acquire, std::memory_order_relaxed)) + return oldCount - newCount; + } + std::atomic_signal_fence(std::memory_order_acquire); + } + oldCount = m_count.fetch_sub(1, std::memory_order_acquire); + if (oldCount <= 0) + { + if ((timeout_usecs == 0) || (timeout_usecs < 0 && !m_sema.wait()) || (timeout_usecs > 0 && !m_sema.timed_wait((std::uint64_t)timeout_usecs))) + { + while (true) + { + oldCount = m_count.load(std::memory_order_acquire); + if (oldCount >= 0 && m_sema.try_wait()) + break; + if (oldCount < 0 && m_count.compare_exchange_strong(oldCount, oldCount + 1, std::memory_order_relaxed, std::memory_order_relaxed)) + return 0; + } + } + } + if (max > 1) + return 1 + tryWaitMany(max - 1); + return 1; + } + +public: + LightweightSemaphore(ssize_t initialCount = 0, int maxSpins = 10000) : m_count(initialCount), m_maxSpins(maxSpins) + { + assert(initialCount >= 0); + assert(maxSpins >= 0); + } + + bool tryWait() + { + ssize_t oldCount = m_count.load(std::memory_order_relaxed); + while (oldCount > 0) + { + if (m_count.compare_exchange_weak(oldCount, oldCount - 1, std::memory_order_acquire, std::memory_order_relaxed)) + return true; + } + return false; + } + + bool wait() + { + return tryWait() || waitWithPartialSpinning(); + } + + bool wait(std::int64_t timeout_usecs) + { + return tryWait() || waitWithPartialSpinning(timeout_usecs); + } + + // Acquires between 0 and (greedily) max, inclusive + ssize_t tryWaitMany(ssize_t max) + { + assert(max >= 0); + ssize_t oldCount = m_count.load(std::memory_order_relaxed); + while (oldCount > 0) + { + ssize_t newCount = oldCount > max ? oldCount - max : 0; + if (m_count.compare_exchange_weak(oldCount, newCount, std::memory_order_acquire, std::memory_order_relaxed)) + return oldCount - newCount; + } + return 0; + } + + // Acquires at least one, and (greedily) at most max + ssize_t waitMany(ssize_t max, std::int64_t timeout_usecs) + { + assert(max >= 0); + ssize_t result = tryWaitMany(max); + if (result == 0 && max > 0) + result = waitManyWithPartialSpinning(max, timeout_usecs); + return result; + } + + ssize_t waitMany(ssize_t max) + { + ssize_t result = waitMany(max, -1); + assert(result > 0); + return result; + } + + void signal(ssize_t count = 1) + { + assert(count >= 0); + ssize_t oldCount = m_count.fetch_add(count, std::memory_order_release); + ssize_t toRelease = -oldCount < count ? -oldCount : count; + if (toRelease > 0) + { + m_sema.signal((int)toRelease); + } + } + + std::size_t availableApprox() const + { + ssize_t count = m_count.load(std::memory_order_relaxed); + return count > 0 ? static_cast(count) : 0; + } +}; + +} // end namespace moodycamel + diff --git a/keydb.conf b/keydb.conf index 1808fdb74..247bcb9ad 100644 --- a/keydb.conf +++ b/keydb.conf @@ -2041,3 +2041,14 @@ server-threads 2 # Enable FLASH support? (Enterprise Only) # storage-provider flash /path/to/flash/db + +# KeyDB will attempt to balance clients across threads evenly; However, replica clients +# are usually much more expensive than a normal client, and so KeyDB will try to assign +# fewer clients to threads with a replica. The weighting factor below is intented to help tune +# this behavior. A replica weighting factor of 2 means we treat a replica as the equivalent +# of two normal clients. Adjusting this value may improve performance when replication is +# used. The best weighting is workload specific - e.g. read heavy workloads should set +# this to 1. Very write heavy workloads may benefit from higher numbers. +# +# By default KeyDB sets this to 2. +replica-weighting-factor 2 \ No newline at end of file diff --git a/src/IStorage.h b/src/IStorage.h index d5808c4c1..d1f316022 100644 --- a/src/IStorage.h +++ b/src/IStorage.h @@ -12,6 +12,7 @@ public: virtual const char *name() const = 0; virtual size_t totalDiskspaceUsed() const = 0; virtual bool FSlow() const = 0; + virtual size_t filedsRequired() const { return 0; } }; class IStorage diff --git a/src/Makefile b/src/Makefile index 2a5f0fdad..fd6906043 100644 --- a/src/Makefile +++ b/src/Makefile @@ -246,7 +246,7 @@ endif endif # Include paths to dependencies FINAL_CFLAGS+= -I../deps/hiredis -I../deps/linenoise -I../deps/lua/src -I../deps/hdr_histogram -I../deps/license/ -FINAL_CXXFLAGS+= -I../deps/hiredis -I../deps/linenoise -I../deps/lua/src -I../deps/hdr_histogram -I../deps/rocksdb/include/ -I../deps/license +FINAL_CXXFLAGS+= -I../deps/hiredis -I../deps/linenoise -I../deps/lua/src -I../deps/hdr_histogram -I../deps/rocksdb/include/ -I../deps/license -I../deps/concurrentqueue # Determine systemd support and/or build preference (defaulting to auto-detection) BUILD_WITH_SYSTEMD=no diff --git a/src/StorageCache.cpp b/src/StorageCache.cpp index 6e1d4af9a..af2ac12b7 100644 --- a/src/StorageCache.cpp +++ b/src/StorageCache.cpp @@ -136,9 +136,10 @@ void StorageCache::retrieve(sds key, IStorage::callbackSingle fn) const size_t StorageCache::count() const { - std::unique_lock ul(m_lock); + std::unique_lock ul(m_lock, std::defer_lock); + bool fLocked = ul.try_lock(); size_t count = m_spstorage->count(); - if (m_pdict != nullptr) { + if (m_pdict != nullptr && fLocked) { serverAssert(bulkInsertsInProgress.load(std::memory_order_seq_cst) || count == (dictSize(m_pdict) + m_collisionCount)); } return count; @@ -147,4 +148,14 @@ size_t StorageCache::count() const void StorageCache::beginWriteBatch() { serverAssert(GlobalLocksAcquired()); // Otherwise we deadlock m_spstorage->beginWriteBatch(); +} + +void StorageCache::emergencyFreeCache() { + dict *d = m_pdict; + m_pdict = nullptr; + if (d != nullptr) { + g_pserver->asyncworkqueue->AddWorkFunction([d]{ + dictRelease(d); + }); + } } \ No newline at end of file diff --git a/src/StorageCache.h b/src/StorageCache.h index 9f92f75c0..184eb60bd 100644 --- a/src/StorageCache.h +++ b/src/StorageCache.h @@ -43,6 +43,8 @@ public: void bulkInsert(sds *rgkeys, sds *rgvals, size_t celem); void retrieve(sds key, IStorage::callbackSingle fn) const; bool erase(sds key); + void emergencyFreeCache(); + bool keycacheIsEnabled() const { return m_pdict != nullptr; } bool enumerate(IStorage::callback fn) const { return m_spstorage->enumerate(fn); } diff --git a/src/ae.cpp b/src/ae.cpp index b55504c54..cf4db75bb 100644 --- a/src/ae.cpp +++ b/src/ae.cpp @@ -787,6 +787,7 @@ void aeSetAfterSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *aftersleep, } thread_local spin_worker tl_worker = nullptr; +thread_local bool fOwnLockOverride = false; void setAeLockSetThreadSpinWorker(spin_worker worker) { tl_worker = worker; @@ -807,9 +808,14 @@ void aeReleaseLock() g_lock.unlock(); } +void aeSetThreadOwnsLockOverride(int fOverride) +{ + fOwnLockOverride = fOverride; +} + int aeThreadOwnsLock() { - return g_lock.fOwnLock(); + return fOwnLockOverride || g_lock.fOwnLock(); } int aeLockContested(int threshold) diff --git a/src/ae.h b/src/ae.h index 0565fe15c..c22624ad6 100644 --- a/src/ae.h +++ b/src/ae.h @@ -168,6 +168,7 @@ void aeAcquireLock(); int aeTryAcquireLock(int fWeak); void aeReleaseLock(); int aeThreadOwnsLock(); +void aeSetThreadOwnsLockOverride(int fOverride); int aeLockContested(int threshold); int aeLockContention(); // returns the number of instantaneous threads waiting on the lock diff --git a/src/config.cpp b/src/config.cpp index 49d539ddf..6147bc429 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -357,6 +357,7 @@ bool initializeStorageProvider(const char **err) { // Create The Storage Factory (if necessary) serverLog(LL_NOTICE, "Initializing FLASH storage provider (this may take a long time)"); + adjustOpenFilesLimit(); g_pserver->m_pstorageFactory = CreateRocksDBStorageFactory(g_sdsArgs, cserver.dbnum, cserver.storage_conf, cserver.storage_conf ? strlen(cserver.storage_conf) : 0); } else if (!strcasecmp(g_sdsProvider, "test") && g_sdsArgs == nullptr) @@ -2468,7 +2469,10 @@ static int updateJemallocBgThread(int val, int prev, const char **err) { static int updateReplBacklogSize(long long val, long long prev, const char **err) { /* resizeReplicationBacklog sets g_pserver->repl_backlog_size, and relies on * being able to tell when the size changes, so restore prev before calling it. */ - UNUSED(err); + if (cserver.repl_backlog_disk_size) { + *err = "Unable to dynamically resize the backlog because disk backlog is enabled"; + return 0; + } g_pserver->repl_backlog_size = prev; g_pserver->repl_backlog_config_size = val; resizeReplicationBacklog(val); @@ -2723,7 +2727,9 @@ standardConfig configs[] = { createBoolConfig("delete-on-evict", NULL, MODIFIABLE_CONFIG, cserver.delete_on_evict, 0, NULL, NULL), createBoolConfig("use-fork", NULL, IMMUTABLE_CONFIG, cserver.fForkBgSave, 0, NULL, NULL), createBoolConfig("io-threads-do-reads", NULL, IMMUTABLE_CONFIG, fDummy, 0, NULL, NULL), + createBoolConfig("time-thread-priority", NULL, IMMUTABLE_CONFIG, cserver.time_thread_priority, 0, NULL, NULL), createBoolConfig("prefetch-enabled", NULL, MODIFIABLE_CONFIG, g_pserver->prefetch_enabled, 1, NULL, NULL), + createBoolConfig("allow-rdb-resize-op", NULL, MODIFIABLE_CONFIG, g_pserver->allowRdbResizeOp, 1, NULL, NULL), createBoolConfig("crash-log-enabled", NULL, MODIFIABLE_CONFIG, g_pserver->crashlog_enabled, 1, NULL, updateSighandlerEnabled), createBoolConfig("crash-memcheck-enabled", NULL, MODIFIABLE_CONFIG, g_pserver->memcheck_enabled, 1, NULL, NULL), createBoolConfig("use-exit-on-panic", NULL, MODIFIABLE_CONFIG, g_pserver->use_exit_on_panic, 0, NULL, NULL), @@ -2802,6 +2808,7 @@ standardConfig configs[] = { createIntConfig("min-clients-per-thread", NULL, MODIFIABLE_CONFIG, 0, 400, cserver.thread_min_client_threshold, 20, INTEGER_CONFIG, NULL, NULL), createIntConfig("storage-flush-period", NULL, MODIFIABLE_CONFIG, 1, 10000, g_pserver->storage_flush_period, 500, INTEGER_CONFIG, NULL, NULL), createIntConfig("replica-quorum", NULL, MODIFIABLE_CONFIG, -1, INT_MAX, g_pserver->repl_quorum, -1, INTEGER_CONFIG, NULL, NULL), + createIntConfig("replica-weighting-factor", NULL, MODIFIABLE_CONFIG, 1, INT_MAX, g_pserver->replicaIsolationFactor, 2, INTEGER_CONFIG, NULL, NULL), /* Unsigned int configs */ createUIntConfig("maxclients", NULL, MODIFIABLE_CONFIG, 1, UINT_MAX, g_pserver->maxclients, 10000, INTEGER_CONFIG, NULL, updateMaxclients), createUIntConfig("loading-process-events-interval-keys", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, g_pserver->loading_process_events_interval_keys, 8192, MEMORY_CONFIG, NULL, NULL), @@ -2819,6 +2826,7 @@ standardConfig configs[] = { createLongLongConfig("proto-max-bulk-len", NULL, MODIFIABLE_CONFIG, 1024*1024, LLONG_MAX, g_pserver->proto_max_bulk_len, 512ll*1024*1024, MEMORY_CONFIG, NULL, NULL), /* Bulk request max size */ createLongLongConfig("stream-node-max-entries", NULL, MODIFIABLE_CONFIG, 0, LLONG_MAX, g_pserver->stream_node_max_entries, 100, INTEGER_CONFIG, NULL, NULL), createLongLongConfig("repl-backlog-size", NULL, MODIFIABLE_CONFIG, 1, LLONG_MAX, g_pserver->repl_backlog_size, 1024*1024, MEMORY_CONFIG, NULL, updateReplBacklogSize), /* Default: 1mb */ + createLongLongConfig("repl-backlog-disk-reserve", NULL, IMMUTABLE_CONFIG, 0, LLONG_MAX, cserver.repl_backlog_disk_size, 0, MEMORY_CONFIG, NULL, NULL), /* Unsigned Long Long configs */ createULongLongConfig("maxmemory", NULL, MODIFIABLE_CONFIG, 0, LLONG_MAX, g_pserver->maxmemory, 0, MEMORY_CONFIG, NULL, updateMaxmemory), diff --git a/src/db.cpp b/src/db.cpp index 6a23848ff..4448d578e 100644 --- a/src/db.cpp +++ b/src/db.cpp @@ -274,7 +274,7 @@ robj *lookupKeyWriteOrReply(client *c, robj *key, robj *reply) { return o; } -bool dbAddCore(redisDb *db, sds key, robj *val, bool fUpdateMvcc, bool fAssumeNew = false) { +bool dbAddCore(redisDb *db, sds key, robj *val, bool fUpdateMvcc, bool fAssumeNew = false, dict_iter *piterExisting = nullptr) { serverAssert(!val->FExpires()); sds copy = sdsdupshared(key); @@ -283,7 +283,7 @@ bool dbAddCore(redisDb *db, sds key, robj *val, bool fUpdateMvcc, bool fAssumeNe setMvccTstamp(val, mvcc); } - bool fInserted = db->insert(copy, val, fAssumeNew); + bool fInserted = db->insert(copy, val, fAssumeNew, piterExisting); if (fInserted) { @@ -353,8 +353,12 @@ void redisDb::dbOverwriteCore(redisDb::iter itr, sds keySds, robj *val, bool fUp * This function does not modify the expire time of the existing key. * * The program is aborted if the key was not already present. */ -void dbOverwrite(redisDb *db, robj *key, robj *val, bool fRemoveExpire) { - auto itr = db->find(key); +void dbOverwrite(redisDb *db, robj *key, robj *val, bool fRemoveExpire, dict_iter *pitrExisting) { + redisDb::iter itr; + if (pitrExisting != nullptr) + itr = *pitrExisting; + else + itr = db->find(key); serverAssertWithInfo(NULL,key,itr != nullptr); lookupKeyUpdateObj(itr.val(), LOOKUP_NONE); @@ -398,8 +402,9 @@ int dbMerge(redisDb *db, sds key, robj *val, int fReplace) * in a context where there is no clear client performing the operation. */ void genericSetKey(client *c, redisDb *db, robj *key, robj *val, int keepttl, int signal) { db->prepOverwriteForSnapshot(szFromObj(key)); - if (!dbAddCore(db, szFromObj(key), val, true /* fUpdateMvcc */)) { - dbOverwrite(db, key, val, !keepttl); + dict_iter iter; + if (!dbAddCore(db, szFromObj(key), val, true /* fUpdateMvcc */, false /*fAssumeNew*/, &iter)) { + dbOverwrite(db, key, val, !keepttl, &iter); } incrRefCount(val); if (signal) signalModifiedKey(c,db,key); @@ -2577,6 +2582,12 @@ void redisDbPersistentData::setStorageProvider(StorageCache *pstorage) m_spstorage = std::unique_ptr(pstorage); } +void redisDbPersistentData::endStorageProvider() +{ + serverAssert(m_spstorage != nullptr); + m_spstorage.reset(); +} + void clusterStorageLoadCallback(const char *rgchkey, size_t cch, void *) { slotToKeyUpdateKeyCore(rgchkey, cch, true /*add*/); @@ -2605,11 +2616,20 @@ void redisDb::storageProviderInitialize() } } -bool redisDbPersistentData::insert(char *key, robj *o, bool fAssumeNew) +void redisDb::storageProviderDelete() { - if (!fAssumeNew) + if (g_pserver->m_pstorageFactory != nullptr) + { + this->endStorageProvider(); + } +} + +bool redisDbPersistentData::insert(char *key, robj *o, bool fAssumeNew, dict_iter *piterExisting) +{ + if (!fAssumeNew && (g_pserver->m_pstorageFactory != nullptr || m_pdbSnapshot != nullptr)) ensure(key); - int res = dictAdd(m_pdict, key, o); + dictEntry *de; + int res = dictAdd(m_pdict, key, o, &de); serverAssert(FImplies(fAssumeNew, res == DICT_OK)); if (res == DICT_OK) { @@ -2621,6 +2641,11 @@ bool redisDbPersistentData::insert(char *key, robj *o, bool fAssumeNew) #endif trackkey(key, false /* fUpdate */); } + else + { + if (piterExisting) + *piterExisting = dict_iter(m_pdict, de); + } return (res == DICT_OK); } @@ -2912,6 +2937,35 @@ bool redisDbPersistentData::processChanges(bool fSnapshot) return (m_spstorage != nullptr); } +void redisDbPersistentData::processChangesAsync(std::atomic &pendingJobs) +{ + ++pendingJobs; + serverAssert(!m_fAllChanged); + dictEmpty(m_dictChanged, nullptr); + dict *dictNew = dictCreate(&dbDictType, nullptr); + std::swap(dictNew, m_pdict); + m_cnewKeysPending = 0; + g_pserver->asyncworkqueue->AddWorkFunction([dictNew, this, &pendingJobs]{ + dictIterator *di = dictGetIterator(dictNew); + dictEntry *de; + std::vector veckeys; + std::vector vecvals; + while ((de = dictNext(di)) != nullptr) + { + robj *o = (robj*)dictGetVal(de); + sds temp = serializeStoredObjectAndExpire(this, (const char*) dictGetKey(de), o); + veckeys.push_back((sds)dictGetKey(de)); + vecvals.push_back(temp); + } + m_spstorage->bulkInsert(veckeys.data(), vecvals.data(), veckeys.size()); + for (auto val : vecvals) + sdsfree(val); + dictReleaseIterator(di); + dictRelease(dictNew); + --pendingJobs; + }); +} + void redisDbPersistentData::commitChanges(const redisDbPersistentDataSnapshot **psnapshotFree) { if (m_pdbSnapshotStorageFlush) @@ -2985,7 +3039,7 @@ size_t redisDbPersistentData::size(bool fCachedOnly) const + (m_pdbSnapshot ? (m_pdbSnapshot->size(fCachedOnly) - dictSize(m_pdictTombstone)) : 0); } -bool redisDbPersistentData::removeCachedValue(const char *key) +bool redisDbPersistentData::removeCachedValue(const char *key, dictEntry **ppde) { serverAssert(m_spstorage != nullptr); // First ensure its not a pending key @@ -3001,7 +3055,11 @@ bool redisDbPersistentData::removeCachedValue(const char *key) } // since we write ASAP the database already has a valid copy so safe to delete - dictDelete(m_pdict, key); + if (ppde != nullptr) { + *ppde = dictUnlink(m_pdict, key); + } else { + dictDelete(m_pdict, key); + } if (m_spstorage != nullptr) m_spstorage->batch_unlock(); @@ -3045,6 +3103,20 @@ void redisDbPersistentData::removeAllCachedValues() } } +void redisDbPersistentData::disableKeyCache() +{ + if (m_spstorage == nullptr) + return; + m_spstorage->emergencyFreeCache(); +} + +bool redisDbPersistentData::keycacheIsEnabled() +{ + if (m_spstorage == nullptr) + return false; + return m_spstorage->keycacheIsEnabled(); +} + void redisDbPersistentData::trackkey(const char *key, bool fUpdate) { if (m_fTrackingChanges && !m_fAllChanged && m_spstorage) { @@ -3152,7 +3224,7 @@ int dbnumFromDb(redisDb *db) serverPanic("invalid database pointer"); } -void redisDbPersistentData::prefetchKeysAsync(client *c, parsed_command &command) +bool redisDbPersistentData::prefetchKeysAsync(client *c, parsed_command &command, bool fExecOK) { if (m_spstorage == nullptr) { #if defined(__x86_64__) || defined(__i386__) @@ -3183,7 +3255,7 @@ void redisDbPersistentData::prefetchKeysAsync(client *c, parsed_command &command } } #endif - return; + return false; } AeLocker lock; @@ -3193,7 +3265,7 @@ void redisDbPersistentData::prefetchKeysAsync(client *c, parsed_command &command getKeysResult result = GETKEYS_RESULT_INIT; auto cmd = lookupCommand(szFromObj(command.argv[0])); if (cmd == nullptr) - return; // Bad command? It's not for us to judge, just bail + return false; // Bad command? It's not for us to judge, just bail int numkeys = getKeysFromCommand(cmd, command.argv, command.argc, &result); for (int ikey = 0; ikey < numkeys; ++ikey) { @@ -3225,41 +3297,61 @@ void redisDbPersistentData::prefetchKeysAsync(client *c, parsed_command &command } } - lock.arm(c); - for (auto &tuple : vecInserts) - { - sds sharedKey = std::get<0>(tuple); - robj *o = std::get<1>(tuple); - std::unique_ptr spexpire = std::move(std::get<2>(tuple)); - - if (o != nullptr) + bool fNoInsert = false; + if (!vecInserts.empty()) { + lock.arm(c); + for (auto &tuple : vecInserts) { - if (this->find_cached_threadsafe(sharedKey) != nullptr) + sds sharedKey = std::get<0>(tuple); + robj *o = std::get<1>(tuple); + std::unique_ptr spexpire = std::move(std::get<2>(tuple)); + + if (o != nullptr) { - // While unlocked this was already ensured - decrRefCount(o); - sdsfree(sharedKey); + if (this->find_cached_threadsafe(sharedKey) != nullptr) + { + // While unlocked this was already ensured + decrRefCount(o); + sdsfree(sharedKey); + fNoInsert = true; + } + else + { + if (spexpire != nullptr) { + if (spexpire->when() < mstime()) { + fNoInsert = true; + break; + } + } + dictAdd(m_pdict, sharedKey, o); + o->SetFExpires(spexpire != nullptr); + + if (spexpire != nullptr) + { + auto itr = m_setexpire->find(sharedKey); + if (itr != m_setexpire->end()) + m_setexpire->erase(itr); + m_setexpire->insert(std::move(*spexpire)); + serverAssert(m_setexpire->find(sharedKey) != m_setexpire->end()); + } + serverAssert(o->FExpires() == (m_setexpire->find(sharedKey) != m_setexpire->end())); + } } else { - dictAdd(m_pdict, sharedKey, o); - o->SetFExpires(spexpire != nullptr); - - if (spexpire != nullptr) - { - auto itr = m_setexpire->find(sharedKey); - if (itr != m_setexpire->end()) - m_setexpire->erase(itr); - m_setexpire->insert(std::move(*spexpire)); - serverAssert(m_setexpire->find(sharedKey) != m_setexpire->end()); - } - serverAssert(o->FExpires() == (m_setexpire->find(sharedKey) != m_setexpire->end())); + if (sharedKey != nullptr) + sdsfree(sharedKey); // BUG but don't bother crashing } } - else - { - if (sharedKey != nullptr) - sdsfree(sharedKey); // BUG but don't bother crashing + lock.disarm(); + } + + if (fExecOK && !fNoInsert && cmd->proc == getCommand && !vecInserts.empty()) { + robj *o = std::get<1>(vecInserts[0]); + if (o != nullptr) { + addReplyBulk(c, o); + return true; } } + return false; } \ No newline at end of file diff --git a/src/dict.cpp b/src/dict.cpp index 252e3185f..e6e707c71 100644 --- a/src/dict.cpp +++ b/src/dict.cpp @@ -408,7 +408,7 @@ dictAsyncRehashCtl *dictRehashAsyncStart(dict *d, int buckets) { d->asyncdata = new dictAsyncRehashCtl(d, d->asyncdata); - int empty_visits = buckets; + int empty_visits = buckets*10; while (d->asyncdata->queue.size() < (size_t)buckets && (size_t)d->rehashidx < d->ht[0].size) { dictEntry *de; @@ -577,9 +577,9 @@ static void _dictRehashStep(dict *d) { } /* Add an element to the target hash table */ -int dictAdd(dict *d, void *key, void *val) +int dictAdd(dict *d, void *key, void *val, dictEntry **existing) { - dictEntry *entry = dictAddRaw(d,key,NULL); + dictEntry *entry = dictAddRaw(d,key,existing); if (!entry) return DICT_ERR; dictSetVal(d, entry, val); @@ -713,6 +713,7 @@ static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) { } } d->ht[table].used--; + _dictExpandIfNeeded(d); return he; } prevHe = he; diff --git a/src/dict.h b/src/dict.h index c4e1931d8..72d50dd2c 100644 --- a/src/dict.h +++ b/src/dict.h @@ -206,7 +206,7 @@ typedef void (dictScanBucketFunction)(void *privdata, dictEntry **bucketref); dict *dictCreate(dictType *type, void *privDataPtr); int dictExpand(dict *d, unsigned long size, bool fShrink = false); int dictTryExpand(dict *d, unsigned long size, bool fShrink); -int dictAdd(dict *d, void *key, void *val); +int dictAdd(dict *d, void *key, void *val, dictEntry **existing = nullptr); dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing); dictEntry *dictAddOrFind(dict *d, void *key); int dictReplace(dict *d, void *key, void *val); diff --git a/src/evict.cpp b/src/evict.cpp index 012a6afc3..20ebc9058 100644 --- a/src/evict.cpp +++ b/src/evict.cpp @@ -34,6 +34,7 @@ #include "bio.h" #include "atomicvar.h" #include +#include #include /* ---------------------------------------------------------------------------- @@ -461,6 +462,69 @@ int getMaxmemoryState(size_t *total, size_t *logical, size_t *tofree, float *lev return C_ERR; } +class FreeMemoryLazyFree : public ICollectable +{ + ssize_t m_cb = 0; + std::vector>> vecdictvecde; + +public: + static std::atomic s_clazyFreesInProgress; + + FreeMemoryLazyFree() { + s_clazyFreesInProgress++; + } + + FreeMemoryLazyFree(const FreeMemoryLazyFree&) = delete; + FreeMemoryLazyFree(FreeMemoryLazyFree&&) = default; + + ~FreeMemoryLazyFree() { + aeAcquireLock(); + for (auto &pair : vecdictvecde) { + for (auto de : pair.second) { + dictFreeUnlinkedEntry(pair.first, de); + } + } + aeReleaseLock(); + --s_clazyFreesInProgress; + } + + ssize_t addEntry(dict *d, dictEntry *de) { + ssize_t cbFreedNow = 0; + ssize_t cb = sizeof(dictEntry); + cb += sdsAllocSize((sds)dictGetKey(de)); + robj *o = (robj*)dictGetVal(de); + switch (o->type) { + case OBJ_STRING: + cb += getStringObjectSdsUsedMemory(o)+sizeof(robj); + break; + + default: + // If we don't know about it we can't accurately track the memory so free now + cbFreedNow = zmalloc_used_memory(); + decrRefCount(o); + cbFreedNow -= zmalloc_used_memory(); + de->v.val = nullptr; + } + + auto itr = std::lower_bound(vecdictvecde.begin(), vecdictvecde.end(), d, + [](const std::pair> &a, dict *d) -> bool { + return a.first < d; + } + ); + if (itr == vecdictvecde.end() || itr->first != d) { + itr = vecdictvecde.insert(itr, std::make_pair(d, std::vector())); + } + serverAssert(itr->first == d); + itr->second.push_back(de); + m_cb += cb; + return cb + cbFreedNow; + } + + size_t memory_queued() { return m_cb; } +}; + +std::atomic FreeMemoryLazyFree::s_clazyFreesInProgress {0}; + /* Return 1 if used memory is more than maxmemory after allocating more memory, * return 0 if not. Redis may reject user's requests or evict some keys if used * memory exceeds maxmemory, especially, when we allocate huge memory at once. */ @@ -509,6 +573,9 @@ static int isSafeToPerformEvictions(void) { * and just be masters exact copies. */ if (g_pserver->m_pstorageFactory == nullptr && listLength(g_pserver->masters) && g_pserver->repl_slave_ignore_maxmemory && !g_pserver->fActiveReplica) return 0; + /* If we have a lazy free obj pending, our amounts will be off, wait for it to go away */ + if (FreeMemoryLazyFree::s_clazyFreesInProgress > 0) return 0; + /* When clients are paused the dataset should be static not just from the * POV of clients not being able to write, but also from the POV of * expires and evictions of keys not being performed. */ @@ -573,6 +640,8 @@ int performEvictions(bool fPreSnapshot) { int result = EVICT_FAIL; int ckeysFailed = 0; + std::unique_ptr splazy = std::make_unique(); + if (getMaxmemoryState(&mem_reported,NULL,&mem_tofree,NULL,false,fPreSnapshot) == C_OK) return EVICT_OK; @@ -662,7 +731,7 @@ int performEvictions(bool fPreSnapshot) { /* volatile-random and allkeys-random policy */ if (g_pserver->maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM || g_pserver->maxmemory_policy == MAXMEMORY_VOLATILE_RANDOM - || fEvictToStorage) + || fFallback) { /* When evicting a random key, we try to evict a key for * each DB, so we use the static 'next_db' variable to @@ -698,9 +767,9 @@ int performEvictions(bool fPreSnapshot) { if (fEvictToStorage) { // This key is in the storage so we only need to free the object - delta = (long long) zmalloc_used_memory(); - if (db->removeCachedValue(bestkey)) { - delta -= (long long) zmalloc_used_memory(); + dictEntry *deT; + if (db->removeCachedValue(bestkey, &deT)) { + mem_freed += splazy->addEntry(db->dictUnsafeKeyOnly(), deT); ckeysFailed = 0; } else { @@ -709,7 +778,6 @@ int performEvictions(bool fPreSnapshot) { if (ckeysFailed > 1024) goto cant_free; } - mem_freed += delta; } else { @@ -764,7 +832,7 @@ int performEvictions(bool fPreSnapshot) { /* After some time, exit the loop early - even if memory limit * hasn't been reached. If we suddenly need to free a lot of * memory, don't want to spend too much time here. */ - if (elapsedUs(evictionTimer) > eviction_time_limit_us) { + if (g_pserver->m_pstorageFactory == nullptr && elapsedUs(evictionTimer) > eviction_time_limit_us) { // We still need to free memory - start eviction timer proc if (!isEvictionProcRunning && serverTL->el != nullptr) { isEvictionProcRunning = 1; @@ -781,6 +849,10 @@ int performEvictions(bool fPreSnapshot) { /* at this point, the memory is OK, or we have reached the time limit */ result = (isEvictionProcRunning) ? EVICT_RUNNING : EVICT_OK; + if (splazy != nullptr && splazy->memory_queued() > 0 && !serverTL->gcEpoch.isReset()) { + g_pserver->garbageCollector.enqueue(serverTL->gcEpoch, std::move(splazy)); + } + cant_free: if (g_pserver->m_pstorageFactory) { @@ -796,10 +868,15 @@ cant_free: redisDb *db = g_pserver->db[idb]; if (db->FStorageProvider()) { - serverLog(LL_WARNING, "Failed to evict keys, falling back to flushing entire cache. Consider increasing maxmemory-samples."); - db->removeAllCachedValues(); - if (((mem_reported - zmalloc_used_memory()) + mem_freed) >= mem_tofree) - result = EVICT_OK; + if (db->size() != 0 && db->size(true /*fcachedOnly*/) == 0 && db->keycacheIsEnabled()) { + serverLog(LL_WARNING, "Key cache exceeds maxmemory, freeing - performance may be affected increase maxmemory if possible"); + db->disableKeyCache(); + } else if (db->size(true /*fCachedOnly*/)) { + serverLog(LL_WARNING, "Failed to evict keys, falling back to flushing entire cache. Consider increasing maxmemory-samples."); + db->removeAllCachedValues(); + if (((mem_reported - zmalloc_used_memory()) + mem_freed) >= mem_tofree) + result = EVICT_OK; + } } } } diff --git a/src/networking.cpp b/src/networking.cpp index 6b40d24f0..add345ac9 100644 --- a/src/networking.cpp +++ b/src/networking.cpp @@ -158,7 +158,6 @@ client *createClient(connection *conn, int iel) { c->flags = 0; c->fPendingAsyncWrite = FALSE; c->fPendingAsyncWriteHandler = FALSE; - c->fPendingReplicaWrite = FALSE; c->ctime = c->lastinteraction = g_pserver->unixtime; /* If the default user does not require authentication, the user is * directly authenticated. */ @@ -318,7 +317,7 @@ int prepareClientToWrite(client *c) { /* Schedule the client to write the output buffers to the socket, unless * it should already be setup to do so (it has already pending data). */ - if (!fAsync && !clientHasPendingReplies(c) && !c->fPendingReplicaWrite) clientInstallWriteHandler(c); + if (!fAsync && (c->flags & CLIENT_SLAVE || !clientHasPendingReplies(c))) clientInstallWriteHandler(c); if (fAsync && !(c->fPendingAsyncWrite)) clientInstallAsyncWriteHandler(c); /* Authorize the caller to queue in the output buffer of this client. */ @@ -1132,7 +1131,7 @@ void copyClientOutputBuffer(client *dst, client *src) { /* Return true if the specified client has pending reply buffers to write to * the socket. */ int clientHasPendingReplies(client *c) { - return (c->bufpos || listLength(c->reply)); + return (c->bufpos || listLength(c->reply) || c->FPendingReplicaWrite()); } static std::atomic rgacceptsInFlight[MAX_EVENT_LOOPS]; @@ -1145,6 +1144,8 @@ int chooseBestThreadForAccept() int cclientsThread; atomicGet(g_pserver->rgthreadvar[iel].cclients, cclientsThread); cclientsThread += rgacceptsInFlight[iel].load(std::memory_order_relaxed); + // Note: Its repl factor less one because cclients also includes replicas, so we don't want to double count + cclientsThread += (g_pserver->rgthreadvar[iel].cclientsReplica) * (g_pserver->replicaIsolationFactor-1); if (cclientsThread < cserver.thread_min_client_threshold) return iel; if (cclientsThread < cclientsMin) @@ -1313,7 +1314,9 @@ void acceptOnThread(connection *conn, int flags, char *cip) } else { - ielTarget = chooseBestThreadForAccept(); + // Cluster connections are more transient, so its not worth the cost to balance + // we can trust that SO_REUSEPORT is doing its job of distributing connections + ielTarget = g_pserver->cluster_enabled ? ielCur : chooseBestThreadForAccept(); } rgacceptsInFlight[ielTarget].fetch_add(1, std::memory_order_relaxed); @@ -1667,6 +1670,7 @@ bool freeClient(client *c) { ln = listSearchKey(l,c); serverAssert(ln != NULL); listDelNode(l,ln); + g_pserver->rgthreadvar[c->iel].cclientsReplica--; /* We need to remember the time when we started to have zero * attached slaves, as after some time we'll free the replication * backlog. */ @@ -1785,104 +1789,105 @@ int writeToClient(client *c, int handler_installed) { std::unique_locklock)> lock(c->lock); - while(clientHasPendingReplies(c)) { - serverAssert(!(c->flags & CLIENT_SLAVE) || c->flags & CLIENT_MONITOR); - if (c->bufpos > 0) { - nwritten = connWrite(c->conn,c->buf+c->sentlen,c->bufpos-c->sentlen); - if (nwritten <= 0) break; - c->sentlen += nwritten; - totwritten += nwritten; - - /* If the buffer was sent, set bufpos to zero to continue with - * the remainder of the reply. */ - if ((int)c->sentlen == c->bufpos) { - c->bufpos = 0; - c->sentlen = 0; - } - } else { - o = (clientReplyBlock*)listNodeValue(listFirst(c->reply)); - if (o->used == 0) { - c->reply_bytes -= o->size; - listDelNode(c->reply,listFirst(c->reply)); - continue; - } - - nwritten = connWrite(c->conn, o->buf() + c->sentlen, o->used - c->sentlen); - if (nwritten <= 0) break; - c->sentlen += nwritten; - totwritten += nwritten; - - /* If we fully sent the object on head go to the next one */ - if (c->sentlen == o->used) { - c->reply_bytes -= o->size; - listDelNode(c->reply,listFirst(c->reply)); - c->sentlen = 0; - /* If there are no longer objects in the list, we expect - * the count of reply bytes to be exactly zero. */ - if (listLength(c->reply) == 0) - serverAssert(c->reply_bytes == 0); - } - } - /* Note that we avoid to send more than NET_MAX_WRITES_PER_EVENT - * bytes, in a single threaded server it's a good idea to serve - * other clients as well, even if a very large request comes from - * super fast link that is always able to accept data (in real world - * scenario think about 'KEYS *' against the loopback interface). - * - * However if we are over the maxmemory limit we ignore that and - * just deliver as much data as it is possible to deliver. - * - * Moreover, we also send as much as possible if the client is - * a replica or a monitor (otherwise, on high-speed traffic, the - * replication/output buffer will grow indefinitely) */ - if (totwritten > NET_MAX_WRITES_PER_EVENT && - (g_pserver->maxmemory == 0 || - zmalloc_used_memory() < g_pserver->maxmemory) && - !(c->flags & CLIENT_SLAVE)) break; - } - /* We can only directly read from the replication backlog if the client is a replica, so only attempt to do so if that's the case. */ if (c->flags & CLIENT_SLAVE && !(c->flags & CLIENT_MONITOR)) { - std::unique_lock repl_backlog_lock (g_pserver->repl_backlog_lock); - long long repl_end_idx = getReplIndexFromOffset(c->repl_end_off); - serverAssert(c->repl_curr_off != -1); - if (c->repl_curr_off != c->repl_end_off){ - long long repl_curr_idx = getReplIndexFromOffset(c->repl_curr_off); - long long nwritten2ndStage = 0; /* How much was written from the start of the replication backlog - * in the event of a wrap around write */ - /* normal case with no wrap around */ - if (repl_end_idx >= repl_curr_idx){ - nwritten = connWrite(c->conn, g_pserver->repl_backlog + repl_curr_idx, repl_end_idx - repl_curr_idx); - /* wrap around case */ + while (clientHasPendingReplies(c)) { + long long repl_end_idx = getReplIndexFromOffset(c->repl_end_off); + serverAssert(c->repl_curr_off != -1); + + if (c->repl_curr_off != c->repl_end_off){ + long long repl_curr_idx = getReplIndexFromOffset(c->repl_curr_off); + long long nwritten2ndStage = 0; /* How much was written from the start of the replication backlog + * in the event of a wrap around write */ + /* normal case with no wrap around */ + if (repl_end_idx >= repl_curr_idx){ + nwritten = connWrite(c->conn, g_pserver->repl_backlog + repl_curr_idx, repl_end_idx - repl_curr_idx); + /* wrap around case */ + } else { + nwritten = connWrite(c->conn, g_pserver->repl_backlog + repl_curr_idx, g_pserver->repl_backlog_size - repl_curr_idx); + /* only attempt wrapping if we write the correct number of bytes */ + if (nwritten == g_pserver->repl_backlog_size - repl_curr_idx){ + nwritten2ndStage = connWrite(c->conn, g_pserver->repl_backlog, repl_end_idx); + if (nwritten2ndStage != -1) + nwritten += nwritten2ndStage; + } + } + + /* only increment bytes if an error didn't occur */ + if (nwritten > 0){ + totwritten += nwritten; + c->repl_curr_off += nwritten; + serverAssert(c->repl_curr_off <= c->repl_end_off); + } + + /* If the second part of a write didn't go through, we still need to register that */ + if (nwritten2ndStage == -1) nwritten = -1; + if (nwritten == -1) + break; } else { - nwritten = connWrite(c->conn, g_pserver->repl_backlog + repl_curr_idx, g_pserver->repl_backlog_size - repl_curr_idx); - /* only attempt wrapping if we write the correct number of bytes */ - if (nwritten == g_pserver->repl_backlog_size - repl_curr_idx){ - nwritten2ndStage = connWrite(c->conn, g_pserver->repl_backlog, repl_end_idx); - if (nwritten2ndStage != -1) - nwritten += nwritten2ndStage; - } + break; } - - /* only increment bytes if an error didn't occur */ - if (nwritten > 0){ + } + } else { + while(clientHasPendingReplies(c)) { + serverAssert(!(c->flags & CLIENT_SLAVE) || c->flags & CLIENT_MONITOR); + if (c->bufpos > 0) { + nwritten = connWrite(c->conn,c->buf+c->sentlen,c->bufpos-c->sentlen); + if (nwritten <= 0) break; + c->sentlen += nwritten; totwritten += nwritten; - c->repl_curr_off += nwritten; - serverAssert(c->repl_curr_off <= c->repl_end_off); - /* If the client's current offset matches the last offset it can read from, there is no pending write */ - if (c->repl_curr_off == c->repl_end_off){ - c->fPendingReplicaWrite = false; + + /* If the buffer was sent, set bufpos to zero to continue with + * the remainder of the reply. */ + if ((int)c->sentlen == c->bufpos) { + c->bufpos = 0; + c->sentlen = 0; + } + } else { + o = (clientReplyBlock*)listNodeValue(listFirst(c->reply)); + if (o->used == 0) { + c->reply_bytes -= o->size; + listDelNode(c->reply,listFirst(c->reply)); + continue; + } + + nwritten = connWrite(c->conn, o->buf() + c->sentlen, o->used - c->sentlen); + if (nwritten <= 0) break; + c->sentlen += nwritten; + totwritten += nwritten; + + /* If we fully sent the object on head go to the next one */ + if (c->sentlen == o->used) { + c->reply_bytes -= o->size; + listDelNode(c->reply,listFirst(c->reply)); + c->sentlen = 0; + /* If there are no longer objects in the list, we expect + * the count of reply bytes to be exactly zero. */ + if (listLength(c->reply) == 0) + serverAssert(c->reply_bytes == 0); } } - - /* If the second part of a write didn't go through, we still need to register that */ - if (nwritten2ndStage == -1) nwritten = -1; + /* Note that we avoid to send more than NET_MAX_WRITES_PER_EVENT + * bytes, in a single threaded server it's a good idea to serve + * other clients as well, even if a very large request comes from + * super fast link that is always able to accept data (in real world + * scenario think about 'KEYS *' against the loopback interface). + * + * However if we are over the maxmemory limit we ignore that and + * just deliver as much data as it is possible to deliver. + * + * Moreover, we also send as much as possible if the client is + * a replica or a monitor (otherwise, on high-speed traffic, the + * replication/output buffer will grow indefinitely) */ + if (totwritten > NET_MAX_WRITES_PER_EVENT && + (g_pserver->maxmemory == 0 || + zmalloc_used_memory() < g_pserver->maxmemory) && + !(c->flags & CLIENT_SLAVE)) break; } } - g_pserver->stat_net_output_bytes += totwritten; if (nwritten == -1) { if (connGetState(c->conn) != CONN_STATE_CONNECTED) { @@ -1900,7 +1905,7 @@ int writeToClient(client *c, int handler_installed) { * We just rely on data / pings received for timeout detection. */ if (!(c->flags & CLIENT_MASTER)) c->lastinteraction = g_pserver->unixtime; } - if (!clientHasPendingReplies(c) && !c->fPendingReplicaWrite) { + if (!clientHasPendingReplies(c)) { c->sentlen = 0; if (handler_installed) connSetWriteHandler(c->conn, NULL); @@ -2024,7 +2029,6 @@ void ProcessPendingAsyncWrites() * need to use a syscall in order to install the writable event handler, * get it called, and so forth. */ int handleClientsWithPendingWrites(int iel, int aof_state) { - std::unique_lock lockf(g_pserver->rgthreadvar[iel].lockPendingWrite); int processed = 0; serverAssert(iel == (serverTL - g_pserver->rgthreadvar)); @@ -2047,7 +2051,9 @@ int handleClientsWithPendingWrites(int iel, int aof_state) { ae_flags |= AE_BARRIER; } + std::unique_lock lockf(g_pserver->rgthreadvar[iel].lockPendingWrite); auto vec = std::move(g_pserver->rgthreadvar[iel].clients_pending_write); + lockf.unlock(); processed += (int)vec.size(); for (client *c : vec) { @@ -2064,7 +2070,7 @@ int handleClientsWithPendingWrites(int iel, int aof_state) { /* Don't write to clients that are going to be closed anyway. */ if (c->flags & CLIENT_CLOSE_ASAP) continue; - /* Try to write buffers to the client socket. */ + /* Try to write buffers to the client socket, unless its a replica in multithread mode */ if (writeToClient(c,0) == C_ERR) { if (c->flags & CLIENT_CLOSE_ASAP) @@ -2080,7 +2086,7 @@ int handleClientsWithPendingWrites(int iel, int aof_state) { /* If after the synchronous writes above we still have data to * output to the client, we need to install the writable handler. */ - if (clientHasPendingReplies(c) || c->fPendingReplicaWrite) { + if (clientHasPendingReplies(c)) { if (connSetWriteHandlerWithBarrier(c->conn, sendReplyToClient, ae_flags, true) == C_ERR) { freeClientAsync(c); } @@ -2551,11 +2557,13 @@ void parseClientCommandBuffer(client *c) { } /* Prefetch outside the lock for better perf */ - if (g_pserver->prefetch_enabled && cserver.cthreads > 1 && cqueriesStart < c->vecqueuedcmd.size() && + if (g_pserver->prefetch_enabled && (cserver.cthreads > 1 || g_pserver->m_pstorageFactory) && cqueriesStart < c->vecqueuedcmd.size() && (g_pserver->m_pstorageFactory || aeLockContested(cserver.cthreads/2) || cserver.cthreads == 1) && !GlobalLocksAcquired()) { auto &query = c->vecqueuedcmd.back(); if (query.argc > 0 && query.argc == query.argcMax) { - c->db->prefetchKeysAsync(c, query); + if (c->db->prefetchKeysAsync(c, query, c->vecqueuedcmd.size() == 1)) { + c->vecqueuedcmd.erase(c->vecqueuedcmd.begin()); + } } } c->reqtype = 0; @@ -2710,7 +2718,7 @@ void readQueryFromClient(connection *conn) { return; } - if (cserver.cthreads > 1) { + if (cserver.cthreads > 1 || g_pserver->m_pstorageFactory) { parseClientCommandBuffer(c); if (g_pserver->enable_async_commands && !serverTL->disable_async_commands && listLength(g_pserver->monitors) == 0 && (aeLockContention() || serverTL->rgdbSnapshot[c->db->id] || g_fTestMode)) { // Frequent writers aren't good candidates for this optimization, they cause us to renew the snapshot too often @@ -3738,7 +3746,7 @@ void rewriteClientCommandArgument(client *c, int i, robj *newval) { /* In the case of a replica client, writes to said replica are using data from the replication backlog * as opposed to it's own internal buffer, this number should keep track of that */ unsigned long getClientReplicationBacklogSharedUsage(client *c) { - return (!(c->flags & CLIENT_SLAVE) || !c->fPendingReplicaWrite ) ? 0 : g_pserver->master_repl_offset - c->repl_curr_off; + return (!(c->flags & CLIENT_SLAVE) || !c->FPendingReplicaWrite() ) ? 0 : g_pserver->master_repl_offset - c->repl_curr_off; } /* This function returns the number of bytes that Redis is diff --git a/src/new.cpp b/src/new.cpp index 4775e207a..4e6b07dfd 100644 --- a/src/new.cpp +++ b/src/new.cpp @@ -27,14 +27,17 @@ void *operator new(std::size_t size, const std::nothrow_t &) noexcept return zmalloc(size, MALLOC_LOCAL); } +//need to do null checks for delete since the compiler can optimize out null checks in zfree void operator delete(void * p) noexcept { - zfree(p); + if (p != nullptr) + zfree(p); } void operator delete(void *p, std::size_t) noexcept { - zfree(p); + if (p != nullptr) + zfree(p); } #endif diff --git a/src/rdb.cpp b/src/rdb.cpp index 957c3fa88..bbd320b6b 100644 --- a/src/rdb.cpp +++ b/src/rdb.cpp @@ -2563,6 +2563,355 @@ void stopSaving(int success) { NULL); } + +class JobBase +{ +public: + enum class JobType { + Function, + Insert + }; + + JobType type; + + JobBase(JobType type) + : type(type) + {} + + virtual ~JobBase() = default; +}; + +struct rdbInsertJob : public JobBase +{ + redisDb *db = nullptr; + sds key = nullptr; + robj *val = nullptr; + long long lru_clock; + long long expiretime; + long long lru_idle; + long long lfu_freq; + std::vector> vecsubexpires; + + void addSubexpireKey(robj *subkey, long long when) { + vecsubexpires.push_back(std::make_pair(robj_sharedptr(subkey), when)); + decrRefCount(subkey); + } + + rdbInsertJob() + : JobBase(JobBase::JobType::Insert) + {} + + rdbInsertJob(rdbInsertJob &&src) + : JobBase(JobBase::JobType::Insert) + { + db = src.db; + src.db = nullptr; + key = src.key; + src.key = nullptr; + val = src.val; + src.val = nullptr; + lru_clock = src.lru_clock; + expiretime = src.expiretime; + lru_idle = src.lru_idle; + lfu_freq = src.lfu_freq; + vecsubexpires = std::move(src.vecsubexpires); + } + + ~rdbInsertJob() { + if (key) + sdsfree(key); + if (val) + decrRefCount(val); + } +}; + +struct rdbFunctionJob : public JobBase +{ +public: + std::function m_fn; + + rdbFunctionJob(std::function &&fn) + : JobBase(JobBase::JobType::Function), m_fn(fn) + {} +}; + +class rdbAsyncWorkThread +{ + rdbSaveInfo *rsi; + int rdbflags; + moodycamel::BlockingConcurrentQueue queueJobs; + fastlock m_lockPause { "rdbAsyncWork-Pause"}; + bool fLaunched = false; + std::atomic fExit {false}; + std::atomic ckeysLoaded; + std::atomic cstorageWritesInFlight; + std::atomic workerThreadDone; + std::thread m_thread; + std::vector vecbatch; + long long now; + long long lastPing = -1; + + static void listFreeMethod(const void *v) { + delete reinterpret_cast(v); + } + +public: + + rdbAsyncWorkThread(rdbSaveInfo *rsi, int rdbflags, long long now) + : rsi(rsi), rdbflags(rdbflags), now(now) + { + ckeysLoaded = 0; + cstorageWritesInFlight = 0; + } + + ~rdbAsyncWorkThread() { + fExit = true; + while (m_lockPause.fOwnLock()) + m_lockPause.unlock(); + if (m_thread.joinable()) + endWork(); + } + + void start() { + serverAssert(!fLaunched); + m_thread = std::thread(&rdbAsyncWorkThread::loadWorkerThreadMain, this); + fLaunched = true; + } + + void throttle() { + if (g_pserver->m_pstorageFactory && (getMaxmemoryState(NULL,NULL,NULL,NULL) != C_OK)) { + while ((cstorageWritesInFlight.load(std::memory_order_relaxed) || queueJobs.size_approx()) && (getMaxmemoryState(NULL,NULL,NULL,NULL) != C_OK)) { + usleep(1); + pauseExecution(); + ProcessWhileBlocked(); + resumeExecution(); + } + + if ((getMaxmemoryState(NULL,NULL,NULL,NULL) != C_OK)) { + for (int idb = 0; idb < cserver.dbnum; ++idb) { + redisDb *db = g_pserver->db[idb]; + if (db->size() > 0 && db->keycacheIsEnabled()) { + serverLog(LL_WARNING, "Key cache %d exceeds maxmemory during load, freeing - performance may be affected increase maxmemory if possible", idb); + db->disableKeyCache(); + } + } + } + } + } + + void enqueue(std::unique_ptr &spjob) { + vecbatch.push_back(spjob.release()); + if (vecbatch.size() >= 64) { + queueJobs.enqueue_bulk(vecbatch.data(), vecbatch.size()); + vecbatch.clear(); + throttle(); + } + } + + void pauseExecution() { + m_lockPause.lock(); + } + + void resumeExecution() { + m_lockPause.unlock(); + } + + void enqueue(std::function &&fn) { + std::unique_ptr spjob = std::make_unique(std::move(fn)); + queueJobs.enqueue(spjob.release()); + throttle(); + } + + void ProcessWhileBlocked() { + if ((mstime() - lastPing) > 1000) { // Ping if its been a second or longer + listIter li; + listNode *ln; + listRewind(g_pserver->masters, &li); + while ((ln = listNext(&li))) + { + struct redisMaster *mi = (struct redisMaster*)listNodeValue(ln); + if (mi->masterhost && mi->repl_state == REPL_STATE_TRANSFER) + replicationSendNewlineToMaster(mi); + } + lastPing = mstime(); + } + + processEventsWhileBlocked(serverTL - g_pserver->rgthreadvar); + } + + size_t ckeys() { return ckeysLoaded; } + + size_t endWork() { + if (!vecbatch.empty()) { + queueJobs.enqueue_bulk(vecbatch.data(), vecbatch.size()); + vecbatch.clear(); + } + std::atomic_thread_fence(std::memory_order_seq_cst); // The queue must have transferred to the consumer before we call fExit + serverAssert(fLaunched); + fExit = true; + if (g_pserver->m_pstorageFactory) { + // If we have a storage provider it can take some time to complete and we want to process events in the meantime + while (!workerThreadDone) { + usleep(10); + pauseExecution(); + ProcessWhileBlocked(); + resumeExecution(); + } + } + m_thread.join(); + while (cstorageWritesInFlight.load(std::memory_order_seq_cst)) { + usleep(10); + ProcessWhileBlocked(); + } + fLaunched = false; + fExit = false; + serverAssert(queueJobs.size_approx() == 0); + return ckeysLoaded; + } + + void processJob(rdbInsertJob &job) { + redisObjectStack keyobj; + initStaticStringObject(keyobj,job.key); + + bool f1024thKey = false; + bool fStaleMvccKey = (this->rsi) ? mvccFromObj(job.val) < this->rsi->mvccMinThreshold : false; + + /* Check if the key already expired. This function is used when loading + * an RDB file from disk, either at startup, or when an RDB was + * received from the master. In the latter case, the master is + * responsible for key expiry. If we would expire keys here, the + * snapshot taken by the master may not be reflected on the replica. */ + bool fExpiredKey = iAmMaster() && !(this->rdbflags&RDBFLAGS_AOF_PREAMBLE) && job.expiretime != -1 && job.expiretime < this->now; + if (fStaleMvccKey || fExpiredKey) { + if (fStaleMvccKey && !fExpiredKey && this->rsi != nullptr && this->rsi->mi != nullptr && this->rsi->mi->staleKeyMap != nullptr && lookupKeyRead(job.db, &keyobj) == nullptr) { + // We have a key that we've already deleted and is not back in our database. + // We'll need to inform the sending master of the delete if it is also a replica of us + robj_sharedptr objKeyDup(createStringObject(job.key, sdslen(job.key))); + this->rsi->mi->staleKeyMap->operator[](job.db->id).push_back(objKeyDup); + } + sdsfree(job.key); + job.key = nullptr; + decrRefCount(job.val); + job.val = nullptr; + } else { + /* Add the new object in the hash table */ + int fInserted = dbMerge(job.db, job.key, job.val, (this->rsi && this->rsi->fForceSetKey) || (this->rdbflags & RDBFLAGS_ALLOW_DUP)); // Note: dbMerge will incrRef + + if (fInserted) + { + auto ckeys = this->ckeysLoaded.fetch_add(1, std::memory_order_relaxed); + f1024thKey = (ckeys % 1024) == 0; + + /* Set the expire time if needed */ + if (job.expiretime != -1) + { + setExpire(NULL,job.db,&keyobj,nullptr,job.expiretime); + } + + /* Set usage information (for eviction). */ + objectSetLRUOrLFU(job.val,job.lfu_freq,job.lru_idle,job.lru_clock,1000); + + /* call key space notification on key loaded for modules only */ + moduleNotifyKeyspaceEvent(NOTIFY_LOADED, "loaded", &keyobj, job.db->id); + + replicationNotifyLoadedKey(job.db, &keyobj, job.val, job.expiretime); + + for (auto &pair : job.vecsubexpires) + { + setExpire(NULL, job.db, &keyobj, pair.first, pair.second); + replicateSubkeyExpire(job.db, &keyobj, pair.first.get(), pair.second); + } + + job.val = nullptr; // don't free this as we moved ownership to the DB + } + } + + /* If we have a storage provider check if we need to evict some keys to stay under our memory limit, + do this every 16 keys to limit the perf impact */ + if (g_pserver->m_pstorageFactory && f1024thKey) + { + bool fHighMemory = (getMaxmemoryState(NULL,NULL,NULL,NULL) != C_OK); + if (fHighMemory || f1024thKey) + { + for (int idb = 0; idb < cserver.dbnum; ++idb) + { + if (g_pserver->m_pstorageFactory) { + g_pserver->db[idb]->processChangesAsync(this->cstorageWritesInFlight); + fHighMemory = false; + } + } + if (fHighMemory) + performEvictions(false /* fPreSnapshot*/); + } + g_pserver->garbageCollector.endEpoch(serverTL->gcEpoch); + serverTL->gcEpoch = g_pserver->garbageCollector.startEpoch(); + } + } + + static void loadWorkerThreadMain(rdbAsyncWorkThread *pqueue) { + rdbAsyncWorkThread &queue = *pqueue; + redisServerThreadVars vars = {}; + vars.clients_pending_asyncwrite = listCreate(); + serverTL = &vars; + aeSetThreadOwnsLockOverride(true); + + // We will inheret the server thread's affinity mask, clear it as we want to run on a different core. + cpu_set_t *cpuset = CPU_ALLOC(std::thread::hardware_concurrency()); + if (cpuset != nullptr) { + size_t size = CPU_ALLOC_SIZE(std::thread::hardware_concurrency()); + CPU_ZERO_S(size, cpuset); + for (unsigned i = 0; i < std::thread::hardware_concurrency(); ++i) { + CPU_SET_S(i, size, cpuset); + } + pthread_setaffinity_np(pthread_self(), size, cpuset); + CPU_FREE(cpuset); + } + + for (;;) { + if (queue.queueJobs.size_approx() == 0) { + if (queue.fExit.load(std::memory_order_relaxed)) + break; + } + + if (queue.fExit.load(std::memory_order_seq_cst) && queue.queueJobs.size_approx() == 0) + break; + + vars.gcEpoch = g_pserver->garbageCollector.startEpoch(); + JobBase *rgjob[64]; + int cjobs = 0; + while ((cjobs = pqueue->queueJobs.wait_dequeue_bulk_timed(rgjob, 64, std::chrono::milliseconds(5))) > 0) { + std::unique_lock ulPause(pqueue->m_lockPause); + + for (int ijob = 0; ijob < cjobs; ++ijob) { + JobBase *pjob = rgjob[ijob]; + switch (pjob->type) + { + case JobBase::JobType::Insert: + pqueue->processJob(*static_cast(pjob)); + break; + + case JobBase::JobType::Function: + static_cast(pjob)->m_fn(); + break; + } + delete pjob; + } + } + g_pserver->garbageCollector.endEpoch(vars.gcEpoch); + } + + if (g_pserver->m_pstorageFactory) { + for (int idb = 0; idb < cserver.dbnum; ++idb) + g_pserver->db[idb]->processChangesAsync(queue.cstorageWritesInFlight); + } + + queue.workerThreadDone = true; + ProcessPendingAsyncWrites(); + listRelease(vars.clients_pending_asyncwrite); + aeSetThreadOwnsLockOverride(false); + } +}; + /* Track loading progress in order to serve client's from time to time and if needed calculate rdb checksum */ void rdbLoadProgressCallback(rio *r, const void *buf, size_t len) { @@ -2574,6 +2923,8 @@ void rdbLoadProgressCallback(rio *r, const void *buf, size_t len) { (g_pserver->loading_process_events_interval_keys && (r->keys_since_last_callback >= g_pserver->loading_process_events_interval_keys))) { + rdbAsyncWorkThread *pwthread = reinterpret_cast(r->chksum_arg); + listIter li; listNode *ln; listRewind(g_pserver->masters, &li); @@ -2584,7 +2935,13 @@ void rdbLoadProgressCallback(rio *r, const void *buf, size_t len) { replicationSendNewlineToMaster(mi); } loadingProgress(r->processed_bytes); + + if (pwthread) + pwthread->pauseExecution(); processEventsWhileBlocked(serverTL - g_pserver->rgthreadvar); + if (pwthread) + pwthread->resumeExecution(); + processModuleLoadingProgressEvent(0); robj *ping_argv[1]; @@ -2597,28 +2954,38 @@ void rdbLoadProgressCallback(rio *r, const void *buf, size_t len) { } } + /* Load an RDB file from the rio stream 'rdb'. On success C_OK is returned, * otherwise C_ERR is returned and 'errno' is set accordingly. */ int rdbLoadRio(rio *rdb, int rdbflags, rdbSaveInfo *rsi) { uint64_t dbid = 0; int type, rdbver; - redisDb *db = g_pserver->db[dbid]; + redisDb *dbCur = g_pserver->db[dbid]; char buf[1024]; /* Key-specific attributes, set by opcodes before the key type. */ long long lru_idle = -1, lfu_freq = -1, expiretime = -1, now; long long lru_clock = 0; + unsigned long long ckeysLoaded = 0; uint64_t mvcc_tstamp = OBJ_MVCC_INVALID; - size_t ckeysLoaded = 0; + now = mstime(); + rdbAsyncWorkThread wqueue(rsi, rdbflags, now); robj *subexpireKey = nullptr; sds key = nullptr; bool fLastKeyExpired = false; + std::unique_ptr spjob; - for (int idb = 0; idb < cserver.dbnum; ++idb) - { - g_pserver->db[idb]->trackChanges(true, 1024); + // If we're tracking changes we need to reset this + bool fTracking = g_pserver->db[0]->FTrackingChanges(); + if (fTracking) { + // We don't want to track here because processChangesAsync is outside the normal scope handling + for (int idb = 0; idb < cserver.dbnum; ++idb) { + if (g_pserver->db[idb]->processChanges(false)) + g_pserver->db[idb]->commitChanges(); + } } rdb->update_cksum = rdbLoadProgressCallback; + rdb->chksum_arg = &wqueue; rdb->max_processing_chunk = g_pserver->loading_process_events_interval_bytes; if (rioRead(rdb,buf,9) == 0) goto eoferr; buf[9] = '\0'; @@ -2634,8 +3001,8 @@ int rdbLoadRio(rio *rdb, int rdbflags, rdbSaveInfo *rsi) { return C_ERR; } - now = mstime(); lru_clock = LRU_CLOCK(); + wqueue.start(); while(1) { robj *val; @@ -2683,7 +3050,7 @@ int rdbLoadRio(rio *rdb, int rdbflags, rdbSaveInfo *rsi) { "databases. Exiting\n", cserver.dbnum); exit(1); } - db = g_pserver->db[dbid]; + dbCur = g_pserver->db[dbid]; continue; /* Read next opcode. */ } else if (type == RDB_OPCODE_RESIZEDB) { /* RESIZEDB: Hint about the size of the keys in the currently @@ -2693,7 +3060,11 @@ int rdbLoadRio(rio *rdb, int rdbflags, rdbSaveInfo *rsi) { goto eoferr; if ((expires_size = rdbLoadLen(rdb,NULL)) == RDB_LENERR) goto eoferr; - db->expand(db_size); + if (g_pserver->allowRdbResizeOp && !g_pserver->m_pstorageFactory) { + wqueue.enqueue([dbCur, db_size]{ + dbCur->expand(db_size); + }); + } continue; /* Read next opcode. */ } else if (type == RDB_OPCODE_AUX) { /* AUX: generic string-string fields. Use to add state to RDB @@ -2768,12 +3139,10 @@ int rdbLoadRio(rio *rdb, int rdbflags, rdbSaveInfo *rsi) { } } else { - redisObjectStack keyobj; - initStaticStringObject(keyobj,key); long long expireT = strtoll(szFromObj(auxval), nullptr, 10); - setExpire(NULL, db, &keyobj, subexpireKey, expireT); - replicateSubkeyExpire(db, &keyobj, subexpireKey, expireT); - decrRefCount(subexpireKey); + serverAssert(spjob != nullptr); + serverAssert(sdscmp(key, spjob->key) == 0); + spjob->addSubexpireKey(subexpireKey, expireT); subexpireKey = nullptr; } } else { @@ -2846,7 +3215,7 @@ int rdbLoadRio(rio *rdb, int rdbflags, rdbSaveInfo *rsi) { key = nullptr; } - if ((key = (sds)rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL)) == NULL) + if ((key = (sds)rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS_SHARED,NULL)) == NULL) goto eoferr; /* Read value */ if ((val = rdbLoadObject(type,rdb,key,mvcc_tstamp)) == NULL) { @@ -2854,7 +3223,20 @@ int rdbLoadRio(rio *rdb, int rdbflags, rdbSaveInfo *rsi) { key = nullptr; goto eoferr; } + + bool fStaleMvccKey = (rsi) ? mvccFromObj(val) < rsi->mvccMinThreshold : false; + if (spjob != nullptr) + wqueue.enqueue(spjob); + spjob = std::make_unique(); + spjob->db = dbCur; + spjob->key = sdsdupshared(key); + spjob->val = val; + spjob->lru_clock = lru_clock; + spjob->expiretime = expiretime; + spjob->lru_idle = lru_idle; + spjob->lfu_freq = lfu_freq; + val = nullptr; /* Check if the key already expired. This function is used when loading * an RDB file from disk, either at startup, or when an RDB was @@ -2864,75 +3246,15 @@ int rdbLoadRio(rio *rdb, int rdbflags, rdbSaveInfo *rsi) { * Similarly if the RDB is the preamble of an AOF file, we want to * load all the keys as they are, since the log of operations later * assume to work in an exact keyspace state. */ - redisObjectStack keyobj; - initStaticStringObject(keyobj,key); bool fExpiredKey = iAmMaster() && !(rdbflags&RDBFLAGS_AOF_PREAMBLE) && expiretime != -1 && expiretime < now; - if (fStaleMvccKey || fExpiredKey) { - if (fStaleMvccKey && !fExpiredKey && rsi != nullptr && rsi->mi != nullptr && rsi->mi->staleKeyMap != nullptr && lookupKeyRead(db, &keyobj) == nullptr) { - // We have a key that we've already deleted and is not back in our database. - // We'll need to inform the sending master of the delete if it is also a replica of us - robj_sharedptr objKeyDup(createStringObject(key, sdslen(key))); - rsi->mi->staleKeyMap->operator[](db->id).push_back(objKeyDup); - } - fLastKeyExpired = true; - sdsfree(key); - key = nullptr; - decrRefCount(val); - val = nullptr; - } else { - /* If we have a storage provider check if we need to evict some keys to stay under our memory limit, - do this every 16 keys to limit the perf impact */ - if (g_pserver->m_pstorageFactory && (ckeysLoaded % 128) == 0) - { + fLastKeyExpired = fStaleMvccKey || fExpiredKey; + + ckeysLoaded++; + if (g_pserver->m_pstorageFactory && (ckeysLoaded % 128) == 0) + { + if (!serverTL->gcEpoch.isReset()) { g_pserver->garbageCollector.endEpoch(serverTL->gcEpoch); serverTL->gcEpoch = g_pserver->garbageCollector.startEpoch(); - bool fHighMemory = (getMaxmemoryState(NULL,NULL,NULL,NULL) != C_OK); - if (fHighMemory || (ckeysLoaded % (1024)) == 0) - { - for (int idb = 0; idb < cserver.dbnum; ++idb) - { - if (g_pserver->db[idb]->processChanges(false)) - g_pserver->db[idb]->commitChanges(); - if (fHighMemory && !(rsi && rsi->fForceSetKey)) { - g_pserver->db[idb]->removeAllCachedValues(); // During load we don't go through the normal eviction unless we're merging (i.e. an active replica) - fHighMemory = false; // we took care of it - } - g_pserver->db[idb]->trackChanges(false, 1024); - } - if (fHighMemory) - performEvictions(false /* fPreSnapshot*/); - } - } - - redisObjectStack keyobj; - initStaticStringObject(keyobj,key); - - /* Add the new object in the hash table */ - int fInserted = dbMerge(db, key, val, (rsi && rsi->fForceSetKey) || (rdbflags & RDBFLAGS_ALLOW_DUP)); // Note: dbMerge will incrRef - fLastKeyExpired = false; - - if (fInserted) - { - ++ckeysLoaded; - - /* Set the expire time if needed */ - if (expiretime != -1) - { - setExpire(NULL,db,&keyobj,nullptr,expiretime); - } - - /* Set usage information (for eviction). */ - objectSetLRUOrLFU(val,lfu_freq,lru_idle,lru_clock,1000); - - /* call key space notification on key loaded for modules only */ - moduleNotifyKeyspaceEvent(NOTIFY_LOADED, "loaded", &keyobj, db->id); - - replicationNotifyLoadedKey(db, &keyobj, val, expiretime); - } - else - { - decrRefCount(val); - val = nullptr; } } @@ -2948,6 +3270,9 @@ int rdbLoadRio(rio *rdb, int rdbflags, rdbSaveInfo *rsi) { lru_idle = -1; } + if (spjob != nullptr) + wqueue.enqueue(spjob); + if (key != nullptr) { sdsfree(key); @@ -2980,11 +3305,14 @@ int rdbLoadRio(rio *rdb, int rdbflags, rdbSaveInfo *rsi) { } } - for (int idb = 0; idb < cserver.dbnum; ++idb) - { - if (g_pserver->db[idb]->processChanges(false)) - g_pserver->db[idb]->commitChanges(); + wqueue.endWork(); + if (fTracking) { + // Reset track changes + for (int idb = 0; idb < cserver.dbnum; ++idb) { + g_pserver->db[idb]->trackChanges(false); + } } + return C_OK; /* Unexpected end of file is handled here calling rdbReportReadError(): @@ -2992,6 +3320,14 @@ int rdbLoadRio(rio *rdb, int rdbflags, rdbSaveInfo *rsi) { * the RDB file from a socket during initial SYNC (diskless replica mode), * we'll report the error to the caller, so that we can retry. */ eoferr: + if (fTracking) { + // Reset track changes + for (int idb = 0; idb < cserver.dbnum; ++idb) { + g_pserver->db[idb]->trackChanges(false); + } + } + + wqueue.endWork(); if (key != nullptr) { sdsfree(key); diff --git a/src/rdb.h b/src/rdb.h index ffff0ab66..c13fa2b14 100644 --- a/src/rdb.h +++ b/src/rdb.h @@ -119,10 +119,11 @@ #define RDB_MODULE_OPCODE_STRING 5 /* String. */ /* rdbLoad...() functions flags. */ -#define RDB_LOAD_NONE 0 -#define RDB_LOAD_ENC (1<<0) -#define RDB_LOAD_PLAIN (1<<1) -#define RDB_LOAD_SDS (1<<2) +#define RDB_LOAD_NONE 0 +#define RDB_LOAD_ENC (1<<0) +#define RDB_LOAD_PLAIN (1<<1) +#define RDB_LOAD_SDS (1<<2) +#define RDB_LOAD_SDS_SHARED ((1 << 3) | RDB_LOAD_SDS) /* flags on the purpose of rdb save or load */ #define RDBFLAGS_NONE 0 /* No special RDB loading. */ diff --git a/src/replication.cpp b/src/replication.cpp index 4197df0c9..c7c242e08 100644 --- a/src/replication.cpp +++ b/src/replication.cpp @@ -46,6 +46,7 @@ #include #include #include +#include void replicationDiscardCachedMaster(redisMaster *mi); void replicationResurrectCachedMaster(redisMaster *mi, connection *conn); @@ -184,8 +185,39 @@ int bg_unlink(const char *filename) { /* ---------------------------------- MASTER -------------------------------- */ +bool createDiskBacklog() { + // Lets create some disk backed pages and add them here + std::string path = "./repl-backlog-temp" + std::to_string(gettid()); + int fd = open(path.c_str(), O_CREAT | O_RDWR | O_LARGEFILE, S_IRUSR | S_IWUSR); + if (fd < 0) { + return false; + } + size_t alloc = cserver.repl_backlog_disk_size; + int result = truncate(path.c_str(), alloc); + unlink(path.c_str()); // ensure the fd is the only ref + if (result == -1) { + close (fd); + return false; + } + + g_pserver->repl_backlog_disk = (char*)mmap(nullptr, alloc, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + close(fd); + if (g_pserver->repl_backlog_disk == MAP_FAILED) { + g_pserver->repl_backlog_disk = nullptr; + return false; + } + + serverLog(LL_VERBOSE, "Disk Backed Replication Allocated"); + return true; +} + void createReplicationBacklog(void) { serverAssert(g_pserver->repl_backlog == NULL); + if (cserver.repl_backlog_disk_size) { + if (!createDiskBacklog()) { + serverLog(LL_WARNING, "Failed to create disk backlog, will use memory only"); + } + } g_pserver->repl_backlog = (char*)zmalloc(g_pserver->repl_backlog_size, MALLOC_LOCAL); g_pserver->repl_backlog_histlen = 0; g_pserver->repl_backlog_idx = 0; @@ -234,9 +266,22 @@ void resizeReplicationBacklog(long long newsize) { long long earliest_off = g_pserver->repl_lowest_off.load(); if (earliest_off != -1) { - // We need to keep critical data so we can't shrink less than the hot data in the buffer + char *backlog = nullptr; newsize = std::max(newsize, g_pserver->master_repl_offset - earliest_off); - char *backlog = (char*)zmalloc(newsize); + + if (cserver.repl_backlog_disk_size != 0) { + if (newsize > g_pserver->repl_backlog_config_size) { + if (g_pserver->repl_backlog == g_pserver->repl_backlog_disk) + return; // Can't do anything more + serverLog(LL_NOTICE, "Switching to disk backed replication backlog due to exceeding memory limits"); + backlog = g_pserver->repl_backlog_disk; + newsize = cserver.repl_backlog_disk_size; + } + } + + // We need to keep critical data so we can't shrink less than the hot data in the buffer + if (backlog == nullptr) + backlog = (char*)zmalloc(newsize); g_pserver->repl_backlog_histlen = g_pserver->master_repl_offset - earliest_off; long long earliest_idx = getReplIndexFromOffset(earliest_off); @@ -251,7 +296,10 @@ void resizeReplicationBacklog(long long newsize) { auto cbActiveBacklog = cbPhase1 + g_pserver->repl_backlog_idx; serverAssert(g_pserver->repl_backlog_histlen == cbActiveBacklog); } - zfree(g_pserver->repl_backlog); + if (g_pserver->repl_backlog != g_pserver->repl_backlog_disk) + zfree(g_pserver->repl_backlog); + else + serverLog(LL_NOTICE, "Returning to memory backed replication backlog"); g_pserver->repl_backlog = backlog; g_pserver->repl_backlog_idx = g_pserver->repl_backlog_histlen; if (g_pserver->repl_batch_idxStart >= 0) { @@ -261,7 +309,10 @@ void resizeReplicationBacklog(long long newsize) { } g_pserver->repl_backlog_start = earliest_off; } else { - zfree(g_pserver->repl_backlog); + if (g_pserver->repl_backlog != g_pserver->repl_backlog_disk) + zfree(g_pserver->repl_backlog); + else + serverLog(LL_NOTICE, "Returning to memory backed replication backlog"); g_pserver->repl_backlog = (char*)zmalloc(newsize); g_pserver->repl_backlog_histlen = 0; g_pserver->repl_backlog_idx = 0; @@ -311,6 +362,8 @@ void feedReplicationBacklog(const void *ptr, size_t len) { long long maxClientBuffer = (long long)cserver.client_obuf_limits[CLIENT_TYPE_SLAVE].hard_limit_bytes; if (maxClientBuffer <= 0) maxClientBuffer = LLONG_MAX; // infinite essentially + if (cserver.repl_backlog_disk_size) + maxClientBuffer = std::max(g_pserver->repl_backlog_size, cserver.repl_backlog_disk_size); long long min_offset = LLONG_MAX; int listening_replicas = 0; while ((ln = listNext(&li))) { @@ -761,7 +814,6 @@ long long addReplyReplicationBacklog(client *c, long long offset) { /* Force the partial sync to be queued */ prepareClientToWrite(c); - c->fPendingReplicaWrite = true; return len; } @@ -890,6 +942,7 @@ int masterTryPartialResynchronization(client *c) { c->repl_ack_time = g_pserver->unixtime; c->repl_put_online_on_ack = 0; listAddNodeTail(g_pserver->slaves,c); + g_pserver->rgthreadvar[c->iel].cclientsReplica++; /* We can't use the connection buffers since they are used to accumulate * new commands at this stage. But we are sure the socket send buffer is @@ -993,6 +1046,7 @@ int startBgsaveForReplication(int mincapa) { replica->replstate = REPL_STATE_NONE; replica->flags &= ~CLIENT_SLAVE; listDelNode(g_pserver->slaves,ln); + g_pserver->rgthreadvar[replica->iel].cclientsReplica--; addReplyError(replica, "BGSAVE failed, replication can't continue"); replica->flags |= CLIENT_CLOSE_AFTER_REPLY; @@ -1122,6 +1176,7 @@ void syncCommand(client *c) { c->repldbfd = -1; c->flags |= CLIENT_SLAVE; listAddNodeTail(g_pserver->slaves,c); + g_pserver->rgthreadvar[c->iel].cclientsReplica++; /* Create the replication backlog if needed. */ if (listLength(g_pserver->slaves) == 1 && g_pserver->repl_backlog == NULL) { @@ -4992,6 +5047,22 @@ void flushReplBacklogToClients() long long min_offset = LONG_LONG_MAX; // Ensure no overflow serverAssert(g_pserver->repl_batch_offStart < g_pserver->master_repl_offset); + if (g_pserver->master_repl_offset - g_pserver->repl_batch_offStart > g_pserver->repl_backlog_size) { + // We overflowed + listIter li; + listNode *ln; + listRewind(g_pserver->slaves, &li); + while ((ln = listNext(&li))) { + client *c = (client*)listNodeValue(ln); + sds sdsClient = catClientInfoString(sdsempty(),c); + freeClientAsync(c); + serverLog(LL_WARNING,"Client %s scheduled to be closed ASAP for overcoming of output buffer limits.", sdsClient); + sdsfree(sdsClient); + } + goto LDone; + } + + // Ensure no overflow if we get here serverAssert(g_pserver->master_repl_offset - g_pserver->repl_batch_offStart <= g_pserver->repl_backlog_size); serverAssert(g_pserver->repl_batch_idxStart != g_pserver->repl_backlog_idx); @@ -5022,16 +5093,13 @@ void flushReplBacklogToClients() replica->repl_end_off = g_pserver->master_repl_offset; /* Only if the there isn't already a pending write do we prepare the client to write */ - if (!replica->fPendingReplicaWrite){ - serverAssert(replica->repl_curr_off != g_pserver->master_repl_offset); - prepareClientToWrite(replica); - replica->fPendingReplicaWrite = true; - } - + serverAssert(replica->repl_curr_off != g_pserver->master_repl_offset); + prepareClientToWrite(replica); } if (fAsyncWrite) ProcessPendingAsyncWrites(); +LDone: // This may be called multiple times per "frame" so update with our progress flushing to clients g_pserver->repl_batch_idxStart = g_pserver->repl_backlog_idx; g_pserver->repl_batch_offStart = g_pserver->master_repl_offset; diff --git a/src/rio.cpp b/src/rio.cpp index 72024ada2..809d028ef 100644 --- a/src/rio.cpp +++ b/src/rio.cpp @@ -99,6 +99,7 @@ static const rio rioBufferIO = { rioBufferTell, rioBufferFlush, NULL, /* update_checksum */ + NULL, /* update checksum arg */ 0, /* current checksum */ 0, /* flags */ 0, /* bytes read or written */ @@ -113,6 +114,7 @@ static const rio rioConstBufferIO = { rioBufferTell, rioBufferFlush, NULL, /* update_checksum */ + NULL, /* update checksum arg */ 0, /* current checksum */ 0, /* flags */ 0, /* bytes read or written */ @@ -176,6 +178,7 @@ static const rio rioFileIO = { rioFileTell, rioFileFlush, NULL, /* update_checksum */ + NULL, /* update checksum arg */ 0, /* current checksum */ 0, /* flags */ 0, /* bytes read or written */ @@ -275,6 +278,7 @@ static const rio rioConnIO = { rioConnTell, rioConnFlush, NULL, /* update_checksum */ + NULL, /* update checksum arg */ 0, /* current checksum */ 0, /* flags */ 0, /* bytes read or written */ @@ -394,6 +398,7 @@ static const rio rioFdIO = { rioFdTell, rioFdFlush, NULL, /* update_checksum */ + NULL, /* update checksum arg */ 0, /* current checksum */ 0, /* flags */ 0, /* bytes read or written */ diff --git a/src/rio.h b/src/rio.h index d48474fcb..86f3fa465 100644 --- a/src/rio.h +++ b/src/rio.h @@ -58,6 +58,7 @@ struct _rio { * and len fields pointing to the new block of data to add to the checksum * computation. */ void (*update_cksum)(struct _rio *, const void *buf, size_t len); + void *chksum_arg; /* The current checksum and flags (see RIO_FLAG_*) */ uint64_t cksum, flags; diff --git a/src/server.cpp b/src/server.cpp index 10fae5779..eb4bc974a 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -1707,12 +1707,12 @@ void tryResizeHashTables(int dbid) { * is returned. */ int redisDbPersistentData::incrementallyRehash() { /* Keys dictionary */ - if (dictIsRehashing(m_pdict) || dictIsRehashing(m_pdictTombstone)) { - int result = dictRehashMilliseconds(m_pdict,1); - result += dictRehashMilliseconds(m_pdictTombstone,1); - return result; /* already used our millisecond for this loop... */ - } - return 0; + int result = 0; + if (dictIsRehashing(m_pdict)) + result += dictRehashMilliseconds(m_pdict,1); + if (dictIsRehashing(m_pdictTombstone)) + dictRehashMilliseconds(m_pdictTombstone,1); // don't count this + return result; /* already used our millisecond for this loop... */ } /* This function is called once a background process of some kind terminates, @@ -2115,12 +2115,17 @@ void databasesCron(bool fMainThread) { if (g_pserver->activerehashing) { for (j = 0; j < dbs_per_call; j++) { if (serverTL->rehashCtl != nullptr) { - if (dictRehashSomeAsync(serverTL->rehashCtl, 5)) { - break; - } else { - dictCompleteRehashAsync(serverTL->rehashCtl, true /*fFree*/); - serverTL->rehashCtl = nullptr; + if (!serverTL->rehashCtl->done.load(std::memory_order_relaxed)) { + aeReleaseLock(); + if (dictRehashSomeAsync(serverTL->rehashCtl, rehashes_per_ms)) { + aeAcquireLock(); + break; + } + aeAcquireLock(); } + + dictCompleteRehashAsync(serverTL->rehashCtl, true /*fFree*/); + serverTL->rehashCtl = nullptr; } serverAssert(serverTL->rehashCtl == nullptr); @@ -2128,22 +2133,27 @@ void databasesCron(bool fMainThread) { /* Are we async rehashing? And if so is it time to re-calibrate? */ /* The recalibration limit is a prime number to ensure balancing across threads */ if (rehashes_per_ms > 0 && async_rehashes < 131 && !cserver.active_defrag_enabled && cserver.cthreads > 1 && dictSize(dict) > 2048 && dictIsRehashing(dict) && !g_pserver->loading) { - serverTL->rehashCtl = dictRehashAsyncStart(dict, rehashes_per_ms); - ++async_rehashes; + serverTL->rehashCtl = dictRehashAsyncStart(dict, rehashes_per_ms * ((1000 / g_pserver->hz) / 10)); // Estimate 10% CPU time spent in lock contention + if (serverTL->rehashCtl) + ++async_rehashes; } if (serverTL->rehashCtl) break; - + // Before starting anything new, can we end the rehash of a blocked thread? - if (dict->asyncdata != nullptr) { + while (dict->asyncdata != nullptr) { auto asyncdata = dict->asyncdata; if (asyncdata->done) { dictCompleteRehashAsync(asyncdata, false /*fFree*/); // Don't free because we don't own the pointer serverAssert(dict->asyncdata != asyncdata); - break; // completion can be expensive, don't do anything else + } else { + break; } } + if (dict->asyncdata) + break; + rehashes_per_ms = g_pserver->db[rehash_db]->incrementallyRehash(); async_rehashes = 0; if (rehashes_per_ms > 0) { @@ -2364,13 +2374,8 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { UNUSED(id); UNUSED(clientData); - if (serverTL->rehashCtl != nullptr && !serverTL->rehashCtl->done) { - aeReleaseLock(); - // If there is not enough lock contention we may not have made enough progress on the async - // rehash. Ensure we finish it outside the lock. - dictRehashSomeAsync(serverTL->rehashCtl, serverTL->rehashCtl->queue.size()); - aeAcquireLock(); - } + if (g_pserver->maxmemory && g_pserver->m_pstorageFactory) + performEvictions(false); /* If another threads unblocked one of our clients, and this thread has been idle then beforeSleep won't have a chance to process the unblocking. So we also @@ -2614,7 +2619,7 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { g_pserver->rdb_bgsave_scheduled = 0; } - if (cserver.storage_memory_model == STORAGE_WRITEBACK && g_pserver->m_pstorageFactory) { + if (cserver.storage_memory_model == STORAGE_WRITEBACK && g_pserver->m_pstorageFactory && !g_pserver->loading) { run_with_period(g_pserver->storage_flush_period) { flushStorageWeak(); } @@ -2660,13 +2665,8 @@ int serverCronLite(struct aeEventLoop *eventLoop, long long id, void *clientData UNUSED(id); UNUSED(clientData); - if (serverTL->rehashCtl != nullptr && !serverTL->rehashCtl->done) { - aeReleaseLock(); - // If there is not enough lock contention we may not have made enough progress on the async - // rehash. Ensure we finish it outside the lock. - dictRehashSomeAsync(serverTL->rehashCtl, serverTL->rehashCtl->queue.size()); - aeAcquireLock(); - } + if (g_pserver->maxmemory && g_pserver->m_pstorageFactory) + performEvictions(false); int iel = ielFromEventLoop(eventLoop); serverAssert(iel != IDX_EVENT_LOOP_MAIN); @@ -2895,7 +2895,7 @@ void beforeSleep(struct aeEventLoop *eventLoop) { static thread_local bool fFirstRun = true; // note: we also copy the DB pointer in case a DB swap is done while the lock is released std::vector vecdb; // note we cache the database pointer in case a dbswap is done while the lock is released - if (cserver.storage_memory_model == STORAGE_WRITETHROUGH && g_pserver->m_pstorageFactory != nullptr) + if (cserver.storage_memory_model == STORAGE_WRITETHROUGH && g_pserver->m_pstorageFactory != nullptr && !g_pserver->loading) { if (!fFirstRun) { mstime_t storage_process_latency; @@ -2967,6 +2967,16 @@ void beforeSleep(struct aeEventLoop *eventLoop) { serverAssert(sleeping_threads <= cserver.cthreads); } + if (!g_pserver->garbageCollector.empty()) { + // Server threads don't free the GC, but if we don't have a + // a bgsave or some other async task then we'll hold onto the + // data for too long + g_pserver->asyncworkqueue->AddWorkFunction([]{ + auto epoch = g_pserver->garbageCollector.startEpoch(); + g_pserver->garbageCollector.endEpoch(epoch); + }, true /*fHiPri*/); + } + /* Determine whether the modules are enabled before sleeping, and use that result both here, and after wakeup to avoid double acquire or release of the GIL */ serverTL->modulesEnabledThisAeLoop = !!moduleCount(); @@ -3477,6 +3487,8 @@ int setOOMScoreAdj(int process_class) { * g_pserver->maxclients to the value that we can actually handle. */ void adjustOpenFilesLimit(void) { rlim_t maxfiles = g_pserver->maxclients+CONFIG_MIN_RESERVED_FDS; + if (g_pserver->m_pstorageFactory) + maxfiles += g_pserver->m_pstorageFactory->filedsRequired(); struct rlimit limit; if (getrlimit(RLIMIT_NOFILE,&limit) == -1) { @@ -3795,8 +3807,6 @@ static void initServerThread(struct redisServerThreadVars *pvar, int fMain) pvar->in_eval = 0; pvar->in_exec = 0; pvar->el = aeCreateEventLoop(g_pserver->maxclients+CONFIG_FDSET_INCR); - aeSetBeforeSleepProc(pvar->el, beforeSleep, AE_SLEEP_THREADSAFE); - aeSetAfterSleepProc(pvar->el, afterSleep, AE_SLEEP_THREADSAFE); pvar->current_client = nullptr; pvar->fRetrySetAofEvent = false; if (pvar->el == NULL) { @@ -3805,6 +3815,8 @@ static void initServerThread(struct redisServerThreadVars *pvar, int fMain) strerror(errno)); exit(1); } + aeSetBeforeSleepProc(pvar->el, beforeSleep, AE_SLEEP_THREADSAFE); + aeSetAfterSleepProc(pvar->el, afterSleep, AE_SLEEP_THREADSAFE); fastlock_init(&pvar->lockPendingWrite, "lockPendingWrite"); @@ -4010,6 +4022,9 @@ void InitServerLast() { g_pserver->initial_memory_usage = zmalloc_used_memory(); g_pserver->asyncworkqueue = new (MALLOC_LOCAL) AsyncWorkQueue(cserver.cthreads); + + // Allocate the repl backlog + } /* Parse the flags string description 'strflags' and set them to the @@ -6637,6 +6652,7 @@ static void sigShutdownHandler(int sig) { if (g_pserver->shutdown_asap && sig == SIGINT) { serverLogFromHandler(LL_WARNING, "You insist... exiting now."); rdbRemoveTempFile(g_pserver->rdbThreadVars.tmpfileNum, 1); + g_pserver->garbageCollector.shutdown(); exit(1); /* Exit with an error since this was not a clean shutdown. */ } else if (g_pserver->loading) { serverLogFromHandler(LL_WARNING, "Received shutdown signal during loading, exiting now."); @@ -6818,6 +6834,7 @@ void loadDataFromDisk(void) { serverLog(LL_NOTICE, "Loading the RDB even though we have a storage provider because the database is empty"); } + serverTL->gcEpoch = g_pserver->garbageCollector.startEpoch(); if (g_pserver->aof_state == AOF_ON) { if (loadAppendOnlyFile(g_pserver->aof_filename) == C_OK) serverLog(LL_NOTICE,"DB loaded from append only file: %.3f seconds",(float)(ustime()-start)/1000000); @@ -6863,6 +6880,8 @@ void loadDataFromDisk(void) { exit(1); } } + g_pserver->garbageCollector.endEpoch(serverTL->gcEpoch); + serverTL->gcEpoch.reset(); } void redisOutOfMemoryHandler(size_t allocation_size) { @@ -7137,13 +7156,6 @@ void *workerThreadMain(void *parg) serverAssert(!GlobalLocksAcquired()); aeDeleteEventLoop(el); - aeAcquireLock(); - for (int idb = 0; idb < cserver.dbnum; ++idb) { - if (g_pserver->rgthreadvar[iel].rgdbSnapshot[idb] != nullptr) - g_pserver->db[idb]->endSnapshot(g_pserver->rgthreadvar[iel].rgdbSnapshot[idb]); - } - aeReleaseLock(); - return NULL; } @@ -7463,7 +7475,13 @@ int main(int argc, char **argv) { } InitServerLast(); - loadDataFromDisk(); + + try { + loadDataFromDisk(); + } catch (ShutdownException) { + exit(EXIT_SUCCESS); + } + if (g_pserver->cluster_enabled) { if (verifyClusterConfigWithData() == C_ERR) { serverLog(LL_WARNING, @@ -7519,6 +7537,11 @@ int main(int argc, char **argv) { serverAssert(cserver.cthreads > 0 && cserver.cthreads <= MAX_EVENT_LOOPS); pthread_create(&cserver.time_thread_id, nullptr, timeThreadMain, nullptr); + if (cserver.time_thread_priority) { + struct sched_param time_thread_priority; + time_thread_priority.sched_priority = sched_get_priority_max(SCHED_FIFO); + pthread_setschedparam(cserver.time_thread_id, SCHED_FIFO, &time_thread_priority); + } pthread_attr_t tattr; pthread_attr_init(&tattr); @@ -7560,8 +7583,7 @@ int main(int argc, char **argv) { if (!fLockAcquired) g_fInCrash = true; // We don't actually crash right away, because we want to sync any storage providers for (int idb = 0; idb < cserver.dbnum; ++idb) { - delete g_pserver->db[idb]; - g_pserver->db[idb] = nullptr; + g_pserver->db[idb]->storageProviderDelete(); } // If we couldn't acquire the global lock it means something wasn't shutdown and we'll probably deadlock serverAssert(fLockAcquired); diff --git a/src/server.h b/src/server.h index f83756d95..29441aae5 100644 --- a/src/server.h +++ b/src/server.h @@ -39,6 +39,9 @@ #include "rio.h" #include "atomicvar.h" +#include +#include + #include #include #include @@ -1070,6 +1073,9 @@ class dict_iter : public dict_const_iter { dict *m_dict = nullptr; public: + dict_iter() + : dict_const_iter(nullptr) + {} explicit dict_iter(nullptr_t) : dict_const_iter(nullptr) {} @@ -1134,7 +1140,7 @@ public: void getStats(char *buf, size_t bufsize) { dictGetStats(buf, bufsize, m_pdict); } void getExpireStats(char *buf, size_t bufsize) { m_setexpire->getstats(buf, bufsize); } - bool insert(char *k, robj *o, bool fAssumeNew = false); + bool insert(char *k, robj *o, bool fAssumeNew = false, dict_iter *existing = nullptr); void tryResize(); int incrementallyRehash(); void updateValue(dict_iter itr, robj *val); @@ -1156,14 +1162,17 @@ public: bool FRehashing() const { return dictIsRehashing(m_pdict) || dictIsRehashing(m_pdictTombstone); } void setStorageProvider(StorageCache *pstorage); + void endStorageProvider(); void trackChanges(bool fBulk, size_t sizeHint = 0); + bool FTrackingChanges() const { return !!m_fTrackingChanges; } // Process and commit changes for secondary storage. Note that process and commit are seperated // to allow you to release the global lock before commiting. To prevent deadlocks you *must* // either release the global lock or keep the same global lock between the two functions as // a second look is kept to ensure writes to secondary storage are ordered bool processChanges(bool fSnapshot); + void processChangesAsync(std::atomic &pendingJobs); void commitChanges(const redisDbPersistentDataSnapshot **psnapshotFree = nullptr); // This should only be used if you look at the key, we do not fixup @@ -1179,10 +1188,12 @@ public: void restoreSnapshot(const redisDbPersistentDataSnapshot *psnapshot); bool FStorageProvider() { return m_spstorage != nullptr; } - bool removeCachedValue(const char *key); + bool removeCachedValue(const char *key, dictEntry **ppde = nullptr); void removeAllCachedValues(); + void disableKeyCache(); + bool keycacheIsEnabled(); - void prefetchKeysAsync(client *c, struct parsed_command &command); + bool prefetchKeysAsync(client *c, struct parsed_command &command, bool fExecOK); bool FSnapshot() const { return m_spdbSnapshotHOLDER != nullptr; } @@ -1297,6 +1308,7 @@ struct redisDb : public redisDbPersistentDataSnapshot void initialize(int id); void storageProviderInitialize(); + void storageProviderDelete(); virtual ~redisDb(); void dbOverwriteCore(redisDb::iter itr, sds keySds, robj *val, bool fUpdateMvcc, bool fRemoveExpire); @@ -1330,17 +1342,21 @@ struct redisDb : public redisDbPersistentDataSnapshot using redisDbPersistentData::setExpire; using redisDbPersistentData::trackChanges; using redisDbPersistentData::processChanges; + using redisDbPersistentData::processChangesAsync; using redisDbPersistentData::commitChanges; using redisDbPersistentData::setexpireUnsafe; using redisDbPersistentData::setexpire; using redisDbPersistentData::endSnapshot; using redisDbPersistentData::restoreSnapshot; using redisDbPersistentData::removeAllCachedValues; + using redisDbPersistentData::disableKeyCache; + using redisDbPersistentData::keycacheIsEnabled; using redisDbPersistentData::dictUnsafeKeyOnly; using redisDbPersistentData::resortExpire; using redisDbPersistentData::prefetchKeysAsync; using redisDbPersistentData::prepOverwriteForSnapshot; using redisDbPersistentData::FRehashing; + using redisDbPersistentData::FTrackingChanges; public: const redisDbPersistentDataSnapshot *createSnapshot(uint64_t mvccCheckpoint, bool fOptional) { @@ -1607,7 +1623,6 @@ struct client { * when sending data to this replica. */ long long repl_end_off = -1; /* Replication offset to write to, stored in the replica, as opposed to using the global offset * to prevent needing the global lock */ - int fPendingReplicaWrite; /* Is there a write queued for this replica? */ char replid[CONFIG_RUN_ID_SIZE+1]; /* Master replication ID (if master). */ int slave_listening_port; /* As configured with: REPLCONF listening-port */ @@ -1670,6 +1685,10 @@ struct client { robj **argv; size_t argv_len_sumActive = 0; + bool FPendingReplicaWrite() const { + return repl_curr_off != repl_end_off; + } + // post a function from a non-client thread to run on its client thread bool postFunction(std::function fn, bool fLock = true); size_t argv_len_sum() const; @@ -2011,7 +2030,7 @@ public: // Per-thread variabels that may be accessed without a lock struct redisServerThreadVars { - aeEventLoop *el; + aeEventLoop *el = nullptr; socketFds ipfd; /* TCP socket file descriptors */ socketFds tlsfd; /* TLS socket file descriptors */ int in_eval; /* Are we inside EVAL? */ @@ -2020,6 +2039,7 @@ struct redisServerThreadVars { list *unblocked_clients; /* list of clients to unblock before next loop NOT THREADSAFE */ list *clients_pending_asyncwrite; int cclients; + int cclientsReplica = 0; client *current_client; /* Current client */ long fixed_time_expire = 0; /* If > 0, expire keys against server.mstime. */ client *lua_client = nullptr; /* The "fake client" to query Redis from Lua */ @@ -2139,6 +2159,8 @@ struct redisServerConst { int storage_memory_model = STORAGE_WRITETHROUGH; char *storage_conf = nullptr; int fForkBgSave = false; + int time_thread_priority = false; + long long repl_backlog_disk_size = 0; }; struct redisServer { @@ -2215,6 +2237,8 @@ struct redisServer { int active_expire_enabled; /* Can be disabled for testing purposes. */ + int replicaIsolationFactor = 1; + /* Fields used only for stats */ long long stat_numcommands; /* Number of processed commands */ long long stat_numconnections; /* Number of connections received */ @@ -2312,6 +2336,7 @@ struct redisServer { sds aof_child_diff; /* AOF diff accumulator child side. */ int aof_rewrite_pending = 0; /* is a call to aofChildWriteDiffData already queued? */ /* RDB persistence */ + int allowRdbResizeOp; /* Debug situations we may want rehash to be ocurring, so ignore resize */ long long dirty; /* Changes to DB from the last save */ long long dirty_before_bgsave; /* Used to restore dirty on failed BGSAVE */ struct _rdbThreadVars @@ -2376,6 +2401,7 @@ struct redisServer { int replicaseldb; /* Last SELECTed DB in replication output */ int repl_ping_slave_period; /* Master pings the replica every N seconds */ char *repl_backlog; /* Replication backlog for partial syncs */ + char *repl_backlog_disk = nullptr; long long repl_backlog_size; /* Backlog circular buffer size */ long long repl_backlog_config_size; /* The repl backlog may grow but we want to know what the user set it to */ long long repl_backlog_histlen; /* Backlog actual data length */ @@ -3343,7 +3369,7 @@ int objectSetLRUOrLFU(robj *val, long long lfu_freq, long long lru_idle, #define LOOKUP_NONOTIFY (1<<1) #define LOOKUP_UPDATEMVCC (1<<2) void dbAdd(redisDb *db, robj *key, robj *val); -void dbOverwrite(redisDb *db, robj *key, robj *val, bool fRemoveExpire = false); +void dbOverwrite(redisDb *db, robj *key, robj *val, bool fRemoveExpire = false, dict_iter *pitrExisting = nullptr); int dbMerge(redisDb *db, sds key, robj *val, int fReplace); void genericSetKey(client *c, redisDb *db, robj *key, robj *val, int keepttl, int signal); void setKey(client *c, redisDb *db, robj *key, robj *val); diff --git a/tests/integration/rdb.tcl b/tests/integration/rdb.tcl index efd57b2c8..0e804c7d9 100644 --- a/tests/integration/rdb.tcl +++ b/tests/integration/rdb.tcl @@ -202,6 +202,18 @@ test {client freed during loading} { } } +test {repeated load} { + start_server [list overrides [list server-threads 3 allow-rdb-resize-op no]] { + r debug populate 500000 key 1000 + + set digest [r debug digest] + for {set j 0} {$j < 10} {incr j} { + r debug reload + assert_equal $digest [r debug digest] + } + } +} + # Our COW metrics (Private_Dirty) work only on Linux set system_name [string tolower [exec uname -s]] if {$system_name eq {linux}} { diff --git a/tests/integration/replication-2.tcl b/tests/integration/replication-2.tcl index 9524f3563..a87953879 100644 --- a/tests/integration/replication-2.tcl +++ b/tests/integration/replication-2.tcl @@ -88,5 +88,28 @@ start_server {tags {"repl"}} { } assert_equal [r debug digest] [r -1 debug digest] } + + test {REPL Backlog handles large value} { + # initialize bigval to 64-bytes + r flushall + r config set repl-backlog-size 1K + r config set client-output-buffer-limit "replica 1024 1024 0" + set bigval "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + for {set i 0} { $i < 20 } { incr i } { + append bigval $bigval + } + r set bigkey $bigval + # We expect the replication to be disconnected so wait a bit + wait_for_condition 50 100 { + [s -1 master_link_status] eq {down} + } else { + fail "Memory limit exceeded but not detected" + } + wait_for_condition 50 100 { + [r debug digest] eq [r -1 debug digest] + } else { + fail "Replica did not reconnect" + } + } } } diff --git a/tests/unit/introspection.tcl b/tests/unit/introspection.tcl index 753cc6e60..45dbd9838 100644 --- a/tests/unit/introspection.tcl +++ b/tests/unit/introspection.tcl @@ -44,6 +44,11 @@ start_server {tags {"introspection"}} { set e } {ERR*} + test {replica-weighting-factor does not accept values less than 1} { + catch {r config set replica-weighting-factor 0} e + set e + } {ERR*} + test {CLIENT SETNAME can assign a name to this connection} { assert_equal [r client setname myname] {OK} r client list @@ -109,6 +114,7 @@ start_server {tags {"introspection"}} { server_cpulist bio_cpulist aof_rewrite_cpulist + time-thread-priority bgsave_cpulist storage-cache-mode storage-provider-options @@ -117,6 +123,7 @@ start_server {tags {"introspection"}} { active-replica bind set-proc-title + repl-backlog-disk-reserve } if {!$::tls} {