
git-subtree-dir: deps/memkind/src git-subtree-split: bb9f19dd1b3ed6cc5e1b35919564ccf6f4b32f69
355 lines
11 KiB
C++
355 lines
11 KiB
C++
/*
|
|
* Copyright (C) 2017 - 2018 Intel Corporation.
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are met:
|
|
* 1. Redistributions of source code must retain the above copyright notice(s),
|
|
* this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright notice(s),
|
|
* this list of conditions and the following disclaimer in the documentation
|
|
* and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS
|
|
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
|
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
|
* EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
|
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
|
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include "common.h"
|
|
#include "random_sizes_allocator.h"
|
|
#include "proc_stat.h"
|
|
#include "allocator_perf_tool/GTestAdapter.hpp"
|
|
#include "allocator_perf_tool/Allocation_info.hpp"
|
|
|
|
#include <memkind.h>
|
|
|
|
#include <condition_variable>
|
|
#include <functional>
|
|
#include <mutex>
|
|
#include <random>
|
|
#include <thread>
|
|
|
|
class Worker
|
|
{
|
|
public:
|
|
Worker(RandomSizesAllocator &&allocator, double malloc_probability)
|
|
: allocator(std::move(allocator)), malloc_probability(malloc_probability) {}
|
|
|
|
void work()
|
|
{
|
|
if (allocator.empty() || get_random_bool(malloc_probability)) {
|
|
requested_memory_sum += allocator.malloc_random_memory();
|
|
} else {
|
|
requested_memory_sum -= allocator.free_random_memory();
|
|
}
|
|
}
|
|
|
|
size_t get_requested_memory_sum_bytes() const
|
|
{
|
|
return requested_memory_sum;
|
|
}
|
|
|
|
private:
|
|
bool get_random_bool(double probability)
|
|
{
|
|
std::bernoulli_distribution distribution(probability);
|
|
return distribution(generator);
|
|
}
|
|
|
|
std::default_random_engine generator;
|
|
RandomSizesAllocator allocator;
|
|
size_t requested_memory_sum = 0;
|
|
double malloc_probability;
|
|
};
|
|
|
|
|
|
class MemoryFootprintStats
|
|
{
|
|
public:
|
|
void reset()
|
|
{
|
|
std::lock_guard<std::mutex> lk(sample_guard);
|
|
initial_virtual_memory = proc_stat.get_virtual_memory_size_bytes();
|
|
initial_physical_memory = proc_stat.get_physical_memory_size_bytes();
|
|
vm_overhead_sum = 0;
|
|
current_vm_overhead = 0;
|
|
max_vm_overhead = 0;
|
|
|
|
current_phys_overhead = 0;
|
|
phys_overhead_sum = 0;
|
|
max_phys_overhead = 0;
|
|
|
|
requested_memory = 0;
|
|
sample_count = 0;
|
|
}
|
|
|
|
void sample(long long requested_memory_bytes)
|
|
{
|
|
std::lock_guard<std::mutex> lk(sample_guard);
|
|
current_vm = proc_stat.get_virtual_memory_size_bytes();
|
|
current_phys = proc_stat.get_physical_memory_size_bytes();
|
|
|
|
sample_count++;
|
|
|
|
requested_memory = requested_memory_bytes;
|
|
|
|
current_vm_overhead = current_vm - initial_virtual_memory - requested_memory;
|
|
vm_overhead_sum += current_vm_overhead;
|
|
max_vm_overhead = std::max(max_vm_overhead, current_vm_overhead);
|
|
|
|
current_phys_overhead = current_phys - initial_physical_memory;
|
|
phys_overhead_sum += current_phys_overhead;
|
|
max_phys_overhead = std::max(max_phys_overhead, current_phys_overhead);
|
|
}
|
|
|
|
void log_data() const
|
|
{
|
|
std::lock_guard<std::mutex> lk(sample_guard);
|
|
GTestAdapter::RecordProperty("avg_vm_overhead_per_operation_mb",
|
|
convert_bytes_to_mb(vm_overhead_sum) / sample_count);
|
|
GTestAdapter::RecordProperty("avg_vm_overhead_growth_per_operation_mb",
|
|
convert_bytes_to_mb(current_vm_overhead) / sample_count);
|
|
GTestAdapter::RecordProperty("max_vm_overhead_mb",
|
|
convert_bytes_to_mb(max_vm_overhead));
|
|
GTestAdapter::RecordProperty("avg_phys_overhead_per_operation_mb",
|
|
convert_bytes_to_mb(phys_overhead_sum) / sample_count);
|
|
GTestAdapter::RecordProperty("max_phys_overhead_mb",
|
|
convert_bytes_to_mb(max_phys_overhead));
|
|
GTestAdapter::RecordProperty("overhead_to_requested_memory_ratio_percent",
|
|
100.f * current_vm_overhead / requested_memory);
|
|
GTestAdapter::RecordProperty("requested_memory_mb",
|
|
convert_bytes_to_mb(requested_memory));
|
|
}
|
|
private:
|
|
long long initial_virtual_memory;
|
|
long long vm_overhead_sum = 0;
|
|
long long current_vm_overhead = 0;
|
|
long long max_vm_overhead = 0;
|
|
|
|
long long initial_physical_memory;
|
|
long long current_phys_overhead = 0;
|
|
long long phys_overhead_sum = 0;
|
|
long long max_phys_overhead = 0;
|
|
|
|
long long requested_memory = 0;
|
|
long long sample_count = 0;
|
|
|
|
long long current_vm;
|
|
long long current_phys;
|
|
|
|
ProcStat proc_stat;
|
|
mutable std::mutex sample_guard;
|
|
};
|
|
|
|
/* Execute func calling it n_calls times in n_threads threads.
|
|
* The execution is multithreaded but the func calls order is sequential.
|
|
* func takes thread id, and operation id as an argument,
|
|
* and must return thread id of the next thread, where thread ids are in range <0, n_threads-1>.
|
|
* init_thread_id specify the initial thread id.
|
|
*/
|
|
void run_multithreaded_seq_exec(unsigned n_threads, unsigned init_thread_id,
|
|
unsigned n_calls, std::function<unsigned(unsigned, unsigned)> func)
|
|
{
|
|
std::vector<std::thread> threads;
|
|
std::mutex mutex;
|
|
std::condition_variable turns_holder;
|
|
unsigned current_call = 0;
|
|
unsigned current_tid = init_thread_id;
|
|
|
|
threads.reserve(n_threads);
|
|
|
|
mutex.lock();
|
|
|
|
for(int tid=0; tid<n_threads; ++tid) {
|
|
threads.emplace_back([ &, tid]() {
|
|
while(current_call < n_calls) {
|
|
std::unique_lock<std::mutex> lk(mutex);
|
|
turns_holder.wait(lk, [ &,tid] {return current_tid == tid || current_call == n_calls;});
|
|
if(current_call == n_calls) {
|
|
return;
|
|
}
|
|
current_tid = func(tid, current_call);
|
|
ASSERT_LT(current_tid, n_threads) << "Incorrect thread id!";
|
|
current_call++;
|
|
lk.unlock();
|
|
turns_holder.notify_all();
|
|
}
|
|
});
|
|
}
|
|
|
|
mutex.unlock();
|
|
|
|
for(int i=0; i<threads.size(); i++) {
|
|
threads[i].join();
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Create threads and measure the cost of maintaining allocations from threads.
|
|
* Allocations order is sequential (otherwise the results might be very nondeterministic).
|
|
*/
|
|
void run_test(memkind_t kind, size_t min_size, size_t max_size,
|
|
unsigned n_threads, double malloc_probability=1.0, unsigned n_calls=1000)
|
|
{
|
|
Worker worker(RandomSizesAllocator(kind, min_size, max_size, n_calls),
|
|
malloc_probability);
|
|
|
|
MemoryFootprintStats mem_footprint_stats;
|
|
|
|
auto func = [&](unsigned tid, unsigned id) -> unsigned {
|
|
if(id == 0)
|
|
{
|
|
mem_footprint_stats.reset();
|
|
}
|
|
|
|
worker.work();
|
|
mem_footprint_stats.sample(worker.get_requested_memory_sum_bytes());
|
|
|
|
return (tid + 1) % n_threads; //next thread id
|
|
};
|
|
run_multithreaded_seq_exec(n_threads, 0, n_calls, func);
|
|
mem_footprint_stats.log_data();
|
|
}
|
|
|
|
class MemoryFootprintTest: public :: testing::Test
|
|
{};
|
|
|
|
TEST_F(MemoryFootprintTest,
|
|
test_TC_MEMKIND_DEFAULT_only_malloc_small_allocations_1_thread)
|
|
{
|
|
run_test(MEMKIND_DEFAULT, 128, 15 * KB, 1);
|
|
}
|
|
|
|
TEST_F(MemoryFootprintTest,
|
|
test_TC_MEMKIND_DEFAULT_only_malloc_small_allocations_10_thread)
|
|
{
|
|
run_test(MEMKIND_DEFAULT, 128, 15 * KB, 10);
|
|
}
|
|
|
|
TEST_F(MemoryFootprintTest,
|
|
test_TC_MEMKIND_DEFAULT_only_malloc_medium_allocations_1_thread)
|
|
{
|
|
run_test(MEMKIND_DEFAULT, 16 * KB, 1 * MB, 1);
|
|
}
|
|
|
|
TEST_F(MemoryFootprintTest,
|
|
test_TC_MEMKIND_DEFAULT_only_malloc_medium_allocations_10_thread)
|
|
{
|
|
run_test(MEMKIND_DEFAULT, 16 * KB, 1 * MB, 10);
|
|
}
|
|
|
|
TEST_F(MemoryFootprintTest,
|
|
test_TC_MEMKIND_DEFAULT_only_malloc_large_allocations_1_thread)
|
|
{
|
|
run_test(MEMKIND_DEFAULT, 2 * MB, 10 * MB, 1, 1.0, 100);
|
|
}
|
|
|
|
TEST_F(MemoryFootprintTest,
|
|
test_TC_MEMKIND_DEFAULT_only_malloc_large_allocations_10_thread)
|
|
{
|
|
run_test(MEMKIND_DEFAULT, 2 * MB, 10 * MB, 10, 1.0, 100);
|
|
}
|
|
|
|
TEST_F(MemoryFootprintTest,
|
|
test_TC_MEMKIND_DEFAULT_random_malloc80_free20_random_small_allocations_1_thread)
|
|
{
|
|
run_test(MEMKIND_DEFAULT, 128, 15 * KB, 1, 0.8);
|
|
}
|
|
|
|
TEST_F(MemoryFootprintTest,
|
|
test_TC_MEMKIND_DEFAULT_random_malloc80_free20_random_small_allocations_10_thread)
|
|
{
|
|
run_test(MEMKIND_DEFAULT, 128, 15 * KB, 10, 0.8);
|
|
}
|
|
|
|
TEST_F(MemoryFootprintTest,
|
|
test_TC_MEMKIND_DEFAULT_random_malloc80_free20_random_medium_allocations_1_thread)
|
|
{
|
|
run_test(MEMKIND_DEFAULT, 16 * KB, 1 * MB, 1, 0.8);
|
|
}
|
|
|
|
TEST_F(MemoryFootprintTest,
|
|
test_TC_MEMKIND_DEFAULT_random_malloc80_free20_random_large_allocations_10_thread)
|
|
{
|
|
run_test(MEMKIND_DEFAULT, 2 * MB, 10 * MB, 10, 0.8, 100);
|
|
}
|
|
|
|
TEST_F(MemoryFootprintTest,
|
|
test_TC_MEMKIND_HBW_only_malloc_small_allocations_1_thread)
|
|
{
|
|
run_test(MEMKIND_HBW, 128, 15 * KB, 1);
|
|
}
|
|
|
|
TEST_F(MemoryFootprintTest,
|
|
test_TC_MEMKIND_HBW_only_malloc_small_allocations_10_thread)
|
|
{
|
|
run_test(MEMKIND_HBW, 128, 15 * KB, 10);
|
|
}
|
|
|
|
TEST_F(MemoryFootprintTest,
|
|
test_TC_MEMKIND_HBW_only_malloc_medium_allocations_1_thread)
|
|
{
|
|
run_test(MEMKIND_HBW, 16 * KB, 1 * MB, 1);
|
|
}
|
|
|
|
TEST_F(MemoryFootprintTest,
|
|
test_TC_MEMKIND_HBW_only_malloc_medium_allocations_10_thread)
|
|
{
|
|
run_test(MEMKIND_HBW, 16 * KB, 1 * MB, 10);
|
|
}
|
|
|
|
TEST_F(MemoryFootprintTest,
|
|
test_TC_MEMKIND_HBW_only_malloc_large_allocations_1_thread)
|
|
{
|
|
run_test(MEMKIND_HBW, 2 * MB, 10 * MB, 1, 100);
|
|
}
|
|
|
|
TEST_F(MemoryFootprintTest,
|
|
test_TC_MEMKIND_HBW_only_malloc_large_allocations_10_thread)
|
|
{
|
|
run_test(MEMKIND_HBW, 2 * MB, 10 * MB, 10, 100);
|
|
}
|
|
|
|
TEST_F(MemoryFootprintTest,
|
|
test_TC_MEMKIND_HBW_random_malloc80_free20_random_small_allocations_1_thread)
|
|
{
|
|
run_test(MEMKIND_HBW, 128, 15 * KB, 1, 0.8);
|
|
}
|
|
|
|
TEST_F(MemoryFootprintTest,
|
|
test_TC_MEMKIND_HBW_random_malloc80_free20_random_small_allocations_10_thread)
|
|
{
|
|
run_test(MEMKIND_HBW, 128, 15 * KB, 10, 0.8);
|
|
}
|
|
|
|
TEST_F(MemoryFootprintTest,
|
|
test_TC_MEMKIND_HBW_random_malloc80_free20_random_medium_allocations_1_thread)
|
|
{
|
|
run_test(MEMKIND_HBW, 16 * KB, 1 * MB, 1, 0.8);
|
|
}
|
|
|
|
TEST_F(MemoryFootprintTest,
|
|
test_TC_MEMKIND_HBW_random_malloc80_free20_random_medium_allocations_10_thread)
|
|
{
|
|
run_test(MEMKIND_HBW, 16 * KB, 1 * MB, 10, 0.8);
|
|
}
|
|
|
|
TEST_F(MemoryFootprintTest,
|
|
test_TC_MEMKIND_HBW_random_malloc80_free20_random_large_allocations_1_thread)
|
|
{
|
|
run_test(MEMKIND_HBW, 2 * MB, 10 * MB, 1, 0.8, 100);
|
|
}
|
|
|
|
TEST_F(MemoryFootprintTest,
|
|
test_TC_MEMKIND_HBW_random_malloc80_free20_random_large_allocations_10_thread)
|
|
{
|
|
run_test(MEMKIND_HBW, 2 * MB, 10 * MB, 10, 0.8, 100);
|
|
}
|