
git-subtree-dir: deps/memkind/src git-subtree-split: bb9f19dd1b3ed6cc5e1b35919564ccf6f4b32f69
358 lines
12 KiB
C++
358 lines
12 KiB
C++
/*
|
|
* Copyright (C) 2014 - 2018 Intel Corporation.
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are met:
|
|
* 1. Redistributions of source code must retain the above copyright notice(s),
|
|
* this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright notice(s),
|
|
* this list of conditions and the following disclaimer in the documentation
|
|
* and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS
|
|
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
|
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
|
* EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
|
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
|
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include <algorithm> // sort
|
|
#include <math.h> // log2
|
|
#include <cassert>
|
|
#include <iostream>
|
|
#include "framework.hpp"
|
|
|
|
namespace performance_tests
|
|
{
|
|
namespace ch = std::chrono;
|
|
using std::cout;
|
|
using std::endl;
|
|
using std::unique_lock;
|
|
using std::mutex;
|
|
|
|
|
|
#ifdef __DEBUG
|
|
mutex g_coutMutex;
|
|
int g_msgLevel = 1;
|
|
#endif
|
|
|
|
void Barrier::wait()
|
|
{
|
|
unique_lock<mutex> lock(m_barrierMutex);
|
|
// Decrement number of threads awaited at the barrier
|
|
m_waiting--;
|
|
if (m_waiting == 0) {
|
|
// Called by the last expected thread - notify all waiting threads and exit
|
|
m_cVar.notify_all();
|
|
// Store the time when barrier was released
|
|
if (m_releasedAt.tv_sec == 0 && m_releasedAt.tv_nsec == 0) {
|
|
clock_gettime(CLOCK_MONOTONIC, &m_releasedAt);
|
|
}
|
|
return;
|
|
}
|
|
// Wait unitl the last expected thread calls wait() on Barrier instance, or timeout occurs
|
|
m_cVar.wait_until(lock, ch::system_clock::now() + ch::seconds(10), []() {
|
|
return GetInstance().m_waiting == 0;
|
|
});
|
|
}
|
|
|
|
// Worker class
|
|
Worker::Worker(
|
|
uint32_t actionsCount,
|
|
const vector<size_t> &allocationSizes,
|
|
Operation *freeOperation,
|
|
memkind_t kind)
|
|
: m_actionsCount(actionsCount)
|
|
, m_allocationSizes(allocationSizes)
|
|
, m_actions(vector<Action*>(actionsCount, nullptr))
|
|
, m_kind(kind)
|
|
{
|
|
assert(freeOperation->getName() == OperationName::Free);
|
|
}
|
|
|
|
Worker::~Worker()
|
|
{
|
|
for (Action *action : m_actions) { //each action
|
|
delete action;
|
|
}
|
|
|
|
}
|
|
|
|
void Worker::init(const vector<Operation *> &testOperations,
|
|
Operation *&freeOperation)
|
|
{
|
|
for(uint32_t i = 0 ; i < m_actionsCount ; i++) {
|
|
int bucketSize = rand() % Operation::MaxBucketSize;
|
|
|
|
for (Operation *operation : testOperations) { //each operation
|
|
if (operation->checkCondition(bucketSize)) {
|
|
size_t size = m_allocationSizes[m_allocationSizes.size() > 1 ? rand() %
|
|
m_allocationSizes.size() : 0];
|
|
m_actions[i] = new Action(
|
|
operation,
|
|
freeOperation,
|
|
m_kind,
|
|
size,
|
|
log2(rand() % size),
|
|
sizeof(void *) * (1 << ((rand() % Operation::MemalignMaxMultiplier))));
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void Worker::run()
|
|
{
|
|
m_thread = new thread(&Worker::work, this);
|
|
}
|
|
|
|
#ifdef __DEBUG
|
|
uint16_t Worker::getId()
|
|
{
|
|
return m_threadId;
|
|
}
|
|
void Worker::setId(uint16_t threadId)
|
|
{
|
|
m_threadId = threadId;
|
|
}
|
|
#endif
|
|
|
|
void Worker::finish()
|
|
{
|
|
if (m_thread != nullptr) {
|
|
m_thread->join();
|
|
delete m_thread;
|
|
}
|
|
}
|
|
|
|
void Worker::work()
|
|
{
|
|
EMIT(1, "Entering barrier " << m_threadId)
|
|
Barrier::GetInstance().wait();
|
|
EMIT(1, "Starting thread " << m_threadId)
|
|
for (Action *action : m_actions) {
|
|
action->alloc();
|
|
}
|
|
}
|
|
|
|
void Worker::clean()
|
|
{
|
|
EMIT(2, "Cleaning thread " << m_threadId)
|
|
for (Action *action : m_actions) {
|
|
action->free();
|
|
}
|
|
EMIT(1, "Thread " << m_threadId << " finished")
|
|
}
|
|
|
|
// PerformanceTest class
|
|
PerformanceTest::PerformanceTest(
|
|
size_t repeatsCount,
|
|
size_t threadsCount,
|
|
size_t operationsCount)
|
|
: m_repeatsCount(repeatsCount)
|
|
, m_discardCount(repeatsCount * (distardPercent / 100.0))
|
|
, m_threadsCount(threadsCount)
|
|
, m_operationsCount(operationsCount)
|
|
, m_executionMode(ExecutionMode::SingleInteration)
|
|
{
|
|
}
|
|
|
|
void PerformanceTest::setAllocationSizes(const vector<size_t> &allocationSizes)
|
|
{
|
|
m_allocationSizes = allocationSizes;
|
|
}
|
|
|
|
void PerformanceTest::setOperations(const vector<vector<Operation *>>
|
|
&testOperations, Operation *freeOperation)
|
|
{
|
|
m_testOperations = testOperations;
|
|
m_freeOperation = freeOperation;
|
|
}
|
|
|
|
void PerformanceTest::setExecutionMode(ExecutionMode executionMode)
|
|
{
|
|
m_executionMode = executionMode;
|
|
}
|
|
|
|
void PerformanceTest::setKind(const vector<memkind_t> &kinds)
|
|
{
|
|
m_kinds = kinds;
|
|
}
|
|
|
|
inline void PerformanceTest::runIteration()
|
|
{
|
|
timespec iterationStop, iterationStart;
|
|
|
|
Barrier::GetInstance().reset(m_threadsCount);
|
|
for (Worker *worker : m_workers) {
|
|
worker->run();
|
|
}
|
|
for (Worker *worker : m_workers) {
|
|
worker->finish();
|
|
}
|
|
EMIT(1, "Alloc completed");
|
|
clock_gettime(CLOCK_MONOTONIC, &iterationStop);
|
|
iterationStart = Barrier::GetInstance().releasedAt();
|
|
m_durations.push_back(
|
|
(iterationStop.tv_sec * NanoSecInSec + iterationStop.tv_nsec) -
|
|
(iterationStart.tv_sec * NanoSecInSec + iterationStart.tv_nsec)
|
|
);
|
|
for (Worker *worker : m_workers) {
|
|
worker->clean();
|
|
}
|
|
}
|
|
|
|
void PerformanceTest::prepareWorkers()
|
|
{
|
|
for (size_t threadId = 0; threadId < m_threadsCount; threadId++) {
|
|
m_workers.push_back(
|
|
new Worker(
|
|
m_operationsCount,
|
|
m_allocationSizes,
|
|
m_freeOperation,
|
|
m_kinds.size() > 0 ? m_kinds[threadId % m_kinds.size()] : nullptr)
|
|
);
|
|
#ifdef __DEBUG
|
|
m_workers.back()->setId(threadId);
|
|
#endif
|
|
if (m_executionMode == ExecutionMode::SingleInteration) {
|
|
// In ManyIterations mode, operations will be set for each thread at the beginning of each iteration
|
|
m_workers.back()->init(m_testOperations[threadId % m_testOperations.size()],
|
|
m_freeOperation);
|
|
}
|
|
}
|
|
}
|
|
|
|
Metrics PerformanceTest::getMetrics()
|
|
{
|
|
uint64_t totalDuration = 0;
|
|
|
|
std::sort(m_durations.begin(), m_durations.end());
|
|
|
|
m_durations.erase(m_durations.end() - m_discardCount, m_durations.end());
|
|
for (uint64_t &duration : m_durations) {
|
|
totalDuration += duration;
|
|
}
|
|
|
|
Metrics metrics;
|
|
|
|
metrics.executedOperations = m_durations.size() * m_threadsCount *
|
|
m_operationsCount;
|
|
metrics.totalDuration = totalDuration;
|
|
metrics.repeatDuration = (double) totalDuration /
|
|
((uint64_t)m_durations.size() * NanoSecInSec);
|
|
metrics.iterationDuration = metrics.repeatDuration;
|
|
if (m_executionMode == ExecutionMode::ManyIterations) {
|
|
metrics.executedOperations *= m_testOperations.size();
|
|
metrics.iterationDuration /= m_testOperations.size();
|
|
}
|
|
metrics.operationsPerSecond = (double) metrics.executedOperations *
|
|
NanoSecInSec / totalDuration;
|
|
metrics.avgOperationDuration = (double) totalDuration /
|
|
metrics.executedOperations;
|
|
assert(metrics.iterationDuration != 0.0);
|
|
return metrics;
|
|
}
|
|
|
|
void PerformanceTest::writeMetrics(const string &suiteName,
|
|
const string &caseName, const string &fileName)
|
|
{
|
|
Metrics metrics = getMetrics();
|
|
|
|
// For thousands separation
|
|
setlocale(LC_ALL, "");
|
|
if (!fileName.empty()) {
|
|
FILE *f;
|
|
if((f = fopen(fileName.c_str(), "a+"))) {
|
|
fprintf(f,
|
|
"%s;%s;%zu;%zu;%lu;%f;%f;%f;%f\n",
|
|
suiteName.c_str(),
|
|
caseName.c_str(),
|
|
m_repeatsCount,
|
|
m_threadsCount,
|
|
metrics.executedOperations,
|
|
metrics.operationsPerSecond,
|
|
metrics.avgOperationDuration,
|
|
metrics.iterationDuration,
|
|
metrics.repeatDuration);
|
|
fclose(f);
|
|
}
|
|
|
|
}
|
|
printf("Operations/sec:\t\t\t%'f\n"
|
|
"Avg. operation duration:\t%f nsec\n"
|
|
"Iteration duration:\t\t%f sec\n"
|
|
"Repeat duration:\t\t%f sec\n",
|
|
metrics.operationsPerSecond,
|
|
metrics.avgOperationDuration,
|
|
metrics.iterationDuration,
|
|
metrics.repeatDuration);
|
|
}
|
|
|
|
int PerformanceTest::run()
|
|
{
|
|
if (m_testOperations.empty() ||
|
|
m_allocationSizes.empty() ||
|
|
m_freeOperation == nullptr) {
|
|
cout << "ERROR: Test not initialized" << endl;
|
|
return 1;
|
|
}
|
|
// Create threads
|
|
prepareWorkers();
|
|
//warmup kinds
|
|
void *alloc = nullptr;
|
|
|
|
for (const memkind_t &kind : m_kinds) {
|
|
m_testOperations[0][0]->perform(kind, alloc, 1e6);
|
|
m_freeOperation->perform(kind, alloc);
|
|
}
|
|
for (size_t repeat = 0; repeat < m_repeatsCount; repeat++) {
|
|
EMIT(1, "Test run #" << repeat)
|
|
if (m_executionMode == ExecutionMode::SingleInteration) {
|
|
runIteration();
|
|
} else {
|
|
// Perform each operations list in separate iteration, for each thread
|
|
for (vector<Operation *> &ops : m_testOperations) {
|
|
for (Worker *worker : m_workers) {
|
|
worker->init(ops, m_freeOperation);
|
|
}
|
|
runIteration();
|
|
}
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
void PerformanceTest::showInfo()
|
|
{
|
|
printf("Test parameters: %lu repeats, %lu threads, %d operations per thread\n",
|
|
m_repeatsCount,
|
|
m_threadsCount,
|
|
m_operationsCount);
|
|
printf("Thread memory allocation operations:\n");
|
|
for (unsigned long i = 0; i < m_testOperations.size(); i++) {
|
|
if (m_executionMode == ExecutionMode::SingleInteration) {
|
|
printf("\tThread %lu,%lu,...\n", i, i + (m_testOperations.size()));
|
|
} else {
|
|
printf("\tIteration %lu\n", i);
|
|
}
|
|
for (const Operation *op : m_testOperations[i]) {
|
|
printf("\t\t %s (bucket size: %d)\n", op->getNameStr().c_str(),
|
|
op->getBucketSize());
|
|
}
|
|
}
|
|
printf("Memory free operation:\n\t\t%s\n",
|
|
m_freeOperation->getNameStr().c_str());
|
|
printf("Allocation sizes:\n");
|
|
for (size_t size : m_allocationSizes) {
|
|
printf("\t\t%lu bytes\n", size);
|
|
}
|
|
}
|
|
}
|