futriix/src/storage.c

268 lines
6.5 KiB
C
Raw Normal View History

#include "server.h"
#include <stdlib.h>
#include <stdio.h>
#include <memkind.h>
#include <sys/ioctl.h>
#include <linux/fs.h>
#include <unistd.h>
#include <inttypes.h>
#include <fcntl.h>
#include "storage.h"
struct memkind *mkdisk = NULL;
static const char *PMEM_DIR = NULL;
int memkind_pmem_iskind(struct memkind *kind, const void *pv);
void handle_prefork();
void handle_postfork_parent();
void handle_postfork_child();
#define OBJECT_PAGE_BUFFER_SIZE 8192 //(size in objs)
#define OBJ_PAGE_BITS_PER_WORD 64
struct object_page
{
uint64_t allocmap[OBJECT_PAGE_BUFFER_SIZE/(8*sizeof(uint64_t))];
struct object_page *pnext;
char rgb[];
};
struct alloc_pool
{
unsigned cbObject;
struct object_page *pobjpageHead;
};
struct object_page *pool_allocate_page(int cbObject)
{
size_t cb = (((size_t)cbObject) * OBJECT_PAGE_BUFFER_SIZE) + sizeof(struct object_page);
return scalloc(cb, 1, MALLOC_SHARED);
}
void pool_initialize(struct alloc_pool *ppool, int cbObject)
{
if ((cbObject % 8) != 0)
{
cbObject += 8 - (cbObject % 8);
}
ppool->cbObject = cbObject;
ppool->pobjpageHead = pool_allocate_page(cbObject);
}
static int IdxAllocObject(struct object_page *page)
{
for (size_t iword = 0; iword < OBJ_PAGE_BITS_PER_WORD; ++iword)
{
if ((page->allocmap[iword] + 1) != 0)
{
int ibit = 0;
uint64_t bitword = page->allocmap[iword];
while (bitword & 1)
{
bitword >>= 1;
++ibit;
}
page->allocmap[iword] |= 1ULL << ibit;
return (iword * OBJ_PAGE_BITS_PER_WORD) + ibit;
}
}
return -1;
}
void *pool_alloc(struct alloc_pool *ppool)
{
struct object_page *cur = ppool->pobjpageHead;
for (;;)
{
int idx = IdxAllocObject(cur);
if (idx >= 0)
{
return cur->rgb + (((size_t)ppool->cbObject) * idx);
}
if (cur->pnext == NULL)
{
cur->pnext = pool_allocate_page(ppool->cbObject);
}
cur = cur->pnext;
}
}
void pool_free(struct alloc_pool *ppool, void *pv)
{
struct object_page *cur = ppool->pobjpageHead;
char *obj = pv;
for (;cur != NULL;)
{
if (obj >= cur->rgb && (obj < (cur->rgb + (OBJECT_PAGE_BUFFER_SIZE * ppool->cbObject))))
{
// Its on this page
int idx = (obj - cur->rgb) / ppool->cbObject;
cur->allocmap[idx / OBJ_PAGE_BITS_PER_WORD] &= ~(1ULL << (idx % OBJ_PAGE_BITS_PER_WORD));
return;
}
cur = cur->pnext;
}
serverLog(LOG_CRIT, "obj not from pool");
sfree(obj); // we don't know where it came from
return;
}
#define EMBSTR_ROBJ_SIZE (sizeof(robj)+sizeof(struct sdshdr8)+OBJ_ENCODING_EMBSTR_SIZE_LIMIT+1)
struct alloc_pool poolobj;
struct alloc_pool poolembstrobj;
int forkFile()
{
int fdT;
memkind_tmpfile(PMEM_DIR, &fdT);
if (ioctl(fdT, FICLONE, memkind_fd(mkdisk)) == -1)
{
return -1;
}
return fdT;
}
// initialize the memory subsystem.
// NOTE: This may be called twice, first with NULL specifying we should use ram
// later, after the configuration file is loaded with a path to where we should
// place our temporary file.
void storage_init(const char *tmpfilePath, size_t cbFileReserve)
{
if (tmpfilePath == NULL)
{
serverAssert(mkdisk == NULL);
mkdisk = MEMKIND_DEFAULT;
}
else
{
// First create the file
serverAssert(mkdisk == MEMKIND_DEFAULT);
PMEM_DIR = memkind_malloc(MEMKIND_DEFAULT, strlen(tmpfilePath));
strcpy((char*)PMEM_DIR, tmpfilePath);
int errv = memkind_create_pmem(PMEM_DIR, 0, &mkdisk);
if (errv == MEMKIND_ERROR_INVALID)
{
serverLog(LOG_CRIT, "Memory pool creation failed: %s", strerror(errno));
exit(EXIT_FAILURE);
}
else if (errv)
{
char msgbuf[1024];
memkind_error_message(errv, msgbuf, 1024);
serverLog(LOG_CRIT, "Memory pool creation failed: %s", msgbuf);
exit(EXIT_FAILURE);
}
// Next test if COW is working
int fdTest = forkFile();
if (fdTest < 0)
{
serverLog(LOG_ERR, "Scratch file system does not support Copy on Write. To fix this scratch-file-path must point to a path on a filesystem which supports copy on write, such as btrfs.");
exit(EXIT_FAILURE);
}
close(fdTest);
// Now lets make the file big
if (cbFileReserve == 0)
cbFileReserve = 1*1024*1024*1024; // 1 GB (enough to be interesting)
posix_fallocate64(memkind_fd(mkdisk), 0, cbFileReserve);
pool_initialize(&poolobj, sizeof(robj));
pool_initialize(&poolembstrobj, EMBSTR_ROBJ_SIZE);
pthread_atfork(handle_prefork, handle_postfork_parent, handle_postfork_child);
}
}
struct redisObject *salloc_obj()
{
return pool_alloc(&poolobj);
}
void sfree_obj(struct redisObject *obj)
{
pool_free(&poolobj, obj);
}
struct redisObject *salloc_objembstr()
{
return pool_alloc(&poolembstrobj);
}
void sfree_objembstr(robj *obj)
{
pool_free(&poolembstrobj, obj);
}
static memkind_t kindFromPtr(const void *pv)
{
if (mkdisk == MEMKIND_DEFAULT)
return MEMKIND_DEFAULT;
if (memkind_pmem_iskind(mkdisk, pv))
return mkdisk;
return MEMKIND_DEFAULT;
}
size_t salloc_usable_size(void *ptr)
{
return memkind_malloc_usable_size(kindFromPtr(ptr), ptr);
}
static memkind_t kindFromClass(enum MALLOC_CLASS class)
{
switch (class)
{
case MALLOC_SHARED:
return mkdisk;
default:
break;
}
return MEMKIND_DEFAULT;
}
void *salloc(size_t cb, enum MALLOC_CLASS class)
{
if (cb == 0)
cb = 1;
return memkind_malloc(kindFromClass(class), cb);
}
void *scalloc(size_t cb, size_t c, enum MALLOC_CLASS class)
{
return memkind_calloc(kindFromClass(class), cb, c);
}
void sfree(void *pv)
{
memkind_free(kindFromPtr(pv), pv);
}
void *srealloc(void *pv, size_t cb, enum MALLOC_CLASS class)
{
return memkind_realloc(kindFromClass(class), pv, cb);
}
int fdNew = -1;
void handle_prefork()
{
fdNew = forkFile();
if (fdNew < 0)
serverLog(LOG_ERR, "Failed to clone scratch file");
}
void handle_postfork_parent()
{
// Parent, close fdNew
close(fdNew);
fdNew = -1;
}
void handle_postfork_child()
{
int fdOriginal = memkind_fd(mkdisk);
memkind_pmem_remapfd(mkdisk, fdNew);
close(fdOriginal);
}