2019-01-29 18:10:46 -05:00
# include "server.h"
# include <stdlib.h>
# include <stdio.h>
# include <memkind.h>
# include <sys/ioctl.h>
# include <linux/fs.h>
# include <unistd.h>
# include <inttypes.h>
2019-02-04 16:56:13 -05:00
# include <fcntl.h>
2019-01-29 18:10:46 -05:00
# include "storage.h"
struct memkind * mkdisk = NULL ;
2019-02-01 13:54:59 -05:00
static const char * PMEM_DIR = NULL ;
2019-01-29 18:10:46 -05:00
2019-02-09 13:05:06 -05:00
int memkind_pmem_iskind ( struct memkind * kind , const void * pv ) ;
2019-01-30 01:15:10 -05:00
void handle_prefork ( ) ;
void handle_postfork_parent ( ) ;
void handle_postfork_child ( ) ;
2019-01-29 18:10:46 -05:00
# define OBJECT_PAGE_BUFFER_SIZE 8192 //(size in objs)
2019-01-30 01:15:10 -05:00
# define OBJ_PAGE_BITS_PER_WORD 64
2019-01-29 18:10:46 -05:00
struct object_page
{
uint64_t allocmap [ OBJECT_PAGE_BUFFER_SIZE / ( 8 * sizeof ( uint64_t ) ) ] ;
struct object_page * pnext ;
2019-01-30 01:15:10 -05:00
char rgb [ ] ;
2019-01-29 18:10:46 -05:00
} ;
2019-01-30 01:15:10 -05:00
struct alloc_pool
2019-01-29 18:10:46 -05:00
{
2019-01-30 01:15:10 -05:00
unsigned cbObject ;
struct object_page * pobjpageHead ;
} ;
struct object_page * pool_allocate_page ( int cbObject )
{
size_t cb = ( ( ( size_t ) cbObject ) * OBJECT_PAGE_BUFFER_SIZE ) + sizeof ( struct object_page ) ;
return scalloc ( cb , 1 , MALLOC_SHARED ) ;
}
void pool_initialize ( struct alloc_pool * ppool , int cbObject )
{
if ( ( cbObject % 8 ) ! = 0 )
2019-01-29 18:10:46 -05:00
{
2019-01-30 01:15:10 -05:00
cbObject + = 8 - ( cbObject % 8 ) ;
2019-01-29 18:10:46 -05:00
}
2019-01-30 01:15:10 -05:00
ppool - > cbObject = cbObject ;
ppool - > pobjpageHead = pool_allocate_page ( cbObject ) ;
2019-01-29 18:10:46 -05:00
}
2019-01-30 01:15:10 -05:00
static int IdxAllocObject ( struct object_page * page )
2019-01-29 18:10:46 -05:00
{
for ( size_t iword = 0 ; iword < OBJ_PAGE_BITS_PER_WORD ; + + iword )
{
if ( ( page - > allocmap [ iword ] + 1 ) ! = 0 )
{
int ibit = 0 ;
uint64_t bitword = page - > allocmap [ iword ] ;
while ( bitword & 1 )
{
bitword > > = 1 ;
+ + ibit ;
}
2019-01-30 01:15:10 -05:00
page - > allocmap [ iword ] | = 1ULL < < ibit ;
2019-01-29 18:10:46 -05:00
return ( iword * OBJ_PAGE_BITS_PER_WORD ) + ibit ;
}
}
return - 1 ;
}
2019-01-30 01:15:10 -05:00
void * pool_alloc ( struct alloc_pool * ppool )
2019-01-29 18:10:46 -05:00
{
2019-01-30 01:15:10 -05:00
struct object_page * cur = ppool - > pobjpageHead ;
2019-01-29 18:10:46 -05:00
for ( ; ; )
{
int idx = IdxAllocObject ( cur ) ;
if ( idx > = 0 )
{
2019-01-30 01:15:10 -05:00
return cur - > rgb + ( ( ( size_t ) ppool - > cbObject ) * idx ) ;
2019-01-29 18:10:46 -05:00
}
if ( cur - > pnext = = NULL )
{
2019-01-30 01:15:10 -05:00
cur - > pnext = pool_allocate_page ( ppool - > cbObject ) ;
2019-01-29 18:10:46 -05:00
}
cur = cur - > pnext ;
}
}
2019-01-30 01:15:10 -05:00
void pool_free ( struct alloc_pool * ppool , void * pv )
2019-01-29 18:10:46 -05:00
{
2019-01-30 01:15:10 -05:00
struct object_page * cur = ppool - > pobjpageHead ;
char * obj = pv ;
for ( ; cur ! = NULL ; )
2019-01-29 18:10:46 -05:00
{
2019-01-30 01:15:10 -05:00
if ( obj > = cur - > rgb & & ( obj < ( cur - > rgb + ( OBJECT_PAGE_BUFFER_SIZE * ppool - > cbObject ) ) ) )
2019-01-29 18:10:46 -05:00
{
// Its on this page
2019-01-30 01:15:10 -05:00
int idx = ( obj - cur - > rgb ) / ppool - > cbObject ;
cur - > allocmap [ idx / OBJ_PAGE_BITS_PER_WORD ] & = ~ ( 1ULL < < ( idx % OBJ_PAGE_BITS_PER_WORD ) ) ;
return ;
2019-01-29 18:10:46 -05:00
}
cur = cur - > pnext ;
}
2019-01-30 01:15:10 -05:00
serverLog ( LOG_CRIT , " obj not from pool " ) ;
sfree ( obj ) ; // we don't know where it came from
2019-01-29 18:10:46 -05:00
return ;
}
2019-01-30 01:15:10 -05:00
# define EMBSTR_ROBJ_SIZE (sizeof(robj)+sizeof(struct sdshdr8)+OBJ_ENCODING_EMBSTR_SIZE_LIMIT+1)
struct alloc_pool poolobj ;
struct alloc_pool poolembstrobj ;
2019-02-01 13:54:59 -05:00
int forkFile ( )
2019-01-30 01:15:10 -05:00
{
2019-02-01 13:54:59 -05:00
int fdT ;
memkind_tmpfile ( PMEM_DIR , & fdT ) ;
if ( ioctl ( fdT , FICLONE , memkind_fd ( mkdisk ) ) = = - 1 )
2019-01-30 01:15:10 -05:00
{
2019-02-01 13:54:59 -05:00
return - 1 ;
2019-01-30 01:15:10 -05:00
}
2019-02-01 13:54:59 -05:00
return fdT ;
}
// initialize the memory subsystem.
// NOTE: This may be called twice, first with NULL specifying we should use ram
// later, after the configuration file is loaded with a path to where we should
// place our temporary file.
2019-02-04 16:56:13 -05:00
void storage_init ( const char * tmpfilePath , size_t cbFileReserve )
2019-02-01 13:54:59 -05:00
{
if ( tmpfilePath = = NULL )
{
serverAssert ( mkdisk = = NULL ) ;
mkdisk = MEMKIND_DEFAULT ;
}
else
{
// First create the file
serverAssert ( mkdisk = = MEMKIND_DEFAULT ) ;
PMEM_DIR = memkind_malloc ( MEMKIND_DEFAULT , strlen ( tmpfilePath ) ) ;
strcpy ( ( char * ) PMEM_DIR , tmpfilePath ) ;
int errv = memkind_create_pmem ( PMEM_DIR , 0 , & mkdisk ) ;
if ( errv = = MEMKIND_ERROR_INVALID )
{
serverLog ( LOG_CRIT , " Memory pool creation failed: %s " , strerror ( errno ) ) ;
exit ( EXIT_FAILURE ) ;
}
else if ( errv )
{
char msgbuf [ 1024 ] ;
memkind_error_message ( errv , msgbuf , 1024 ) ;
serverLog ( LOG_CRIT , " Memory pool creation failed: %s " , msgbuf ) ;
exit ( EXIT_FAILURE ) ;
}
// Next test if COW is working
int fdTest = forkFile ( ) ;
if ( fdTest < 0 )
{
serverLog ( LOG_ERR , " Scratch file system does not support Copy on Write. To fix this scratch-file-path must point to a path on a filesystem which supports copy on write, such as btrfs. " ) ;
exit ( EXIT_FAILURE ) ;
}
close ( fdTest ) ;
2019-01-30 01:15:10 -05:00
2019-02-04 16:56:13 -05:00
// Now lets make the file big
if ( cbFileReserve = = 0 )
cbFileReserve = 1 * 1024 * 1024 * 1024 ; // 1 GB (enough to be interesting)
posix_fallocate64 ( memkind_fd ( mkdisk ) , 0 , cbFileReserve ) ;
2019-02-01 13:54:59 -05:00
pool_initialize ( & poolobj , sizeof ( robj ) ) ;
pool_initialize ( & poolembstrobj , EMBSTR_ROBJ_SIZE ) ;
pthread_atfork ( handle_prefork , handle_postfork_parent , handle_postfork_child ) ;
}
2019-01-30 01:15:10 -05:00
}
struct redisObject * salloc_obj ( )
{
return pool_alloc ( & poolobj ) ;
}
void sfree_obj ( struct redisObject * obj )
{
pool_free ( & poolobj , obj ) ;
}
struct redisObject * salloc_objembstr ( )
{
return pool_alloc ( & poolembstrobj ) ;
}
void sfree_objembstr ( robj * obj )
{
pool_free ( & poolembstrobj , obj ) ;
}
2019-02-09 13:05:06 -05:00
static memkind_t kindFromPtr ( const void * pv )
{
if ( mkdisk = = MEMKIND_DEFAULT )
return MEMKIND_DEFAULT ;
if ( memkind_pmem_iskind ( mkdisk , pv ) )
return mkdisk ;
return MEMKIND_DEFAULT ;
}
2019-02-04 16:56:13 -05:00
size_t salloc_usable_size ( void * ptr )
{
2019-02-09 13:05:06 -05:00
return memkind_malloc_usable_size ( kindFromPtr ( ptr ) , ptr ) ;
2019-02-04 16:56:13 -05:00
}
static memkind_t kindFromClass ( enum MALLOC_CLASS class )
2019-01-29 18:10:46 -05:00
{
switch ( class )
{
case MALLOC_SHARED :
2019-02-04 16:56:13 -05:00
return mkdisk ;
2019-01-29 18:10:46 -05:00
default :
2019-02-04 16:56:13 -05:00
break ;
2019-01-29 18:10:46 -05:00
}
2019-02-04 16:56:13 -05:00
return MEMKIND_DEFAULT ;
}
void * salloc ( size_t cb , enum MALLOC_CLASS class )
{
if ( cb = = 0 )
cb = 1 ;
return memkind_malloc ( kindFromClass ( class ) , cb ) ;
2019-01-29 18:10:46 -05:00
}
void * scalloc ( size_t cb , size_t c , enum MALLOC_CLASS class )
{
2019-02-04 16:56:13 -05:00
return memkind_calloc ( kindFromClass ( class ) , cb , c ) ;
2019-01-29 18:10:46 -05:00
}
void sfree ( void * pv )
{
2019-02-09 13:05:06 -05:00
memkind_free ( kindFromPtr ( pv ) , pv ) ;
2019-01-29 18:10:46 -05:00
}
2019-02-04 16:56:13 -05:00
void * srealloc ( void * pv , size_t cb , enum MALLOC_CLASS class )
2019-01-29 18:10:46 -05:00
{
2019-02-04 16:56:13 -05:00
return memkind_realloc ( kindFromClass ( class ) , pv , cb ) ;
2019-01-29 18:10:46 -05:00
}
int fdNew = - 1 ;
void handle_prefork ( )
{
2019-02-01 13:54:59 -05:00
fdNew = forkFile ( ) ;
if ( fdNew < 0 )
serverLog ( LOG_ERR , " Failed to clone scratch file " ) ;
2019-01-29 18:10:46 -05:00
}
2019-01-30 01:15:10 -05:00
void handle_postfork_parent ( )
2019-01-29 18:10:46 -05:00
{
2019-01-30 01:15:10 -05:00
// Parent, close fdNew
close ( fdNew ) ;
fdNew = - 1 ;
}
void handle_postfork_child ( )
{
int fdOriginal = memkind_fd ( mkdisk ) ;
memkind_pmem_remapfd ( mkdisk , fdNew ) ;
close ( fdOriginal ) ;
2019-01-29 18:10:46 -05:00
}