Merge pull request #219 from Snapchat/emit-non-empty-shard-less-replica-issue

add metric emit for non-empty primary with less than 2 connected replicas
This commit is contained in:
Zhaozhen Liang 2023-08-22 11:34:09 -06:00 committed by GitHub Enterprise
commit c97f696ea2

View File

@ -67,6 +67,8 @@ public:
/* constants */ /* constants */
static time_t c_infoUpdateSeconds = 10; static time_t c_infoUpdateSeconds = 10;
// the current Redis Cluster setup we configure replication factor as 2, each non-empty master node should have 2 replicas, given that there are 3 zones in each regions
static const int EXPECTED_NUMBER_OF_REPLICAS = 2;
StatsdClientWrapper *g_stats = nullptr; StatsdClientWrapper *g_stats = nullptr;
std::string m_strPrefix { "keydb" }; std::string m_strPrefix { "keydb" };
@ -544,6 +546,34 @@ void emit_system_free_memory() {
} }
} }
void emit_metrics_for_insufficient_replicas(struct RedisModuleCtx *ctx, long long keys) {
// non-empty
if (keys <= 0) {
return;
}
RedisModuleCallReply *reply = RedisModule_Call(ctx, "ROLE", "");
if (RedisModule_CallReplyType(reply) != REDISMODULE_REPLY_ARRAY) {
RedisModule_FreeCallReply(reply);
return;
}
RedisModuleCallReply *roleReply = RedisModule_CallReplyArrayElement(reply, 0);
if (RedisModule_CallReplyType(roleReply) != REDISMODULE_REPLY_STRING) {
RedisModule_FreeCallReply(reply);
return;
}
size_t len;
const char *role = RedisModule_CallReplyStringPtr(roleReply, &len);
// check if the current node is a primary
if (strncmp(role, "master", len) == 0) {
RedisModuleCallReply *replicasReply = RedisModule_CallReplyArrayElement(reply, 2);
// check if there are less than 2 connected replicas
if (RedisModule_CallReplyLength(replicasReply) < EXPECTED_NUMBER_OF_REPLICAS) {
g_stats->increment("lessThanExpectedReplicas_error", 1);
}
}
RedisModule_FreeCallReply(reply);
}
void event_cron_handler(struct RedisModuleCtx *ctx, RedisModuleEvent eid, uint64_t subevent, void *data) { void event_cron_handler(struct RedisModuleCtx *ctx, RedisModuleEvent eid, uint64_t subevent, void *data) {
static time_t lastTime = 0; static time_t lastTime = 0;
time_t curTime = time(nullptr); time_t curTime = time(nullptr);
@ -619,11 +649,16 @@ void event_cron_handler(struct RedisModuleCtx *ctx, RedisModuleEvent eid, uint64
g_stats->timing("emit_free_system_memory_time_taken_us", ustime() - commandStartTime); g_stats->timing("emit_free_system_memory_time_taken_us", ustime() - commandStartTime);
/* Log Keys */ /* Log Keys */
commandStartTime = ustime();
reply = RedisModule_Call(ctx, "dbsize", ""); reply = RedisModule_Call(ctx, "dbsize", "");
long long keys = RedisModule_CallReplyInteger(reply); long long keys = RedisModule_CallReplyInteger(reply);
RedisModule_FreeCallReply(reply); RedisModule_FreeCallReply(reply);
g_stats->gauge("keys", keys); g_stats->gauge("keys", keys);
RedisModule_Log(ctx, REDISMODULE_LOGLEVEL_DEBUG, "Emitting metric \"keys\": %llu", keys); RedisModule_Log(ctx, REDISMODULE_LOGLEVEL_DEBUG, "Emitting metric \"keys\": %llu", keys);
g_stats->timing("emit_keys_metric_time_taken_us", ustime() - commandStartTime);
emit_metrics_for_insufficient_replicas(ctx, keys);
g_stats->timing("metrics_time_taken_us", ustime() - startTime); g_stats->timing("metrics_time_taken_us", ustime() - startTime);
lastTime = curTime; lastTime = curTime;