Skip to content

Commit ace783a

Browse files
msotheeswaran-scGitHub Enterprise
authored andcommitted
1 parent 7d4f461 commit ace783a

19 files changed

Lines changed: 497 additions & 205 deletions

machamp_scripts/build.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
# make the build
44
git submodule init && git submodule update
5-
make BUILD_TLS=yes -j$(nproc) KEYDB_CFLAGS='-Werror' KEYDB_CXXFLAGS='-Werror'
5+
make BUILD_TLS=yes ENABLE_FLASH=yes -j$(nproc) KEYDB_CFLAGS='-Werror' KEYDB_CXXFLAGS='-Werror'
66

77
# gen-cert
88
./utils/gen-test-certs.sh

src/IStorage.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#pragma once
22
#include <functional>
33
#include "sds.h"
4+
#include <string>
45

56
#define METADATA_DB_IDENTIFIER "c299fde0-6d42-4ec4-b939-34f680ffe39f"
67

@@ -43,6 +44,11 @@ class IStorage
4344
endWriteBatch();
4445
}
4546

47+
virtual std::vector<std::string> getExpirationCandidates(unsigned int count) = 0;
48+
virtual std::vector<std::string> getEvictionCandidates(unsigned int count) = 0;
49+
virtual void setExpire(const char *key, size_t cchKey, long long expire) = 0;
50+
virtual void removeExpire(const char *key, size_t cchKey, long long expire) = 0;
51+
4652
virtual void beginWriteBatch() {} // NOP
4753
virtual void endWriteBatch() {} // NOP
4854

src/StorageCache.cpp

Lines changed: 31 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -84,19 +84,31 @@ void StorageCache::cacheKey(const char *rgch, size_t cch)
8484

8585
bool StorageCache::erase(sds key)
8686
{
87+
unsigned long long when = 0;
88+
m_spstorage->retrieve(key, sdslen(key), [&when](const char *, size_t, const void * data, size_t cbdata) {
89+
auto e = deserializeExpire((const char *)data, cbdata, nullptr);
90+
if (e != nullptr)
91+
when = e->when();
92+
});
8793
bool result = m_spstorage->erase(key, sdslen(key));
8894
std::unique_lock<fastlock> ul(m_lock);
89-
if (result && m_pdict != nullptr)
95+
if (result)
9096
{
91-
uint64_t hash = dictSdsHash(key);
92-
dictEntry *de = dictFind(m_pdict, reinterpret_cast<void*>(hash));
93-
serverAssert(de != nullptr);
94-
de->v.s64--;
95-
serverAssert(de->v.s64 >= 0);
96-
if (de->v.s64 == 0) {
97-
dictDelete(m_pdict, reinterpret_cast<void*>(hash));
98-
} else {
99-
m_collisionCount--;
97+
if (m_pdict != nullptr)
98+
{
99+
uint64_t hash = dictSdsHash(key);
100+
dictEntry *de = dictFind(m_pdict, reinterpret_cast<void*>(hash));
101+
serverAssert(de != nullptr);
102+
de->v.s64--;
103+
serverAssert(de->v.s64 >= 0);
104+
if (de->v.s64 == 0) {
105+
dictDelete(m_pdict, reinterpret_cast<void*>(hash));
106+
} else {
107+
m_collisionCount--;
108+
}
109+
}
110+
if (when != 0) {
111+
m_spstorage->removeExpire(key, sdslen(key), when);
100112
}
101113
}
102114
return result;
@@ -111,6 +123,9 @@ void StorageCache::insert(sds key, const void *data, size_t cbdata, bool fOverwr
111123
}
112124
ul.unlock();
113125
m_spstorage->insert(key, sdslen(key), (void*)data, cbdata, fOverwrite);
126+
auto e = deserializeExpire((const char *)data, cbdata, nullptr);
127+
if (e != nullptr)
128+
m_spstorage->setExpire(key, sdslen(key), e->when());
114129
}
115130

116131
long _dictKeyIndex(dict *d, const void *key, uint64_t hash, dictEntry **existing);
@@ -119,13 +134,18 @@ void StorageCache::bulkInsert(char **rgkeys, size_t *rgcbkeys, char **rgvals, si
119134
std::vector<dictEntry*> vechashes;
120135
if (m_pdict != nullptr) {
121136
vechashes.reserve(celem);
137+
}
122138

123-
for (size_t ielem = 0; ielem < celem; ++ielem) {
139+
for (size_t ielem = 0; ielem < celem; ++ielem) {
140+
if (m_pdict != nullptr) {
124141
dictEntry *de = (dictEntry*)zmalloc(sizeof(dictEntry));
125142
de->key = (void*)dictGenHashFunction(rgkeys[ielem], (int)rgcbkeys[ielem]);
126143
de->v.u64 = 1;
127144
vechashes.push_back(de);
128145
}
146+
auto e = deserializeExpire(rgvals[ielem], rgcbvals[ielem], nullptr);
147+
if (e != nullptr)
148+
m_spstorage->setExpire(rgkeys[ielem], rgcbkeys[ielem], e->when());
129149
}
130150

131151
std::unique_lock<fastlock> ul(m_lock);

src/StorageCache.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,11 +51,18 @@ class StorageCache
5151
bool enumerate(IStorage::callback fn) const { return m_spstorage->enumerate(fn); }
5252
bool enumerate_hashslot(IStorage::callback fn, unsigned int hashslot) const { return m_spstorage->enumerate_hashslot(fn, hashslot); }
5353

54+
std::vector<std::string> getExpirationCandidates(unsigned int count) { return m_spstorage->getExpirationCandidates(count); }
55+
std::vector<std::string> getEvictionCandidates(unsigned int count) { return m_spstorage->getEvictionCandidates(count); }
56+
void setExpire(const char *key, size_t cchKey, long long expire) { m_spstorage->setExpire(key, cchKey, expire); }
57+
void removeExpire(const char *key, size_t cchKey, long long expire) { m_spstorage->removeExpire(key, cchKey, expire); }
58+
5459
void beginWriteBatch();
5560
void endWriteBatch() { m_spstorage->endWriteBatch(); }
5661
void batch_lock() { return m_spstorage->batch_lock(); }
5762
void batch_unlock() { return m_spstorage->batch_unlock(); }
5863

64+
void flush() { m_spstorage->flush(); }
65+
5966
size_t count() const;
6067

6168
const StorageCache *clone();

src/config.cpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2597,6 +2597,19 @@ static int updateMaxmemory(long long val, long long prev, const char **err) {
25972597
return 1;
25982598
}
25992599

2600+
static int updateFlashMaxmemory(long long val, long long prev, const char **err) {
2601+
UNUSED(prev);
2602+
UNUSED(err);
2603+
if (val && g_pserver->m_pstorageFactory) {
2604+
size_t used = g_pserver->m_pstorageFactory->totalDiskspaceUsed();
2605+
if ((unsigned long long)val < used) {
2606+
serverLog(LL_WARNING,"WARNING: the new maxstorage value set via CONFIG SET (%llu) is smaller than the current storage usage (%zu). This will result in key eviction and/or the inability to accept new write commands depending on the maxmemory-policy.", g_pserver->maxstorage, used);
2607+
}
2608+
performEvictions(false /*fPreSnapshot*/);
2609+
}
2610+
return 1;
2611+
}
2612+
26002613
static int updateGoodSlaves(long long val, long long prev, const char **err) {
26012614
UNUSED(val);
26022615
UNUSED(prev);
@@ -2940,7 +2953,7 @@ standardConfig configs[] = {
29402953

29412954
/* Unsigned Long Long configs */
29422955
createULongLongConfig("maxmemory", NULL, MODIFIABLE_CONFIG, 0, LLONG_MAX, g_pserver->maxmemory, 0, MEMORY_CONFIG, NULL, updateMaxmemory),
2943-
createULongLongConfig("maxstorage", NULL, MODIFIABLE_CONFIG, 0, LLONG_MAX, g_pserver->maxstorage, 0, MEMORY_CONFIG, NULL, NULL),
2956+
createULongLongConfig("maxstorage", NULL, MODIFIABLE_CONFIG, 0, LLONG_MAX, g_pserver->maxstorage, 0, MEMORY_CONFIG, NULL, updateFlashMaxmemory),
29442957

29452958
/* Size_t configs */
29462959
createSizeTConfig("hash-max-ziplist-entries", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, g_pserver->hash_max_ziplist_entries, 512, INTEGER_CONFIG, NULL, NULL),

src/db.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2875,10 +2875,10 @@ void redisDbPersistentData::ensure(const char *sdsKey, dictEntry **pde)
28752875
{
28762876
dictAdd(m_pdict, sdsNewKey, o);
28772877

2878+
o->SetFExpires(spexpire != nullptr);
28782879
if (spexpire != nullptr) {
28792880
o->expire = std::move(*spexpire);
28802881
}
2881-
o->SetFExpires(spexpire != nullptr);
28822882
g_pserver->stat_storage_provider_read_hits++;
28832883
} else {
28842884
sdsfree(sdsNewKey);
@@ -3249,8 +3249,8 @@ std::unique_ptr<expireEntry> deserializeExpire(const char *str, size_t cch, size
32493249
if (subkey)
32503250
sdsfree(subkey);
32513251
}
3252-
3253-
*poffset = offset;
3252+
if (poffset != nullptr)
3253+
*poffset = offset;
32543254
return spexpire;
32553255
}
32563256

src/debug.cpp

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ void computeDatasetDigest(unsigned char *final) {
308308
mixDigest(final,&aux,sizeof(aux));
309309

310310
/* Iterate this DB writing every entry */
311-
db->iterate_threadsafe([final, db](const char *key, robj_roptr o)->bool {
311+
db->iterate_threadsafe([final](const char *key, robj_roptr o)->bool {
312312
unsigned char digest[20];
313313
robj *keyobj;
314314

@@ -932,6 +932,21 @@ NULL
932932
mallctl_string(c, c->argv+2, c->argc-2);
933933
return;
934934
#endif
935+
} else if(!strcasecmp(szFromObj(c->argv[1]),"flush-storage") && c->argc == 2) {
936+
if (g_pserver->m_pstorageFactory != nullptr) {
937+
for (int i = 0; i < cserver.dbnum; i++) {
938+
g_pserver->db[i]->getStorageCache()->flush();
939+
}
940+
addReply(c,shared.ok);
941+
} else {
942+
addReplyError(c, "Can't flush storage if no storage provider is set");
943+
}
944+
} else if (!strcasecmp(szFromObj(c->argv[1]),"get-storage-usage") && c->argc == 2) {
945+
if (g_pserver->m_pstorageFactory != nullptr) {
946+
addReplyLongLong(c, g_pserver->m_pstorageFactory->totalDiskspaceUsed());
947+
} else {
948+
addReplyLongLong(c, 0);
949+
}
935950
} else {
936951
addReplySubcommandSyntaxError(c);
937952
return;

src/evict.cpp

Lines changed: 96 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,36 @@ unsigned long long estimateObjectIdleTime(robj_roptr o) {
100100
}
101101
}
102102

103+
unsigned long long getIdle(robj *obj, const expireEntry *e) {
104+
unsigned long long idle;
105+
/* Calculate the idle time according to the policy. This is called
106+
* idle just because the code initially handled LRU, but is in fact
107+
* just a score where an higher score means better candidate. */
108+
if (g_pserver->maxmemory_policy & MAXMEMORY_FLAG_LRU) {
109+
idle = (obj != nullptr) ? estimateObjectIdleTime(obj) : 0;
110+
} else if (g_pserver->maxmemory_policy & MAXMEMORY_FLAG_LFU) {
111+
/* When we use an LRU policy, we sort the keys by idle time
112+
* so that we expire keys starting from greater idle time.
113+
* However when the policy is an LFU one, we have a frequency
114+
* estimation, and we want to evict keys with lower frequency
115+
* first. So inside the pool we put objects using the inverted
116+
* frequency subtracting the actual frequency to the maximum
117+
* frequency of 255. */
118+
idle = 255-LFUDecrAndReturn(obj);
119+
} else if (g_pserver->maxmemory_policy == MAXMEMORY_VOLATILE_TTL) {
120+
/* In this case the sooner the expire the better. */
121+
if (e != nullptr)
122+
idle = ULLONG_MAX - e->when();
123+
else
124+
idle = 0;
125+
} else if (g_pserver->maxmemory_policy & MAXMEMORY_FLAG_ALLKEYS) {
126+
idle = ULLONG_MAX;
127+
} else {
128+
serverPanic("Unknown eviction policy in storage eviction");
129+
}
130+
return idle;
131+
}
132+
103133
/* LRU approximation algorithm
104134
*
105135
* Redis uses an approximation of the LRU algorithm that runs in constant
@@ -137,28 +167,7 @@ void evictionPoolAlloc(void) {
137167

138168
void processEvictionCandidate(int dbid, sds key, robj *o, const expireEntry *e, struct evictionPoolEntry *pool)
139169
{
140-
unsigned long long idle;
141-
142-
/* Calculate the idle time according to the policy. This is called
143-
* idle just because the code initially handled LRU, but is in fact
144-
* just a score where an higher score means better candidate. */
145-
if (g_pserver->maxmemory_policy & MAXMEMORY_FLAG_LRU) {
146-
idle = (o != nullptr) ? estimateObjectIdleTime(o) : 0;
147-
} else if (g_pserver->maxmemory_policy & MAXMEMORY_FLAG_LFU) {
148-
/* When we use an LRU policy, we sort the keys by idle time
149-
* so that we expire keys starting from greater idle time.
150-
* However when the policy is an LFU one, we have a frequency
151-
* estimation, and we want to evict keys with lower frequency
152-
* first. So inside the pool we put objects using the inverted
153-
* frequency subtracting the actual frequency to the maximum
154-
* frequency of 255. */
155-
idle = 255-LFUDecrAndReturn(o);
156-
} else if (g_pserver->maxmemory_policy == MAXMEMORY_VOLATILE_TTL) {
157-
/* In this case the sooner the expire the better. */
158-
idle = ULLONG_MAX - e->when();
159-
} else {
160-
serverPanic("Unknown eviction policy in evictionPoolPopulate()");
161-
}
170+
unsigned long long idle = getIdle(o,e);
162171

163172
/* Insert the element inside the pool.
164173
* First, find the first empty bucket or the first populated
@@ -600,6 +609,31 @@ static unsigned long evictionTimeLimitUs() {
600609
return ULONG_MAX; /* No limit to eviction time */
601610
}
602611

612+
void evict(redisDb *db, robj *keyobj) {
613+
mstime_t eviction_latency;
614+
propagateExpire(db,keyobj,g_pserver->lazyfree_lazy_eviction);
615+
/* We compute the amount of memory freed by db*Delete() alone.
616+
* It is possible that actually the memory needed to propagate
617+
* the DEL in AOF and replication link is greater than the one
618+
* we are freeing removing the key, but we can't account for
619+
* that otherwise we would never exit the loop.
620+
*
621+
* AOF and Output buffer memory will be freed eventually so
622+
* we only care about memory used by the key space. */
623+
latencyStartMonitor(eviction_latency);
624+
if (g_pserver->lazyfree_lazy_eviction)
625+
dbAsyncDelete(db,keyobj);
626+
else
627+
dbSyncDelete(db,keyobj);
628+
latencyEndMonitor(eviction_latency);
629+
latencyAddSampleIfNeeded("eviction-del",eviction_latency);
630+
631+
signalModifiedKey(NULL,db,keyobj);
632+
notifyKeyspaceEvent(NOTIFY_EVICTED, "evicted",
633+
keyobj, db->id);
634+
decrRefCount(keyobj);
635+
}
636+
603637
static void updateSysAvailableMemory() {
604638
if (g_pserver->force_eviction_percent) {
605639
g_pserver->cron_malloc_stats.sys_available = getMemAvailable();
@@ -637,7 +671,7 @@ int performEvictions(bool fPreSnapshot) {
637671
int keys_freed = 0;
638672
size_t mem_reported, mem_tofree;
639673
long long mem_freed; /* May be negative */
640-
mstime_t latency, eviction_latency;
674+
mstime_t latency;
641675
long long delta;
642676
int slaves = listLength(g_pserver->slaves);
643677
const bool fEvictToStorage = !cserver.delete_on_evict && g_pserver->db[0]->FStorageProvider();
@@ -662,6 +696,43 @@ int performEvictions(bool fPreSnapshot) {
662696
monotime evictionTimer;
663697
elapsedStart(&evictionTimer);
664698

699+
if (g_pserver->maxstorage && g_pserver->m_pstorageFactory != nullptr) {
700+
while (g_pserver->m_pstorageFactory->totalDiskspaceUsed() >= g_pserver->maxstorage && elapsedUs(evictionTimer) < eviction_time_limit_us) {
701+
redisDb *db;
702+
std::vector<std::string> evictionPool;
703+
robj *bestkey = nullptr;
704+
redisDb *bestdb = nullptr;
705+
unsigned long long bestidle = 0;
706+
for (int i = 0; i < cserver.dbnum; i++) {
707+
db = g_pserver->db[i];
708+
evictionPool = db->getStorageCache()->getEvictionCandidates(g_pserver->maxmemory_samples);
709+
for (std::string key : evictionPool) {
710+
robj *keyobj = createStringObject(key.c_str(), key.size());
711+
robj *obj = db->find(szFromObj(keyobj));
712+
if (obj != nullptr) {
713+
expireEntry *e = db->getExpire(keyobj);
714+
unsigned long long idle = getIdle(obj, e);
715+
716+
if (bestkey == nullptr || bestidle < idle) {
717+
if (bestkey != nullptr)
718+
decrRefCount(bestkey);
719+
incrRefCount(keyobj);
720+
bestkey = keyobj;
721+
bestidle = idle;
722+
bestdb = db;
723+
}
724+
}
725+
decrRefCount(keyobj);
726+
}
727+
}
728+
if (bestkey) {
729+
evict(bestdb, bestkey);
730+
} else {
731+
break; //could not find a key to evict so stop now
732+
}
733+
}
734+
}
735+
665736
if (g_pserver->maxstorage && g_pserver->m_pstorageFactory != nullptr && g_pserver->m_pstorageFactory->totalDiskspaceUsed() >= g_pserver->maxstorage)
666737
goto cant_free_storage;
667738

@@ -776,7 +847,7 @@ int performEvictions(bool fPreSnapshot) {
776847
if (db->removeCachedValue(bestkey, &deT)) {
777848
mem_freed += splazy->addEntry(db->dictUnsafeKeyOnly(), deT);
778849
ckeysFailed = 0;
779-
g_pserver->stat_evictedkeys++;
850+
g_pserver->stat_evictedkeys++;
780851
}
781852
else {
782853
delta = 0;
@@ -788,30 +859,11 @@ int performEvictions(bool fPreSnapshot) {
788859
else
789860
{
790861
robj *keyobj = createStringObject(bestkey,sdslen(bestkey));
791-
propagateExpire(db,keyobj,g_pserver->lazyfree_lazy_eviction);
792-
/* We compute the amount of memory freed by db*Delete() alone.
793-
* It is possible that actually the memory needed to propagate
794-
* the DEL in AOF and replication link is greater than the one
795-
* we are freeing removing the key, but we can't account for
796-
* that otherwise we would never exit the loop.
797-
*
798-
* AOF and Output buffer memory will be freed eventually so
799-
* we only care about memory used by the key space. */
800862
delta = (long long) zmalloc_used_memory();
801-
latencyStartMonitor(eviction_latency);
802-
if (g_pserver->lazyfree_lazy_eviction)
803-
dbAsyncDelete(db,keyobj);
804-
else
805-
dbSyncDelete(db,keyobj);
806-
latencyEndMonitor(eviction_latency);
807-
latencyAddSampleIfNeeded("eviction-del",eviction_latency);
863+
evict(db, keyobj);
808864
delta -= (long long) zmalloc_used_memory();
809865
mem_freed += delta;
810866
g_pserver->stat_evictedkeys++;
811-
signalModifiedKey(NULL,db,keyobj);
812-
notifyKeyspaceEvent(NOTIFY_EVICTED, "evicted",
813-
keyobj, db->id);
814-
decrRefCount(keyobj);
815867
}
816868
keys_freed++;
817869

0 commit comments

Comments
 (0)