redis-serverプロセスCPU 100%問題

11286 ワード

結論:redisのBUGであるかどうかを確認する必要があるのは、プロセスが実際に消費するメモリが構成した最大メモリよりはるかに小さいため、メモリが不足して淘汰する必要はないからである.
(バグとして確認、4.0.11で修正、詳細は以下を参照:https://blog.csdn.net/Aquester/article/details/88718701)CPU 100%redis-serverプロセスクラスタ状態:slave一時解決方法:gdbを使用してd.ht[0].usedの値が0に変更された理由:dictGetRandomKey()中にブランチ「if(dictSize(d)==0)return NULL;関数dbRandomKey()がデッドサイクルに入る.バージョン:Redis server v=3.2.0 sha=00000000:0 malloc=jemalloc-4.0.3 bits=64 build=9894 db 3 ef 433 c 070現象1:CPU 100%PID USER PR NI VIRT RES SHR S%CPU%MEM TIME+COMMAND 25636 redis 20 0 38492 4096 1360 R 100.0 2578:10 redis-server現象2:大量CLOSE_WAIT状態接続:tcp 2417 0 1.49.26.98:1382 1.49.26.98:37268 CLOSE_WAIT  -                    tcp     2521      0 1.49.26.98:11382      1.49.26.98:35141      CLOSE_WAIT  -                    tcp     2521      0 1.49.26.98:11382      1.49.26.98:57181      CLOSE_WAIT-プロセスステータス:redis 25636 30.0 0.0 38492 4096?Rsl 3月23,2579:55/data/redis/bin/redis-server*:1382[cluster]最大メモリ構成(1 G):maxmemory 107374824実行ログ:25636:S 28 Mar 00:21:24.526-1 clients connected(0 slaves)、1312384 bytes in use 25636:S 28 Mar 00:21:29.531-DB 0:1 keys(1 volatile)in 8 slots HT.25636:S 28 Mar 00:21:29.531-1 clients connected(0 slaves)、1312384 bytes in use 25636:S 28 Mar 00:21:32.585-Accepted 1.118.14.7:58132呼び出しスタック:#0 dictGenHashFunction(key=,len=5)at dict.c:123#10 x 000000004232 e 6 in dictFind(d=0 x 7 f 71 c 2 a 17240,key=key@entry=0x7f71c2a15001) at dict.c:499 #2  0x000000000043a00a in dbRandomKey (db=0x7f71c2a24800) at db.c:176 #3  0x000000000043a0a2 in randomkeyCommand (c=0x7f71c2aae1c0) at db.c:355 #4  0x0000000000426b95 in call (c=c@entry=0x7f71c2aae1c0, flags=flags@entry=15) at server.c:2221 #5  0x0000000000429ba7 in processCommand (c=0x7f71c2aae1c0) at server.c:2500 #6  0x0000000000436515 in processInputBuffer (c=0x7f71c2aae1c0) at networking.c:1296 #7  0x0000000000421338 in aeProcessEvents (eventLoop=eventLoop@entry=0x7f71c2a2e050, flags=flags@entry=3) at ae.c:412 #8  0x00000000004215eb in aeMain (eventLoop=0x7f71c2a2e050) at ae.c:455 #9  0x000000000041e5df in main (argc=2, argv=0x7ffef34b2418) at server.c:4079 #0  0x00007f71c2fbc3a2 in random () from/lib64/libc.so.6 #1  0x0000000000423745 in dictGetRandomKey (d=0x7f71c2a171e0) at dict.c:646 #2  0x0000000000439fc0 in dbRandomKey (db=0x7f71c2a24800) at db.c:171 #3  0x000000000043a0a2 in randomkeyCommand (c=0x7f71c2aae1c0) at db.c:355 #4  0x0000000000426b95 in call (c=c@entry=0x7f71c2aae1c0, flags=flags@entry=15) at server.c:2221 #5  0x0000000000429ba7 in processCommand (c=0x7f71c2aae1c0) at server.c:2500 #6  0x0000000000436515 in processInputBuffer (c=0x7f71c2aae1c0) at networking.c:1296 #7  0x0000000000421338 in aeProcessEvents (eventLoop=eventLoop@entry=0x7f71c2a2e050, flags=flags@entry=3) at ae.c:412 #8  0x00000000004215eb in aeMain (eventLoop=0x7f71c2a2e050) at ae.c:455 #9  0x000000000041e5df in main (argc=2, argv=0x7ffef34b2418) at server.c:4079 #0  0x00007f71c30e17e4 in __memcmp_sse4_1 () from/lib64/libc.so.6 #1  0x0000000000424219 in dictSdsKeyCompare (privdata=, key1=, key2=) at server.c:445 #2  0x000000000042331d in dictFind (d=0x7f71c2a17240, key=0x7f71c2a27e73) at dict.c:504 #3  0x0000000000439494 in getExpire (db=0x7f71c2a24800, key=0x7f71c2a27e60) at db.c:824 #4  0x0000000000439c4f in expireIfNeeded (db=0x7f71c2a24800, key=0x7f71c2a27e60) at db.c:858 #5  0x000000000043a01a in dbRandomKey (db=0x7f71c2a24800) at db.c:177 #6  0x000000000043a0a2 in randomkeyCommand (c=0x7f71c2aae1c0) at db.c:355 #7  0x0000000000426b95 in call (c=c@entry=0x7f71c2aae1c0, flags=flags@entry=15) at server.c:2221 #8  0x0000000000429ba7 in processCommand (c=0x7f71c2aae1c0) at server.c:2500 #9  0x0000000000436515 in processInputBuffer (c=0x7f71c2aae1c0) at networking.c:1296 #10 0x0000000000421338 in aeProcessEvents (eventLoop=eventLoop@entry=0x7f71c2a2e050, flags=flags@entry=3) at ae.c:412 #11 0x00000000004215eb in aeMain (eventLoop=0x7f71c2a2e050) at ae.c:455 #12 0x000000000041e5df in main (argc=2, argv=0x7ffef34b2418) at server.c:4079 #0  dictGetRandomKey (d=) at dict.c:663 #1  0x0000000000439fc0 in dbRandomKey (db=0x7f71c2a24800) at db.c:171 #2  0x000000000043a0a2 in randomkeyCommand (c=0x7f71c2aae1c0) at db.c:355 #3  0x0000000000426b95 in call (c=c@entry=0x7f71c2aae1c0, flags=flags@entry=15) at server.c:2221 #4  0x0000000000429ba7 in processCommand (c=0x7f71c2aae1c0) at server.c:2500 #5  0x0000000000436515 in processInputBuffer (c=0x7f71c2aae1c0) at networking.c:1296 #6  0x0000000000421338 in aeProcessEvents (eventLoop=eventLoop@entry=0x7f71c2a2e050, flags=flags@entry=3) at ae.c:412 #7  0x00000000004215eb in aeMain (eventLoop=0x7f71c2a2e050) at ae.c:455 #8  0x000000000041e5df in main (argc=2, argv=0x7ffef34b2418) at server.c:4079推測:最大メモリに達し、淘汰keysロジックに入るが、keysが淘汰に合致せず、デッドサイクルになる.
 
関連コード:
/* Return a random key from the currently selected database. */
void randomkeyCommand(client *c) {
    robj *key;

    if ((key = dbRandomKey(c->db)) == NULL) {
        addReply(c,shared.nullbulk);
        return;
    }

    addReplyBulk(c,key);
    decrRefCount(key);
}

/* Return a random key, in form of a Redis object.
 * If there are no keys, NULL is returned.
 *
 * The function makes sure to return keys not already expired. */
robj *dbRandomKey(redisDb *db) {
    dictEntry *de;

    while(1) { // CPU      ,       
        sds key;
        robj *keyobj;

        de = dictGetRandomKey(db->dict);
        if (de == NULL) return NULL;

        key = dictGetKey(de);
        keyobj = createStringObject(key,sdslen(key));
        if (dictFind(db->expires,key)) {
            if (expireIfNeeded(db,keyobj)) {
                decrRefCount(keyobj);
                continue; /* search for another key. This expired. */
            }
        }
        return keyobj;
    }
}

void call(client *c, int flags) {
    long long dirty, start, duration;
    int client_old_flags = c->flags;

    /* Sent the command to clients in MONITOR mode, only if the commands are
     * not generated from reading an AOF. */
    if (listLength(server.monitors) &&
        !server.loading &&
        !(c->cmd->flags & (CMD_SKIP_MONITOR|CMD_ADMIN)))
    {
        replicationFeedMonitors(c,server.monitors,c->db->id,c->argv,c->argc);
    }

    /* Initialization: clear the flags that must be set by the command on
     * demand, and initialize the array for additional commands propagation. */
    c->flags &= ~(CLIENT_FORCE_AOF|CLIENT_FORCE_REPL|CLIENT_PREVENT_PROP);
    redisOpArrayInit(&server.also_propagate);

    /* Call the command. */
    dirty = server.dirty;
    start = ustime();
    c->cmd->proc(c);
    duration = ustime()-start;
    dirty = server.dirty-dirty;
    if (dirty < 0) dirty = 0;
    。。。。。。
}

/* With multiplexing we need to take per-client state.
 * Clients are taken in a linked list. */
typedef struct client {
    。。。。。。
    struct redisCommand *cmd, *lastcmd;  /* Last command executed. */
    。。。。。。
};

typedef void redisCommandProc(client *c);
typedef int *redisGetKeysProc(struct redisCommand *cmd, robj **argv, int argc, int *numkeys);
struct redisCommand {
    char *name;
    redisCommandProc *proc;
    int arity;
    char *sflags; /* Flags as string representation, one char per flag. */
    int flags;    /* The actual flags, obtained from the 'sflags' field. */
    /* Use a function to determine keys arguments in a command line.
     * Used for Redis Cluster redirect. */
    redisGetKeysProc *getkeys_proc;
    /* What keys should be loaded in background when calling this command? */
    int firstkey; /* The first argument that's a key (0 = no keys) */
    int lastkey;  /* The last argument that's a key */
    int keystep;  /* The step between first and last key */
    long long microseconds, calls;
};

/* This is our hash table structure. Every dictionary has two of this as we
 * implement incremental rehashing, for the old to the new table. */
typedef struct dictht {
    dictEntry **table;
    unsigned long size;
    unsigned long sizemask;
    unsigned long used;
} dictht;

typedef struct dict {
    dictType *type;
    void *privdata;
    dictht ht[2];
    long rehashidx; /* rehashing not in progress if rehashidx == -1 */
    int iterators; /* number of iterators currently running */
} dict;

/* Return a random entry from the hash table. Useful to
 * implement randomized algorithms */
dictEntry *dictGetRandomKey(dict *d)
{
    dictEntry *he, *orighe;
    unsigned int h;
    int listlen, listele;

    // (gdb) p *d
    // $1 = {type = 0x71d940 , privdata = 0x0, ht = {{table = 0x7f71c2a1e480, size = 8, sizemask = 7, used = 1}, {table = 0x0, size = 0, sizemask = 0, used = 0}}, rehashidx = -1, iterators = 0}
    //
    // (gdb) p d.ht[0] 
    // $3 = {table = 0x7f71c2a1e480, size = 8, sizemask = 7, used = 1}
    // (gdb) p d.ht[1]
    // $4 = {table = 0x0, size = 0, sizemask = 0, used = 0}
    //
    // (gdb) set variable d.ht[0].used=0
    // (gdb) p d.ht[0].used      
    // $7 = 0

    // #define dictSize(d) ((d)->ht[0].used+(d)->ht[1].used)
    if (dictSize(d) == 0) return NULL;
    if (dictIsRehashing(d)) _dictRehashStep(d);
    if (dictIsRehashing(d)) {
        do {
            /* We are sure there are no elements in indexes from 0
             * to rehashidx-1 */
            h = d->rehashidx + (random() % (d->ht[0].size +
                                            d->ht[1].size -
                                            d->rehashidx));
            he = (h >= d->ht[0].size) ? d->ht[1].table[h - d->ht[0].size] :
                                      d->ht[0].table[h];
        } while(he == NULL);
    } else {
        do {
            h = random() & d->ht[0].sizemask;
            he = d->ht[0].table[h];
        } while(he == NULL);
    }

    /* Now we found a non empty bucket, but it is a linked
     * list and we need to get a random element from the list.
     * The only sane way to do so is counting the elements and
     * select a random index. */
    listlen = 0;
    orighe = he;
    while(he) {
        he = he->next;
        listlen++;
    }
    listele = random() % listlen;
    he = orighe;
    while(listele--) he = he->next;
    return he;
}

/* This function performs just a step of rehashing, and only if there are
 * no safe iterators bound to our hash table. When we have iterators in the
 * middle of a rehashing we can't mess with the two hash tables otherwise
 * some element can be missed or duplicated.
 *
 * This function is called by common lookup or update operations in the
 * dictionary so that the hash table automatically migrates from H1 to H2
 * while it is actively used. */
static void _dictRehashStep(dict *d) {
    if (d->iterators == 0) dictRehash(d,1);
}

 
プロセスメモリ(問題解決、デッドサイクル終了後に表示されますが、結果はpsと一致します):Memory used_memory:1375320 used_memory_human:1.31M used_memory_rss:4321280 used_memory_rss_human:4.12M used_memory_peak:2468448 used_memory_peak_human:2.35M total_system_memory:33453797376 total_system_memory_human:31.16G used_memory_lua:34816 used_memory_lua_human:34.00K maxmemory:1073741824 maxmemory_human:1.00G maxmemory_policy:allkeys-lru mem_fragmentation_ratio:3.14 mem_allocator:jemalloc-4.0.3