数据库
首页 > 数据库> > Redis源码分析--Sentinel(4)实例处理的Acting half

Redis源码分析--Sentinel(4)实例处理的Acting half

作者:互联网

Acting half:

一、进入故障转移状态之前:

 void sentinelHandleRedisInstance(sentinelRedisInstance *ri) {
    // ...
    // ...
    /* ============== ACTING HALF ============= */
    /* We don't proceed with the acting half if we are in TILT mode.
     * TILT happens when we find something odd with the time, like a
     * sudden change in the clock. */
    if (sentinel.tilt) {
        if (mstime()-sentinel.tilt_start_time < SENTINEL_TILT_PERIOD) return;
        /* 如果30秒内一切正常,退出Tilt模式 */
        sentinel.tilt = 0;
        sentinelEvent(REDIS_WARNING,"-tilt",NULL,"#tilt mode exited");
    }

    /* Every kind of instance */
    sentinelCheckSubjectivelyDown(ri);

    /* Masters and slaves */
    if (ri->flags & (SRI_MASTER|SRI_SLAVE)) {
        /* Nothing so far. */
    }

   /* Only masters */
    if (ri->flags & SRI_MASTER) {
        /* 由于消息的收发都是异步的,所以这里不一定可以直接判断出结果,
         * 所以本sentinel会在一次次定时器调用中判断是否需要客观下线 */
        sentinelCheckObjectivelyDown(ri);
        /* 判断是否进行故障转移,如果进行故障转移,master->failover_state的变化将触发一次命令的发送
         * 即本机sentinel要求其他sentinel选举本机为leader*/
        if (sentinelStartFailoverIfNeeded(ri))
             /* 这里的调用是本机要求选举自己做领头Sentinel,这里只会触发一次 */
            sentinelAskMasterStateToOtherSentinels(ri,SENTINEL_ASK_FORCED);
        sentinelFailoverStateMachine(ri);
        /* 这里发送的消息应该不固定,可能是询问客观下线状态,
         * 也可能是要求选举(如果初始选举没有产生leader,
         * 这一行会不断触发,直至产生leader) */
        sentinelAskMasterStateToOtherSentinels(ri,SENTINEL_NO_FLAGS);
    }
}

二、发送命令 sentinelAskMasterStateToOtherSentinels:

​ 这个函数很特殊,所以专门用一节讲它。首先先说明这个函数会发送命令:

SENTINEL IS-MASTER-DOWN-BY-ADDR <ip> <port> <current_epoch> <runid>

而根据runid参数不同,会执行两个不同功能:

void sentinelAskMasterStateToOtherSentinels(sentinelRedisInstance *master, int flags) {
    dictIterator *di;
    dictEntry *de;

    di = dictGetIterator(master->sentinels);
    while((de = dictNext(di)) != NULL) {
        sentinelRedisInstance *ri = dictGetVal(de);
        mstime_t elapsed = mstime() - ri->last_master_down_reply_time;
        char port[32];
        int retval;

        /* If the master state from other sentinel is too old, we clear it. */
        if (elapsed > SENTINEL_ASK_PERIOD*5) {
            ri->flags &= ~SRI_MASTER_DOWN;
            sdsfree(ri->leader);
            ri->leader = NULL;
        }

        /* Only ask if master is down to other sentinels if:
         *
         * 1) We believe it is down, or there is a failover in progress.
         * 2) Sentinel is connected.
         * 3) We did not received the info within SENTINEL_ASK_PERIOD ms. */
        if ((master->flags & SRI_S_DOWN) == 0) continue;
        if (ri->flags & SRI_DISCONNECTED) continue;
        if (!(flags & SENTINEL_ASK_FORCED) &&
            mstime() - ri->last_master_down_reply_time < SENTINEL_ASK_PERIOD)
            continue;

        /* Ask */
        ll2string(port,sizeof(port),master->addr->port);
        retval = redisAsyncCommand(ri->cc,
                    /**/
                    sentinelReceiveIsMasterDownReply, NULL,
                    "SENTINEL is-master-down-by-addr %s %s %llu %s",
                    master->addr->ip, port,
                    sentinel.current_epoch,
                    /* 根据master当前failover_state判断这是sentinel在进行判断是否客观下线还是要选举该sentinel为leader */
                    (master->failover_state > SENTINEL_FAILOVER_STATE_NONE) ?
                    server.runid : "*");
        if (retval == REDIS_OK) ri->pending_commands++;
    }
    dictReleaseIterator(di);
}


三、进入故障转移的条件:

int sentinelStartFailoverIfNeeded(sentinelRedisInstance *master) {
    /* We can't failover if the master is not in O_DOWN state. */
    if (!(master->flags & SRI_O_DOWN)) return 0;

    /* Failover already in progress? */
    if (master->flags & SRI_FAILOVER_IN_PROGRESS) return 0;

    /* Last failover attempt started too little time ago? */
    if (mstime() - master->failover_start_time <
        master->failover_timeout*2) return 0;
    /* 进行故障转移 */
    sentinelStartFailover(master);
    return 1;
}
void sentinelStartFailover(sentinelRedisInstance *master) {
    redisAssert(master->flags & SRI_MASTER);
    /* 设置FAILOVER_STATE_WAIT_START状态,该状态会在 
     * sentinelAskMasterStateToOtherSentinels 方法中
     * 允许本sentinel要求其他Sentinel选举自己为leader */
    master->failover_state = SENTINEL_FAILOVER_STATE_WAIT_START;
    master->flags |= SRI_FAILOVER_IN_PROGRESS;
    /* 回合数++ */
    master->failover_epoch = ++sentinel.current_epoch;
    sentinelEvent(REDIS_WARNING,"+new-epoch",master,"%llu",
        (unsigned long long) sentinel.current_epoch);
    sentinelEvent(REDIS_WARNING,"+try-failover",master,"%@");
    /* 维护time信息 */
    master->failover_start_time = mstime();
    master->failover_state_change_time = mstime();
}

标签:--,failover,Redis,SENTINEL,源码,master,sentinel,flags,ri
来源: https://www.cnblogs.com/macguz/p/15865715.html