APISIX源码剖析-熔断插件api-breaker-天翼云开发者社区

APISIX的api-breaker插件，实现API熔断逻辑，从而保护上游服务。即通过代码逻辑自动触发健康/不健康状态的次数递增运算，根据熔断配置参数判断是否开启/结束熔断策略。

当上游服务返回不健康配置中的状态码，并达到配置的不健康最大次数（单位时间内），则认为上游服务处于不健康状态，开启熔断
第一次触发不健康状态时，熔断 2 秒。超过熔断时间后，将重新开始转发请求到上游服务，如果继续返回不健康状态码，记数再次达到配置次数时，熔断 4 秒。依次类推（2，4，8，16，……），直到达到设置的最大熔断值。
当上游服务处于不健康状态时，如果转发请求到上游服务并返回配置中的健康状态码（默认为 200），并达到配置的次数时，则认为上游服务恢复至健康状态，结束熔断。

代码实现

初始化配置

主要是熔断配置初始化，包括熔断反馈信息（包括应答码，应答header，应答内容），熔断策略配置（熔断最长持续时间，上游状态码健康判断等）

local schema = {
    type = "object",
    properties = {
        break_response_code = {
            type = "integer",
            minimum = 200,
            maximum = 599,
        },
        break_response_body = {
            type = "string"
        },
        break_response_headers = {
            type = "array",
            items = {
                type = "object",
                properties = {
                    key = {
                        type = "string",
                        minLength = 1
                    },
                    value = {
                        type = "string",
                        minLength = 1
                    }
                },
                required = {"key", "value"},
            }
        },
        max_breaker_sec = {
            type = "integer",
            minimum = 3,
            default = 300,
        },
        unhealthy = {
            type = "object",
            properties = {
                http_statuses = {
                    type = "array",
                    minItems = 1,
                    items = {
                        type = "integer",
                        minimum = 500,
                        maximum = 599,
                    },
                    uniqueItems = true,
                    default = {500}
                },
                failures = {
                    type = "integer",
                    minimum = 1,
                    default = 3,
                }
            },
            default = {http_statuses = {500}, failures = 3}
        },
        healthy = {
            type = "object",
            properties = {
                http_statuses = {
                    type = "array",
                    minItems = 1,
                    items = {
                        type = "integer",
                        minimum = 200,
                        maximum = 499,
                    },
                    uniqueItems = true,
                    default = {200}
                },
                successes = {
                    type = "integer",
                    minimum = 1,
                    default = 3,
                }
            },
            default = {http_statuses = {200}, successes = 3}
        }
    },
    required = {"break_response_code"},
}

处理逻辑

处理逻辑根据历史统计的上游健康数据判断上游服务是否开启熔断，该请求是否可以正常处理；即判断当前是否在熔断期间，并且计算熔断时间以及判断是否超出最大熔断时间范围，如果是则添加熔断信息数据返回应答；否则请求可以正常处理。

function _M.access(conf, ctx)
    local unhealthy_key = gen_unhealthy_key(ctx)
    -- unhealthy counts
    local unhealthy_count, err = shared_buffer:get(unhealthy_key)
    if err then
        core.log.warn("failed to get unhealthy_key: ",
                      unhealthy_key, " err: ", err)
        return
    end

    if not unhealthy_count then
        return
    end

    -- timestamp of the last time a unhealthy state was triggered
    local lasttime_key = gen_lasttime_key(ctx)
    local lasttime, err = shared_buffer:get(lasttime_key)
    if err then
        core.log.warn("failed to get lasttime_key: ",
                      lasttime_key, " err: ", err)
        return
    end

    if not lasttime then
        return
    end

    local failure_times = math.ceil(unhealthy_count / conf.unhealthy.failures)
    if failure_times < 1 then
        failure_times = 1
    end

    -- cannot exceed the maximum value of the user configuration
    local breaker_time = 2 ^ failure_times
    if breaker_time > conf.max_breaker_sec then
        breaker_time = conf.max_breaker_sec
    end
    core.log.info("breaker_time: ", breaker_time)

    -- breaker
    if lasttime + breaker_time >= ngx.time() then
        if conf.break_response_body then
            if conf.break_response_headers then
                for _, value in ipairs(conf.break_response_headers) do
                    local val = core.utils.resolve_var(value.value, ctx.var)
                    core.response.add_header(value.key, val)
                end
            end
            return conf.break_response_code, conf.break_response_body
        end
        return conf.break_response_code
    end

    return
end

日志处理--健康状态统计逻辑

健康与否的数据统计是在日志模块里面调用实现的，根据上下文信息的内容判断请求是否是健康状态，如果是不健康状态，则需要进行不健康状态累计，并根据配置判断是否开启熔断；如果是健康状态则进行健康累加计算后，根据配置判断是否需要重置下游服务的正常状态，结束熔断；即这段代码主要关系熔断状态开启和结束的处理。

function _M.log(conf, ctx)
    local unhealthy_key = gen_unhealthy_key(ctx)
    local healthy_key = gen_healthy_key(ctx)
    local upstream_status = core.response.get_upstream_status(ctx)

    if not upstream_status then
        return
    end

    -- unhealthy process
    if core.table.array_find(conf.unhealthy.http_statuses,
                             upstream_status)
    then
        local unhealthy_count, err = shared_buffer:incr(unhealthy_key, 1, 0)
        if err then
            core.log.warn("failed to incr unhealthy_key: ", unhealthy_key,
                          " err: ", err)
        end
        core.log.info("unhealthy_key: ", unhealthy_key, " count: ",
                      unhealthy_count)

        shared_buffer:delete(healthy_key)

        -- whether the user-configured number of failures has been reached,
        -- and if so, the timestamp for entering the unhealthy state.
        if unhealthy_count % conf.unhealthy.failures == 0 then
            shared_buffer:set(gen_lasttime_key(ctx), ngx.time(),
                              conf.max_breaker_sec)
            core.log.info("update unhealthy_key: ", unhealthy_key, " to ",
                          unhealthy_count)
        end

        return
    end

    -- health process
    if not core.table.array_find(conf.healthy.http_statuses, upstream_status) then
        return
    end

    local unhealthy_count, err = shared_buffer:get(unhealthy_key)
    if err then
        core.log.warn("failed to `get` unhealthy_key: ", unhealthy_key,
                      " err: ", err)
    end

    if not unhealthy_count then
        return
    end

    local healthy_count, err = shared_buffer:incr(healthy_key, 1, 0)
    if err then
        core.log.warn("failed to `incr` healthy_key: ", healthy_key,
                      " err: ", err)
    end

    -- clear related status
    if healthy_count >= conf.healthy.successes then
        -- stat change to normal
        core.log.info("change to normal, ", healthy_key, " ", healthy_count)
        shared_buffer:delete(gen_lasttime_key(ctx))
        shared_buffer:delete(unhealthy_key)
        shared_buffer:delete(healthy_key)
    end

    return
end

插件应用

可以通过dashboard进行界面设置，并开启熔断插件

local schema = { type = "object", properties = { break_response_code = { type = "integer", minimum = 200, maximum = 599, }, break_response_body = { type = "string" }, break_response_headers = { type = "array", items = { type = "object", properties = { key = { type = "string", minLength = 1 }, value = { type = "string", minLength = 1 } }, required = {"key", "value"}, } }, max_breaker_sec = { type = "integer", minimum = 3, default = 300, }, unhealthy = { type = "object", properties = { http_statuses = { type = "array", minItems = 1, items = { type = "integer", minimum = 500, maximum = 599, }, uniqueItems = true, default = {500} }, failures = { type = "integer", minimum = 1, default = 3, } }, default = {http_statuses = {500}, failures = 3} }, healthy = { type = "object", properties = { http_statuses = { type = "array", minItems = 1, items = { type = "integer", minimum = 200, maximum = 499, }, uniqueItems = true, default = {200} }, successes = { type = "integer", minimum = 1, default = 3, } }, default = {http_statuses = {200}, successes = 3} } }, required = {"break_response_code"}, }

function _M.access(conf, ctx) local unhealthy_key = gen_unhealthy_key(ctx) -- unhealthy counts local unhealthy_count, err = shared_buffer:get(unhealthy_key) if err then core.log.warn("failed to get unhealthy_key: ", unhealthy_key, " err: ", err) return end if not unhealthy_count then return end -- timestamp of the last time a unhealthy state was triggered local lasttime_key = gen_lasttime_key(ctx) local lasttime, err = shared_buffer:get(lasttime_key) if err then core.log.warn("failed to get lasttime_key: ", lasttime_key, " err: ", err) return end if not lasttime then return end local failure_times = math.ceil(unhealthy_count / conf.unhealthy.failures) if failure_times < 1 then failure_times = 1 end -- cannot exceed the maximum value of the user configuration local breaker_time = 2 ^ failure_times if breaker_time > conf.max_breaker_sec then breaker_time = conf.max_breaker_sec end core.log.info("breaker_time: ", breaker_time) -- breaker if lasttime + breaker_time >= ngx.time() then if conf.break_response_body then if conf.break_response_headers then for _, value in ipairs(conf.break_response_headers) do local val = core.utils.resolve_var(value.value, ctx.var) core.response.add_header(value.key, val) end end return conf.break_response_code, conf.break_response_body end return conf.break_response_code end return end

function _M.log(conf, ctx) local unhealthy_key = gen_unhealthy_key(ctx) local healthy_key = gen_healthy_key(ctx) local upstream_status = core.response.get_upstream_status(ctx) if not upstream_status then return end -- unhealthy process if core.table.array_find(conf.unhealthy.http_statuses, upstream_status) then local unhealthy_count, err = shared_buffer:incr(unhealthy_key, 1, 0) if err then core.log.warn("failed to incr unhealthy_key: ", unhealthy_key, " err: ", err) end core.log.info("unhealthy_key: ", unhealthy_key, " count: ", unhealthy_count) shared_buffer:delete(healthy_key) -- whether the user-configured number of failures has been reached, -- and if so, the timestamp for entering the unhealthy state. if unhealthy_count % conf.unhealthy.failures == 0 then shared_buffer:set(gen_lasttime_key(ctx), ngx.time(), conf.max_breaker_sec) core.log.info("update unhealthy_key: ", unhealthy_key, " to ", unhealthy_count) end return end -- health process if not core.table.array_find(conf.healthy.http_statuses, upstream_status) then return end local unhealthy_count, err = shared_buffer:get(unhealthy_key) if err then core.log.warn("failed to `get` unhealthy_key: ", unhealthy_key, " err: ", err) end if not unhealthy_count then return end local healthy_count, err = shared_buffer:incr(healthy_key, 1, 0) if err then core.log.warn("failed to `incr` healthy_key: ", healthy_key, " err: ", err) end -- clear related status if healthy_count >= conf.healthy.successes then -- stat change to normal core.log.info("change to normal, ", healthy_key, " ", healthy_count) shared_buffer:delete(gen_lasttime_key(ctx)) shared_buffer:delete(unhealthy_key) shared_buffer:delete(healthy_key) end return end

智算服务

应用商城

合作伙伴

开发者

支持与服务

了解天翼云

APISIX源码剖析-熔断插件api-breaker

代码实现

初始化配置

处理逻辑

日志处理--健康状态统计逻辑

插件应用

APISIX源码剖析-熔断插件api-breaker

代码实现

初始化配置

处理逻辑

日志处理--健康状态统计逻辑

插件应用

活动

智算服务

应用商城

合作伙伴

开发者

支持与服务

了解天翼云

APISIX源码剖析-熔断插件api-breaker

代码实现

初始化配置

处理逻辑

日志处理--健康状态统计逻辑

插件应用

APISIX源码剖析-熔断插件api-breaker

代码实现

初始化配置

处理逻辑

日志处理--健康状态统计逻辑

插件应用