集群多实例负载均衡

#haproxy
使用示例

global
    log /dev/log local0
    maxconn 20480
    daemon

defaults
    log global
    mode http
    option httplog
    option dontlognull
    timeout connect 5s
    timeout client  300s
    timeout server  300s
    retries 3

listen stats
    bind *:8404
    stats enable
    stats uri /stats
    stats refresh 5s
    stats auth admin:12345

frontend openai_api
    bind *:8080
    default_backend llm_backend

backend llm_backend
    balance leastconn
    option httpchk GET /v1/models
    http-check expect status 200
    server llm1 10.0.0.11:8000 check inter 5s fall 3 rise 2
    server llm2 10.0.0.12:8000 check inter 5s fall 3 rise 2