etcdクラスタのk 8 s statefulset実現

15591 ワード

説明
etcdはk 8 sのstatefulsetで実現する、1つのstatefulsetのyamlはetcdのクラスタの動的伸縮を実現することができ、statefulsetのreplicasを減らすと(あるいはノードが故障してdownが落ちる)、自動的にノードをetcdクラスタから除去し、その時statefulsetのreplicasを増やすと(あるいは故障ノードが回復)、自動的にetcdクラスタにノードを追加する.しかし、クラスタを初期化するときに設定最小クラスタノード数よりも少ないクラスタノード数を減らすと、クラスタノードを減らす(またはノードが故障してダウンする)と、クラスタからノードが除去されず、新規ノード(または障害ノードが回復する)、自動的にetcdクラスタに新規ノード(または障害ノードが回復する)が更新される.声明の下でこのstatefulsetはk 8 sの公式提供の原版yamlに基づいてテストを修正した後である.ファイルの場所、最新のソースコード(古いバージョンのソースコードには必ずしも存在しない):https://github.com/kubernetes/kubernetes/test/e2e/testing-manifests/statefulset/etcdのyaml.しかし、このyamlはetcdクラスタを正常に作成することができず、多くの問題がある.自分でテストすることができます.
修正後のyamlは以下の通りである.自分のk 8 sにストレージサポートがない場合は、yamlのpvcをemptyDirのvolumeに変更すればよい.また、ミラーは自分でetcdの公式ミラーに変えることができる.
yamlは次のとおりです.
apiVersion: v1
kind: Service
metadata:
  name: etcd
  labels:
    app: etcd
spec:
  ports:
  - port: 2380
    name: etcd-server
  - port: 2379
    name: etcd-client
  clusterIP: None
  selector:
    app: etcd
  publishNotReadyAddresses: true

--- 
apiVersion: apps/v1
kind: StatefulSet
metadata:
  name: etcd
  labels:
    app: etcd
spec:
  serviceName: etcd
  replicas: 3
  selector:
    matchLabels:
      app: etcd
  template:
    metadata:
      name: etcd
      labels:
        app: etcd
    spec:
      containers:
      - name: etcd
        image: gcr.k8s.io/etcd:3.2.24
        imagePullPolicy: Always
        ports:
        - containerPort: 2380
          name: peer
        - containerPort: 2379
          name: client
        resources:
          requests:
            cpu: 100m
            memory: 512Mi
        env:
        - name: INITIAL_CLUSTER_SIZE
          value: "3"
        - name: SET_NAME
          value: etcd
        - name: NAMESPACE
          valueFrom: 
            fieldRef: 
              fieldPath: meta.namepace
        volumeMounts:
        - name: datadir
          mountPath: /var/run/etcd
        lifecycle:
          preStop:
            exec:
              command:
                - "/bin/sh"
                - "-ec"
                - |
                  EPS=""
                  for i in $(seq 0 $((${INITIAL_CLUSTER_SIZE} - 1))); do
                      EPS="${EPS}${EPS:+,}http://${SET_NAME}-${i}.${SET_NAME}:2379"
                  done

                  HOSTNAME=$(hostname)

                  member_hash() {
                      etcdctl member list | grep http://${HOSTNAME}.${SET_NAME}:2380 | cut -d':' -f1 | cut -d'[' -f1
                  }

                  # Remove everything otherwise the cluster will no longer scale-up
                  SET_ID=${HOSTNAME:5:${#HOSTNAME}}
                  # adding a new member to existing cluster (assuming all initial pods are available)
                  if [ "${SET_ID}" -ge ${INITIAL_CLUSTER_SIZE} ]; then
                    echo "Removing ${HOSTNAME} from etcd cluster"
                    ETCDCTL_ENDPOINT=${EPS} etcdctl member remove $(member_hash)
                    if [ $? -eq 0 ]; then
                      rm -rf /var/run/etcd/*
                    fi
                  fi
                  
        command:
          - "/bin/sh"
          - "-ec"
          - |
            HOSTNAME=$(hostname)

            # store member id into PVC for later member replacement
            collect_member() {
                while ! etcdctl member list &>/dev/null; do sleep 1; done
                etcdctl member list | grep http://${HOSTNAME}.${SET_NAME}:2380 | cut -d':' -f1 | cut -d'[' -f1 > /var/run/etcd/member_id
                exit 0
            }

            eps() {
                EPS=""
                for i in $(seq 0 $((${INITIAL_CLUSTER_SIZE} - 1))); do
                    EPS="${EPS}${EPS:+,}http://${SET_NAME}-${i}.${SET_NAME}:2379"
                done
                echo ${EPS}
            }

            member_hash() {
                etcdctl member list | grep http://${HOSTNAME}.${SET_NAME}:2380 | cut -d':' -f1 | cut -d'[' -f1
            }

            # re-joining after failure?
            if [ -e /var/run/etcd/default.etcd ]; then
                echo "Re-joining etcd member"
                member_id=$(cat /var/run/etcd/member_id)

                # re-join member
                POD_IP=$(hostname -i)
                ETCDCTL_ENDPOINT=$(eps) etcdctl member update ${member_id} http://${HOSTNAME}.${SET_NAME}:2380
                exec etcd --name ${HOSTNAME} \
                    --listen-peer-urls http://${POD_IP}:2380 \
                    --listen-client-urls http://${POD_IP}:2379,http://127.0.0.1:2379 \
                    --advertise-client-urls http://${POD_IP}:2379 \
                    --data-dir /var/run/etcd/default.etcd
            fi

            # etcd-SET_ID

            SET_ID=${HOSTNAME:5:${#HOSTNAME}}
            
            # adding a new member to existing cluster (assuming all initial pods are available)
            if [ "${SET_ID}" -ge ${INITIAL_CLUSTER_SIZE} ]; then
                export ETCDCTL_ENDPOINT=$(eps)

                # member already added?
                MEMBER_HASH=$(member_hash)


                if [ -n "${MEMBER_HASH}" ]; then
                    # the member hash exists but for some reason etcd failed
                    # as the datadir has not be created, we can remove the member
                    # and retrieve new hash
                    etcdctl member remove ${MEMBER_HASH}
                fi

                echo "Adding new member"
                etcdctl member add ${HOSTNAME} http://${HOSTNAME}.${SET_NAME}:2380 | grep "^ETCD_" > /var/run/etcd/new_member_envs

                if [ $? -ne 0 ]; then
                    echo "Exiting"
                    rm -f /var/run/etcd/new_member_envs
                    exit 1
                fi

                cat /var/run/etcd/new_member_envs
                source /var/run/etcd/new_member_envs

                collect_member &

                POD_IP=$(hostname -i)
                exec etcd --name ${HOSTNAME} \
                    --listen-peer-urls http://${POD_IP}:2380 \
                    --listen-client-urls http://${POD_IP}:2379,http://127.0.0.1:2379 \
                    --advertise-client-urls http://${POD_IP}:2379 \
                    --data-dir /var/run/etcd/default.etcd \
                    --initial-advertise-peer-urls http://${HOSTNAME}.${SET_NAME}:2380 \
                    --initial-cluster ${ETCD_INITIAL_CLUSTER} \
                    --initial-cluster-state ${ETCD_INITIAL_CLUSTER_STATE}
            fi

            for i in $(seq 0 $((${INITIAL_CLUSTER_SIZE} - 1))); do
                while true; do
                    echo "Waiting for ${SET_NAME}-${i}.${SET_NAME} to come up"
                    ping -W 1 -c 1 ${SET_NAME}-${i}.${SET_NAME}.${NAMESPACE}.svc.cluster.local > /dev/null && break
                    sleep 1s
                done
            done

            PEERS=""
            for i in $(seq 0 $((${INITIAL_CLUSTER_SIZE} - 1))); do
                PEERS="${PEERS}${PEERS:+,}${SET_NAME}-${i}=http://${SET_NAME}-${i}.${SET_NAME}:2380"
            done
            
            collect_member &

            # join member
            POD_IP=$(hostname -i)
            exec etcd --name ${HOSTNAME} \
                --initial-advertise-peer-urls http://${POD_IP}:2380 \
                --listen-peer-urls http://${POD_IP}:2380 \
                --listen-client-urls http://${POD_IP}:2379,http://127.0.0.1:2379 \
                --advertise-client-urls http://${POD_IP}:2379 \
                --initial-cluster-token etcd-cluster-1 \
                --initial-cluster ${PEERS} \
                --initial-cluster-state new \
                --data-dir /var/run/etcd/default.etcd
  volumeClaimTemplates:
  - metadata:
      name: datadir
    spec:
      accessModes:
        - "ReadWriteOnce"
      resources:
        requests:
          # upstream recommended max is 700M
          storage: 1Gi


emptyDirのyamlは以下の通りです.
apiVersion: v1
kind: Service
metadata:
  name: etcd
  labels:
    app: etcd
spec:
  ports:
  - port: 2380
    name: etcd-server
  - port: 2379
    name: etcd-client
  clusterIP: None
  selector:
    app: etcd
  publishNotReadyAddresses: true
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
  name: etcd
  labels:
    app: etcd
spec:
  serviceName: etcd
  replicas: 3
  selector:
    matchLabels:
      app: etcd
  template:
    metadata:
      name: etcd
      labels:
        app: etcd
    spec:
      volumes:
      - name: datadir
        emptyDir: {}
      containers:
      - name: etcd
        image: gcr.k8s.io/etcd:3.2.24
        imagePullPolicy: Always
        ports:
        - containerPort: 2380
          name: peer
        - containerPort: 2379
          name: client
        resources:
          requests:
            cpu: 100m
            memory: 512Mi
        env:
        - name: INITIAL_CLUSTER_SIZE
          value: "3"
        - name: SET_NAME
          value: etcd
        - name: NAMESPACE
          valueFrom: 
            fieldRef: 
              fieldPath: meta.namepace
        volumeMounts:
        - name: datadir
          mountPath: /var/run/etcd
        lifecycle:
          preStop:
            exec:
              command:
                - "/bin/sh"
                - "-ec"
                - |
                  EPS=""
                  for i in $(seq 0 $((${INITIAL_CLUSTER_SIZE} - 1))); do
                      EPS="${EPS}${EPS:+,}http://${SET_NAME}-${i}.${SET_NAME}:2379"
                  done

                  HOSTNAME=$(hostname)

                  member_hash() {
                      etcdctl member list | grep http://${HOSTNAME}.${SET_NAME}:2380 | cut -d':' -f1 | cut -d'[' -f1
                  }

                  # Remove everything otherwise the cluster will no longer scale-up
                  SET_ID=${HOSTNAME:5:${#HOSTNAME}}
                  # adding a new member to existing cluster (assuming all initial pods are available)
                  if [ "${SET_ID}" -ge ${INITIAL_CLUSTER_SIZE} ]; then
                    echo "Removing ${HOSTNAME} from etcd cluster"
                    ETCDCTL_ENDPOINT=${EPS} etcdctl member remove $(member_hash)
                    if [ $? -eq 0 ]; then
                      rm -rf /var/run/etcd/*
                    fi
                  fi
                  
        command:
          - "/bin/sh"
          - "-ec"
          - |
            HOSTNAME=$(hostname)

            # store member id into PVC for later member replacement
            collect_member() {
                while ! etcdctl member list &>/dev/null; do sleep 1; done
                etcdctl member list | grep http://${HOSTNAME}.${SET_NAME}:2380 | cut -d':' -f1 | cut -d'[' -f1 > /var/run/etcd/member_id
                exit 0
            }

            eps() {
                EPS=""
                for i in $(seq 0 $((${INITIAL_CLUSTER_SIZE} - 1))); do
                    EPS="${EPS}${EPS:+,}http://${SET_NAME}-${i}.${SET_NAME}:2379"
                done
                echo ${EPS}
            }

            member_hash() {
                etcdctl member list | grep http://${HOSTNAME}.${SET_NAME}:2380 | cut -d':' -f1 | cut -d'[' -f1
            }

            # re-joining after failure?
            if [ -e /var/run/etcd/default.etcd ]; then
                echo "Re-joining etcd member"
                member_id=$(cat /var/run/etcd/member_id)

                # re-join member
                POD_IP=$(hostname -i)
                ETCDCTL_ENDPOINT=$(eps) etcdctl member update ${member_id} http://${HOSTNAME}.${SET_NAME}:2380
                exec etcd --name ${HOSTNAME} \
                    --listen-peer-urls http://${POD_IP}:2380 \
                    --listen-client-urls http://${POD_IP}:2379,http://127.0.0.1:2379 \
                    --advertise-client-urls http://${POD_IP}:2379 \
                    --data-dir /var/run/etcd/default.etcd
            fi

            # etcd-SET_ID

            SET_ID=${HOSTNAME:5:${#HOSTNAME}}
            
            # adding a new member to existing cluster (assuming all initial pods are available)
            if [ "${SET_ID}" -ge ${INITIAL_CLUSTER_SIZE} ]; then
                export ETCDCTL_ENDPOINT=$(eps)

                # member already added?
                MEMBER_HASH=$(member_hash)


                if [ -n "${MEMBER_HASH}" ]; then
                    # the member hash exists but for some reason etcd failed
                    # as the datadir has not be created, we can remove the member
                    # and retrieve new hash
                    etcdctl member remove ${MEMBER_HASH}
                fi

                echo "Adding new member"
                etcdctl member add ${HOSTNAME} http://${HOSTNAME}.${SET_NAME}:2380 | grep "^ETCD_" > /var/run/etcd/new_member_envs

                if [ $? -ne 0 ]; then
                    echo "Exiting"
                    rm -f /var/run/etcd/new_member_envs
                    exit 1
                fi

                cat /var/run/etcd/new_member_envs
                source /var/run/etcd/new_member_envs

                collect_member &

                POD_IP=$(hostname -i)
                exec etcd --name ${HOSTNAME} \
                    --listen-peer-urls http://${POD_IP}:2380 \
                    --listen-client-urls http://${POD_IP}:2379,http://127.0.0.1:2379 \
                    --advertise-client-urls http://${POD_IP}:2379 \
                    --data-dir /var/run/etcd/default.etcd \
                    --initial-advertise-peer-urls http://${HOSTNAME}.${SET_NAME}:2380 \
                    --initial-cluster ${ETCD_INITIAL_CLUSTER} \
                    --initial-cluster-state ${ETCD_INITIAL_CLUSTER_STATE}
            fi

            for i in $(seq 0 $((${INITIAL_CLUSTER_SIZE} - 1))); do
                while true; do
                    echo "Waiting for ${SET_NAME}-${i}.${SET_NAME} to come up"
                    ping -W 1 -c 1 ${SET_NAME}-${i}.${SET_NAME}.${NAMESPACE}.svc.cluster.local > /dev/null && break
                    sleep 1s
                done
            done

            PEERS=""
            for i in $(seq 0 $((${INITIAL_CLUSTER_SIZE} - 1))); do
                PEERS="${PEERS}${PEERS:+,}${SET_NAME}-${i}=http://${SET_NAME}-${i}.${SET_NAME}:2380"
            done
            
            collect_member &

            # join member
            POD_IP=$(hostname -i)
            exec etcd --name ${HOSTNAME} \
                --initial-advertise-peer-urls http://${POD_IP}:2380 \
                --listen-peer-urls http://${POD_IP}:2380 \
                --listen-client-urls http://${POD_IP}:2379,http://127.0.0.1:2379 \
                --advertise-client-urls http://${POD_IP}:2379 \
                --initial-cluster-token etcd-cluster-1 \
                --initial-cluster ${PEERS} \
                --initial-cluster-state new \
                --data-dir /var/run/etcd/default.etcd
  # volumeClaimTemplates:
  # - metadata:
  #     name: datadir
  #   spec:
  #     accessModes:
  #       - "ReadWriteOnce"
  #     resources:
  #       requests:
  #         # upstream recommended max is 700M
  #         storage: 1Gi