etcdクラスタのk 8 s statefulset実現
15591 ワード
説明
etcdはk 8 sのstatefulsetで実現する、1つのstatefulsetのyamlはetcdのクラスタの動的伸縮を実現することができ、statefulsetのreplicasを減らすと(あるいはノードが故障してdownが落ちる)、自動的にノードをetcdクラスタから除去し、その時statefulsetのreplicasを増やすと(あるいは故障ノードが回復)、自動的にetcdクラスタにノードを追加する.しかし、クラスタを初期化するときに設定最小クラスタノード数よりも少ないクラスタノード数を減らすと、クラスタノードを減らす(またはノードが故障してダウンする)と、クラスタからノードが除去されず、新規ノード(または障害ノードが回復する)、自動的にetcdクラスタに新規ノード(または障害ノードが回復する)が更新される.声明の下でこのstatefulsetはk 8 sの公式提供の原版yamlに基づいてテストを修正した後である.ファイルの場所、最新のソースコード(古いバージョンのソースコードには必ずしも存在しない):https://github.com/kubernetes/kubernetes/test/e2e/testing-manifests/statefulset/etcdのyaml.しかし、このyamlはetcdクラスタを正常に作成することができず、多くの問題がある.自分でテストすることができます.
修正後のyamlは以下の通りである.自分のk 8 sにストレージサポートがない場合は、yamlのpvcをemptyDirのvolumeに変更すればよい.また、ミラーは自分でetcdの公式ミラーに変えることができる.
yamlは次のとおりです.
emptyDirのyamlは以下の通りです.
etcdはk 8 sのstatefulsetで実現する、1つのstatefulsetのyamlはetcdのクラスタの動的伸縮を実現することができ、statefulsetのreplicasを減らすと(あるいはノードが故障してdownが落ちる)、自動的にノードをetcdクラスタから除去し、その時statefulsetのreplicasを増やすと(あるいは故障ノードが回復)、自動的にetcdクラスタにノードを追加する.しかし、クラスタを初期化するときに設定最小クラスタノード数よりも少ないクラスタノード数を減らすと、クラスタノードを減らす(またはノードが故障してダウンする)と、クラスタからノードが除去されず、新規ノード(または障害ノードが回復する)、自動的にetcdクラスタに新規ノード(または障害ノードが回復する)が更新される.声明の下でこのstatefulsetはk 8 sの公式提供の原版yamlに基づいてテストを修正した後である.ファイルの場所、最新のソースコード(古いバージョンのソースコードには必ずしも存在しない):https://github.com/kubernetes/kubernetes/test/e2e/testing-manifests/statefulset/etcdのyaml.しかし、このyamlはetcdクラスタを正常に作成することができず、多くの問題がある.自分でテストすることができます.
修正後のyamlは以下の通りである.自分のk 8 sにストレージサポートがない場合は、yamlのpvcをemptyDirのvolumeに変更すればよい.また、ミラーは自分でetcdの公式ミラーに変えることができる.
yamlは次のとおりです.
apiVersion: v1
kind: Service
metadata:
name: etcd
labels:
app: etcd
spec:
ports:
- port: 2380
name: etcd-server
- port: 2379
name: etcd-client
clusterIP: None
selector:
app: etcd
publishNotReadyAddresses: true
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: etcd
labels:
app: etcd
spec:
serviceName: etcd
replicas: 3
selector:
matchLabels:
app: etcd
template:
metadata:
name: etcd
labels:
app: etcd
spec:
containers:
- name: etcd
image: gcr.k8s.io/etcd:3.2.24
imagePullPolicy: Always
ports:
- containerPort: 2380
name: peer
- containerPort: 2379
name: client
resources:
requests:
cpu: 100m
memory: 512Mi
env:
- name: INITIAL_CLUSTER_SIZE
value: "3"
- name: SET_NAME
value: etcd
- name: NAMESPACE
valueFrom:
fieldRef:
fieldPath: meta.namepace
volumeMounts:
- name: datadir
mountPath: /var/run/etcd
lifecycle:
preStop:
exec:
command:
- "/bin/sh"
- "-ec"
- |
EPS=""
for i in $(seq 0 $((${INITIAL_CLUSTER_SIZE} - 1))); do
EPS="${EPS}${EPS:+,}http://${SET_NAME}-${i}.${SET_NAME}:2379"
done
HOSTNAME=$(hostname)
member_hash() {
etcdctl member list | grep http://${HOSTNAME}.${SET_NAME}:2380 | cut -d':' -f1 | cut -d'[' -f1
}
# Remove everything otherwise the cluster will no longer scale-up
SET_ID=${HOSTNAME:5:${#HOSTNAME}}
# adding a new member to existing cluster (assuming all initial pods are available)
if [ "${SET_ID}" -ge ${INITIAL_CLUSTER_SIZE} ]; then
echo "Removing ${HOSTNAME} from etcd cluster"
ETCDCTL_ENDPOINT=${EPS} etcdctl member remove $(member_hash)
if [ $? -eq 0 ]; then
rm -rf /var/run/etcd/*
fi
fi
command:
- "/bin/sh"
- "-ec"
- |
HOSTNAME=$(hostname)
# store member id into PVC for later member replacement
collect_member() {
while ! etcdctl member list &>/dev/null; do sleep 1; done
etcdctl member list | grep http://${HOSTNAME}.${SET_NAME}:2380 | cut -d':' -f1 | cut -d'[' -f1 > /var/run/etcd/member_id
exit 0
}
eps() {
EPS=""
for i in $(seq 0 $((${INITIAL_CLUSTER_SIZE} - 1))); do
EPS="${EPS}${EPS:+,}http://${SET_NAME}-${i}.${SET_NAME}:2379"
done
echo ${EPS}
}
member_hash() {
etcdctl member list | grep http://${HOSTNAME}.${SET_NAME}:2380 | cut -d':' -f1 | cut -d'[' -f1
}
# re-joining after failure?
if [ -e /var/run/etcd/default.etcd ]; then
echo "Re-joining etcd member"
member_id=$(cat /var/run/etcd/member_id)
# re-join member
POD_IP=$(hostname -i)
ETCDCTL_ENDPOINT=$(eps) etcdctl member update ${member_id} http://${HOSTNAME}.${SET_NAME}:2380
exec etcd --name ${HOSTNAME} \
--listen-peer-urls http://${POD_IP}:2380 \
--listen-client-urls http://${POD_IP}:2379,http://127.0.0.1:2379 \
--advertise-client-urls http://${POD_IP}:2379 \
--data-dir /var/run/etcd/default.etcd
fi
# etcd-SET_ID
SET_ID=${HOSTNAME:5:${#HOSTNAME}}
# adding a new member to existing cluster (assuming all initial pods are available)
if [ "${SET_ID}" -ge ${INITIAL_CLUSTER_SIZE} ]; then
export ETCDCTL_ENDPOINT=$(eps)
# member already added?
MEMBER_HASH=$(member_hash)
if [ -n "${MEMBER_HASH}" ]; then
# the member hash exists but for some reason etcd failed
# as the datadir has not be created, we can remove the member
# and retrieve new hash
etcdctl member remove ${MEMBER_HASH}
fi
echo "Adding new member"
etcdctl member add ${HOSTNAME} http://${HOSTNAME}.${SET_NAME}:2380 | grep "^ETCD_" > /var/run/etcd/new_member_envs
if [ $? -ne 0 ]; then
echo "Exiting"
rm -f /var/run/etcd/new_member_envs
exit 1
fi
cat /var/run/etcd/new_member_envs
source /var/run/etcd/new_member_envs
collect_member &
POD_IP=$(hostname -i)
exec etcd --name ${HOSTNAME} \
--listen-peer-urls http://${POD_IP}:2380 \
--listen-client-urls http://${POD_IP}:2379,http://127.0.0.1:2379 \
--advertise-client-urls http://${POD_IP}:2379 \
--data-dir /var/run/etcd/default.etcd \
--initial-advertise-peer-urls http://${HOSTNAME}.${SET_NAME}:2380 \
--initial-cluster ${ETCD_INITIAL_CLUSTER} \
--initial-cluster-state ${ETCD_INITIAL_CLUSTER_STATE}
fi
for i in $(seq 0 $((${INITIAL_CLUSTER_SIZE} - 1))); do
while true; do
echo "Waiting for ${SET_NAME}-${i}.${SET_NAME} to come up"
ping -W 1 -c 1 ${SET_NAME}-${i}.${SET_NAME}.${NAMESPACE}.svc.cluster.local > /dev/null && break
sleep 1s
done
done
PEERS=""
for i in $(seq 0 $((${INITIAL_CLUSTER_SIZE} - 1))); do
PEERS="${PEERS}${PEERS:+,}${SET_NAME}-${i}=http://${SET_NAME}-${i}.${SET_NAME}:2380"
done
collect_member &
# join member
POD_IP=$(hostname -i)
exec etcd --name ${HOSTNAME} \
--initial-advertise-peer-urls http://${POD_IP}:2380 \
--listen-peer-urls http://${POD_IP}:2380 \
--listen-client-urls http://${POD_IP}:2379,http://127.0.0.1:2379 \
--advertise-client-urls http://${POD_IP}:2379 \
--initial-cluster-token etcd-cluster-1 \
--initial-cluster ${PEERS} \
--initial-cluster-state new \
--data-dir /var/run/etcd/default.etcd
volumeClaimTemplates:
- metadata:
name: datadir
spec:
accessModes:
- "ReadWriteOnce"
resources:
requests:
# upstream recommended max is 700M
storage: 1Gi
emptyDirのyamlは以下の通りです.
apiVersion: v1
kind: Service
metadata:
name: etcd
labels:
app: etcd
spec:
ports:
- port: 2380
name: etcd-server
- port: 2379
name: etcd-client
clusterIP: None
selector:
app: etcd
publishNotReadyAddresses: true
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: etcd
labels:
app: etcd
spec:
serviceName: etcd
replicas: 3
selector:
matchLabels:
app: etcd
template:
metadata:
name: etcd
labels:
app: etcd
spec:
volumes:
- name: datadir
emptyDir: {}
containers:
- name: etcd
image: gcr.k8s.io/etcd:3.2.24
imagePullPolicy: Always
ports:
- containerPort: 2380
name: peer
- containerPort: 2379
name: client
resources:
requests:
cpu: 100m
memory: 512Mi
env:
- name: INITIAL_CLUSTER_SIZE
value: "3"
- name: SET_NAME
value: etcd
- name: NAMESPACE
valueFrom:
fieldRef:
fieldPath: meta.namepace
volumeMounts:
- name: datadir
mountPath: /var/run/etcd
lifecycle:
preStop:
exec:
command:
- "/bin/sh"
- "-ec"
- |
EPS=""
for i in $(seq 0 $((${INITIAL_CLUSTER_SIZE} - 1))); do
EPS="${EPS}${EPS:+,}http://${SET_NAME}-${i}.${SET_NAME}:2379"
done
HOSTNAME=$(hostname)
member_hash() {
etcdctl member list | grep http://${HOSTNAME}.${SET_NAME}:2380 | cut -d':' -f1 | cut -d'[' -f1
}
# Remove everything otherwise the cluster will no longer scale-up
SET_ID=${HOSTNAME:5:${#HOSTNAME}}
# adding a new member to existing cluster (assuming all initial pods are available)
if [ "${SET_ID}" -ge ${INITIAL_CLUSTER_SIZE} ]; then
echo "Removing ${HOSTNAME} from etcd cluster"
ETCDCTL_ENDPOINT=${EPS} etcdctl member remove $(member_hash)
if [ $? -eq 0 ]; then
rm -rf /var/run/etcd/*
fi
fi
command:
- "/bin/sh"
- "-ec"
- |
HOSTNAME=$(hostname)
# store member id into PVC for later member replacement
collect_member() {
while ! etcdctl member list &>/dev/null; do sleep 1; done
etcdctl member list | grep http://${HOSTNAME}.${SET_NAME}:2380 | cut -d':' -f1 | cut -d'[' -f1 > /var/run/etcd/member_id
exit 0
}
eps() {
EPS=""
for i in $(seq 0 $((${INITIAL_CLUSTER_SIZE} - 1))); do
EPS="${EPS}${EPS:+,}http://${SET_NAME}-${i}.${SET_NAME}:2379"
done
echo ${EPS}
}
member_hash() {
etcdctl member list | grep http://${HOSTNAME}.${SET_NAME}:2380 | cut -d':' -f1 | cut -d'[' -f1
}
# re-joining after failure?
if [ -e /var/run/etcd/default.etcd ]; then
echo "Re-joining etcd member"
member_id=$(cat /var/run/etcd/member_id)
# re-join member
POD_IP=$(hostname -i)
ETCDCTL_ENDPOINT=$(eps) etcdctl member update ${member_id} http://${HOSTNAME}.${SET_NAME}:2380
exec etcd --name ${HOSTNAME} \
--listen-peer-urls http://${POD_IP}:2380 \
--listen-client-urls http://${POD_IP}:2379,http://127.0.0.1:2379 \
--advertise-client-urls http://${POD_IP}:2379 \
--data-dir /var/run/etcd/default.etcd
fi
# etcd-SET_ID
SET_ID=${HOSTNAME:5:${#HOSTNAME}}
# adding a new member to existing cluster (assuming all initial pods are available)
if [ "${SET_ID}" -ge ${INITIAL_CLUSTER_SIZE} ]; then
export ETCDCTL_ENDPOINT=$(eps)
# member already added?
MEMBER_HASH=$(member_hash)
if [ -n "${MEMBER_HASH}" ]; then
# the member hash exists but for some reason etcd failed
# as the datadir has not be created, we can remove the member
# and retrieve new hash
etcdctl member remove ${MEMBER_HASH}
fi
echo "Adding new member"
etcdctl member add ${HOSTNAME} http://${HOSTNAME}.${SET_NAME}:2380 | grep "^ETCD_" > /var/run/etcd/new_member_envs
if [ $? -ne 0 ]; then
echo "Exiting"
rm -f /var/run/etcd/new_member_envs
exit 1
fi
cat /var/run/etcd/new_member_envs
source /var/run/etcd/new_member_envs
collect_member &
POD_IP=$(hostname -i)
exec etcd --name ${HOSTNAME} \
--listen-peer-urls http://${POD_IP}:2380 \
--listen-client-urls http://${POD_IP}:2379,http://127.0.0.1:2379 \
--advertise-client-urls http://${POD_IP}:2379 \
--data-dir /var/run/etcd/default.etcd \
--initial-advertise-peer-urls http://${HOSTNAME}.${SET_NAME}:2380 \
--initial-cluster ${ETCD_INITIAL_CLUSTER} \
--initial-cluster-state ${ETCD_INITIAL_CLUSTER_STATE}
fi
for i in $(seq 0 $((${INITIAL_CLUSTER_SIZE} - 1))); do
while true; do
echo "Waiting for ${SET_NAME}-${i}.${SET_NAME} to come up"
ping -W 1 -c 1 ${SET_NAME}-${i}.${SET_NAME}.${NAMESPACE}.svc.cluster.local > /dev/null && break
sleep 1s
done
done
PEERS=""
for i in $(seq 0 $((${INITIAL_CLUSTER_SIZE} - 1))); do
PEERS="${PEERS}${PEERS:+,}${SET_NAME}-${i}=http://${SET_NAME}-${i}.${SET_NAME}:2380"
done
collect_member &
# join member
POD_IP=$(hostname -i)
exec etcd --name ${HOSTNAME} \
--initial-advertise-peer-urls http://${POD_IP}:2380 \
--listen-peer-urls http://${POD_IP}:2380 \
--listen-client-urls http://${POD_IP}:2379,http://127.0.0.1:2379 \
--advertise-client-urls http://${POD_IP}:2379 \
--initial-cluster-token etcd-cluster-1 \
--initial-cluster ${PEERS} \
--initial-cluster-state new \
--data-dir /var/run/etcd/default.etcd
# volumeClaimTemplates:
# - metadata:
# name: datadir
# spec:
# accessModes:
# - "ReadWriteOnce"
# resources:
# requests:
# # upstream recommended max is 700M
# storage: 1Gi