Ceph障害臨界値テスト
7352 ワード
#! /bin/bash
#############################################################
# To process the input args with "getopt"
#
# Now we allow input the option
# -m --size n --timeout :mem operate size nG to used
# -c --core n --timeout :cpu operate num n core to used
# -n --delay :net operate ...
#############################################################
log_file="./ltrep.log"
log()
{
local level=$1
local msg=$2
if [ $level == "ERR" -o $level == "error" ];then
echo -e "[`date "+%Y-%m-%d %H:%M:%S"`] \033[31m = $level = : $msg \033[0m " >> $log_file
else
echo -e "[`date "+%Y-%m-%d %H:%M:%S"`] = $level = : $msg" >> $log_file
fi
}
arg_proc()
{
#set args rule
ARGS=`getopt -o m:c:n:: --long cpu:,mem:,net:: -n $0 -"$@"`
if [ $? != 0 ];then
echo "Other terminating at using \"getopt\"! "
exit 1
fi
log INF "Current args is: ${ARGS}"
#reset args in position's args
#eval set -- "${ARGS}"
}
get_stress()
{
ret=`rpm -qa|grep stress|grep -v grep|wc -l`
if [ $ret != "1" ];then
echo "The stress rpm is not setup, use \"yum install -y stress\" to setup stress rpm packet!"
log ERR "The stress rpm is not setup, use \"yum install -y stress\" to setup stress rpm packet!"
exit 1
fi
log INF "stress rpm is ok! continue next step."
}
que_user()
{
user=`whoami`
if [ $user != "root" ];then
echo "Current user is $user you need to use \"root\" to excute this script!"
exit 1
fi
log INF "Current user is $user "
}
#############################################################
# this function for cpu usage testing
# use it in main function
#############################################################
cpu_num=$(cat /proc/cpuinfo | grep "physical id" | wc -l)
cpu_op()
{
use_c=$(expr $1 \* $cpu_num / 100)
log INF "use \"stress -c $1 -t $2 \" to use $use_c core!"
stress -c $use_c -t $2 2>&1 >>$log_file &
}
#############################################################
# this function for net testing
# use it in main function
#############################################################
net_com()
{
que_user
log DEB "do net operate testing... "
if [ $1 == "reorder" ];then
tc qdisc change dev $2 root netem delay 10ms
tc qdisc change dev $2 root netem delay 10ms $1 $3 $(expr 1 - $3)
else
log DEB "tc qdisc add dev $2 root netem $1 $3"
tc qdisc add dev $2 root netem $1 $3
fi
sleep $4
log INF "restore net, tc qdisc del dev $2 root "
tc qdisc del dev $2 root
}
net_flash()
{
opnet=$1
stoptime=$2
for((i=0;i<4;i++));do
log INF "do down $opnet operation!"
sudo ifdown $opnet
if [ $? -eq 0 ];then
log INF "down $opnet successful"
sleep $stoptime
else
log ERR "down $opnet failed! retry..."
sudo ifdown $opnet
fi
log INFO "do up $opnet operation!"
sudo ifup $opnet
if [ $? -eq 0 ];then
log INF "up $opnet successful"
sleep 10
else
log ERR "up $opnet failed! retry..."
sudo ifup $opnet
fi
done
}
################################################################
# kill osd or mon process script
# use it in main function
################################################################
kill_mon()
{
int=1
while(( $int<=100000))
do
log INF "excute cmd :ps -aux |grep \"ceph-mon\"|grep -v \"grep\" |awk \'\{print \$2\}\'"
pid=`ps -aux |grep "ceph-mon"|grep -v "grep" |awk '{print $2}'`
log INF "cmd result is: $pid"
wcc=`echo $pid|wc -l`
if [ $wcc -ne "0" ];then
log INF "kill -9 $pid"
sudo kill \-9 $pid 2>&1 >> $log_file
fi
sleep $1
let "int++"
log DEB "$int times kill mon process has done!"
done
}
kill_osd()
{
int=1
while(( $int<=100000))
do
pid=`ps -aux |grep "id $1"|grep -v "grep" |awk '{print $2}'`
wcc=`ps -aux |grep "id $1"|grep -v "grep" |awk '{print $2}'|wc -l`
if [ $wcc -ne "0" ];then
log INF "kill -9 $pid"
sudo kill \-9 $pid 2>&1 >> kill_osd.log
fi
sleep $2
let "int++"
log DEB "$int times kill mon process has done!"
done
}
################################################################
# mem used function
# update: 2019-3-22 pansaky
################################################################
mem_op()
{
free_mem=`free -g| grep Mem|awk '{print $4}'`
if [ $1 -gt $free_mem ];then
echo "Will $1 G mem from node, but it has $free_mem G free now!"
log WAR "Will $1 G mem from node, but it has $free_mem G free now!"
fi
log INF "excute: \" stress --vm $1 --vm-bytes 1G --vm-hang $2 --timeout $2 \""
stress --vm $1 --vm-bytes 1G --vm-hang $2 --timeout $2 2>&1 >>$log_file &
}
################################################################
# the main function of this script
################################################################
err_help()
{
if [[ $1 != "-h" && $1 != "--help" ]];then
echo "UNKNOW args: $@ "
echo " -h|--help to get help info"
fi
echo "Usage: "
echo " -m|--mem 30 1000 to use 30G mem in 1000s"
echo " -c|--cpu 30 1000 to use 30% cpu in 1000s"
echo " -d|--disk 30 1000 to use 30G net in 1000s"
echo " -o|--osd 0 300 to kill osd.0 per 300s"
echo " -k|--mon 1000 to kill mon process per 1000s"
echo " -n|--net delay|loss|corrupt|reorder eth0 0.1 36000 to delay|loss|corrupt|reorder the eth0 0.1 datapack last 10h"
echo " -n|--net flash eth0 600 to flash eth0 per 600s"
log ERR "UNKNOW args: $* "
}
main()
{
log INF "==============================================="
log INF "Start the new times!"
que_user
arg_proc
get_stress
case "$1" in
-m|--mem)
log DEB "DO mem operation!"
mem_op $2 $3;;
-c|--cpu)
log DEB "DO cpu operation!"
cpu_op $2 $3;;
-d|--disk)
echo "disk operate is not done!"
log DEB "DO do do!";;
-o|--osd)
log DEB "DO osd operation!"
kill_osd $2 $3 ;;
-k|--mon)
log DEB "DO mon operation!"
kill_mon $2 ;;
-h|--help)
err_help;;
-n|--net)
case $2 in
"delay"|"loss"|"corrupt"|"reorder")
net_com $2 $3 $4 $5 ;;
"flash")
net_flash $3 $4 ;;
*)
echo "wrong args for net operation! termination..."
log ERR "wrong args for net operation! termination process..."
err_help $@
exit 1 ;;
esac;;
*)
err_help
exit 1 ;;
esac
echo "Excute script Sucessful!"
log INF "Excute script Sucessful! end with exit code $?"
}
main $1 $2 $3 $4 $5