Ceph障害臨界値テスト

7352 ワード

#! /bin/bash
#############################################################
#   To process the input args with "getopt"
#
#   Now we allow input the option
#   -m  --size n  --timeout   :mem operate size nG to used
#   -c  --core n  --timeout   :cpu operate num n core to used
#   -n  --delay      :net operate ...
#############################################################
log_file="./ltrep.log"
 
log()
{
local level=$1
local msg=$2
if [ $level == "ERR" -o  $level == "error" ];then
  echo -e "[`date "+%Y-%m-%d %H:%M:%S"`] \033[31m = $level =  : $msg \033[0m " >> $log_file
else
  echo -e "[`date "+%Y-%m-%d %H:%M:%S"`]  = $level =  : $msg" >> $log_file
fi
}
 
arg_proc()
{
#set args rule
ARGS=`getopt -o m:c:n:: --long cpu:,mem:,net:: -n $0 -"$@"`
if [ $? != 0 ];then
  echo "Other terminating at using \"getopt\"! "
  exit 1
fi
log INF "Current args is: ${ARGS}"
#reset args in position's args
#eval set -- "${ARGS}"
}
 
get_stress()
{
  ret=`rpm -qa|grep stress|grep -v grep|wc -l`
  if [ $ret != "1" ];then
    echo "The stress rpm is not setup, use \"yum install -y stress\" to setup stress rpm packet!"
    log ERR "The stress rpm is not setup, use \"yum install -y stress\" to setup stress rpm packet!"
    exit 1
  fi
  log INF "stress rpm is ok! continue next step."
}
 
que_user()
{
  user=`whoami`
  if [ $user != "root" ];then
    echo "Current user is $user you need to use \"root\" to excute this script!"
    exit 1
  fi
  log INF "Current user is $user "
}
#############################################################
#       this function for cpu usage testing
#       use it in main function
#############################################################
 
cpu_num=$(cat /proc/cpuinfo | grep "physical id" | wc -l)
 
cpu_op()
{
  use_c=$(expr $1 \* $cpu_num / 100)
  log INF "use \"stress -c $1 -t $2 \" to use $use_c core!"
 
  stress -c $use_c -t $2 2>&1 >>$log_file &
}
 
#############################################################
#       this function for net testing
#       use it in main function
#############################################################
net_com()
{
  que_user
  log DEB "do net operate testing... "
  if [ $1 == "reorder" ];then
    tc qdisc change dev $2 root netem delay 10ms
    tc qdisc change dev $2 root netem delay 10ms $1 $3 $(expr 1 - $3)
  else
    log DEB "tc qdisc add dev $2 root netem $1 $3"
    tc qdisc add dev $2 root netem $1 $3
  fi
  sleep $4
  log INF "restore net, tc qdisc del dev $2 root "
  tc qdisc del dev $2 root
}
 
net_flash()
{
opnet=$1
stoptime=$2
 for((i=0;i<4;i++));do
  log INF "do down $opnet operation!"
  sudo ifdown $opnet
    if [ $? -eq 0 ];then
      log INF "down $opnet successful"
      sleep $stoptime
    else
      log ERR "down $opnet failed!  retry..."
      sudo ifdown $opnet
    fi
 
  log INFO "do up $opnet operation!"
  sudo ifup $opnet
    if [ $? -eq 0 ];then
      log INF "up $opnet successful"
      sleep 10
    else
      log ERR "up $opnet failed!  retry..."
      sudo ifup $opnet
    fi
  done
 
}
 
################################################################
#  kill osd or mon process script
#  use it in main function
################################################################
kill_mon()
{
int=1
while(( $int<=100000))
do
  log INF "excute cmd :ps -aux |grep \"ceph-mon\"|grep -v \"grep\" |awk \'\{print \$2\}\'"
  pid=`ps -aux |grep "ceph-mon"|grep -v "grep" |awk '{print $2}'`
  log INF "cmd result is: $pid"
  wcc=`echo $pid|wc -l`
  if [ $wcc -ne "0" ];then
    log INF "kill -9 $pid"
    sudo kill \-9 $pid 2>&1 >> $log_file
  fi
 sleep $1
 let "int++"
 log DEB "$int times kill mon process has done!"
done
}
 
kill_osd()
{
int=1
while(( $int<=100000))
do
  pid=`ps -aux |grep "id $1"|grep -v "grep" |awk '{print $2}'`
  wcc=`ps -aux |grep "id $1"|grep -v "grep" |awk '{print $2}'|wc -l`
  if [ $wcc -ne "0" ];then
    log INF "kill -9 $pid"
    sudo kill \-9 $pid 2>&1 >> kill_osd.log
  fi
 sleep $2
 let "int++"
 log DEB "$int times kill mon process has done!"
done
}
################################################################
#       mem used function
#       update: 2019-3-22  pansaky
################################################################
 
mem_op()
{
  free_mem=`free -g| grep Mem|awk '{print $4}'`
  if [ $1 -gt $free_mem ];then
    echo "Will $1 G mem from node, but it has $free_mem G free now!"
    log WAR "Will $1 G mem from node, but it has $free_mem G free now!"
  fi
 
  log INF "excute: \" stress --vm $1 --vm-bytes 1G --vm-hang $2 --timeout $2 \""
 
  stress --vm $1 --vm-bytes 1G --vm-hang $2 --timeout $2   2>&1 >>$log_file &
}
 
################################################################
# the main  function of this script
################################################################
err_help()
{
  if [[ $1 != "-h" && $1 != "--help" ]];then
    echo "UNKNOW  args: $@ "
    echo "  -h|--help           to get help info"
  fi
    echo "Usage: "
    echo "  -m|--mem 30 1000    to use 30G mem in 1000s"
    echo "  -c|--cpu 30 1000    to use 30% cpu in 1000s"
    echo "  -d|--disk 30 1000   to use 30G net in 1000s"
    echo "  -o|--osd 0 300      to kill osd.0 per 300s"
    echo "  -k|--mon  1000      to kill mon process per 1000s"
    echo "  -n|--net delay|loss|corrupt|reorder eth0 0.1 36000   to delay|loss|corrupt|reorder the eth0 0.1 datapack last 10h"
    echo "  -n|--net flash eth0 600   to flash eth0 per 600s"
    log ERR "UNKNOW args: $* "
}
 
 
main()
{
log INF "==============================================="
log INF "Start the new times!"
que_user
arg_proc
get_stress
 
case "$1" in
  -m|--mem)
    log DEB "DO mem operation!"
    mem_op $2 $3;;
 
  -c|--cpu)
    log DEB "DO cpu operation!"
    cpu_op $2 $3;;
 
  -d|--disk)
    echo "disk operate is not done!"
    log DEB "DO  do   do!";;
 
  -o|--osd)
    log DEB "DO osd operation!"
    kill_osd  $2 $3 ;;
 
  -k|--mon)
    log DEB "DO mon operation!"
    kill_mon $2 ;;
 
  -h|--help)
    err_help;;
 
  -n|--net)
      case $2 in
        "delay"|"loss"|"corrupt"|"reorder")
          net_com $2 $3 $4 $5 ;;
        "flash")
          net_flash $3 $4 ;;
        *)
          echo "wrong args for net operation! termination..."
          log ERR "wrong args for net operation! termination  process..."
          err_help $@
          exit 1 ;;
      esac;;
  *)
    err_help
 
    exit 1 ;;
esac
 
echo "Excute  script Sucessful!"
log INF "Excute  script Sucessful! end with exit code $?"
}
 
main  $1 $2 $3 $4 $5