[Linux ☓ Nvidia] ファンをCurve Controlする


環境

Linux Ubuntu bionic xfce

流れ

  1. スクリプトを用意する
  2. /usr/share/applicationsdesktopファイルを作成
  3. 2で作成したdesktopファイル~/.config/autostartへコピーする
  4. reboot or logout

1. スクリプトを用意する

ファン制御ファイル

/opt/fan-curve-control/exec.sh
#!/bin/sh

prf() { printf %s\\n "$*" ; }
z=$0; display=""; CDPATH=""; fname=""; num_gpus="0"; num_fans="0"; debug="0"
max_t="0"; max_t2="0"; mnt="0"; mxt="0"; ot="0"; tdiff="0"; cur_t="0"
new_spd="0"; cur_spd="0"; old_t="200"; check_diff1="0"; check_diff2="0"
fcurve_len="0"; fcurve_len2="0"; num_gpus_loop="0"; num_fans_loop="0"; old_s="0"
otl="-1"; sleep_override=""; gpu_cmd="nvidia-settings"

usage="Usage: $(basename "$0") [OPTION]...

where:
-c  [ARG] configuration file (default: $PWD/config)
-d  [ARG] display device string (e.g. \":0\", \"CRT-0\"), defaults to auto
-D  run in daemon mode (background process), using sh
-h  show this help text
-l  enable logging to stdout
-s  [ARG] set the sleep time (in seconds)
-v  show the current version of this script"

{ \unalias command; \unset -f command; } >/dev/null 2>&1
[ -n "$ZSH_VERSION" ] && options[POSIX_BUILTINS]=on
while true; do
    [ -L "$z" ] || [ -e "$z" ] || { prf "'$z' is invalid" >&2; exit 1; }
    command cd "$(command dirname -- "$z")"
    fname=$(command basename -- "$z"); [ "$fname" = '/' ] && fname=''
    if [ -L "$fname" ]; then
        z=$(command ls -l "$fname"); z=${z#* -> }; continue
    fi; break
done; conf_file=$(command pwd -P)
if [ "$fname" = '.' ]; then
    conf_file=${conf_file%/}
elif [ "$fname" = '..' ]; then
    conf_file=$(command dirname -- "${conf_file}")
else
    conf_file=${conf_file%/}/$fname
fi
conf_file=$(dirname -- "$conf_file")"/config"

while getopts ":c: :d: :D :h :l :s: :v :x" opt; do
    if [ "$opt" = "c" ]; then conf_file="$OPTARG"
    elif [ "$opt" = "d" ]; then display="-c $OPTARG"
    elif [ "$opt" = "D" ]; then nohup sh temp.sh >/dev/null 2>&1 &
        exit 1
    elif [ "$opt" = "h" ]; then prf "$usage"; exit 0
    elif [ "$opt" = "l" ]; then debug="1"
    elif [ "$opt" = "s" ]; then sleep_override="$OPTARG"
    elif [ "$opt" = "v" ]; then prf "Version 18"; exit 0
    elif [ "$opt" = "x" ]; then gpu_cmd="../nssim/nssim nvidia-settings"
    elif [ "$opt" = ":" ]; then prf "Option -$OPTARG requires an argument"
    else prf "Invalid option: -$OPTARG"; exit 1
    fi
done

prf "
################################################################################
#          nan0s7's script for automatically managing GPU fan speed            #
################################################################################
"
# FUNCTIONS THAT REQUIRE CERTAIN DEPENDENCIES TO BE MET
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# DEPENDS: PROCPS
kill_already_running() {
    tmp="$(pgrep -c temp.sh)"
    if [ "$tmp" -gt "1" ]; then
        process_pid="$(pgrep -o temp.sh)"
        kill "$process_pid"; prf "Killed $process_pid"
    fi
}
# DEPENDS: NVIDIA-SETTINGS
get_temp() {
    cur_t="$($gpu_cmd -q=[gpu:"$gpu"]/GPUCoreTemp -t $display)"
}
get_query() {
    prf "$($gpu_cmd -q "$1" $display)"
}
set_fan_control() {
    i=0
    while [ "$i" -le "$1" ]; do
        $gpu_cmd -a [gpu:"$i"]/GPUFanControlState="$2" $display
        i=$((i+1))
    done
}
set_speed() {
    $gpu_cmd -a [fan:"$fan"]/GPUTargetFanSpeed="$cur_spd" $display
}
finish() {
    set_fan_control "$num_gpus_loop" "0"
    prf "Fan control set back to auto mode"; exit 0
}; trap " finish" INT
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
echo_info() {
    e=" t=$cur_t ot=$ot td=$tdiff s=$sleep_time gpu=$gpu fan=$fan cd=$chd"
    e="$e nsp=$new_spd osp=$cur_spd maxt=$mxt mint=$mnt otl=$otl"
    prf "$e"
}
arr_size() {
    arr_len=0
    for element in $arr; do
        arr_len=$((arr_len+1))
    done
}
re_elem() {
    i=0
    elem=0
    for elem in $arr; do
        if [ "$i" -ne "$n" ]; then
            i=$((i+1))
        else
            break
        fi
    done
}
loop_cmds() {
    get_temp
    if [ "$cur_t" -ne "$ot" ]; then
        # Calculate difference and make sure it's positive
        if [ "$cur_t" -le "$ot" ]; then
            tdiff="$((ot-cur_t))"
        else
            tdiff="$((cur_t-ot))"
        fi
        if [ "$tdiff" -ge "$chd" ]; then
            if [ "$cur_t" -lt "$mnt" ]; then
                new_spd="0"; otl="-1"
            elif [ "$cur_t" -lt "$mxt" ]; then
                tl=0
                for arr_t in $tc; do
                    if [ "$cur_t" -le "$arr_t" ]; then
                        break
                    else
                        tl=$((tl+1))
                    fi
                done
                if [ "$tl" -ne "$otl" ]; then
                    arr="$fc"; n="$tl"; re_elem
                    new_spd="$elem"; otl="$tl"
                fi
            else
                new_spd="100"
            fi
            if [ "$new_spd" -ne "$cur_spd" ]; then
                cur_spd="$new_spd"
                set_speed
                i=0
                tmp="$old_s"; old_s=""
                for elem in $tmp; do
                    if [ "$i" -ne "$fan" ]; then
                        old_s="$old_s $elem"
                    else
                        old_s="$old_s $cur_spd"
                    fi
                    i=$((i+1))
                done
            fi
            i=0
            tmp="$old_t"; old_t=""
            for elem in $tmp; do
                if [ "$i" -ne "$fan" ]; then
                    old_t="$old_t $elem"
                else
                    old_t="$old_t $cur_t"
                fi
                i=$((i+1))
            done
            tdiff="0"
        fi
    fi
    if [ "$debug" -eq "1" ]; then
        echo_info
    fi
}
set_stuff() {
    arr="$fan2gpu"; n="$fan"; re_elem; gpu="$elem"
    arr="$which_curve"; n="$fan"; re_elem; tmp="$elem"
    if [ "$tmp" -eq "1" ]; then
        chd="$check_diff1"
        mnt="$min_t"; mxt="$max_t"
        tc="$tcurve"; fc="$fcurve"
    else
        chd="$check_diff2"
        mnt="$min_t2"; mxt="$max_t2"
        tc="$tcurve2"; fc="$fcurve2"
    fi
}

kill_already_running

# Load the config file
if ! [ -f "$conf_file" ]; then
    prf "Config file not found." >&2; exit 1
fi
. "$conf_file"; prf "Configuration file: $conf_file"

if [ -n "$sleep_override" ]; then sleep_time="$sleep_override"; fi

# Check for any user errors in config file
arr="$fcurve"; arr_size; size1="$arr_len"
arr="$tcurve"; arr_size; size2="$arr_len"
if ! [ "$size1" -eq "$size2" ]; then
    prf "fcurve and tcurve don't match up!"; exit 1
fi
arr="$fcurve2"; arr_size; size1="$arr_len"
arr="$tcurve2"; arr_size; size2="$arr_len"
if ! [ "$size1" -eq "$size2" ]; then
    prf "fcurve2 and tcurve2 don't match up!"; exit 1
fi
arr="$tcurve"; n="0"; re_elem
if [ "$min_t" -ge "$elem" ]; then
    prf "min_t is greater than the first value in the tcurve!"; exit 1
fi
arr="$tcurve2"; n="0"; re_elem
if [ "$min_t2" -ge "$elem" ]; then
    prf "min_t2 is greater than the first value in the tcurve2!"; exit 1
fi

# Calculate some more values
arr="$tcurve"; arr_size; arr="$tcurve"; n="$arr_len"; re_elem; max_t="$elem"
arr="$tcurve2"; arr_size; arr="$tcurve2"; n="$arr_len"; re_elem; max_t2="$elem"
arr="$fcurve"; arr_size; fcurve_len="$((arr_len-1))"
arr="$fcurve2"; arr_size; fcurve_len2="$((arr_len-1))"

# Get the system's GPU configuration
num_fans=$(get_query "fans"); num_fans="${num_fans%* Fan on*}"
if [ -z "$num_fans" ]; then
    prf "No Fans detected"; exit 1
elif [ "${#num_fans}" -gt "2" ]; then
    num_fans="${num_fans%* Fans on*}"
    num_fans_loop="$((num_fans-1))"
fi
prf "Number of Fans detected: $num_fans"
num_gpus=$(get_query "gpus"); num_gpus="${num_gpus%* GPU on*}"
if [ -z "$num_gpus" ]; then
    prf "No GPUs detected"; exit 1
elif [ "${#num_gpus}" -gt "2" ]; then
    num_gpus="${num_gpus%* GPUs on*}"
    num_gpus_loop="$((num_gpus-1))"
fi
prf "Number of GPUs detected: $num_gpus"

i=0
while [ "$i" -lt "$num_fans_loop" ]; do
    old_t="$old_t 0"
    old_s="$old_s 0"
    i=$((i+1))
done

if [ "$force_check" -eq "0" ]; then
    j=0
    while [ "$j" -le "$((fcurve_len-1))" ]; do
        arr="$tcurve"; n="$((j+1))"; re_elem; tmp1="$elem"
        arr="$tcurve"; n="$j"; re_elem; tmp2="$elem"
        check_diff1="$((check_diff1+tmp1-tmp2))"
        j=$((j+1))
    done
    check_diff1="$(((check_diff1/(fcurve_len-1))-sleep_time))"
    j=0
    while [ "$j" -le "$((fcurve_len2-1))" ]; do
        arr="$tcurve2"; n="$((j+1))"; re_elem; tmp1="$elem"
        arr="$tcurve2"; n="$j"; re_elem; tmp2="$elem"
        check_diff2="$((check_diff2+tmp1-tmp2))"
        j=$((j+1))
    done
    check_diff2="$(((check_diff2/(fcurve_len2-1))-sleep_time))"
else
    check_diff1="$force_check"; check_diff2="$force_check"
fi

set_fan_control "$num_gpus_loop" "1"

if [ "$num_gpus" -eq "1" ] && [ "$num_fans" -eq "1" ]; then
    prf "Started process for 1 GPU and 1 Fan"
    fan="$default_fan"
    set_stuff
    while true; do
        arr="$old_t"; n="$fan"; re_elem; ot="$elem"
        arr="$old_s"; n="$fan"; re_elem; cur_spd="$elem"
        loop_cmds
        sleep "$sleep_time"
    done
else
    prf "Started process for n-GPUs and n-Fans"
    while true; do
        fan=0
        while [ "$fan" -le "$num_fans_loop" ]; do
            set_stuff
            arr="$old_t"; n="$fan"; re_elem; ot="$elem"
            arr="$old_s"; n="$fan"; re_elem; cur_spd="$elem"
            loop_cmds
            fan=$((fan+1))
        done
        sleep "$sleep_time"
    done
fi

設定ファイル

ファンの回転数と温度の相関関係はお好みで設定する。

/opt/fan-curve-control/config
# min_t is the temperature at which every temperature below it will cause
#  the fan speed to be set to 0%, and everything above will be whatever the
#  first speed in fcurve is (default of 25%)
# min_t2 is only used with the second fan speed and temperature arrays, so
#  there is no need to change it unless you're using the second curve
min_t="25"
min_t2="25"

# How many seconds the script should wait until checking for a change in temps
sleep_time="7"

# By default it's set up so that when the temp is less than or equal to 35
#  degrees, the fan speed will be set to 25%. Next, if the temp is between 36
#  and 45, the fan speed should be set to 40%, etc.
# The last temperature value will be the maximum temperature before 100% fan
#  speed will be set
# You can make the array as big or as small as you require, as long as they
#  both end up being the same size
fcurve="25 40 55 70 85" # fan speeds
tcurve="35 45 55 65 75" # temperatures

# This value is used to determine the temperature difference needed to get
#  the script to check for a new speed to apply. The default of this value
#  is zero, which means the script will automatically calculate a value
#  based on the temperature curves supplied below
force_check="0"

# These two arrays are for GPU's that have a secondary fan that you may wish
#  to control seperately, especially if it is water-cooled.
fcurve2="15 30 45 60 75"
tcurve2="35 45 55 65 75"

# First number in array is fan 0, second number is fan 1, etc. If the number
#  is 1, that indicates that the script should use the first curve for that
#  fan. The same goes for the number 2.
which_curve="1 2 1 2"

# Only used for single-fan operation. If you have more than one gpu/fan but
#  only want to control one of them, select which one here. Otherwise there
#  is no need to change this setting.
default_fan="0"

# Similar to which_curve, but instead lets the script know which of the GPU's
#  has which fan. i.e. element 0 in the array being set to 0 means that fan 0
#  is assigned to GPU 0, element 1 is 0 too, meaning fan 1 is on GPU 0 as well
fan2gpu="0 0 1 1"
実行権限を付与する
sudo chmod 755 /opt/fan-curve-control/exec.sh
sudo chmod 755 /opt/fan-curve-control/config

2. /usr/share/applicationsfan-curve-control.desktopを作成

desktopファイルを作成
sudo vi /usr/share/applications/fan-curve-control.desktop
/usr/share/applications/fan-curve-control.desktop
[Desktop Entry]
Name=nfancarve
Type=Application
Path=/opt/nfancurve/
Exec=/opt/nfancurve/temp.sh

3. 2で作成したfan-curve-control.desktop~/.config/autostartへコピーする

コピー
cp /usr/share/applications/nfancurve.desktop ~/.config/autostart
確認
ls ~/.config/autostart 

4. reboot or logout

ファン制御できているか確認

まとめ

解決までに

と3つのやり方を経験しました。

crontabは癖があるので、bootの仕組みをもう少し勉強する必要があると思う。

rc.localsystemdの動作原理を理解しないと触っちゃいけないと思った。

この記事で行ったautostartはbootしてシステムが完全に立ち上がる最後の段階(99%)のところで実行される感じがする(調べてない)ので、serviceの依存関係等を気にしなくていいので自動起動に挑戦する初級者の人はautostartを使うのが良いと思う。