VM起動監視サービス

仮想化ホスト側の監視スクリプトを整理した。
pingによる生存確認とか、systemd-notifyによる起動完了通知なども入れて、タイミング調整のためにそこら中に入れていたsleepはほとんど居なくなった。
systemd timerを使えば無限ループじゃなくてone shotの塊として書くこともできそうだけど、とりあえず完成。


監視部本体

#!/bin/bash

# Must be root
if test "`/usr/bin/id -u`" != 0 ; then
    echo "$0: You must be root to run this script" >& 2
    exit 1
fi

# Must be give domain name & address
[ ${#} -ne 2 ] && exit 1

POLLING_SLEEPTIME=120

source /usr/local/sbin/vm-boot-halt

#echo $1
#echo $2
domain=${1}
address=${2}

exit_service ()
{
    wait_halt_vm  ${domain} ${address}
    force_halt_vm ${domain} ${address}
}

trap "exit_service" EXIT

exit_service #terminate existing VMs

while true
do
    #            domain    address
    wait_boot_vm ${domain} ${address}

    systemd-notify --ready

    sleep $POLLING_SLEEPTIME
done

マジックナンバーを外から渡すようにして、共通化

共通部分(関数定義)

#!/bin/bash

BOOT_TIMEOUT_COUNT=300 #same as sec
HALT_TIMEOUT_COUNT=300 #same as sec

#N_("no state"),
#N_("running"),
#N_("idle"),
#N_("paused"),
#N_("in shutdown"),
#N_("shut off"),
#N_("crashed"),
#N_("pmsuspended")

export LANG=C

boot_vm ()
{
    state=`virsh domstate ${1}`
    rc=${?}
    [ ${rc} -ne 0 ] && exit ${rc}

    case ${state} in
        "no state")
            echo "no state"
            rc=1;;
        "running")
            rc=0;;
        "idle")
            rc=0;;
        "paused")
            virsh resume ${1}
            rc=${?};;
        "in shutdown")
            rc=0;;
        "shut off")
            virsh start ${1}
            rc=${?};;
        "crashed")
            virsh reset ${1}
            rc=${?};;
        "pmsuspended")
            virsh dompmwakeup ${1}
            rc=${?};;
        *)
            echo "illegal state"
            rc=1
    esac
    [ ${rc} -ne 0 ] && exit ${rc}
}

wait_boot_vm()
{
    flag=1

    for i in `seq 0 ${BOOT_TIMEOUT_COUNT}`
    do
        boot_vm ${1}

        ping -c 1 ${2} > /dev/null
        rc=${?}
        if [ ${rc} -eq 0 ]; then
            flag=0
            #echo "booted"
            break
        fi
    done
    [ ${flag} -ne 0 ] && exit ${flag}
}

halt_vm ()
{
    state=`virsh domstate ${1}`
    rc=${?}
    [ ${rc} -ne 0 ] && exit ${rc}

    case ${state} in
        "no state")
            echo "no state"
            rc=1;;
        "running")
            virsh shutdown ${1}
            rc=${?};;
        "idle")
            virsh shutdown ${1}
            rc=${?};;
        "paused")
            virsh resume ${1}
            rc=${?}
            virsh shutdown ${1}
            rc=${rc}+${?};;
        "in shutdown")
            rc=0;;
        "shut off")
            rc=0;;
        "crashed")
            virsh destroy ${1}
            rc=${?};;
        "pmsuspended")
            virsh dompmwakeup ${1}
            rc=${?}
            virsh shutdown ${1}
            rc=${rc}+${?};;
        *)
            echo "illegal state"
            rc=1
    esac
    [ ${rc} -ne 0 ] && exit ${rc}
}

force_halt_vm ()
{
    state=`virsh domstate ${1}`
    rc=${?}
    [ ${rc} -ne 0 ] && exit ${rc}

    case ${state} in
        "no state")
            echo "no state"
            rc=1;;
        "running")
            virsh destroy ${1}
            rc=${?};;
        "idle")
            virsh destroy ${1}
            rc=${?};;
        "paused")
            virsh resume ${1}
            rc=${?}
            virsh destroy ${1}
            rc=${rc}+${?};;
        "in shutdown")
            virsh destroy ${1}
            rc=${?};;
        "shut off")
            echo "no destroy"
            rc=0;;
        "crashed")
            virsh destroy ${1}
            rc=${?};;
        "pmsuspended")
            virsh dompmwakeup ${1}
            rc=${?}
            virsh destroy ${1}
            rc=${rc}+${?};;
        *)
            echo "illegal state"
            rc=1
    esac
    [ ${rc} -ne 0 ] && exit ${rc}
}

wait_halt_vm()
{
    flag=1

    for i in `seq 0 ${HALT_TIMEOUT_COUNT}`
    do
        halt_vm ${1}

        ping -c 1 ${2} > /dev/null
        rc=${?}
        if [ ${rc} -ne 0 ]; then
            flag=0
            echo "safe halted"
            break
        fi
        sleep 1
    done
    [ ${flag} -ne 0 ] && exit ${flag}
}

本体のバリエーションが1つのみになったので、分ける必要はなかった気もする。

最後にsystemd unit

[Unit]
Description=KVM SUSE Tumbleweed ssh server startup
After=network.target libvirt-guests.service libvirtd.service vm-router.service
Requires=network.target libvirt-guests.service libvirtd.service

[Service]
ExecStart=/usr/local/sbin/vm-heartbeat-service tw-ssh-server 192.168.1.200
ExecReload=/bin/kill -HUP $MAINPID
Restart=on-failure
Type=notify

[Install]
WantedBy=multi-user.target

Typeをnotifyにして依存関係待ち合わせをsystemdに解決させるようにした。


仮想化サーバーで自分で作った部分はここだけなので、新規構築用の備忘録。