Files
q-tools/anduril/srun-random
2014-07-08 15:01:13 +03:00

47 lines
1.2 KiB
Bash
Executable File

#!/bin/bash
if [ -z "${ANDURIL_NODELIST}" ]
then NODELIST=( vm3 vm4 vm5 vm6 vm7 vm8 vm9 )
else read -a NODELIST <<< "$ANDURIL_NODELIST"
fi
if [ "$1" == "-h" ]
then echo -ne '
This tools selects the lowest cpu allocated node for slurm run
export ANDURIL_NODELIST="vm3 vm4 vm5"
^ to have your own preferred list of nodes
Current nodelist: "'${NODELIST[@]}'"'"( ${#NODELIST[@]} nodes)\n"
exit
fi
NODERUNS=( )
TRY=0
# find node with max free CPUs
for e in ${NODELIST[@]}
do NODERUNS+=( 0 )
done
while [ true ]
do
INDEX=$(( ( RANDOM % ${#NODELIST[@]} ) ))
NODERUNS=$(( $( scontrol -o show node ${NODELIST[$INDEX]} | sed 's,.*CPUAlloc=\([0-9]\+\).*CPUTot=\([0-9]\+\).*,\2-\1 ,' ) ))
scontrol -o show node ${NODELIST[$INDEX]} | grep State=DOWN > /dev/null && NODERUNS=-1
echo "srun: Node ${NODELIST[$INDEX]}, Free sockets: $NODERUNS" >&2
[ $NODERUNS -gt 0 ] && {
break
} || {
TRY=$(( $TRY + 1 ))
echo "try again $TRY"
[ $TRY -gt 3 ] && {
TRY=0
SECONDS=$(( ( RANDOM % 10 ) ))
echo "waiting for free sockets for $SECONDS s."
sleep $SECONDS
}
}
done
srun --nodelist=${NODELIST[$INDEX]} "$@"