diff --git a/anduril/slurm-random b/anduril/slurm-random new file mode 100755 index 0000000..7750ce8 --- /dev/null +++ b/anduril/slurm-random @@ -0,0 +1,85 @@ +#!/bin/bash + +if [ -z "$1" ] +then echo provide the script to run + exit 1 +fi + +if [ -z "${ANDURIL_NODELIST}" ] +then NODELIST=( vm3 vm4 vm5 vm6 vm7 vm8 vm9 ) +else read -a NODELIST <<< "$ANDURIL_NODELIST" +fi +JOBROOT="$HOME/.srun" + +if [ "$1" == "-h" ] +then echo -ne ' +This tools selects the lowest cpu allocated node for slurm run. +Use it with anduril: --exec-mode prefix --prefix '$( basename $0 )' +To change the list of nodes (preferred order): +export ANDURIL_NODELIST="vm3 vm4 vm5" + +Current nodelist: "'${NODELIST[@]}'"'"( ${#NODELIST[@]} nodes)\n" + exit +fi + +NODERUNS=( ) +TRY=0 +for e in ${NODELIST[@]} +do NODERUNS+=( 0 ) +done +while [ true ] +do + INDEX=$(( ( RANDOM % ${#NODELIST[@]} ) )) + NODERUNS=$(( $( scontrol -o show node ${NODELIST[$INDEX]} | sed 's,.*CPUAlloc=\([0-9]\+\).*CPUTot=\([0-9]\+\).*,\2-\1 ,' ) )) + scontrol -o show node ${NODELIST[$INDEX]} | grep State=DOWN > /dev/null && NODERUNS=-1 + echo "srun: Node ${NODELIST[$INDEX]}, Free sockets: $NODERUNS" >&2 + [ $NODERUNS -gt 0 ] && { + break + } || { + TRY=$(( $TRY + 1 )) + echo "try again $TRY" + [ $TRY -gt 3 ] && { + TRY=0 + SECONDS=$(( ( RANDOM % 10 ) )) + echo "waiting for free sockets for $SECONDS s." + sleep $SECONDS + } + } +done + +echo "srun: Node ${NODELIST[$INDEX]}, Free sockets: ${NODERUNS[$INDEX]}" >&2 + +mkdir -p "$JOBROOT" +JOBNAME=job_$( date +"%y%m%d_%H%M%S" )_$( echo $@ | md5sum | cut -f1 -d" " ) +JOBPATH="$JOBROOT/$JOBNAME" +while [ -d "$JOBPATH" ] +do echo Jobpath "$JOBPATH" exists + JOBNAME=job_$( date +"%y%m%d_%H%M%S" )_$( echo $@ | md5sum | cut -f1 -d" " ) + JOBPATH="$JOBROOT/$JOBNAME" +done +mkdir -p "$JOBPATH" +JOBFILE="$JOBPATH/job" +STATFILE="$JOBPATH/statistics" +STRMFILE="$JOBPATH/stream" +EXECPATH=$( pwd ) + +# create the jobfile +echo '#!/bin/bash' > "$JOBFILE" +chmod 755 "$JOBFILE" + +echo 'echo Node: $HOSTNAME'" >> \"$STATFILE\" " >> "$JOBFILE" +echo "pwd >> \"$STATFILE\" " >> "$JOBFILE" +echo "date +'Start: %s' >> \"$STATFILE\" " >> "$JOBFILE" +echo -n "/usr/bin/time -o \"$STATFILE\" --append " >> "$JOBFILE" +for (( i=1; i<=$#; i++ )) +do echo -n "\"${!i}\" " >> "$JOBFILE" +done +#Catch the input stream (for R launcher) +cat - >> "$STRMFILE" +echo -n ' < "'$STRMFILE'"' >> "$JOBFILE" +echo -e "\n" >> "$JOBFILE" +echo "date +'Stop: %s' >> \"$STATFILE\" " >> "$JOBFILE" + +echo "The job file is in $JOBFILE" +# send the job +srun --nodelist=${NODELIST[$INDEX]} "$JOBFILE"