Files
q-tools/anduril/slurm-random
2015-01-19 20:45:59 +02:00

127 lines
4.2 KiB
Bash
Executable File

#!/bin/bash
if [ -z "$1" ]
then echo provide the script to run
exit 1
fi
if [ -z "${ANDURIL_NODELIST}" ]
then NODELIST=( $( scontrol show node|grep NodeName | sed -e 's,[^ ]\+=\([^ ]\+\) .*,\1,' ) )
else read -a NODELIST <<< "$ANDURIL_NODELIST"
fi
JOBROOT="$HOME/.srun"
if [ "$1" == "-h" ]
then echo -ne '
This tool selects a random node for slurm run.
Use it with anduril: --exec-mode prefix --prefix '$( basename $0 )'
To change the list of nodes for randomization:
export ANDURIL_NODELIST="vm3 vm4 vm5"
Current nodelist: "'${NODELIST[@]}'"'"( ${#NODELIST[@]} nodes)\n"
exit
fi
NODERUNS=( )
TRY=0
for e in ${NODELIST[@]}
do NODERUNS+=( 0 )
done
while [ true ]
do
INDEX=$(( ( RANDOM % ${#NODELIST[@]} ) ))
NODERUNS=$(( $( scontrol -o show node ${NODELIST[$INDEX]} | sed 's,.*CPUAlloc=\([0-9]\+\).*CPUTot=\([0-9]\+\).*,\2-\1 ,' ) ))
scontrol -o show node ${NODELIST[$INDEX]} | grep State=DOWN > /dev/null && NODERUNS=-1
echo "srun: Node ${NODELIST[$INDEX]}, Free sockets: $NODERUNS" >&2
[ $NODERUNS -gt 0 ] && {
break
} || {
TRY=$(( $TRY + 1 ))
echo "try again $TRY"
[ $TRY -gt 3 ] && {
TRY=0
SECONDS=$(( ( RANDOM % 10 ) ))
echo "waiting for free sockets for $SECONDS s."
sleep $SECONDS
}
}
done
mkdir -p "$JOBROOT"
for (( i=1; i<=$#; i++ ))
do if [[ "${!i}" == */_command ]]
then JOBNAME=job_
COMPONENTNAME=$( grep ^metadata.componentName= "${!i}" | sed s,^metadata.componentName=,, | sed -e 's/[^A-Za-z0-9._-]/_/g' )
INSTANCENAME=$( grep ^metadata.instanceName= "${!i}" | sed s,^metadata.instanceName=,, | sed -e 's/[^A-Za-z0-9._-]/_/g' )
JOBNAME="$JOBNAME"$( grep ^metadata.sourceLocation= "${!i}" | sed s,^metadata.sourceLocation=,, | sed -e 's/[^A-Za-z0-9._-]/_/g' )
JOBNAME="$JOBNAME"_$INSTANCENAME
JOBNAME="$JOBNAME"_$COMPONENTNAME
JOBNAME="$JOBNAME"_$( date +"%y%m%d_%H%M%S" )
CPU=$( grep ^metadata.cpu= "${!i}" | sed s,^metadata.cpu=,, )
MEMORY=$( grep ^metadata.memory= "${!i}" | sed s,^metadata.cpu=,, )
export USERDEFINED=$( grep ^metadata.userDefined= "${!i}" | sed s,^metadata.userDefined=,, )
fi
done
JOBPATH="$JOBROOT/$JOBNAME"
while [ -d "$JOBPATH" ]
do echo Jobpath "$JOBPATH" exists
JOBNAME=job_$( date +"%y%m%d_%H%M%S" )_$( echo $@ | md5sum | cut -f1 -d" " )
JOBPATH="$JOBROOT/$JOBNAME"
done
mkdir -p "$JOBPATH"
JOBFILE="$JOBPATH/job"
STATFILE="$JOBPATH/statistics"
STRMFILE="$JOBPATH/stream"
EXECPATH=$( pwd )
[[ -z "$CPU" ]] || export CPU="-c $CPU"
[[ -z "$MEMORY" ]] || export MEMORY="--mem $MEMORY"
# create the jobfile
echo '#!/bin/bash' > "$JOBFILE"
chmod 755 "$JOBFILE"
# Find _command file
for (( i=1; i<=$#; i++ ))
do if [[ "${!i}" == */_command ]]
then echo 'retrys=0' >> "$JOBFILE"
echo 'while :
do [ -f "'${!i}'" ] && {
break
} || {
echo Waiting for _command file '${!i}'
retrys=$(( $retrys + 1 ))
sleep 1; [ "$retrys" -gt 15 ] && break;
}
done ' >> "$JOBFILE"
fi
done
echo 'echo Node: $HOSTNAME'" >> \"$STATFILE\" " >> "$JOBFILE"
echo "pwd >> \"$STATFILE\" " >> "$JOBFILE"
echo "date +'Start: %s' >> \"$STATFILE\" " >> "$JOBFILE"
echo -n "/usr/bin/time -o \"$STATFILE\" --append " >> "$JOBFILE"
for (( i=1; i<=$#; i++ ))
do echo -n "\"${!i}\" " >> "$JOBFILE"
done
#Catch the input stream (for R launcher)
cat - >> "$STRMFILE"
echo -n ' < "'$STRMFILE'"' >> "$JOBFILE"
echo -e "\n" >> "$JOBFILE"
echo 'EC=$?' >> "$JOBFILE"
echo "date +'Stop: %s' >> \"$STATFILE\" " >> "$JOBFILE"
echo 'exit $EC' >> "$JOBFILE"
echo "The job file is in $JOBFILE"
# send the job
echo \#srun $CPU $MEMORY -J $INSTANCENAME --nodelist=${NODELIST[$INDEX]} slurm-cake "$JOBFILE" "$JOBFILE" >> "$JOBFILE"
srun $CPU $MEMORY -J $INSTANCENAME --nodelist=${NODELIST[$INDEX]} slurm-cake "$JOBFILE" "$JOBFILE"
EC=$?
# clear out old job definitions (semirandomly)
[[ -e "$JOBROOT"/.lastdel ]] || touch "$JOBROOT"/.lastdel
if test "$( find $JOBROOT/.lastdel -mmin +30 )"
then touch "$JOBROOT"/.lastdel
find "$JOBROOT" -maxdepth 2 -mindepth 2 -type f -mtime +20 -delete 2>/dev/null
find "$JOBROOT" -type d -depth -empty -delete 2>/dev/null
fi
exit $EC