#!/bin/bash if [ -z "$1" ] then echo provide the script to run exit 1 fi if [ -z "${ANDURIL_NODELIST}" ] then NODELIST=( $( scontrol show node|grep NodeName | sed -e 's,[^ ]\+=\([^ ]\+\) .*,\1,' ) ) else read -a NODELIST <<< "$ANDURIL_NODELIST" fi JOBROOT="$HOME/.srun" if [ "$1" == "-h" ] then echo -ne ' This prefix selects the highest free cpu node for slurm run. Use it with anduril: --exec-mode prefix --prefix '$( basename $0 )' To change the list of nodes (preferred order): export ANDURIL_NODELIST="vm3 vm4 vm5" Current nodelist: "'${NODELIST[@]}'"'"( ${#NODELIST[@]} nodes)\n" exit fi NODERUNS=( ) for e in ${NODELIST[@]} do NODERUNS+=( 0 ) done # find node with max free CPUs while [ true ] do MAX=0 INDEX=0 for (( i=0; i<${#NODERUNS[@]}; i++ )) do NODERUNS[$i]=$(( $( scontrol -o show node ${NODELIST[$i]} | sed 's,.*CPUAlloc=\([0-9]\+\).*CPUTot=\([0-9]\+\).*,\2-\1 ,' ) )) scontrol -o show node ${NODELIST[$i]} | grep State=DOWN > /dev/null && NODERUNS[$i]=-1 echo -n "${NODELIST[$i]}:${NODERUNS[$i]} " [ ${NODERUNS[$i]} -gt $MAX ] && { MAX=${NODERUNS[$i]} INDEX=$i } done [ $MAX -gt 0 ] && { break } || { SECONDS=$(( 10 + $RANDOM/1000 )) echo "waiting for free sockets for $SECONDS s." sleep $SECONDS } done echo "srun: Node ${NODELIST[$INDEX]}, Free sockets: ${NODERUNS[$INDEX]}" >&2 mkdir -p "$JOBROOT" JOBNAME=job_$( date +"%y%m%d_%H%M%S" )_$( echo $@ | md5sum | cut -f1 -d" " ) JOBPATH="$JOBROOT/$JOBNAME" while [ -d "$JOBPATH" ] do echo Jobpath "$JOBPATH" exists JOBNAME=job_$( date +"%y%m%d_%H%M%S" )_$( echo $@ | md5sum | cut -f1 -d" " ) JOBPATH="$JOBROOT/$JOBNAME" done mkdir -p "$JOBPATH" JOBFILE="$JOBPATH/job" STATFILE="$JOBPATH/statistics" STRMFILE="$JOBPATH/stream" EXECPATH=$( pwd ) # create the jobfile echo '#!/bin/bash' > "$JOBFILE" chmod 755 "$JOBFILE" # Find _command file for (( i=1; i<=$#; i++ )) do if [[ "${!i}" == */_command ]] then echo 'retrys=0' >> "$JOBFILE" echo 'while : do [ -f "'${!i}'" ] && { break } || { echo Waiting for _command file '${!i}' retrys=$(( $retrys + 1 )) sleep 1; [ "$retrys" -gt 15 ] && break; } done ' >> "$JOBFILE" fi done echo 'echo Node: $HOSTNAME'" >> \"$STATFILE\" " >> "$JOBFILE" echo "pwd >> \"$STATFILE\" " >> "$JOBFILE" echo "date +'Start: %s' >> \"$STATFILE\" " >> "$JOBFILE" echo -n "/usr/bin/time -o \"$STATFILE\" --append " >> "$JOBFILE" for (( i=1; i<=$#; i++ )) do echo -n "\"${!i}\" " >> "$JOBFILE" done #Catch the input stream (for R launcher) cat - >> "$STRMFILE" echo -n ' < "'$STRMFILE'"' >> "$JOBFILE" echo -e "\n" >> "$JOBFILE" echo 'EC=$?' >> "$JOBFILE" echo "date +'Stop: %s' >> \"$STATFILE\" " >> "$JOBFILE" echo 'exit $EC' >> "$JOBFILE" # clear out old job definitions (semirandomly) ( [[ -e "$JOBROOT"/.lastdel ]] || touch "$JOBROOT"/.lastdel if test "$( find $JOBROOT/.lastdel -mmin +30 )" then touch "$JOBROOT"/.lastdel find "$JOBROOT" -maxdepth 2 -mindepth 2 -type f -mtime +20 -delete 2>/dev/null find "$JOBROOT" -type d -depth -empty -delete 2>/dev/null fi ) & echo "The job file is in $JOBFILE" # send the job srun --nodelist=${NODELIST[$INDEX]} slurm-cake "$JOBFILE" "$JOBFILE" EC=$? wait exit $EC