Another strategy to choose nodes, one with max free cpus
This commit is contained in:
76
anduril/slurm-maxfree
Executable file
76
anduril/slurm-maxfree
Executable file
@@ -0,0 +1,76 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
if [ -z "$1" ]
|
||||||
|
then echo provide the script to run
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -z "${ANDURIL_NODELIST}" ]
|
||||||
|
then NODELIST=( kristel sasha )
|
||||||
|
else read -a NODELIST <<< "$ANDURIL_NODELIST"
|
||||||
|
fi
|
||||||
|
JOBROOT="/mnt/storage3/analysis/.srun"
|
||||||
|
|
||||||
|
if [ "$1" == "-h" ]
|
||||||
|
then echo -ne '
|
||||||
|
This prefix selects the highest free cpu node for slurm run.
|
||||||
|
Use it with anduril: --exec-mode prefix --prefix '$( basename $0 )'
|
||||||
|
To change the list of nodes (preferred order):
|
||||||
|
export ANDURIL_NODELIST="vm3 vm4 vm5"
|
||||||
|
|
||||||
|
Current nodelist: "'${NODELIST[@]}'"'"( ${#NODELIST[@]} nodes)\n"
|
||||||
|
exit
|
||||||
|
fi
|
||||||
|
|
||||||
|
NODERUNS=( )
|
||||||
|
|
||||||
|
# find node with max free CPUs
|
||||||
|
|
||||||
|
for e in ${NODELIST[@]}
|
||||||
|
do NODERUNS+=( $(( $( scontrol show node $e | tr -d [:cntrl:] | sed 's,.*CPUAlloc=\([0-9]\+\).*CPUTot=\([0-9]\+\).*,\2-\1 ,' ) )) )
|
||||||
|
done
|
||||||
|
MAX=${NODERUNS[0]}
|
||||||
|
INDEX=0
|
||||||
|
for (( i=0; i<${#NODERUNS[@]}; i++ ))
|
||||||
|
do (( ${NODERUNS[$i]} > MAX )) && {
|
||||||
|
MAX=${NODERUNS[$i]}
|
||||||
|
INDEX=$i
|
||||||
|
}
|
||||||
|
echo -n "${NODELIST[$i]}:${NODERUNS[$i]} "
|
||||||
|
done
|
||||||
|
echo sending to ${NODELIST[$INDEX]}
|
||||||
|
|
||||||
|
mkdir -p "$JOBROOT"
|
||||||
|
JOBNAME=job_$( date +"%y%m%d_%H%M%S" )_$( echo $@ | md5sum | cut -f1 -d" " )
|
||||||
|
JOBPATH="$JOBROOT/$JOBNAME"
|
||||||
|
while [ -d "$JOBPATH" ]
|
||||||
|
do echo Jobpath "$JOBPATH" exists
|
||||||
|
JOBNAME=job_$( date +"%y%m%d_%H%M%S" )_$( echo $@ | md5sum | cut -f1 -d" " )
|
||||||
|
JOBPATH="$JOBROOT/$JOBNAME"
|
||||||
|
done
|
||||||
|
mkdir -p "$JOBPATH"
|
||||||
|
JOBFILE="$JOBPATH/job"
|
||||||
|
STATFILE="$JOBPATH/statistics"
|
||||||
|
STRMFILE="$JOBPATH/stream"
|
||||||
|
EXECPATH=$( pwd )
|
||||||
|
|
||||||
|
# create the jobfile
|
||||||
|
echo '#!/bin/bash' > "$JOBFILE"
|
||||||
|
chmod 755 "$JOBFILE"
|
||||||
|
|
||||||
|
echo 'echo Node: $HOSTNAME'" >> \"$STATFILE\" " >> "$JOBFILE"
|
||||||
|
echo "pwd >> \"$STATFILE\" " >> "$JOBFILE"
|
||||||
|
echo "date +'Start: %s' >> \"$STATFILE\" " >> "$JOBFILE"
|
||||||
|
echo -n "/usr/bin/time -o \"$STATFILE\" --append " >> "$JOBFILE"
|
||||||
|
for (( i=1; i<=$#; i++ ))
|
||||||
|
do echo -n "\"${!i}\" " >> "$JOBFILE"
|
||||||
|
done
|
||||||
|
#Catch the input stream (for R launcher)
|
||||||
|
cat - >> "$STRMFILE"
|
||||||
|
echo -n ' < "'$STRMFILE'"' >> "$JOBFILE"
|
||||||
|
echo -e "\n" >> "$JOBFILE"
|
||||||
|
echo "date +'Stop: %s' >> \"$STATFILE\" " >> "$JOBFILE"
|
||||||
|
|
||||||
|
echo "The job file is in $JOBFILE"
|
||||||
|
# send the job
|
||||||
|
srun --nodelist=${NODELIST[$INDEX]} "$JOBFILE"
|
||||||
Reference in New Issue
Block a user