MPI-Script
#!/bin/bash
#SBATCH -J <Job_Name>
#SBATCH --mail-type=END
# Please check pathes (directories have to exist beforehand):
#SBATCH -e /scratch/<TUID>/<yourWorkingDirectory>/%x.err.%j
#SBATCH -o /scratch/<TUID>/<yourWorkingDirectory>/%x.out.%j
#
#SBATCH -n 192 # number of processes (= total cores to use, here: 2 nodes à 96 cores)
#SBATCH --mem-per-cpu=1750 # required main memory in MByte per MPI task/process
#SBATCH -t 01:30:00 # in hours, minutes and seconds, or '#SBATCH -t 10' - just minutes
# -------------------------------
# your job's "payload" in form of commands to execute, eg.
module purge
module load gcc openmpi
cd /scratch/<TUID>/<yourWorkingDirectory>
srun <MPI program> <parameters>
EXITCODE=$?
# any cleanup and copy commands:
...
# end this job script with precisely the exit status of your scientific program above:
exit $EXITCODE
OpenMP-Script
#!/bin/bash
#SBATCH -J <job_name>
#SBATCH --mail-type=END
# Please check pathes (directories have to exist beforehand):
#SBATCH -e /home/<TUID>/<project_name>/<job_name>.err.%j
#SBATCH -o /home/<TUID>/<project_name>/<job_name>.out.%j
#
#SBATCH -n 1 # 1 process only
#SBATCH -c 24 # number of CPU cores per process
# can be referenced as $SLURM_CPUS_PER_TASK in your "payload" down below
#SBATCH --mem-per-cpu=1750 # Main memory in MByte for each cpu core
#SBATCH -t 01:30:00 # Hours and minutes, or '#SBATCH -t 10' - just minutes
# -------------------------------
# your job's "payload" in form of commands to execute, eg.
module purge
module load gcc
cd /scratch/<TUID>/<project_name>
# Specification from OMP_NUM_THREADS depends on your program
export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK
/home/<TUID>/<path/to/program> <parameters>
EXITCODE=$?
# any cleanup and copy commands:
...
# end this job script with precisely the exit status of your scientific program above:
exit $EXITCODE
MPI + OpenMP-Script
#!/bin/bash
#SBATCH -J <Job_Name>
#SBATCH --mail-type=END
# Please check pathes (directories have to exist beforehand):
#SBATCH -e /home/<TUID>/<project_name>/<job_name>.err.%j
#SBATCH -o /home/<TUID>/<project_name>/<job_name>.out.%j
#
#SBATCH -n 4 # number of processes (= total cores to use, here: 4 nodes à 96 cores)
#SBATCH -c 96 # number of OpenMP threads or CPU cores per process
# can be referenced as $SLURM_CPUS_PER_TASK in your "payload" down below
#SBATCH --mem-per-cpu=1750 # Main memory in MByte for each cpu core
#SBATCH -t 01:30:00 # Hours and minutes, or '#SBATCH -t 10' - just minutes
# -------------------------------
# your job's "payload" in form of commands to execute, eg.
module purge
module load gcc openmpi
cd /scratch/<TUID>/<yourWorkingDirectory>
# specification from OMP_NUM_THREADS depends on your program
export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK
srun <Programm> <Parameter>
EXITCODE=$?
# any cleanup and copy commands:
...
# end this job script with precisely the exit status of your scientific program above:
exit $EXITCODE
GPU/GRes
#!/bin/bash
#SBATCH -J <Job_Name>
#SBATCH --mail-type=END
# Please check pathes (directories have to exist beforehand):
#SBATCH -e /home/<TUID>/<project_name>/<job_name>.err.%j
#SBATCH -o /home/<TUID>/<project_name>/<job_name>.out.%j
#
# CPU specification
#SBATCH -n 1 # 1 process
#SBATCH -c 24 # 24 CPU cores per process
# can be referenced as $SLURM_CPUS_PER_TASK in the "payload" part
#SBATCH --mem-per-cpu=1750 # Hauptspeicher in MByte pro Rechenkern
#SBATCH -t 01:30:00 # in hours:minutes, or '#SBATCH -t 10' - just minutes
# GPU specification
#SBATCH --gres=gpu:v100:2 # 2 GPUs of type NVidia "Volta 100"
# -------------------------------
# your job's "payload" in form of commands to execute, eg.
module purge
module load gcc cuda
cd /scratch/<TUID>/<yourWorkingDirectory>
# specification from OMP_NUM_THREADS depends on your program
export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK
# for checking whether and which GPUs have been allocated
# (output appears in the "#SBATCH -e" file specified above):
nvidia-smi 1>&2
./<Programm> <Parameter>
EXITCODE=$?
# any cleanup and copy commands:
...
# end this job script with precisely the exit status of your scientific program above:
exit $EXITCODE
The request “--gres=Class:Type:Amount
” always refers to a single accelerator node, and to GPU cards as a whole. There is no way of requesting separate amounts of GPU cores (i.e. 48 Tensor units)--you can just ask for one up to 4 whole GPU cards.
Unless you explicitly specify a certain number of “CPU cores per task” (using “-c #
” or “--cpus-per-task=#
”), your job will automatically be assigned a quarter of the available CPU cores (96/4 = 24) per requested GPU.
Requesting just “--gres=gpu:a100:2
” (without specifying “-c #
”), your job will find 48 CPU cores to be available on the node, in addition to the two GPU cards.