Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
197925f
Merge branch 'main' of github.com:EESSI/software-layer-scripts
bedroge Feb 20, 2026
a64ee14
Merge branch 'main' of github.com:EESSI/software-layer-scripts
bedroge Feb 27, 2026
9b65b14
Merge branch 'main' of github.com:EESSI/software-layer-scripts
bedroge Mar 6, 2026
68af45f
Merge branch 'main' of github.com:EESSI/software-layer-scripts
bedroge Mar 12, 2026
16327c6
Merge branch 'main' of github.com:EESSI/software-layer-scripts
bedroge Apr 16, 2026
6b58771
Merge branch 'main' of github.com:EESSI/software-layer-scripts
bedroge Apr 16, 2026
6c5dc52
Merge branch 'main' of github.com:EESSI/software-layer-scripts
bedroge Apr 20, 2026
944a208
Merge branch 'main' of github.com:EESSI/software-layer-scripts
bedroge May 8, 2026
0dd6810
Merge branch 'main' of github.com:EESSI/software-layer-scripts
bedroge May 29, 2026
2b9b14f
Merge branch 'main' of github.com:EESSI/software-layer-scripts
bedroge Jun 3, 2026
5416905
Merge branch 'main' of github.com:EESSI/software-layer-scripts
bedroge Jun 3, 2026
bb2b7c6
Merge branch 'main' of github.com:EESSI/software-layer-scripts
bedroge Jun 9, 2026
97b0bff
Merge branch 'main' of github.com:EESSI/software-layer-scripts
bedroge Jun 16, 2026
7f99317
add function for checking compute capability of NVIDIA GPU
bedroge Jun 19, 2026
1bef0e8
for every accelerator, check if a host GPU has to be made available i…
bedroge Jun 19, 2026
dce057f
Merge branch 'main' into accel_cc_check
bedroge Jun 19, 2026
1618478
Merge branch 'main' of github.com:EESSI/software-layer-scripts into a…
bedroge Jun 24, 2026
59d7962
use echo_yellow if the CC does not match, use ERROR isntead of Error …
bedroge Jun 24, 2026
616c628
print which nvidia flag is used
bedroge Jun 24, 2026
5d39a4f
Merge branch 'accel_cc_check' of github.com:bedroge/software-layer-sc…
bedroge Jun 24, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 25 additions & 12 deletions bot/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -261,13 +261,6 @@ declare -a BUILD_STEP_ARGS=()
BUILD_STEP_ARGS+=("--save" "${TARBALL_TMP_BUILD_STEP_DIR}")
BUILD_STEP_ARGS+=("--storage" "${STORAGE}")

# add options required to handle NVIDIA support
if nvidia_gpu_available; then
BUILD_STEP_ARGS+=("--nvidia" "all")
else
BUILD_STEP_ARGS+=("--nvidia" "install")
fi

# Retain location for host injections so we don't reinstall CUDA
# (Always need to run the driver installation as available driver may change)
if [[ ! -z ${SHARED_FS_PATH} ]]; then
Expand All @@ -294,19 +287,39 @@ else
# prepend accel/ to all array elements
EESSI_ACCELERATOR_TARGET_OVERRIDES=("${ACCEL_OVERRIDES_ARRAY[@]/#/accel/}")
fi
RESUME_DIR=""

for ACCEL_OVERRIDE in "${EESSI_ACCELERATOR_TARGET_OVERRIDES[@]}"; do
# copy the common build step arguments to a a
BUILD_STEP_ARGS_ACCEL=("${BUILD_STEP_ARGS[@]}")
if [[ "${ACCEL_OVERRIDE}" == "accel/nvidia/"* ]]; then
nvidia_cc=${ACCEL_OVERRIDE##*/cc}
# add options required to handle NVIDIA support
# only make the GPU available in the container if the host has a GPU and it has the correct compute capability
if nvidia_gpu_available && nvidia_gpu_has_compute_capability "${nvidia_cc}" ; then
echo "bot/build.sh: GPU with the requested compute capability is available, using '--nvidia all'"
BUILD_STEP_ARGS_ACCEL+=("--nvidia" "all")
else
echo "bot/build.sh: no GPU with the requested compute capability is available, using '--nvidia install'"
BUILD_STEP_ARGS_ACCEL+=("--nvidia" "install")
fi
fi
# resume from the previous accelerator's build directory
# as we want to combine all accelerator builds into a single tarball in the end
if [[ ! -z "${RESUME_DIR}" ]]; then
BUILD_STEP_ARGS_ACCEL+=("--resume" "${RESUME_DIR}")
fi

export EESSI_ACCELERATOR_TARGET_OVERRIDE="${ACCEL_OVERRIDE}"
echo "bot/build.sh: EESSI_ACCELERATOR_TARGET_OVERRIDE='${ACCEL_OVERRIDE}'"
echo "Executing command to build software:"
echo "$software_layer_dir/eessi_container.sh ${COMMON_ARGS[@]} ${BUILD_STEP_ARGS[@]}"
echo "$software_layer_dir/eessi_container.sh ${COMMON_ARGS[@]} ${BUILD_STEP_ARGS_ACCEL[@]}"
echo " -- $software_layer_dir/install_software_layer.sh \"${INSTALL_SCRIPT_ARGS[@]}\" \"$@\" 2>&1 | tee -a ${build_outerr}"
$software_layer_dir/eessi_container.sh "${COMMON_ARGS[@]}" "${BUILD_STEP_ARGS[@]}" \
$software_layer_dir/eessi_container.sh "${COMMON_ARGS[@]}" "${BUILD_STEP_ARGS_ACCEL[@]}" \
-- $software_layer_dir/install_software_layer.sh "${INSTALL_SCRIPT_ARGS[@]}" "$@" 2>&1 | tee -a ${build_outerr}

# determine temporary directory to resume from for the next accelerator,
# as we want to combine all accelerator builds into a single tarball in the end
BUILD_TMPDIR=$(grep ' as tmp directory ' ${build_outerr} | cut -d ' ' -f 2)
BUILD_STEP_ARGS+=("--resume" "${BUILD_TMPDIR}")
RESUME_DIR=$(grep ' as tmp directory ' ${build_outerr} | cut -d ' ' -f 2)
done

# prepare directory to store tarball of tmp for tarball step
Expand Down
31 changes: 31 additions & 0 deletions scripts/utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -166,3 +166,34 @@ function nvidia_gpu_available {
return 2
fi
}

function nvidia_gpu_has_compute_capability {
# Ensure we are given a single compute capability argument
if [ $# -ne 1 ]; then
echo_red "Function requires a single compute capability argument" >&2
return $ANY_ERROR_EXITCODE
fi
# Remove period (if present) from the given compute capability, i.e. 8.0 -> 80
requested_cc=${1//./}
# We are careful here in case we are running in a container and LD_LIBARY_PATH has been wiped.
mapfile -t gpu_ccs < <(LD_LIBRARY_PATH="/.singularity.d/libs:${LD_LIBRARY_PATH}" nvidia-smi --query-gpu=compute_cap --format=noheader)
# Remove the periods from all compute capabilities
gpu_ccs=("${gpu_ccs[@]//./}")
# On a multi-GPU system we may get the compute capabilities of all GPUs, one per line.
# In that case we print a warning and check the first GPU.
if [ ${#gpu_ccs[@]} -eq 0 ]; then
echo_red "ERROR: querying for the GPU's compute capability did not return anything."
return 1
else
if [ ${#gpu_ccs[@]} -gt 1 ]; then
echo_yellow "Warning: multiple GPUs detected, checking the compute capability of the first GPU".
fi
if [ "$requested_cc" == "${gpu_ccs[0]}" ]; then
echo_green "Requested compute capability matches the one from the GPU."
return 0
else
echo_yellow "Warning: the compute capability of the GPU (${gpu_ccs[0]}) does not match the requested compute capability ($requested_cc)."
return 2
fi
fi
}
Loading