CI: Use -O2 for AVX512-FP16 jobs to work around GCC codegen bug libeigen/eigen!2424 Co-authored-by: Rasmus Munk Larsen <rmlarsen@gmail.com>
diff --git a/ci/build.linux.gitlab-ci.yml b/ci/build.linux.gitlab-ci.yml index ef68f7b..13c33e8 100644 --- a/ci/build.linux.gitlab-ci.yml +++ b/ci/build.linux.gitlab-ci.yml
@@ -61,13 +61,28 @@ variables: EIGEN_CI_ADDITIONAL_ARGS: "-DEIGEN_TEST_AVX512DQ=on" -build:linux:cross:x86-64:gcc-13:avx512fp16: +# AVX512-FP16 builds are split into official/unsupported because building all +# tests exceeds the 3-hour GitLab SaaS shared-runner timeout. +.build:linux:cross:x86-64:gcc-13:avx512fp16: extends: build:linux:cross:x86-64:gcc-13:default + # Disable cache: uploading the build directory takes ~35 minutes and the + # savings from cache hits do not compensate for the upload cost. + cache: [] variables: - # Use -O2: GCC bug generates invalid masked vmovsh with {z} on memory - # stores at -O3 for _Float16 operations (zero-masking on memory - # destinations is architecturally illegal in AVX-512). - EIGEN_CI_ADDITIONAL_ARGS: "-DEIGEN_TEST_AVX512FP16=on -DEIGEN_TEST_AVX512DQ=on -DEIGEN_TEST_F16C=on -DCMAKE_CXX_FLAGS_RELEASE='-O2 -DNDEBUG'" + # Use RelWithDebInfo (-O2): GCC bug generates invalid masked vmovsh + # with {z} on memory stores at -O3 for _Float16 operations (zero-masking + # on memory destinations is architecturally illegal in AVX-512). + EIGEN_CI_ADDITIONAL_ARGS: "-DEIGEN_TEST_AVX512FP16=on -DEIGEN_TEST_AVX512DQ=on -DEIGEN_TEST_F16C=on -DCMAKE_BUILD_TYPE=RelWithDebInfo" + +build:linux:cross:x86-64:gcc-13:avx512fp16:official: + extends: .build:linux:cross:x86-64:gcc-13:avx512fp16 + variables: + EIGEN_CI_BUILD_TARGET: BuildOfficial + +build:linux:cross:x86-64:gcc-13:avx512fp16:unsupported: + extends: .build:linux:cross:x86-64:gcc-13:avx512fp16 + variables: + EIGEN_CI_BUILD_TARGET: BuildUnsupported # Clang-14 (stable recent version) build:linux:cross:x86-64:clang-14:default:
diff --git a/ci/scripts/build.linux.script.sh b/ci/scripts/build.linux.script.sh index 082c1fc..82d85a5 100755 --- a/ci/scripts/build.linux.script.sh +++ b/ci/scripts/build.linux.script.sh
@@ -23,7 +23,8 @@ # out of resources. In that case, keep trying to build the remaining # targets (k0), then retry with reduced parallelism to minimize resource use. # EIGEN_CI_BUILD_JOBS can be set to limit parallelism for memory-hungry -# compilers (e.g. NVHPC). +# compilers (e.g. NVHPC). When unset, ninja uses all available CPUs. +njobs=${EIGEN_CI_BUILD_JOBS:-${NPROC}} jobs="" if [[ -n "${EIGEN_CI_BUILD_JOBS}" ]]; then jobs="-j${EIGEN_CI_BUILD_JOBS}" @@ -37,9 +38,13 @@ # nvc++) are spread out instead of all running at once. Ninja ignores the # command-line target order and schedules by its dependency graph, so we # must feed it small batches to actually influence scheduling. -# Falls back to the normal build if the target is not a phony or if -# ninja/shuf are not available. -batch_size=${EIGEN_CI_BUILD_BATCH_SIZE:-48} +# +# Batch size defaults to 2 * njobs (enough work to keep all cores busy +# without scheduling too many memory-hungry targets simultaneously). +# Override with EIGEN_CI_BUILD_BATCH_SIZE for fine-grained control. +default_batch=$((njobs * 2)) +default_batch=$((default_batch > 48 ? default_batch : 48)) +batch_size=${EIGEN_CI_BUILD_BATCH_SIZE:-${default_batch}} shuffled=false if [[ -n "${EIGEN_CI_BUILD_TARGET}" ]] && command -v ninja >/dev/null 2>&1; then # Suppress xtrace while extracting and shuffling the target list @@ -70,7 +75,7 @@ printf "%010d %s\n",h,$0 }' | sort | sed 's/^[^ ]* //') if [[ -n "$shuffled_deps" ]]; then ndeps=$(echo "$shuffled_deps" | wc -l) - echo "Building ${ndeps} targets in batches of ${batch_size}" + echo "Building ${ndeps} targets in batches of ${batch_size} (njobs=${njobs})" shuffled=true # Build in batches: ninja parallelises within each batch, but batches # run sequentially so memory-hungry targets from different families
diff --git a/ci/test.linux.gitlab-ci.yml b/ci/test.linux.gitlab-ci.yml index 234cdff..0f48488 100644 --- a/ci/test.linux.gitlab-ci.yml +++ b/ci/test.linux.gitlab-ci.yml
@@ -100,7 +100,6 @@ .test:linux:x86-64:gcc-13:avx512fp16: extends: .test:linux:x86-64:gcc-13:default - needs: [ build:linux:cross:x86-64:gcc-13:avx512fp16 ] tags: - eigen-runner - linux @@ -109,11 +108,13 @@ test:linux:x86-64:gcc-13:avx512fp16:official: extends: .test:linux:x86-64:gcc-13:avx512fp16 + needs: [ build:linux:cross:x86-64:gcc-13:avx512fp16:official ] variables: EIGEN_CI_TEST_LABEL: Official test:linux:x86-64:gcc-13:avx512fp16:unsupported: extends: .test:linux:x86-64:gcc-13:avx512fp16 + needs: [ build:linux:cross:x86-64:gcc-13:avx512fp16:unsupported ] variables: EIGEN_CI_TEST_LABEL: Unsupported