CI: Use -O2 for AVX512-FP16 jobs to work around GCC codegen bug

libeigen/eigen!2424

Co-authored-by: Rasmus Munk Larsen <rmlarsen@gmail.com>
diff --git a/ci/build.linux.gitlab-ci.yml b/ci/build.linux.gitlab-ci.yml
index ef68f7b..13c33e8 100644
--- a/ci/build.linux.gitlab-ci.yml
+++ b/ci/build.linux.gitlab-ci.yml
@@ -61,13 +61,28 @@
   variables:
     EIGEN_CI_ADDITIONAL_ARGS: "-DEIGEN_TEST_AVX512DQ=on"
 
-build:linux:cross:x86-64:gcc-13:avx512fp16:
+# AVX512-FP16 builds are split into official/unsupported because building all
+# tests exceeds the 3-hour GitLab SaaS shared-runner timeout.
+.build:linux:cross:x86-64:gcc-13:avx512fp16:
   extends: build:linux:cross:x86-64:gcc-13:default
+  # Disable cache: uploading the build directory takes ~35 minutes and the
+  # savings from cache hits do not compensate for the upload cost.
+  cache: []
   variables:
-    # Use -O2: GCC bug generates invalid masked vmovsh with {z} on memory
-    # stores at -O3 for _Float16 operations (zero-masking on memory
-    # destinations is architecturally illegal in AVX-512).
-    EIGEN_CI_ADDITIONAL_ARGS: "-DEIGEN_TEST_AVX512FP16=on -DEIGEN_TEST_AVX512DQ=on -DEIGEN_TEST_F16C=on -DCMAKE_CXX_FLAGS_RELEASE='-O2 -DNDEBUG'"
+    # Use RelWithDebInfo (-O2): GCC bug generates invalid masked vmovsh
+    # with {z} on memory stores at -O3 for _Float16 operations (zero-masking
+    # on memory destinations is architecturally illegal in AVX-512).
+    EIGEN_CI_ADDITIONAL_ARGS: "-DEIGEN_TEST_AVX512FP16=on -DEIGEN_TEST_AVX512DQ=on -DEIGEN_TEST_F16C=on -DCMAKE_BUILD_TYPE=RelWithDebInfo"
+
+build:linux:cross:x86-64:gcc-13:avx512fp16:official:
+  extends: .build:linux:cross:x86-64:gcc-13:avx512fp16
+  variables:
+    EIGEN_CI_BUILD_TARGET: BuildOfficial
+
+build:linux:cross:x86-64:gcc-13:avx512fp16:unsupported:
+  extends: .build:linux:cross:x86-64:gcc-13:avx512fp16
+  variables:
+    EIGEN_CI_BUILD_TARGET: BuildUnsupported
 
 # Clang-14 (stable recent version)
 build:linux:cross:x86-64:clang-14:default:
diff --git a/ci/scripts/build.linux.script.sh b/ci/scripts/build.linux.script.sh
index 082c1fc..82d85a5 100755
--- a/ci/scripts/build.linux.script.sh
+++ b/ci/scripts/build.linux.script.sh
@@ -23,7 +23,8 @@
 # out of resources.  In that case, keep trying to build the remaining
 # targets (k0), then retry with reduced parallelism to minimize resource use.
 # EIGEN_CI_BUILD_JOBS can be set to limit parallelism for memory-hungry
-# compilers (e.g. NVHPC).
+# compilers (e.g. NVHPC).  When unset, ninja uses all available CPUs.
+njobs=${EIGEN_CI_BUILD_JOBS:-${NPROC}}
 jobs=""
 if [[ -n "${EIGEN_CI_BUILD_JOBS}" ]]; then
   jobs="-j${EIGEN_CI_BUILD_JOBS}"
@@ -37,9 +38,13 @@
 # nvc++) are spread out instead of all running at once.  Ninja ignores the
 # command-line target order and schedules by its dependency graph, so we
 # must feed it small batches to actually influence scheduling.
-# Falls back to the normal build if the target is not a phony or if
-# ninja/shuf are not available.
-batch_size=${EIGEN_CI_BUILD_BATCH_SIZE:-48}
+#
+# Batch size defaults to 2 * njobs (enough work to keep all cores busy
+# without scheduling too many memory-hungry targets simultaneously).
+# Override with EIGEN_CI_BUILD_BATCH_SIZE for fine-grained control.
+default_batch=$((njobs * 2))
+default_batch=$((default_batch > 48 ? default_batch : 48))
+batch_size=${EIGEN_CI_BUILD_BATCH_SIZE:-${default_batch}}
 shuffled=false
 if [[ -n "${EIGEN_CI_BUILD_TARGET}" ]] && command -v ninja >/dev/null 2>&1; then
   # Suppress xtrace while extracting and shuffling the target list
@@ -70,7 +75,7 @@
       printf "%010d %s\n",h,$0 }' | sort | sed 's/^[^ ]* //')
   if [[ -n "$shuffled_deps" ]]; then
     ndeps=$(echo "$shuffled_deps" | wc -l)
-    echo "Building ${ndeps} targets in batches of ${batch_size}"
+    echo "Building ${ndeps} targets in batches of ${batch_size} (njobs=${njobs})"
     shuffled=true
     # Build in batches: ninja parallelises within each batch, but batches
     # run sequentially so memory-hungry targets from different families
diff --git a/ci/test.linux.gitlab-ci.yml b/ci/test.linux.gitlab-ci.yml
index 234cdff..0f48488 100644
--- a/ci/test.linux.gitlab-ci.yml
+++ b/ci/test.linux.gitlab-ci.yml
@@ -100,7 +100,6 @@
 
 .test:linux:x86-64:gcc-13:avx512fp16:
   extends: .test:linux:x86-64:gcc-13:default
-  needs: [ build:linux:cross:x86-64:gcc-13:avx512fp16 ]
   tags:
     - eigen-runner
     - linux
@@ -109,11 +108,13 @@
 
 test:linux:x86-64:gcc-13:avx512fp16:official:
   extends: .test:linux:x86-64:gcc-13:avx512fp16
+  needs: [ build:linux:cross:x86-64:gcc-13:avx512fp16:official ]
   variables:
     EIGEN_CI_TEST_LABEL: Official
 
 test:linux:x86-64:gcc-13:avx512fp16:unsupported:
   extends: .test:linux:x86-64:gcc-13:avx512fp16
+  needs: [ build:linux:cross:x86-64:gcc-13:avx512fp16:unsupported ]
   variables:
     EIGEN_CI_TEST_LABEL: Unsupported