From b090ad574d3749ab71bb2be18b06ca6f259c8d78 Mon Sep 17 00:00:00 2001 From: Andres Gomez Date: Mon, 31 May 2021 14:41:21 +0300 Subject: [PATCH] ci: build the hang-detection tool into x86_test-vk hang-detection is a vulkan-based lightweight wrapper from parallel-deqp-runner that periodically submits empty command buffers and waits for their completions. If the completion never happens, the GPU is considered hung, the wrapped script is killed, and the job should get aborted. This should have no negative impact on the runtime of dEQP/traces/..., but will allow saving time when the GPU gets hung as we can abort the job immediately rather than waiting for the timeout. In the case of B2C, we are using this tool's error message as a way to trigger the reboot of the test machine and start again. v2: - Use hang-detection already with some jobs (Martin). Signed-off-by: Andres Gomez Reviewed-by: Martin Peres Part-of: --- .gitlab-ci.yml | 2 +- .gitlab-ci/container/build-hang-detection.sh | 16 ++++++++++++++++ .gitlab-ci/container/x86_test-vk.sh | 4 ++++ .gitlab-ci/deqp-runner.sh | 5 ++++- .gitlab-ci/piglit/run.sh | 6 +++++- .gitlab-ci/vkd3d-proton/run.sh | 4 +++- 6 files changed, 33 insertions(+), 4 deletions(-) create mode 100644 .gitlab-ci/container/build-hang-detection.sh diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 6fe88eda035..88f337629cb 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -421,7 +421,7 @@ x86_test-gl: x86_test-vk: extends: .use-x86_test-base variables: - MESA_IMAGE_TAG: &x86_test-vk "2021-05-31-glslangValidator" + MESA_IMAGE_TAG: &x86_test-vk "2021-06-02-hang-detection" # Debian 11 based ARM build image arm_build: diff --git a/.gitlab-ci/container/build-hang-detection.sh b/.gitlab-ci/container/build-hang-detection.sh new file mode 100644 index 00000000000..6ce5464c590 --- /dev/null +++ b/.gitlab-ci/container/build-hang-detection.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +set -ex + +PARALLEL_DEQP_RUNNER_VERSION=6596b71cf37a7efb4d54acd48c770ed2d4ad6b7e + +git clone https://gitlab.freedesktop.org/mesa/parallel-deqp-runner --single-branch -b master --no-checkout /parallel-deqp-runner +pushd /parallel-deqp-runner +git checkout "$PARALLEL_DEQP_RUNNER_VERSION" +meson . _build +ninja -C _build hang-detection +mkdir -p build/bin +install _build/hang-detection build/bin +strip build/bin/* +find . -not -path './build' -not -path './build/*' -delete +popd diff --git a/.gitlab-ci/container/x86_test-vk.sh b/.gitlab-ci/container/x86_test-vk.sh index 307d7e37429..447e2338e98 100644 --- a/.gitlab-ci/container/x86_test-vk.sh +++ b/.gitlab-ci/container/x86_test-vk.sh @@ -121,6 +121,10 @@ wine \ . .gitlab-ci/container/container_pre_build.sh +############### Build parallel-deqp-runner's hang-detection tool + +. .gitlab-ci/container/build-hang-detection.sh + ############### Build piglit PIGLIT_BUILD_TARGETS="piglit_replayer" . .gitlab-ci/container/build-piglit.sh diff --git a/.gitlab-ci/deqp-runner.sh b/.gitlab-ci/deqp-runner.sh index ea1abc92c89..98c6e1d5f12 100755 --- a/.gitlab-ci/deqp-runner.sh +++ b/.gitlab-ci/deqp-runner.sh @@ -43,10 +43,13 @@ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib RESULTS=`pwd`/${DEQP_RESULTS_DIR:-results} mkdir -p $RESULTS +HANG_DETECTION_CMD="" + # Generate test case list file. if [ "$DEQP_VER" = "vk" ]; then cp /deqp/mustpass/vk-$DEQP_VARIANT.txt /tmp/case-list.txt DEQP=/deqp/external/vulkancts/modules/vulkan/deqp-vk + HANG_DETECTION_CMD="/parallel-deqp-runner/build/bin/hang-detection" elif [ "$DEQP_VER" = "gles2" -o "$DEQP_VER" = "gles3" -o "$DEQP_VER" = "gles31" -o "$DEQP_VER" = "egl" ]; then cp /deqp/mustpass/$DEQP_VER-$DEQP_VARIANT.txt /tmp/case-list.txt DEQP=/deqp/modules/$DEQP_VER/deqp-$DEQP_VER @@ -230,7 +233,7 @@ find $RESULTS -name \*.xml \ -exec cp /deqp/testlog.css /deqp/testlog.xsl "$RESULTS/" ";" \ -quit -deqp-runner junit \ +$HANG_DETECTION_CMD deqp-runner junit \ --testsuite $DEQP_VER \ --results $RESULTS/failures.csv \ --output $RESULTS/junit.xml \ diff --git a/.gitlab-ci/piglit/run.sh b/.gitlab-ci/piglit/run.sh index 6ac67d97146..bd4f7d344e3 100755 --- a/.gitlab-ci/piglit/run.sh +++ b/.gitlab-ci/piglit/run.sh @@ -55,6 +55,8 @@ if [ "$VK_DRIVER" ]; then SANITY_MESA_VERSION_CMD="vulkaninfo" + HANG_DETECTION_CMD="/parallel-deqp-runner/build/bin/hang-detection" + # Set up the Window System Interface (WSI) @@ -85,6 +87,8 @@ else SANITY_MESA_VERSION_CMD="wflinfo" + HANG_DETECTION_CMD="" + # Set up the platform windowing system. @@ -199,7 +203,7 @@ PIGLIT_TESTS=$(printf "%s" "$PIGLIT_TESTS") PIGLIT_CMD="./piglit run --timeout 300 -j${FDO_CI_CONCURRENT:-4} $PIGLIT_OPTIONS $PIGLIT_TESTS $PIGLIT_PROFILES "$(/usr/bin/printf "%q" "$RESULTS") -RUN_CMD="export LD_LIBRARY_PATH=$__LD_LIBRARY_PATH; $SANITY_MESA_VERSION_CMD && $PIGLIT_CMD" +RUN_CMD="export LD_LIBRARY_PATH=$__LD_LIBRARY_PATH; $SANITY_MESA_VERSION_CMD && $HANG_DETECTION_CMD $PIGLIT_CMD" if [ "$RUN_CMD_WRAPPER" ]; then RUN_CMD="set +e; $RUN_CMD_WRAPPER "$(/usr/bin/printf "%q" "$RUN_CMD")"; set -e" diff --git a/.gitlab-ci/vkd3d-proton/run.sh b/.gitlab-ci/vkd3d-proton/run.sh index 0eeb6c2dd79..755bb901b69 100755 --- a/.gitlab-ci/vkd3d-proton/run.sh +++ b/.gitlab-ci/vkd3d-proton/run.sh @@ -46,6 +46,8 @@ quiet() { SANITY_MESA_VERSION_CMD="vulkaninfo | tee /tmp/version.txt | grep \"Mesa $MESA_VERSION\(\s\|$\)\"" +HANG_DETECTION_CMD="/parallel-deqp-runner/build/bin/hang-detection" + RUN_CMD="export LD_LIBRARY_PATH=$__LD_LIBRARY_PATH; $SANITY_MESA_VERSION_CMD" set +e @@ -65,7 +67,7 @@ fi VKD3D_PROTON_TESTSUITE_CMD="wine /vkd3d-proton-tests/x64/bin/d3d12.exe >$RESULTS/vkd3d-proton.log 2>&1" quiet printf "%s\n" "Running vkd3d-proton testsuite..." -RUN_CMD="export LD_LIBRARY_PATH=$__LD_LIBRARY_PATH; $VKD3D_PROTON_TESTSUITE_CMD" +RUN_CMD="export LD_LIBRARY_PATH=$__LD_LIBRARY_PATH; $HANG_DETECTION_CMD $VKD3D_PROTON_TESTSUITE_CMD" set +e eval $RUN_CMD