From 9ef8af357ddabef47e25414a1ab96e46273a01d1 Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Tue, 8 Feb 2022 22:48:39 +0200 Subject: [PATCH] virgl/ci: Setup virtio-vsock based IPC The mechanism currently used to pass data from the dEQP child process executed in a crosvm guest environment towards the deqp-runner wrapper script that starts the crosvm instance is based on creating, writing and reading regular files. In addition to the main drawback of using the storage, this approach is potentially unreliable because the data cannot be transferred in real-time and there is no control on ending the transmission. It also requires a forced sleep for syncing the content, while the minimum amount of time necessary to wait cannot be easily and safely determined. Replace this with an IPC based on the virtio transport for virtual sockets (virtio-vsock). Signed-off-by: Cristian Ciocaltea Reviewed-by: Daniel Stone Part-of: --- .gitlab-ci/crosvm-init.sh | 34 +++++----- .gitlab-ci/crosvm-runner.sh | 127 ++++++++++++++++++++++++++---------- 2 files changed, 110 insertions(+), 51 deletions(-) diff --git a/.gitlab-ci/crosvm-init.sh b/.gitlab-ci/crosvm-init.sh index c6d9095b2f5..15e68f835f7 100755 --- a/.gitlab-ci/crosvm-init.sh +++ b/.gitlab-ci/crosvm-init.sh @@ -2,7 +2,9 @@ set -e -export DEQP_TEMP_DIR="$1" +VSOCK_STDOUT=$1 +VSOCK_STDERR=$2 +VSOCK_TEMP_DIR=$3 mount -t proc none /proc mount -t sysfs none /sys @@ -10,29 +12,31 @@ mkdir -p /dev/pts mount -t devpts devpts /dev/pts mount -t tmpfs tmpfs /tmp -. $DEQP_TEMP_DIR/crosvm-env.sh +. ${VSOCK_TEMP_DIR}/crosvm-env.sh # .gitlab-ci.yml script variable is using relative paths to install directory, # so change to that dir before running `crosvm-script` cd "${CI_PROJECT_DIR}" -# The exception is the dEQP binary, since it needs to run from the directory -# it's in -if [ -d "${DEQP_BIN_DIR}" ] -then - cd "${DEQP_BIN_DIR}" -fi +# The exception is the dEQP binary, as it needs to run from its own directory +[ -z "${DEQP_BIN_DIR}" ] || cd "${DEQP_BIN_DIR}" -dmesg --level crit,err,warn -w >> $DEQP_TEMP_DIR/stderr & +# Use a FIFO to collect relevant error messages +STDERR_FIFO=/tmp/crosvm-stderr.fifo +mkfifo -m 600 ${STDERR_FIFO} -set +e -stdbuf -oL sh $DEQP_TEMP_DIR/crosvm-script.sh 2>> $DEQP_TEMP_DIR/stderr >> $DEQP_TEMP_DIR/stdout -echo $? > $DEQP_TEMP_DIR/exit_code -set -e +dmesg --level crit,err,warn -w > ${STDERR_FIFO} & +DMESG_PID=$! + +# Transfer the errors and crosvm-script output via a pair of virtio-vsocks +socat -d -u pipe:${STDERR_FIFO} vsock-listen:${VSOCK_STDERR} & +socat -d -U vsock-listen:${VSOCK_STDOUT} \ + system:"stdbuf -eL sh ${VSOCK_TEMP_DIR}/crosvm-script.sh 2> ${STDERR_FIFO}; echo \$? > ${VSOCK_TEMP_DIR}/exit_code",nofork + +kill ${DMESG_PID} +wait sync -sleep 1 - poweroff -d -n -f || true sleep 1 # Just in case init would exit before the kernel shuts down the VM diff --git a/.gitlab-ci/crosvm-runner.sh b/.gitlab-ci/crosvm-runner.sh index 4e0f64ae6b0..c3b56eb6543 100755 --- a/.gitlab-ci/crosvm-runner.sh +++ b/.gitlab-ci/crosvm-runner.sh @@ -1,58 +1,113 @@ #!/bin/sh -set -ex +set -e -# This script can be called concurrently, pass arguments and env in a -# per-instance tmp dir -DEQP_TEMP_DIR=$(mktemp -d /tmp.XXXXXXXXXX) -export DEQP_TEMP_DIR +# +# Helper to generate CIDs for virtio-vsock based communication with processes +# running inside crosvm guests. +# +# A CID is a 32-bit Context Identifier to be assigned to a crosvm instance +# and must be unique across the host system. For this purpose, let's take +# the least significant 26 bits from CI_JOB_ID as a base and generate a 6-bit +# prefix number to handle up to 64 concurrent crosvm instances per job runner. +# +# As a result, the following variables are set: +# - VSOCK_CID: the crosvm unique CID to be passed as a run argument +# +# - VSOCK_STDOUT, VSOCK_STDERR: the port numbers the guest should accept +# vsock connections on in order to transfer output messages +# +# - VSOCK_TEMP_DIR: the temporary directory path used to pass additional +# context data towards the guest +# +set_vsock_context() { + [ -n "${CI_JOB_ID}" ] || { + echo "Missing or unset CI_JOB_ID env variable" >&2 + exit 1 + } + + local dir_prefix="/tmp-vsock." + local cid_prefix=0 + unset VSOCK_TEMP_DIR + + while [ ${cid_prefix} -lt 64 ]; do + VSOCK_TEMP_DIR=${dir_prefix}${cid_prefix} + mkdir "${VSOCK_TEMP_DIR}" >/dev/null 2>&1 && break || unset VSOCK_TEMP_DIR + cid_prefix=$((cid_prefix + 1)) + done + + [ -n "${VSOCK_TEMP_DIR}" ] || return 1 + + VSOCK_CID=$(((CI_JOB_ID & 0x3ffffff) | ((cid_prefix & 0x3f) << 26))) + VSOCK_STDOUT=5001 + VSOCK_STDERR=5002 + return 0 +} # The dEQP binary needs to run from the directory it's in if [ -n "${1##*.sh}" ] && [ -z "${1##*"deqp"*}" ]; then - DEQP_BIN_DIR=$(dirname "$1") - export DEQP_BIN_DIR + DEQP_BIN_DIR=$(dirname "$1") + export DEQP_BIN_DIR fi +set_vsock_context || { echo "Could not generate crosvm vsock CID" >&2; exit 1; } + +# Ensure cleanup on script exit +trap 'exit ${exit_code}' INT TERM +trap 'exit_code=$?; [ -z "${SOCAT_PIDS}" ] || kill ${SOCAT_PIDS} >/dev/null 2>&1 || true; rm -rf ${VSOCK_TEMP_DIR}' EXIT + # Securely pass the current variables to the crosvm environment -CI_COMMON="$CI_PROJECT_DIR"/install/common +CI_COMMON="${CI_PROJECT_DIR}"/install/common echo "Variables passed through:" -"${CI_COMMON}"/generate-env.sh | tee ${DEQP_TEMP_DIR}/crosvm-env.sh +"${CI_COMMON}"/generate-env.sh | tee ${VSOCK_TEMP_DIR}/crosvm-env.sh -CROSVM_KERNEL_ARGS="quiet console=null root=my_root rw rootfstype=virtiofs init=$CI_PROJECT_DIR/install/crosvm-init.sh ip=192.168.30.2::192.168.30.1:255.255.255.0:crosvm:eth0 -- $DEQP_TEMP_DIR" +# Set the crosvm-script as the arguments of the current script +echo "$@" > ${VSOCK_TEMP_DIR}/crosvm-script.sh -# Set the crosvm-script as the arguments of the current script. -echo "$@" > $DEQP_TEMP_DIR/crosvm-script.sh - -unset DISPLAY -unset XDG_RUNTIME_DIR +# Start background processes to receive output from guest +socat -u vsock-connect:${VSOCK_CID}:${VSOCK_STDERR},retry=200,interval=0.1 stderr & +SOCAT_PIDS=$! +socat -u vsock-connect:${VSOCK_CID}:${VSOCK_STDOUT},retry=200,interval=0.1 stdout & +SOCAT_PIDS="${SOCAT_PIDS} $!" +# Setup networking /usr/sbin/iptables-legacy -w -t nat -A POSTROUTING -o eth0 -j MASQUERADE echo 1 > /proc/sys/net/ipv4/ip_forward -# Send output from guest to host -touch $DEQP_TEMP_DIR/stderr $DEQP_TEMP_DIR/stdout -tail -f $DEQP_TEMP_DIR/stderr >> /dev/stderr & -ERR_TAIL_PID=$! -tail -f $DEQP_TEMP_DIR/stdout >> /dev/stdout & -OUT_TAIL_PID=$! +# Prepare to start crosvm +unset DISPLAY +unset XDG_RUNTIME_DIR -trap "exit \$exit_code" INT TERM -trap "exit_code=\$?; kill $ERR_TAIL_PID $OUT_TAIL_PID; rm -rf $DEQP_TEMP_DIR" EXIT +CROSVM_KERN_ARGS="quiet console=null root=my_root rw rootfstype=virtiofs ip=192.168.30.2::192.168.30.1:255.255.255.0:crosvm:eth0" +CROSVM_KERN_ARGS="${CROSVM_KERN_ARGS} init=${CI_PROJECT_DIR}/install/crosvm-init.sh -- ${VSOCK_STDOUT} ${VSOCK_STDERR} ${VSOCK_TEMP_DIR}" + +set +e -x # We aren't testing LLVMPipe here, so we don't need to validate NIR on the host -NIR_DEBUG="novalidate" LIBGL_ALWAYS_SOFTWARE="true" GALLIUM_DRIVER="$CROSVM_GALLIUM_DRIVER" crosvm run \ - --gpu "$CROSVM_GPU_ARGS" \ - -m 4096 \ - -c 2 \ - --disable-sandbox \ - --shared-dir /:my_root:type=fs:writeback=true:timeout=60:cache=always \ - --host_ip=192.168.30.1 --netmask=255.255.255.0 --mac "AA:BB:CC:00:00:12" \ - -p "$CROSVM_KERNEL_ARGS" \ - /lava-files/bzImage > $DEQP_TEMP_DIR/crosvm 2>&1 +NIR_DEBUG="novalidate" LIBGL_ALWAYS_SOFTWARE="true" GALLIUM_DRIVER=${CROSVM_GALLIUM_DRIVER} \ +crosvm run \ + --gpu "${CROSVM_GPU_ARGS}" -m 4096 -c 2 --disable-sandbox \ + --shared-dir /:my_root:type=fs:writeback=true:timeout=60:cache=always \ + --host_ip "192.168.30.1" --netmask "255.255.255.0" --mac "AA:BB:CC:00:00:12" \ + --cid ${VSOCK_CID} -p "${CROSVM_KERN_ARGS}" \ + /lava-files/bzImage > ${VSOCK_TEMP_DIR}/crosvm 2>&1 -RET=$(cat $DEQP_TEMP_DIR/exit_code || true) +CROSVM_RET=$? +[ ${CROSVM_RET} -eq 0 ] && { + # socat bg processes should terminate as soon as the remote peers exit + wait + # The actual return code is the crosvm guest script's exit code + CROSVM_RET=$(cat ${VSOCK_TEMP_DIR}/exit_code 2>/dev/null) + # Force error when the guest script's exit code is not available + CROSVM_RET=${CROSVM_RET:-1} +} -# Got no exit code from the script, show crosvm output to help with debugging -[ -n "$RET" ] || cat $DEQP_TEMP_DIR/crosvm || true +# Show crosvm output on error to help with debugging +[ ${CROSVM_RET} -eq 0 ] || { + set +x + echo "Dumping crosvm output.." >&2 + cat ${VSOCK_TEMP_DIR}/crosvm >&2 + set -x +} -exit ${RET:-1} +exit ${CROSVM_RET}