ci/lava: Make SSH definition wrap the UART one

Simplify both UART and SSH job definitions module to share common
building blocks themselves.

- generate_lava_yaml_payload is now a LAVAJobDefinition method, so
  dropped the Strategy pattern between both modules
- if SSH is supported and UART is not enforced, default to SSH
- when SSH is enabled, wrap the last deploy action to run the SSH server
  and rewrite the test actions, which should not change due to the boot
  method
- create a constants module to load environment variables

Signed-off-by: Guilherme Gallo <guilherme.gallo@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25912>
This commit is contained in:
Guilherme Gallo 2023-10-25 23:12:16 -03:00 committed by Marge Bot
parent 76922f8404
commit 654f7f783f
4 changed files with 146 additions and 143 deletions

View file

@ -0,0 +1,14 @@
from os import getenv
# How many attempts should be made when a timeout happen during LAVA device boot.
NUMBER_OF_ATTEMPTS_LAVA_BOOT = int(getenv("LAVA_NUMBER_OF_ATTEMPTS_LAVA_BOOT", 3))
# Supports any integers in [0, 100].
# The scheduler considers the job priority when ordering the queue
# to consider which job should run next.
JOB_PRIORITY = int(getenv("JOB_PRIORITY", 75))
# Use UART over the default SSH mechanism to follow logs.
# Caution: this can lead to device silence in some devices in Mesa CI.
FORCE_UART = bool(getenv("LAVA_FORCE_UART", False))

View file

@ -1,36 +1,27 @@
import re
from io import StringIO
from os import getenv
from typing import TYPE_CHECKING, Any
from ruamel.yaml import YAML
from ruamel.yaml.scalarstring import LiteralScalarString
from lava.utils.lava_farm import LavaFarm, get_lava_farm
from lava.utils.ssh_job_definition import (
generate_docker_test,
generate_dut_test,
wrap_boot_action,
wrap_final_deploy_action,
)
from lava.utils.uart_job_definition import (
fastboot_boot_action,
fastboot_deploy_actions,
tftp_boot_action,
tftp_deploy_actions,
uart_test_actions,
)
if TYPE_CHECKING:
from lava.lava_job_submitter import LAVAJobSubmitter
# How many attempts should be made when a timeout happen during LAVA device boot.
NUMBER_OF_ATTEMPTS_LAVA_BOOT = int(getenv("LAVA_NUMBER_OF_ATTEMPTS_LAVA_BOOT", 3))
# Supports any integers in [0, 100].
# The scheduler considers the job priority when ordering the queue
# to consider which job should run next.
JOB_PRIORITY = int(getenv("JOB_PRIORITY", 75))
def to_yaml_block(steps_array: list[str], escape_vars=[]) -> LiteralScalarString:
def escape_envvar(match):
return "\\" + match.group(0)
filtered_array = [s for s in steps_array if s.strip() and not s.startswith("#")]
final_str = "\n".join(filtered_array)
for escape_var in escape_vars:
# Find env vars and add '\\' before them
final_str = re.sub(rf"\${escape_var}*", escape_envvar, final_str)
return LiteralScalarString(final_str)
from .constants import FORCE_UART, JOB_PRIORITY, NUMBER_OF_ATTEMPTS_LAVA_BOOT
class LAVAJobDefinition:
@ -43,9 +34,7 @@ class LAVAJobDefinition:
self.job_submitter: "LAVAJobSubmitter" = job_submitter
def has_ssh_support(self) -> bool:
force_uart = bool(getenv("LAVA_FORCE_UART", False))
if force_uart:
if FORCE_UART:
return False
# Only Collabora's farm supports to run docker container as a LAVA actions,
@ -59,22 +48,65 @@ class LAVAJobDefinition:
def generate_lava_yaml_payload(self) -> dict[str, Any]:
"""
Bridge function to use the supported job definition depending on some Mesa
CI job characteristics.
Generates a YAML payload for submitting a LAVA job, based on the provided arguments.
The strategy here, is to use LAVA with a containerized SSH session to follow
the job output, escaping from dumping data to the UART, which proves to be
error prone in some devices.
Args:
None
Returns:
a dictionary containing the values generated by the `generate_metadata` function and the
actions for the LAVA job submission.
"""
from lava.utils.ssh_job_definition import generate_lava_yaml_payload as ssh_lava_yaml
from lava.utils.uart_job_definition import generate_lava_yaml_payload as uart_lava_yaml
args = self.job_submitter
values = self.generate_metadata()
nfsrootfs = {
"url": f"{args.rootfs_url_prefix}/lava-rootfs.tar.zst",
"compression": "zstd",
}
init_stage1_steps = self.init_stage1_steps()
artifact_download_steps = self.artifact_download_steps()
deploy_actions = []
boot_action = []
test_actions = uart_test_actions(args, init_stage1_steps, artifact_download_steps)
if args.boot_method == "fastboot":
deploy_actions = fastboot_deploy_actions(self, nfsrootfs)
boot_action = fastboot_boot_action(args)
else: # tftp
deploy_actions = tftp_deploy_actions(self, nfsrootfs)
boot_action = tftp_boot_action(args)
if self.has_ssh_support():
return ssh_lava_yaml(self)
wrap_final_deploy_action(deploy_actions[-1])
# SSH jobs use namespaces to differentiate between the DUT and the
# docker container. Every LAVA action needs an explicit namespace, when we are not using
# the default one.
for deploy_action in deploy_actions:
deploy_action["namespace"] = "dut"
wrap_boot_action(boot_action)
test_actions = (
generate_dut_test(args, init_stage1_steps),
generate_docker_test(args, artifact_download_steps),
)
return uart_lava_yaml(self)
values["actions"] = [
*[{"deploy": d} for d in deploy_actions],
{"boot": boot_action},
*[{"test": t} for t in test_actions],
]
return values
def generate_lava_job_definition(self) -> str:
"""
Generates a LAVA job definition in YAML format and returns it as a string.
Returns:
a string representation of the job definition generated by analysing job submitter
arguments and environment variables
"""
job_stream = StringIO()
yaml = YAML()
yaml.width = 4096
@ -157,3 +189,24 @@ class LAVAJobDefinition:
]
return download_steps
def init_stage1_steps(self) -> list[str]:
run_steps = []
# job execution script:
# - inline .gitlab-ci/common/init-stage1.sh
# - fetch and unpack per-pipeline build artifacts from build job
# - fetch and unpack per-job environment from lava-submit.sh
# - exec .gitlab-ci/common/init-stage2.sh
with open(self.job_submitter.first_stage_init, "r") as init_sh:
run_steps += [x.rstrip() for x in init_sh if not x.startswith("#") and x.rstrip()]
# We cannot distribute the Adreno 660 shader firmware inside rootfs,
# since the license isn't bundled inside the repository
if self.job_submitter.device_type == "sm8350-hdk":
run_steps.append(
"curl -L --retry 4 -f --retry-all-errors --retry-delay 60 "
+ "https://github.com/allahjasif1990/hdk888-firmware/raw/main/a660_zap.mbn "
+ '-o "/lib/firmware/qcom/sm8350/a660_zap.mbn"'
)
return run_steps

View file

@ -28,14 +28,15 @@ script after sourcing "dut-env-vars.sh" again for the second SSH test case.
"""
from pathlib import Path
from typing import TYPE_CHECKING, Any
import re
from typing import TYPE_CHECKING, Any, Iterable
from .lava_job_definition import NUMBER_OF_ATTEMPTS_LAVA_BOOT, to_yaml_block
from ruamel.yaml.scalarstring import LiteralScalarString
from .constants import NUMBER_OF_ATTEMPTS_LAVA_BOOT
if TYPE_CHECKING:
from ..lava_job_submitter import LAVAJobSubmitter
from .lava_job_definition import LAVAJobDefinition
# Very early SSH server setup. Uses /dut_ready file to flag it is done.
SSH_SERVER_COMMANDS = {
@ -78,12 +79,23 @@ lava_ssh_test_case() {
]
def generate_dut_test(args: "LAVAJobSubmitter") -> dict[str, Any]:
def to_yaml_block(steps_array: Iterable[str], escape_vars=[]) -> LiteralScalarString:
def escape_envvar(match):
return "\\" + match.group(0)
filtered_array = [s for s in steps_array if s.strip() and not s.startswith("#")]
final_str = "\n".join(filtered_array)
for escape_var in escape_vars:
# Find env vars and add '\\' before them
final_str = re.sub(rf"\${escape_var}*", escape_envvar, final_str)
return LiteralScalarString(final_str)
def generate_dut_test(args: "LAVAJobSubmitter", first_stage_steps: list[str]) -> dict[str, Any]:
# Commands executed on DUT.
# Trying to execute the minimal number of commands, because the console data is
# retrieved via UART, which is hang-prone in some devices.
first_stage_steps: list[str] = Path(args.first_stage_init).read_text().splitlines()
return {
"namespace": "dut",
"definitions": [
@ -108,8 +120,9 @@ def generate_dut_test(args: "LAVAJobSubmitter") -> dict[str, Any]:
}
def generate_docker_test(job_definition: "LAVAJobDefinition") -> dict[str, Any]:
args = job_definition.job_submitter
def generate_docker_test(
args: "LAVAJobSubmitter", artifact_download_steps: list[str]
) -> dict[str, Any]:
# This is a growing list of commands that will be executed by the docker
# guest, which will be the SSH client.
docker_commands = []
@ -148,7 +161,7 @@ def generate_docker_test(job_definition: "LAVAJobDefinition") -> dict[str, Any]:
(
"lava_ssh_test_case 'artifact_download' 'bash --' << EOF",
"source /dut-env-vars.sh",
*job_definition.artifact_download_steps(),
*artifact_download_steps,
"EOF",
)
),
@ -163,44 +176,22 @@ def generate_docker_test(job_definition: "LAVAJobDefinition") -> dict[str, Any]:
return init_stages_test
def generate_lava_yaml_payload(job_definition: "LAVAJobDefinition") -> dict[str, Any]:
values = job_definition.generate_metadata()
job_submitter = job_definition.job_submitter
# URLs to our kernel rootfs to boot from, both generated by the base
# container build
deploy = {
def wrap_final_deploy_action(final_deploy_action: dict):
wrap = {
"namespace": "dut",
"failure_retry": NUMBER_OF_ATTEMPTS_LAVA_BOOT,
"timeout": {"minutes": 10},
"timeouts": {"http-download": {"minutes": 2}},
"to": "tftp",
"os": "oe",
"kernel": {"url": f"{job_submitter.kernel_url_prefix}/{job_submitter.kernel_image_name}"},
"nfsrootfs": {
"url": f"{job_submitter.rootfs_url_prefix}/lava-rootfs.tar.zst",
"compression": "zstd",
},
}
job_definition.attach_kernel_and_dtb(deploy)
# always boot over NFS
boot = {
final_deploy_action.update(wrap)
def wrap_boot_action(boot_action: dict):
wrap = {
"namespace": "dut",
"failure_retry": NUMBER_OF_ATTEMPTS_LAVA_BOOT,
"method": job_submitter.boot_method,
"commands": "nfs",
"prompts": ["lava-shell:"],
**SSH_SERVER_COMMANDS,
}
# only declaring each job as a single 'test' since LAVA's test parsing is
# not useful to us
values["actions"] = [
{"deploy": deploy},
{"boot": boot},
{"test": generate_dut_test(job_submitter)},
{"test": generate_docker_test(job_definition)},
]
return values
boot_action.update(wrap)

View file

@ -4,7 +4,7 @@ if TYPE_CHECKING:
from ..lava_job_submitter import LAVAJobSubmitter
from .lava_job_definition import LAVAJobDefinition
from .lava_job_definition import NUMBER_OF_ATTEMPTS_LAVA_BOOT
from .constants import NUMBER_OF_ATTEMPTS_LAVA_BOOT
# Use the same image that is being used for the hardware enablement and health-checks.
# They are pretty small (<100MB) and have all the tools we need to run LAVA, so it is a safe choice.
@ -14,7 +14,9 @@ from .lava_job_definition import NUMBER_OF_ATTEMPTS_LAVA_BOOT
DOCKER_IMAGE = "registry.gitlab.collabora.com/lava/health-check-docker"
def fastboot_deploy_actions(job_definition: "LAVAJobDefinition", nfsrootfs) -> list[dict[str, Any]]:
def fastboot_deploy_actions(
job_definition: "LAVAJobDefinition", nfsrootfs
) -> tuple[dict[str, Any], ...]:
args = job_definition.job_submitter
fastboot_deploy_nfs = {
"timeout": {"minutes": 10},
@ -59,10 +61,10 @@ def fastboot_deploy_actions(job_definition: "LAVAJobDefinition", nfsrootfs) -> l
# container build
job_definition.attach_kernel_and_dtb(fastboot_deploy_prepare["images"])
return [{"deploy": d} for d in (fastboot_deploy_nfs, fastboot_deploy_prepare, fastboot_deploy)]
return (fastboot_deploy_nfs, fastboot_deploy_prepare, fastboot_deploy)
def tftp_deploy_actions(job_definition: "LAVAJobDefinition", nfsrootfs) -> list[dict[str, Any]]:
def tftp_deploy_actions(job_definition: "LAVAJobDefinition", nfsrootfs) -> tuple[dict[str, Any]]:
args = job_definition.job_submitter
tftp_deploy = {
"timeout": {"minutes": 5},
@ -75,35 +77,14 @@ def tftp_deploy_actions(job_definition: "LAVAJobDefinition", nfsrootfs) -> list[
}
job_definition.attach_kernel_and_dtb(tftp_deploy)
return [{"deploy": d} for d in [tftp_deploy]]
return (tftp_deploy,)
def init_stage1_steps(args: "LAVAJobSubmitter") -> list[str]:
run_steps = []
# job execution script:
# - inline .gitlab-ci/common/init-stage1.sh
# - fetch and unpack per-pipeline build artifacts from build job
# - fetch and unpack per-job environment from lava-submit.sh
# - exec .gitlab-ci/common/init-stage2.sh
with open(args.first_stage_init, "r") as init_sh:
run_steps += [x.rstrip() for x in init_sh if not x.startswith("#") and x.rstrip()]
# We cannot distribute the Adreno 660 shader firmware inside rootfs,
# since the license isn't bundled inside the repository
if args.device_type == "sm8350-hdk":
run_steps.append(
"curl -L --retry 4 -f --retry-all-errors --retry-delay 60 "
+ "https://github.com/allahjasif1990/hdk888-firmware/raw/main/a660_zap.mbn "
+ '-o "/lib/firmware/qcom/sm8350/a660_zap.mbn"'
)
return run_steps
def test_actions(job_definition: "LAVAJobDefinition") -> list[dict[str, Any]]:
def uart_test_actions(
args: "LAVAJobSubmitter", init_stage1_steps: list[str], artifact_download_steps: list[str]
) -> tuple[dict[str, Any]]:
# skeleton test definition: only declaring each job as a single 'test'
# since LAVA's test parsing is not useful to us
args = job_definition.job_submitter
run_steps = []
test = {
"timeout": {"minutes": args.job_timeout_min},
@ -128,8 +109,8 @@ def test_actions(job_definition: "LAVAJobDefinition") -> list[dict[str, Any]]:
],
}
run_steps += init_stage1_steps(args)
run_steps += job_definition.artifact_download_steps()
run_steps += init_stage1_steps
run_steps += artifact_download_steps
run_steps += [
f"mkdir -p {args.ci_project_dir}",
@ -143,7 +124,7 @@ def test_actions(job_definition: "LAVAJobDefinition") -> list[dict[str, Any]]:
f"lava-test-case '{args.project_name}_{args.mesa_job_name}' --shell /init-stage2.sh",
]
return [{"test": t} for t in [test]]
return (test,)
def tftp_boot_action(args: "LAVAJobSubmitter") -> dict[str, Any]:
@ -168,39 +149,3 @@ def fastboot_boot_action(args: "LAVAJobSubmitter") -> dict[str, Any]:
}
return fastboot_boot
def generate_lava_yaml_payload(job_definition: "LAVAJobDefinition") -> dict[str, Any]:
"""
Generates a YAML payload for submitting a LAVA job, based on the provided arguments.
Args:
args ("LAVAJobSubmitter"): The `args` parameter is an instance of the `LAVAJobSubmitter`
class. It contains various properties and methods that are used to configure and submit a
LAVA job.
Returns:
a dictionary containing the values generated by the `generate_metadata` function and the
actions for the LAVA job submission.
"""
job_submitter = job_definition.job_submitter
values = job_definition.generate_metadata()
nfsrootfs = {
"url": f"{job_submitter.rootfs_url_prefix}/lava-rootfs.tar.zst",
"compression": "zstd",
}
if job_submitter.boot_method == "fastboot":
values["actions"] = [
*fastboot_deploy_actions(job_definition, nfsrootfs),
{"boot": fastboot_boot_action(job_submitter)},
]
else: # tftp
values["actions"] = [
*tftp_deploy_actions(job_definition, nfsrootfs),
{"boot": tftp_boot_action(job_submitter)},
]
values["actions"].extend(test_actions(job_definition))
return values