ci/lava: Make SSH definition wrap the UART one

Simplify both UART and SSH job definitions module to share common building blocks themselves. - generate_lava_yaml_payload is now a LAVAJobDefinition method, so dropped the Strategy pattern between both modules - if SSH is supported and UART is not enforced, default to SSH - when SSH is enabled, wrap the last deploy action to run the SSH server and rewrite the test actions, which should not change due to the boot method - create a constants module to load environment variables Signed-off-by: Guilherme Gallo <guilherme.gallo@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25912>
2026-05-05 20:28:04 +02:00 · 2023-10-25 23:12:16 -03:00 · 2023-10-25 23:12:16 -03:00 · 654f7f783f
commit 654f7f783f
parent 76922f8404
4 changed files with 146 additions and 143 deletions
--- a/.gitlab-ci/lava/utils/constants.py
+++ b/.gitlab-ci/lava/utils/constants.py
@ -0,0 +1,14 @@
+from os import getenv
+
+# How many attempts should be made when a timeout happen during LAVA device boot.
+NUMBER_OF_ATTEMPTS_LAVA_BOOT = int(getenv("LAVA_NUMBER_OF_ATTEMPTS_LAVA_BOOT", 3))
+
+
+# Supports any integers in [0, 100].
+# The scheduler considers the job priority when ordering the queue
+# to consider which job should run next.
+JOB_PRIORITY = int(getenv("JOB_PRIORITY", 75))
+
+# Use UART over the default SSH mechanism to follow logs.
+# Caution: this can lead to device silence in some devices in Mesa CI.
+FORCE_UART = bool(getenv("LAVA_FORCE_UART", False))
--- a/.gitlab-ci/lava/utils/lava_job_definition.py
+++ b/.gitlab-ci/lava/utils/lava_job_definition.py
@ -1,36 +1,27 @@
-import re
 from io import StringIO
-from os import getenv
 from typing import TYPE_CHECKING, Any

 from ruamel.yaml import YAML
-from ruamel.yaml.scalarstring import LiteralScalarString

 from lava.utils.lava_farm import LavaFarm, get_lava_farm
+from lava.utils.ssh_job_definition import (
+    generate_docker_test,
+    generate_dut_test,
+    wrap_boot_action,
+    wrap_final_deploy_action,
+)
+from lava.utils.uart_job_definition import (
+    fastboot_boot_action,
+    fastboot_deploy_actions,
+    tftp_boot_action,
+    tftp_deploy_actions,
+    uart_test_actions,
+)

 if TYPE_CHECKING:
    from lava.lava_job_submitter import LAVAJobSubmitter

-# How many attempts should be made when a timeout happen during LAVA device boot.
-NUMBER_OF_ATTEMPTS_LAVA_BOOT = int(getenv("LAVA_NUMBER_OF_ATTEMPTS_LAVA_BOOT", 3))
-
-# Supports any integers in [0, 100].
-# The scheduler considers the job priority when ordering the queue
-# to consider which job should run next.
-JOB_PRIORITY = int(getenv("JOB_PRIORITY", 75))
-
-
-def to_yaml_block(steps_array: list[str], escape_vars=[]) -> LiteralScalarString:
-    def escape_envvar(match):
-        return "\\" + match.group(0)
-
-    filtered_array = [s for s in steps_array if s.strip() and not s.startswith("#")]
-    final_str = "\n".join(filtered_array)
-
-    for escape_var in escape_vars:
-        # Find env vars and add '\\' before them
-        final_str = re.sub(rf"\${escape_var}*", escape_envvar, final_str)
-    return LiteralScalarString(final_str)
+from .constants import FORCE_UART, JOB_PRIORITY, NUMBER_OF_ATTEMPTS_LAVA_BOOT


 class LAVAJobDefinition:
@ -43,9 +34,7 @@ class LAVAJobDefinition:
        self.job_submitter: "LAVAJobSubmitter" = job_submitter

    def has_ssh_support(self) -> bool:
-        force_uart = bool(getenv("LAVA_FORCE_UART", False))
-
-        if force_uart:
+        if FORCE_UART:
            return False

        # Only Collabora's farm supports to run docker container as a LAVA actions,
@ -59,22 +48,65 @@ class LAVAJobDefinition:

    def generate_lava_yaml_payload(self) -> dict[str, Any]:
        """
-        Bridge function to use the supported job definition depending on some Mesa
-        CI job characteristics.
+        Generates a YAML payload for submitting a LAVA job, based on the provided arguments.

-        The strategy here, is to use LAVA with a containerized SSH session to follow
-        the job output, escaping from dumping data to the UART, which proves to be
-        error prone in some devices.
+        Args:
+            None
+
+        Returns:
+            a dictionary containing the values generated by the `generate_metadata` function and the
+            actions for the LAVA job submission.
        """
-        from lava.utils.ssh_job_definition import generate_lava_yaml_payload as ssh_lava_yaml
-        from lava.utils.uart_job_definition import generate_lava_yaml_payload as uart_lava_yaml
+        args = self.job_submitter
+        values = self.generate_metadata()
+        nfsrootfs = {
+            "url": f"{args.rootfs_url_prefix}/lava-rootfs.tar.zst",
+            "compression": "zstd",
+        }
+
+        init_stage1_steps = self.init_stage1_steps()
+        artifact_download_steps = self.artifact_download_steps()
+
+        deploy_actions = []
+        boot_action = []
+        test_actions = uart_test_actions(args, init_stage1_steps, artifact_download_steps)
+
+        if args.boot_method == "fastboot":
+            deploy_actions = fastboot_deploy_actions(self, nfsrootfs)
+            boot_action = fastboot_boot_action(args)
+        else:  # tftp
+            deploy_actions = tftp_deploy_actions(self, nfsrootfs)
+            boot_action = tftp_boot_action(args)

        if self.has_ssh_support():
-            return ssh_lava_yaml(self)
+            wrap_final_deploy_action(deploy_actions[-1])
+            # SSH jobs use namespaces to differentiate between the DUT and the
+            # docker container. Every LAVA action needs an explicit namespace, when we are not using
+            # the default one.
+            for deploy_action in deploy_actions:
+                deploy_action["namespace"] = "dut"
+            wrap_boot_action(boot_action)
+            test_actions = (
+                generate_dut_test(args, init_stage1_steps),
+                generate_docker_test(args, artifact_download_steps),
+            )

-        return uart_lava_yaml(self)
+        values["actions"] = [
+            *[{"deploy": d} for d in deploy_actions],
+            {"boot": boot_action},
+            *[{"test": t} for t in test_actions],
+        ]
+
+        return values

    def generate_lava_job_definition(self) -> str:
+        """
+        Generates a LAVA job definition in YAML format and returns it as a string.
+
+        Returns:
+            a string representation of the job definition generated by analysing job submitter
+            arguments and environment variables
+        """
        job_stream = StringIO()
        yaml = YAML()
        yaml.width = 4096
@ -157,3 +189,24 @@ class LAVAJobDefinition:
            ]

        return download_steps
+
+    def init_stage1_steps(self) -> list[str]:
+        run_steps = []
+        # job execution script:
+        #   - inline .gitlab-ci/common/init-stage1.sh
+        #   - fetch and unpack per-pipeline build artifacts from build job
+        #   - fetch and unpack per-job environment from lava-submit.sh
+        #   - exec .gitlab-ci/common/init-stage2.sh
+
+        with open(self.job_submitter.first_stage_init, "r") as init_sh:
+            run_steps += [x.rstrip() for x in init_sh if not x.startswith("#") and x.rstrip()]
+        # We cannot distribute the Adreno 660 shader firmware inside rootfs,
+        # since the license isn't bundled inside the repository
+        if self.job_submitter.device_type == "sm8350-hdk":
+            run_steps.append(
+                "curl -L --retry 4 -f --retry-all-errors --retry-delay 60 "
+                + "https://github.com/allahjasif1990/hdk888-firmware/raw/main/a660_zap.mbn "
+                + '-o "/lib/firmware/qcom/sm8350/a660_zap.mbn"'
+            )
+
+        return run_steps
--- a/.gitlab-ci/lava/utils/ssh_job_definition.py
+++ b/.gitlab-ci/lava/utils/ssh_job_definition.py
@ -28,14 +28,15 @@ script after sourcing "dut-env-vars.sh" again for the second SSH test case.
 """


-from pathlib import Path
-from typing import TYPE_CHECKING, Any
+import re
+from typing import TYPE_CHECKING, Any, Iterable

-from .lava_job_definition import NUMBER_OF_ATTEMPTS_LAVA_BOOT, to_yaml_block
+from ruamel.yaml.scalarstring import LiteralScalarString
+
+from .constants import NUMBER_OF_ATTEMPTS_LAVA_BOOT

 if TYPE_CHECKING:
    from ..lava_job_submitter import LAVAJobSubmitter
-    from .lava_job_definition import LAVAJobDefinition

 # Very early SSH server setup. Uses /dut_ready file to flag it is done.
 SSH_SERVER_COMMANDS = {
@ -78,12 +79,23 @@ lava_ssh_test_case() {
 ]


-def generate_dut_test(args: "LAVAJobSubmitter") -> dict[str, Any]:
+def to_yaml_block(steps_array: Iterable[str], escape_vars=[]) -> LiteralScalarString:
+    def escape_envvar(match):
+        return "\\" + match.group(0)
+
+    filtered_array = [s for s in steps_array if s.strip() and not s.startswith("#")]
+    final_str = "\n".join(filtered_array)
+
+    for escape_var in escape_vars:
+        # Find env vars and add '\\' before them
+        final_str = re.sub(rf"\${escape_var}*", escape_envvar, final_str)
+    return LiteralScalarString(final_str)
+
+
+def generate_dut_test(args: "LAVAJobSubmitter", first_stage_steps: list[str]) -> dict[str, Any]:
    # Commands executed on DUT.
    # Trying to execute the minimal number of commands, because the console data is
    # retrieved via UART, which is hang-prone in some devices.
-
-    first_stage_steps: list[str] = Path(args.first_stage_init).read_text().splitlines()
    return {
        "namespace": "dut",
        "definitions": [
@ -108,8 +120,9 @@ def generate_dut_test(args: "LAVAJobSubmitter") -> dict[str, Any]:
    }


-def generate_docker_test(job_definition: "LAVAJobDefinition") -> dict[str, Any]:
-    args = job_definition.job_submitter
+def generate_docker_test(
+    args: "LAVAJobSubmitter", artifact_download_steps: list[str]
+) -> dict[str, Any]:
    # This is a growing list of commands that will be executed by the docker
    # guest, which will be the SSH client.
    docker_commands = []
@ -148,7 +161,7 @@ def generate_docker_test(job_definition: "LAVAJobDefinition") -> dict[str, Any]:
            (
                "lava_ssh_test_case 'artifact_download' 'bash --' << EOF",
                "source /dut-env-vars.sh",
-                *job_definition.artifact_download_steps(),
+                *artifact_download_steps,
                "EOF",
            )
        ),
@ -163,44 +176,22 @@ def generate_docker_test(job_definition: "LAVAJobDefinition") -> dict[str, Any]:
    return init_stages_test


-def generate_lava_yaml_payload(job_definition: "LAVAJobDefinition") -> dict[str, Any]:
-    values = job_definition.generate_metadata()
-    job_submitter = job_definition.job_submitter
-
-    # URLs to our kernel rootfs to boot from, both generated by the base
-    # container build
-    deploy = {
+def wrap_final_deploy_action(final_deploy_action: dict):
+    wrap = {
        "namespace": "dut",
        "failure_retry": NUMBER_OF_ATTEMPTS_LAVA_BOOT,
        "timeout": {"minutes": 10},
        "timeouts": {"http-download": {"minutes": 2}},
-        "to": "tftp",
-        "os": "oe",
-        "kernel": {"url": f"{job_submitter.kernel_url_prefix}/{job_submitter.kernel_image_name}"},
-        "nfsrootfs": {
-            "url": f"{job_submitter.rootfs_url_prefix}/lava-rootfs.tar.zst",
-            "compression": "zstd",
-        },
    }
-    job_definition.attach_kernel_and_dtb(deploy)

-    # always boot over NFS
-    boot = {
+    final_deploy_action.update(wrap)
+
+
+def wrap_boot_action(boot_action: dict):
+    wrap = {
        "namespace": "dut",
        "failure_retry": NUMBER_OF_ATTEMPTS_LAVA_BOOT,
-        "method": job_submitter.boot_method,
-        "commands": "nfs",
-        "prompts": ["lava-shell:"],
        **SSH_SERVER_COMMANDS,
    }

-    # only declaring each job as a single 'test' since LAVA's test parsing is
-    # not useful to us
-    values["actions"] = [
-        {"deploy": deploy},
-        {"boot": boot},
-        {"test": generate_dut_test(job_submitter)},
-        {"test": generate_docker_test(job_definition)},
-    ]
-
-    return values
+    boot_action.update(wrap)
--- a/.gitlab-ci/lava/utils/uart_job_definition.py
+++ b/.gitlab-ci/lava/utils/uart_job_definition.py
@ -4,7 +4,7 @@ if TYPE_CHECKING:
    from ..lava_job_submitter import LAVAJobSubmitter
    from .lava_job_definition import LAVAJobDefinition

-from .lava_job_definition import NUMBER_OF_ATTEMPTS_LAVA_BOOT
+from .constants import NUMBER_OF_ATTEMPTS_LAVA_BOOT

 # Use the same image that is being used for the hardware enablement and health-checks.
 # They are pretty small (<100MB) and have all the tools we need to run LAVA, so it is a safe choice.
@ -14,7 +14,9 @@ from .lava_job_definition import NUMBER_OF_ATTEMPTS_LAVA_BOOT
 DOCKER_IMAGE = "registry.gitlab.collabora.com/lava/health-check-docker"


-def fastboot_deploy_actions(job_definition: "LAVAJobDefinition", nfsrootfs) -> list[dict[str, Any]]:
+def fastboot_deploy_actions(
+    job_definition: "LAVAJobDefinition", nfsrootfs
+) -> tuple[dict[str, Any], ...]:
    args = job_definition.job_submitter
    fastboot_deploy_nfs = {
        "timeout": {"minutes": 10},
@ -59,10 +61,10 @@ def fastboot_deploy_actions(job_definition: "LAVAJobDefinition", nfsrootfs) -> l
    # container build
    job_definition.attach_kernel_and_dtb(fastboot_deploy_prepare["images"])

-    return [{"deploy": d} for d in (fastboot_deploy_nfs, fastboot_deploy_prepare, fastboot_deploy)]
+    return (fastboot_deploy_nfs, fastboot_deploy_prepare, fastboot_deploy)


-def tftp_deploy_actions(job_definition: "LAVAJobDefinition", nfsrootfs) -> list[dict[str, Any]]:
+def tftp_deploy_actions(job_definition: "LAVAJobDefinition", nfsrootfs) -> tuple[dict[str, Any]]:
    args = job_definition.job_submitter
    tftp_deploy = {
        "timeout": {"minutes": 5},
@ -75,35 +77,14 @@ def tftp_deploy_actions(job_definition: "LAVAJobDefinition", nfsrootfs) -> list[
    }
    job_definition.attach_kernel_and_dtb(tftp_deploy)

-    return [{"deploy": d} for d in [tftp_deploy]]
+    return (tftp_deploy,)


-def init_stage1_steps(args: "LAVAJobSubmitter") -> list[str]:
-    run_steps = []
-    # job execution script:
-    #   - inline .gitlab-ci/common/init-stage1.sh
-    #   - fetch and unpack per-pipeline build artifacts from build job
-    #   - fetch and unpack per-job environment from lava-submit.sh
-    #   - exec .gitlab-ci/common/init-stage2.sh
-
-    with open(args.first_stage_init, "r") as init_sh:
-        run_steps += [x.rstrip() for x in init_sh if not x.startswith("#") and x.rstrip()]
-    # We cannot distribute the Adreno 660 shader firmware inside rootfs,
-    # since the license isn't bundled inside the repository
-    if args.device_type == "sm8350-hdk":
-        run_steps.append(
-            "curl -L --retry 4 -f --retry-all-errors --retry-delay 60 "
-            + "https://github.com/allahjasif1990/hdk888-firmware/raw/main/a660_zap.mbn "
-            + '-o "/lib/firmware/qcom/sm8350/a660_zap.mbn"'
-        )
-
-    return run_steps
-
-
-def test_actions(job_definition: "LAVAJobDefinition") -> list[dict[str, Any]]:
+def uart_test_actions(
+    args: "LAVAJobSubmitter", init_stage1_steps: list[str], artifact_download_steps: list[str]
+) -> tuple[dict[str, Any]]:
    # skeleton test definition: only declaring each job as a single 'test'
    # since LAVA's test parsing is not useful to us
-    args = job_definition.job_submitter
    run_steps = []
    test = {
        "timeout": {"minutes": args.job_timeout_min},
@ -128,8 +109,8 @@ def test_actions(job_definition: "LAVAJobDefinition") -> list[dict[str, Any]]:
        ],
    }

-    run_steps += init_stage1_steps(args)
-    run_steps += job_definition.artifact_download_steps()
+    run_steps += init_stage1_steps
+    run_steps += artifact_download_steps

    run_steps += [
        f"mkdir -p {args.ci_project_dir}",
@ -143,7 +124,7 @@ def test_actions(job_definition: "LAVAJobDefinition") -> list[dict[str, Any]]:
        f"lava-test-case '{args.project_name}_{args.mesa_job_name}' --shell /init-stage2.sh",
    ]

-    return [{"test": t} for t in [test]]
+    return (test,)


 def tftp_boot_action(args: "LAVAJobSubmitter") -> dict[str, Any]:
@ -168,39 +149,3 @@ def fastboot_boot_action(args: "LAVAJobSubmitter") -> dict[str, Any]:
    }

    return fastboot_boot
-
-
-def generate_lava_yaml_payload(job_definition: "LAVAJobDefinition") -> dict[str, Any]:
-    """
-    Generates a YAML payload for submitting a LAVA job, based on the provided arguments.
-
-    Args:
-      args ("LAVAJobSubmitter"): The `args` parameter is an instance of the `LAVAJobSubmitter`
-        class. It contains various properties and methods that are used to configure and submit a
-        LAVA job.
-
-    Returns:
-        a dictionary containing the values generated by the `generate_metadata` function and the
-        actions for the LAVA job submission.
-    """
-    job_submitter = job_definition.job_submitter
-    values = job_definition.generate_metadata()
-    nfsrootfs = {
-        "url": f"{job_submitter.rootfs_url_prefix}/lava-rootfs.tar.zst",
-        "compression": "zstd",
-    }
-
-    if job_submitter.boot_method == "fastboot":
-        values["actions"] = [
-            *fastboot_deploy_actions(job_definition, nfsrootfs),
-            {"boot": fastboot_boot_action(job_submitter)},
-        ]
-    else:  # tftp
-        values["actions"] = [
-            *tftp_deploy_actions(job_definition, nfsrootfs),
-            {"boot": tftp_boot_action(job_submitter)},
-        ]
-
-    values["actions"].extend(test_actions(job_definition))
-
-    return values