diff --git a/.gitlab-ci/lava/lava_job_submitter.py b/.gitlab-ci/lava/lava_job_submitter.py index fe9988f29fc..cda92d44f72 100755 --- a/.gitlab-ci/lava/lava_job_submitter.py +++ b/.gitlab-ci/lava/lava_job_submitter.py @@ -225,12 +225,12 @@ def wait_for_job_get_started(job, attempt_no): def bootstrap_log_follower(main_test_case, timestamp_relative_to) -> LogFollower: + deploy_timeout = GL_SECTION_TIMEOUTS[LogSectionType.LAVA_DEPLOY] start_section = GitlabSection( - id="dut_boot", - header="Booting hardware device", - type=LogSectionType.LAVA_BOOT, + id="dut_deploy", + header=f"Running LAVA deploy action - Timeout: {deploy_timeout}", + type=LogSectionType.LAVA_DEPLOY, start_collapsed=True, - suppress_end=True, # init-stage2 prints the end for us timestamp_relative_to=timestamp_relative_to, ) print(start_section.start()) diff --git a/.gitlab-ci/lava/utils/lava_job_definition.py b/.gitlab-ci/lava/utils/lava_job_definition.py index dfe74b1bf88..c8d61d22423 100644 --- a/.gitlab-ci/lava/utils/lava_job_definition.py +++ b/.gitlab-ci/lava/utils/lava_job_definition.py @@ -6,6 +6,7 @@ from ruamel.yaml import YAML from os import getenv from lava.utils.lava_farm import get_lava_farm +from lava.utils.log_section import LAVA_DEPLOY_TIMEOUT from lava.utils.ssh_job_definition import ( generate_docker_test, generate_dut_test, @@ -25,7 +26,7 @@ from lava.utils.uart_job_definition import ( if TYPE_CHECKING: from lava.lava_job_submitter import LAVAJobSubmitter -from .constants import FORCE_UART, JOB_PRIORITY, NUMBER_OF_ATTEMPTS_LAVA_BOOT +from .constants import FORCE_UART, JOB_PRIORITY class LAVAJobDefinition: @@ -160,8 +161,7 @@ class LAVAJobDefinition: "depthcharge-action": { # This timeout englobes the entire depthcharge timing, # including retries - "minutes": 5 - * NUMBER_OF_ATTEMPTS_LAVA_BOOT, + "minutes": LAVA_DEPLOY_TIMEOUT }, "uboot-action": { # For rockchip DUTs, U-Boot auto-login action downloads the kernel and diff --git a/.gitlab-ci/lava/utils/log_section.py b/.gitlab-ci/lava/utils/log_section.py index 6ce607ee478..06157d4eb9a 100644 --- a/.gitlab-ci/lava/utils/log_section.py +++ b/.gitlab-ci/lava/utils/log_section.py @@ -12,6 +12,7 @@ class LogSectionType(Enum): UNKNOWN = auto() LAVA_SUBMIT = auto() LAVA_QUEUE = auto() + LAVA_DEPLOY = auto() LAVA_BOOT = auto() TEST_DUT_SUITE = auto() TEST_SUITE = auto() @@ -27,13 +28,17 @@ LAVA_SUBMIT_TIMEOUT = int(getenv("LAVA_SUBMIT_TIMEOUT", 5)) # aggressively for pre-merge. LAVA_QUEUE_TIMEOUT = int(getenv("LAVA_QUEUE_TIMEOUT", 60)) -# Empirically, successful device boot in LAVA time takes less than 3 -# minutes. -# LAVA itself is configured to attempt thrice to boot the device, -# summing up to 9 minutes. +# How long should we wait for a device to be deployed? +# The deploy involves downloading and decompressing the kernel, modules, dtb and the overlays. +# We should retry, to overcome network issues. +LAVA_DEPLOY_TIMEOUT = int(getenv("LAVA_DEPLOY_TIMEOUT", 5)) + +# Empirically, successful device deploy+boot in LAVA time takes less than 3 minutes. +# LAVA itself is configured to attempt `failure_retry` times (NUMBER_OF_ATTEMPTS_LAVA_BOOT) to boot +# the device. # It is better to retry the boot than cancel the job and re-submit to avoid # the enqueue delay. -LAVA_BOOT_TIMEOUT = int(getenv("LAVA_BOOT_TIMEOUT", 9)) +LAVA_BOOT_TIMEOUT = int(getenv("LAVA_BOOT_TIMEOUT", 5)) # Estimated overhead in minutes for a job from GitLab to reach the test phase, # including LAVA scheduling and boot duration @@ -59,6 +64,7 @@ FALLBACK_GITLAB_SECTION_TIMEOUT = timedelta(minutes=10) DEFAULT_GITLAB_SECTION_TIMEOUTS = { LogSectionType.LAVA_SUBMIT: timedelta(minutes=LAVA_SUBMIT_TIMEOUT), LogSectionType.LAVA_QUEUE: timedelta(minutes=LAVA_QUEUE_TIMEOUT), + LogSectionType.LAVA_DEPLOY: timedelta(minutes=LAVA_DEPLOY_TIMEOUT), LogSectionType.LAVA_BOOT: timedelta(minutes=LAVA_BOOT_TIMEOUT), LogSectionType.TEST_DUT_SUITE: timedelta(minutes=LAVA_TEST_DUT_SUITE_TIMEOUT), LogSectionType.TEST_SUITE: timedelta(minutes=LAVA_TEST_SUITE_TIMEOUT), @@ -102,6 +108,14 @@ class LogSection: LOG_SECTIONS = ( + LogSection( + regex=re.compile(r"start: 2 (\S+) \(timeout ([^)]+)\).*"), + levels=("info"), + section_id="{}", + section_header="Booting via {}", + section_type=LogSectionType.LAVA_BOOT, + collapsed=True, + ), LogSection( regex=re.compile(r"? ([^>]*)"), levels=("target", "debug"),