diff --git a/.gitlab-ci/lava/lava_job_submitter.py b/.gitlab-ci/lava/lava_job_submitter.py index fe2d69a2998..6c3b29b22ec 100755 --- a/.gitlab-ci/lava/lava_job_submitter.py +++ b/.gitlab-ci/lava/lava_job_submitter.py @@ -32,8 +32,9 @@ from lava.exceptions import ( MesaCIRetryError, MesaCITimeoutError, ) +from lava.utils import CONSOLE_LOG +from lava.utils import DEFAULT_GITLAB_SECTION_TIMEOUTS as GL_SECTION_TIMEOUTS from lava.utils import ( - CONSOLE_LOG, GitlabSection, LogFollower, LogSectionType, @@ -497,6 +498,13 @@ def treat_mesa_job_name(args): def main(args): proxy = setup_lava_proxy() + # Overwrite the timeout for the testcases with the value offered by the + # user. The testcase running time should be at least 4 times greater than + # the other sections (boot and setup), so we can safely ignore them. + # If LAVA fails to stop the job at this stage, it will fall back to the + # script section timeout with a reasonable delay. + GL_SECTION_TIMEOUTS[LogSectionType.TEST_CASE] = timedelta(minutes=args.job_timeout) + job_definition = generate_lava_yaml(args) if args.dump_yaml: diff --git a/.gitlab-ci/lava/utils/__init__.py b/.gitlab-ci/lava/utils/__init__.py index 4dc824eb4f0..0be88266d35 100644 --- a/.gitlab-ci/lava/utils/__init__.py +++ b/.gitlab-ci/lava/utils/__init__.py @@ -8,4 +8,9 @@ from .log_follower import ( hide_sensitive_data, print_log, ) -from .log_section import LogSection, LogSectionType +from .log_section import ( + DEFAULT_GITLAB_SECTION_TIMEOUTS, + FALLBACK_GITLAB_SECTION_TIMEOUT, + LogSection, + LogSectionType, +) diff --git a/.gitlab-ci/lava/utils/log_section.py b/.gitlab-ci/lava/utils/log_section.py index a722570830a..7df9be9a3f1 100644 --- a/.gitlab-ci/lava/utils/log_section.py +++ b/.gitlab-ci/lava/utils/log_section.py @@ -2,6 +2,7 @@ import re from dataclasses import dataclass from datetime import timedelta from enum import Enum, auto +from os import getenv from typing import Optional, Pattern, Union from lava.utils.gitlab_section import GitlabSection @@ -15,24 +16,34 @@ class LogSectionType(Enum): LAVA_POST_PROCESSING = auto() +# Empirically, successful device boot in LAVA time takes less than 3 +# minutes. +# LAVA itself is configured to attempt thrice to boot the device, +# summing up to 9 minutes. +# It is better to retry the boot than cancel the job and re-submit to avoid +# the enqueue delay. +LAVA_BOOT_TIMEOUT = int(getenv("LAVA_BOOT_TIMEOUT", 9)) + +# Test suite phase is where the initialization happens. +LAVA_TEST_SUITE_TIMEOUT = int(getenv("LAVA_TEST_SUITE_TIMEOUT", 5)) + +# Test cases may take a long time, this script has no right to interrupt +# them. But if the test case takes almost 1h, it will never succeed due to +# Gitlab job timeout. +LAVA_TEST_CASE_TIMEOUT = int(getenv("JOB_TIMEOUT", 60)) + +# LAVA post processing may refer to a test suite teardown, or the +# adjustments to start the next test_case +LAVA_POST_PROCESSING_TIMEOUT = int(getenv("LAVA_POST_PROCESSING_TIMEOUT", 5)) + FALLBACK_GITLAB_SECTION_TIMEOUT = timedelta(minutes=10) DEFAULT_GITLAB_SECTION_TIMEOUTS = { - # Empirically, successful device boot in LAVA time takes less than 3 - # minutes. - # LAVA itself is configured to attempt thrice to boot the device, - # summing up to 9 minutes. - # It is better to retry the boot than cancel the job and re-submit to avoid - # the enqueue delay. - LogSectionType.LAVA_BOOT: timedelta(minutes=9), - # Test suite phase is where the initialization happens. - LogSectionType.TEST_SUITE: timedelta(minutes=5), - # Test cases may take a long time, this script has no right to interrupt - # them. But if the test case takes almost 1h, it will never succeed due to - # Gitlab job timeout. - LogSectionType.TEST_CASE: timedelta(minutes=60), - # LAVA post processing may refer to a test suite teardown, or the - # adjustments to start the next test_case - LogSectionType.LAVA_POST_PROCESSING: timedelta(minutes=5), + LogSectionType.LAVA_BOOT: timedelta(minutes=LAVA_BOOT_TIMEOUT), + LogSectionType.TEST_SUITE: timedelta(minutes=LAVA_TEST_SUITE_TIMEOUT), + LogSectionType.TEST_CASE: timedelta(minutes=LAVA_TEST_CASE_TIMEOUT), + LogSectionType.LAVA_POST_PROCESSING: timedelta( + minutes=LAVA_POST_PROCESSING_TIMEOUT + ), }