mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-03 05:00:09 +01:00
ci/lava: Split boot action into deploy and boot
The boot action was wrapping the deploy action, which could cause timeout misalignment. For example, the boot `GitlabSection` timeout was shorter than the deploy timeout in LAVA, leading to cases where LAVA jobs were canceled during their own retry mechanism. By splitting these actions, we can align the timeouts properly, preventing interference and unnecessary job cancellations. Signed-off-by: Guilherme Gallo <guilherme.gallo@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33906>
This commit is contained in:
parent
d85af615f9
commit
a33c0e1867
3 changed files with 26 additions and 12 deletions
|
|
@ -225,12 +225,12 @@ def wait_for_job_get_started(job, attempt_no):
|
|||
|
||||
|
||||
def bootstrap_log_follower(main_test_case, timestamp_relative_to) -> LogFollower:
|
||||
deploy_timeout = GL_SECTION_TIMEOUTS[LogSectionType.LAVA_DEPLOY]
|
||||
start_section = GitlabSection(
|
||||
id="dut_boot",
|
||||
header="Booting hardware device",
|
||||
type=LogSectionType.LAVA_BOOT,
|
||||
id="dut_deploy",
|
||||
header=f"Running LAVA deploy action - Timeout: {deploy_timeout}",
|
||||
type=LogSectionType.LAVA_DEPLOY,
|
||||
start_collapsed=True,
|
||||
suppress_end=True, # init-stage2 prints the end for us
|
||||
timestamp_relative_to=timestamp_relative_to,
|
||||
)
|
||||
print(start_section.start())
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ from ruamel.yaml import YAML
|
|||
from os import getenv
|
||||
|
||||
from lava.utils.lava_farm import get_lava_farm
|
||||
from lava.utils.log_section import LAVA_DEPLOY_TIMEOUT
|
||||
from lava.utils.ssh_job_definition import (
|
||||
generate_docker_test,
|
||||
generate_dut_test,
|
||||
|
|
@ -25,7 +26,7 @@ from lava.utils.uart_job_definition import (
|
|||
if TYPE_CHECKING:
|
||||
from lava.lava_job_submitter import LAVAJobSubmitter
|
||||
|
||||
from .constants import FORCE_UART, JOB_PRIORITY, NUMBER_OF_ATTEMPTS_LAVA_BOOT
|
||||
from .constants import FORCE_UART, JOB_PRIORITY
|
||||
|
||||
|
||||
class LAVAJobDefinition:
|
||||
|
|
@ -160,8 +161,7 @@ class LAVAJobDefinition:
|
|||
"depthcharge-action": {
|
||||
# This timeout englobes the entire depthcharge timing,
|
||||
# including retries
|
||||
"minutes": 5
|
||||
* NUMBER_OF_ATTEMPTS_LAVA_BOOT,
|
||||
"minutes": LAVA_DEPLOY_TIMEOUT
|
||||
},
|
||||
"uboot-action": {
|
||||
# For rockchip DUTs, U-Boot auto-login action downloads the kernel and
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ class LogSectionType(Enum):
|
|||
UNKNOWN = auto()
|
||||
LAVA_SUBMIT = auto()
|
||||
LAVA_QUEUE = auto()
|
||||
LAVA_DEPLOY = auto()
|
||||
LAVA_BOOT = auto()
|
||||
TEST_DUT_SUITE = auto()
|
||||
TEST_SUITE = auto()
|
||||
|
|
@ -27,13 +28,17 @@ LAVA_SUBMIT_TIMEOUT = int(getenv("LAVA_SUBMIT_TIMEOUT", 5))
|
|||
# aggressively for pre-merge.
|
||||
LAVA_QUEUE_TIMEOUT = int(getenv("LAVA_QUEUE_TIMEOUT", 60))
|
||||
|
||||
# Empirically, successful device boot in LAVA time takes less than 3
|
||||
# minutes.
|
||||
# LAVA itself is configured to attempt thrice to boot the device,
|
||||
# summing up to 9 minutes.
|
||||
# How long should we wait for a device to be deployed?
|
||||
# The deploy involves downloading and decompressing the kernel, modules, dtb and the overlays.
|
||||
# We should retry, to overcome network issues.
|
||||
LAVA_DEPLOY_TIMEOUT = int(getenv("LAVA_DEPLOY_TIMEOUT", 5))
|
||||
|
||||
# Empirically, successful device deploy+boot in LAVA time takes less than 3 minutes.
|
||||
# LAVA itself is configured to attempt `failure_retry` times (NUMBER_OF_ATTEMPTS_LAVA_BOOT) to boot
|
||||
# the device.
|
||||
# It is better to retry the boot than cancel the job and re-submit to avoid
|
||||
# the enqueue delay.
|
||||
LAVA_BOOT_TIMEOUT = int(getenv("LAVA_BOOT_TIMEOUT", 9))
|
||||
LAVA_BOOT_TIMEOUT = int(getenv("LAVA_BOOT_TIMEOUT", 5))
|
||||
|
||||
# Estimated overhead in minutes for a job from GitLab to reach the test phase,
|
||||
# including LAVA scheduling and boot duration
|
||||
|
|
@ -59,6 +64,7 @@ FALLBACK_GITLAB_SECTION_TIMEOUT = timedelta(minutes=10)
|
|||
DEFAULT_GITLAB_SECTION_TIMEOUTS = {
|
||||
LogSectionType.LAVA_SUBMIT: timedelta(minutes=LAVA_SUBMIT_TIMEOUT),
|
||||
LogSectionType.LAVA_QUEUE: timedelta(minutes=LAVA_QUEUE_TIMEOUT),
|
||||
LogSectionType.LAVA_DEPLOY: timedelta(minutes=LAVA_DEPLOY_TIMEOUT),
|
||||
LogSectionType.LAVA_BOOT: timedelta(minutes=LAVA_BOOT_TIMEOUT),
|
||||
LogSectionType.TEST_DUT_SUITE: timedelta(minutes=LAVA_TEST_DUT_SUITE_TIMEOUT),
|
||||
LogSectionType.TEST_SUITE: timedelta(minutes=LAVA_TEST_SUITE_TIMEOUT),
|
||||
|
|
@ -102,6 +108,14 @@ class LogSection:
|
|||
|
||||
|
||||
LOG_SECTIONS = (
|
||||
LogSection(
|
||||
regex=re.compile(r"start: 2 (\S+) \(timeout ([^)]+)\).*"),
|
||||
levels=("info"),
|
||||
section_id="{}",
|
||||
section_header="Booting via {}",
|
||||
section_type=LogSectionType.LAVA_BOOT,
|
||||
collapsed=True,
|
||||
),
|
||||
LogSection(
|
||||
regex=re.compile(r"<?STARTTC>? ([^>]*)"),
|
||||
levels=("target", "debug"),
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue