ci/lava: Detect r8152 issue during boot phase

This week we found that the r8152 issue can happen during the boot
phase, make the necessary adjustments to detect it.

https://gitlab.freedesktop.org/vigneshraman/linux/-/jobs/53651940

Notes:
- The kernel messages during the boot phase is being redirected to the
feedback messages due to the namespaces from the SSH job.
- Update the unit tests:
  - Add boot phase detection
  - Correctly set the boot phase when mocking LogFollower

Reported-by: Vignesh Raman <vignesh.raman@collabora.com>
Signed-off-by: Guilherme Gallo <guilherme.gallo@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27081>
This commit is contained in:
Guilherme Gallo 2024-01-12 15:24:04 -03:00 committed by Marge Bot
parent bfd50f72eb
commit de2c847c24
3 changed files with 57 additions and 28 deletions

View file

@ -15,3 +15,7 @@ FORCE_UART = bool(getenv("LAVA_FORCE_UART", False))
# How many times the r8152 error may happen to consider it a known issue.
KNOWN_ISSUE_R8152_MAX_CONSECUTIVE_COUNTER: int = 10
# This is considered noise, since LAVA produces this log after receiving a package of feedback
# messages.
LOG_DEBUG_FEEDBACK_NOISE = "Listened to connection for namespace 'dut' done"

View file

@ -9,7 +9,7 @@ if TYPE_CHECKING:
from lava.exceptions import MesaCIKnownIssueException
from lava.utils.console_format import CONSOLE_LOG
from lava.utils.constants import KNOWN_ISSUE_R8152_MAX_CONSECUTIVE_COUNTER
from lava.utils.constants import KNOWN_ISSUE_R8152_MAX_CONSECUTIVE_COUNTER, LOG_DEBUG_FEEDBACK_NOISE
from lava.utils.log_section import LogSectionType
@ -20,12 +20,15 @@ class LAVALogHints:
def detect_failure(self, new_lines: list[dict[str, Any]]):
for line in new_lines:
if line["msg"] == LOG_DEBUG_FEEDBACK_NOISE:
continue
self.detect_r8152_issue(line)
def detect_r8152_issue(self, line):
if (
self.log_follower.phase == LogSectionType.TEST_CASE and line["lvl"] == "target"
):
if self.log_follower.phase in (
LogSectionType.LAVA_BOOT,
LogSectionType.TEST_CASE,
) and line["lvl"] in ("feedback", "target"):
if re.search(r"r8152 \S+ eth0: Tx status -71", line["msg"]):
self.r8152_issue_consecutive_counter += 1
return

View file

@ -18,7 +18,13 @@ from lava.utils import (
)
from lava.utils.constants import KNOWN_ISSUE_R8152_MAX_CONSECUTIVE_COUNTER
from ..lava.helpers import create_lava_yaml_msg, does_not_raise, lava_yaml, yaml_dump
from ..lava.helpers import (
create_lava_yaml_msg,
does_not_raise,
lava_yaml,
mock_lava_signal,
yaml_dump,
)
GITLAB_SECTION_SCENARIOS = {
"start collapsed": (
@ -312,47 +318,56 @@ def test_gitlab_section_id(case_name, expected_id):
assert gl.id == expected_id
A618_NETWORK_ISSUE_LOGS = [
*(KNOWN_ISSUE_R8152_MAX_CONSECUTIVE_COUNTER*[create_lava_yaml_msg(
msg="[ 1733.599402] r8152 2-1.3:1.0 eth0: Tx status -71", lvl="target"
)]),
create_lava_yaml_msg(
msg="[ 1733.604506] nfs: server 192.168.201.1 not responding, still trying",
lvl="target",
),
]
TEST_PHASE_LAVA_SIGNAL = create_lava_yaml_msg(
msg="Received signal: <STARTTC> mesa-ci_a618_vk", lvl="debug"
)
def a618_network_issue_logs(level: str = "target") -> list:
net_error = create_lava_yaml_msg(
msg="[ 1733.599402] r8152 2-1.3:1.0 eth0: Tx status -71", lvl=level)
nfs_error = create_lava_yaml_msg(
msg="[ 1733.604506] nfs: server 192.168.201.1 not responding, still trying",
lvl=level,
)
return [
*(KNOWN_ISSUE_R8152_MAX_CONSECUTIVE_COUNTER*[net_error]),
nfs_error
]
TEST_PHASE_LAVA_SIGNAL = mock_lava_signal(LogSectionType.TEST_CASE)
A618_NET_ISSUE_BOOT = a618_network_issue_logs(level="feedback")
A618_NET_ISSUE_TEST = [TEST_PHASE_LAVA_SIGNAL, *a618_network_issue_logs(level="target")]
A618_NETWORK_ISSUE_SCENARIOS = {
"Pass - R8152 kmsg during boot": (A618_NETWORK_ISSUE_LOGS, does_not_raise()),
"Fail - R8152 kmsg during boot phase": (
A618_NET_ISSUE_BOOT,
pytest.raises(MesaCIKnownIssueException),
),
"Fail - R8152 kmsg during test phase": (
[TEST_PHASE_LAVA_SIGNAL, *A618_NETWORK_ISSUE_LOGS],
A618_NET_ISSUE_TEST,
pytest.raises(MesaCIKnownIssueException),
),
"Pass - Partial (1) R8152 kmsg during test phase": (
[TEST_PHASE_LAVA_SIGNAL, A618_NETWORK_ISSUE_LOGS[0]],
A618_NET_ISSUE_TEST[:1],
does_not_raise(),
),
"Pass - Partial (2) R8152 kmsg during test phase": (
[TEST_PHASE_LAVA_SIGNAL, A618_NETWORK_ISSUE_LOGS[1]],
A618_NET_ISSUE_TEST[:2],
does_not_raise(),
),
"Pass - Partial subsequent (3) R8152 kmsg during test phase": (
"Pass - Partial (3) subsequent R8152 kmsg during test phase": (
[
TEST_PHASE_LAVA_SIGNAL,
A618_NETWORK_ISSUE_LOGS[0],
A618_NETWORK_ISSUE_LOGS[0],
A618_NET_ISSUE_TEST[1],
A618_NET_ISSUE_TEST[1],
],
does_not_raise(),
),
"Pass - Partial subsequent (4) R8152 kmsg during test phase": (
"Pass - Partial (4) subsequent nfs kmsg during test phase": (
[
TEST_PHASE_LAVA_SIGNAL,
A618_NETWORK_ISSUE_LOGS[1],
A618_NETWORK_ISSUE_LOGS[1],
A618_NET_ISSUE_TEST[-1],
A618_NET_ISSUE_TEST[-1],
],
does_not_raise(),
),
@ -365,6 +380,13 @@ A618_NETWORK_ISSUE_SCENARIOS = {
ids=A618_NETWORK_ISSUE_SCENARIOS.keys(),
)
def test_detect_failure(messages, expectation):
lf = LogFollower()
boot_section = GitlabSection(
id="lava_boot",
header="LAVA boot",
type=LogSectionType.LAVA_BOOT,
start_collapsed=True,
)
boot_section.start()
lf = LogFollower(starting_section=boot_section)
with expectation:
lf.feed(messages)