mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 19:40:10 +01:00
panfrost: Abort on faults in SYNC mode
This allows failing fast (optionally still tracing, if set with PAN_MESA_DEBUG=trace) when a GPU fault is introduced. This is better behaviour for both use cases: 1. When debugging a known fault, setting this mode together with trace will stop the driver as soon as a buggy command stream is submitted, and the offending stream will be the last trace file. 2. When running test suites (particularly in CI), setting this mode will detect faults and crash, causing the pipeline to fail fast as opposed to incorrectly marking the run green if the test happens to pass despite the faults and slow downs. Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10938>
This commit is contained in:
parent
7bc3730b3f
commit
2f4b5a4ebe
4 changed files with 33 additions and 4 deletions
|
|
@ -892,9 +892,11 @@ panfrost_batch_submit_ioctl(struct panfrost_batch *batch,
|
|||
drmSyncobjWait(dev->fd, &out_sync, 1,
|
||||
INT64_MAX, 0, NULL);
|
||||
|
||||
/* Trace gets priority over sync */
|
||||
bool minimal = !(dev->debug & PAN_DBG_TRACE);
|
||||
pandecode_jc(submit.jc, pan_is_bifrost(dev), dev->gpu_id, minimal);
|
||||
if (dev->debug & PAN_DBG_TRACE)
|
||||
pandecode_jc(submit.jc, pan_is_bifrost(dev), dev->gpu_id, false);
|
||||
|
||||
if (dev->debug & PAN_DBG_SYNC)
|
||||
pandecode_abort_on_fault(submit.jc);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
|
|||
|
|
@ -62,7 +62,7 @@ static const struct debug_named_value panfrost_debug_options[] = {
|
|||
{"trace", PAN_DBG_TRACE, "Trace the command stream"},
|
||||
{"deqp", PAN_DBG_DEQP, "Hacks for dEQP"},
|
||||
{"dirty", PAN_DBG_DIRTY, "Always re-emit all state"},
|
||||
{"sync", PAN_DBG_SYNC, "Wait for each job's completion and check for any GPU fault"},
|
||||
{"sync", PAN_DBG_SYNC, "Wait for each job's completion and abort on GPU faults"},
|
||||
{"precompile", PAN_DBG_PRECOMPILE, "Precompile shaders for shader-db"},
|
||||
{"nofp16", PAN_DBG_NOFP16, "Disable 16-bit support"},
|
||||
{"gl3", PAN_DBG_GL3, "Enable experimental GL 3.x implementation, up to 3.3"},
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@
|
|||
#include <memory.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdarg.h>
|
||||
#include <errno.h>
|
||||
#include <ctype.h>
|
||||
#include "decode.h"
|
||||
|
||||
|
|
@ -1138,3 +1139,26 @@ pandecode_jc(mali_ptr jc_gpu_va, bool bifrost, unsigned gpu_id, bool minimal)
|
|||
|
||||
pandecode_map_read_write();
|
||||
}
|
||||
|
||||
void
|
||||
pandecode_abort_on_fault(mali_ptr jc_gpu_va)
|
||||
{
|
||||
mali_ptr next_job = 0;
|
||||
|
||||
do {
|
||||
struct pandecode_mapped_memory *mem =
|
||||
pandecode_find_mapped_gpu_mem_containing(jc_gpu_va);
|
||||
|
||||
pan_unpack(PANDECODE_PTR(mem, jc_gpu_va, struct mali_job_header_packed),
|
||||
JOB_HEADER, h);
|
||||
next_job = h.next;
|
||||
|
||||
/* Ensure the job is marked COMPLETE */
|
||||
if (h.exception_status != 0x1) {
|
||||
fprintf(stderr, "Incomplete job or timeout");
|
||||
exit(EIO);
|
||||
}
|
||||
} while ((jc_gpu_va = next_job));
|
||||
|
||||
pandecode_map_read_write();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -55,4 +55,7 @@ void pandecode_inject_free(uint64_t gpu_va, unsigned sz);
|
|||
|
||||
void pandecode_jc(uint64_t jc_gpu_va, bool bifrost, unsigned gpu_id, bool minimal);
|
||||
|
||||
void
|
||||
pandecode_abort_on_fault(uint64_t jc_gpu_va);
|
||||
|
||||
#endif /* __MMAP_TRACE_H__ */
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue