iris: track reset signalling instead of replacing the context

Instead of creating a new context when a reset is encountered, we now
track whether the reset was reported back to the application. According
to the spec, the application should poll the reset status and recreate
the context when NO_ERROR encountered after a RESET.

From the EXT_robustness spec:

5. How should the application react to a reset context event?

RESOLVED: For this extension, the application is expected to query
the reset status until NO_ERROR is returned. If a reset is encountered,
at least one *RESET* status will be returned. Once NO_ERROR is again
encountered, the application can safely destroy the old context and
create a new one.

Signed-off-by: Rohan Garg <rohan.garg@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24224>
This commit is contained in:
Rohan Garg 2023-07-27 12:07:54 +02:00 committed by Marge Bot
parent e52f7d5cb9
commit 2756534660
3 changed files with 16 additions and 11 deletions

View file

@ -697,18 +697,19 @@ iris_batch_check_for_reset(struct iris_batch *batch)
{
struct iris_screen *screen = batch->screen;
struct iris_bufmgr *bufmgr = screen->bufmgr;
struct iris_context *ice = batch->ice;
const struct iris_kmd_backend *backend;
enum pipe_reset_status status;
enum pipe_reset_status status = PIPE_NO_RESET;
/* Banned context was already signalled to application */
if (ice->context_reset_signaled)
return status;
backend = iris_bufmgr_get_kernel_driver_backend(bufmgr);
status = backend->batch_check_for_reset(batch);
if (status != PIPE_NO_RESET) {
/* Our context is likely banned, or at least in an unknown state.
* Throw it away and start with a fresh context. Ideally this may
* catch the problem before our next execbuf fails with -EIO.
*/
replace_kernel_ctx(batch);
}
if (status != PIPE_NO_RESET)
ice->context_reset_signaled = true;
return status;
}
@ -956,6 +957,10 @@ _iris_batch_flush(struct iris_batch *batch, const char *file, int line)
*/
if (ret && context_or_engine_was_banned(bufmgr, ret)) {
enum pipe_reset_status status = iris_batch_check_for_reset(batch);
if (status != PIPE_NO_RESET || ice->context_reset_signaled)
replace_kernel_ctx(batch);
if (batch->reset->reset) {
/* Tell gallium frontends the device is lost and it was our fault. */
batch->reset->reset(batch->reset->data, status);

View file

@ -99,9 +99,6 @@ iris_get_device_reset_status(struct pipe_context *ctx)
* worst status (if one was guilty, proclaim guilt).
*/
iris_foreach_batch(ice, batch) {
/* This will also recreate the hardware contexts as necessary, so any
* future queries will show no resets. We only want to report once.
*/
enum pipe_reset_status batch_reset =
iris_batch_check_for_reset(batch);

View file

@ -637,6 +637,9 @@ struct iris_context {
/** Whether the context protected (through EGL_EXT_protected_content) */
bool protected;
/** Whether a banned context was already signalled */
bool context_reset_signaled;
/** A device reset status callback for notifying that the GPU is hosed. */
struct pipe_device_reset_callback reset;