From bc71f689f1f5224d90f01cb8276b68e56107cb43 Mon Sep 17 00:00:00 2001 From: Pierre-Eric Pelloux-Prayer Date: Fri, 9 Apr 2021 18:04:56 +0200 Subject: [PATCH] amdgpu,radeon: add needs_reset param to ctx_query_reset_status MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The kernel can do different types of recovery (soft recovery, GPU reset). Since they both increase gpu_reset_counter, this will cause all contexts to report AMDGPU_CTX_QUERY2_FLAGS_RESET, which is a bit misleading: if a single context was soft-recovered, the others are fine and we don't need special processing. This commit uses the AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST to distinguish between the 2 kind of reset and later commits will use this information. Reviewed-by: Marek Olšák Part-of: --- src/gallium/drivers/r600/r600_pipe_common.c | 2 +- src/gallium/drivers/radeon/radeon_winsys.h | 3 ++- src/gallium/drivers/radeonsi/si_pipe.c | 3 ++- src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 13 ++++++++++++- src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 10 ++++++++-- 5 files changed, 25 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/r600/r600_pipe_common.c b/src/gallium/drivers/r600/r600_pipe_common.c index ebb885fa602..1b4f93c4b86 100644 --- a/src/gallium/drivers/r600/r600_pipe_common.c +++ b/src/gallium/drivers/r600/r600_pipe_common.c @@ -487,7 +487,7 @@ static enum pipe_reset_status r600_get_reset_status(struct pipe_context *ctx) { struct r600_common_context *rctx = (struct r600_common_context *)ctx; - return rctx->ws->ctx_query_reset_status(rctx->ctx); + return rctx->ws->ctx_query_reset_status(rctx->ctx, NULL); } static void r600_set_debug_callback(struct pipe_context *ctx, diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h index f4f26e03328..770f30b47e1 100644 --- a/src/gallium/drivers/radeon/radeon_winsys.h +++ b/src/gallium/drivers/radeon/radeon_winsys.h @@ -491,7 +491,8 @@ struct radeon_winsys { /** * Query a GPU reset status. */ - enum pipe_reset_status (*ctx_query_reset_status)(struct radeon_winsys_ctx *ctx); + enum pipe_reset_status (*ctx_query_reset_status)(struct radeon_winsys_ctx *ctx, + bool *needs_reset); /** * Create a command stream. diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 35bbbb8125d..5a98b6316dd 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -352,7 +352,8 @@ static enum pipe_reset_status si_get_reset_status(struct pipe_context *ctx) { struct si_context *sctx = (struct si_context *)ctx; struct si_screen *sscreen = sctx->screen; - enum pipe_reset_status status = sctx->ws->ctx_query_reset_status(sctx->ctx); + bool needs_reset; + enum pipe_reset_status status = sctx->ws->ctx_query_reset_status(sctx->ctx, &needs_reset); if (status != PIPE_NO_RESET) { /* Call the gallium frontend to set a no-op API dispatch. */ diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index 008894043a3..6b9614f3f41 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -334,11 +334,14 @@ static void amdgpu_ctx_destroy(struct radeon_winsys_ctx *rwctx) } static enum pipe_reset_status -amdgpu_ctx_query_reset_status(struct radeon_winsys_ctx *rwctx) +amdgpu_ctx_query_reset_status(struct radeon_winsys_ctx *rwctx, bool *needs_reset) { struct amdgpu_ctx *ctx = (struct amdgpu_ctx*)rwctx; int r; + if (needs_reset) + *needs_reset = false; + /* Return a failure due to a GPU hang. */ if (ctx->ws->info.drm_minor >= 24) { uint64_t flags; @@ -350,6 +353,8 @@ amdgpu_ctx_query_reset_status(struct radeon_winsys_ctx *rwctx) } if (flags & AMDGPU_CTX_QUERY2_FLAGS_RESET) { + if (needs_reset) + *needs_reset = flags & AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST; if (flags & AMDGPU_CTX_QUERY2_FLAGS_GUILTY) return PIPE_GUILTY_CONTEXT_RESET; else @@ -364,6 +369,8 @@ amdgpu_ctx_query_reset_status(struct radeon_winsys_ctx *rwctx) return PIPE_NO_RESET; } + if (needs_reset) + *needs_reset = true; switch (result) { case AMDGPU_CTX_GUILTY_RESET: return PIPE_GUILTY_CONTEXT_RESET; @@ -376,9 +383,13 @@ amdgpu_ctx_query_reset_status(struct radeon_winsys_ctx *rwctx) /* Return a failure due to a rejected command submission. */ if (ctx->ws->num_total_rejected_cs > ctx->initial_num_total_rejected_cs) { + if (needs_reset) + *needs_reset = true; return ctx->num_rejected_cs ? PIPE_GUILTY_CONTEXT_RESET : PIPE_INNOCENT_CONTEXT_RESET; } + if (needs_reset) + *needs_reset = false; return PIPE_NO_RESET; } diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index 37b9af2712c..3a3f28dda8a 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -87,14 +87,20 @@ static void radeon_drm_ctx_destroy(struct radeon_winsys_ctx *ctx) } static enum pipe_reset_status -radeon_drm_ctx_query_reset_status(struct radeon_winsys_ctx *rctx) +radeon_drm_ctx_query_reset_status(struct radeon_winsys_ctx *rctx, bool *needs_reset) { struct radeon_ctx *ctx = (struct radeon_ctx*)rctx; unsigned latest = radeon_drm_get_gpu_reset_counter(ctx->ws); - if (ctx->gpu_reset_counter == latest) + if (ctx->gpu_reset_counter == latest) { + if (needs_reset) + *needs_reset = false; return PIPE_NO_RESET; + } + + if (needs_reset) + *needs_reset = true; ctx->gpu_reset_counter = latest; return PIPE_UNKNOWN_CONTEXT_RESET;