From 7d5b5da2116cb94d1d8837338303e6cf99d10b40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 31 Jul 2024 08:12:02 -0400 Subject: [PATCH] radeonsi/gfx12: fix register programming to fix GPU hangs Fixes: f703dfd1bb8 - radeonsi: add gfx12 Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: (cherry picked from commit 07a0b5e2f22f152b02454c9e17b418072ef05516) --- .pick_status.json | 2 +- src/gallium/drivers/radeonsi/si_compute.c | 2 ++ src/gallium/drivers/radeonsi/si_state.c | 5 +++-- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index 5e90dcc4a51..87799e88875 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -634,7 +634,7 @@ "description": "radeonsi/gfx12: fix register programming to fix GPU hangs", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "f703dfd1bb8c22b6791dd95c7de270e176452b4b", "notes": null diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 98bfa4ecaa7..82449934403 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -1036,6 +1036,8 @@ static void si_emit_dispatch_packets(struct si_context *sctx, const struct pipe_ * - Only supported by the gfx queue. * - Max 16 workgroups per SE can be launched, max 4 in each dimension. * - PARTIAL_TG_EN, USE_THREAD_DIMENSIONS, and ORDERED_APPEND_ENBL must be 0. + * - COMPUTE_START_X/Y are in units of 2D subgrids, not workgroups + * (program COMPUTE_START_X to start_x >> log_x, COMPUTE_START_Y to start_y >> log_y). */ if (sctx->has_graphics && !partial_block_en && (info->indirect || info->grid[1] >= 4) && MIN2(info->block[0], info->block[1]) >= 4 && diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 4d702b6466b..6bb1037a79b 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -5777,7 +5777,7 @@ static void gfx12_init_gfx_preamble_state(struct si_context *sctx) /* Context registers */ ac_pm4_set_reg(&pm4->base, R_028000_DB_RENDER_CONTROL, 0); - ac_pm4_set_reg(&pm4->base, R_02800C_DB_RENDER_OVERRIDE, S_02800C_FORCE_STENCIL_VALID(1)); + ac_pm4_set_reg(&pm4->base, R_02800C_DB_RENDER_OVERRIDE, S_02800C_FORCE_STENCIL_READ(1)); ac_pm4_set_reg(&pm4->base, R_028040_DB_GL1_INTERFACE_CONTROL, 0); ac_pm4_set_reg(&pm4->base, R_028048_DB_MEM_TEMPORAL, S_028048_Z_TEMPORAL_READ(zs_read_temporal_hint) | @@ -5886,7 +5886,8 @@ static void gfx12_init_gfx_preamble_state(struct si_context *sctx) ac_pm4_set_reg(&pm4->base, R_028C50_PA_SC_NGG_MODE_CNTL, S_028C50_MAX_DEALLOCS_IN_WAVE(64)); ac_pm4_set_reg(&pm4->base, R_028C54_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL, S_028C54_NULL_SQUAD_AA_MASK_ENABLE(1)); - ac_pm4_set_reg(&pm4->base, R_028C58_PA_SC_SHADER_CONTROL, 0); + ac_pm4_set_reg(&pm4->base, R_028C58_PA_SC_SHADER_CONTROL, + S_028C58_REALIGN_DQUADS_AFTER_N_WAVES(1)); for (unsigned i = 0; i < 8; i++) { ac_pm4_set_reg(&pm4->base, R_028F00_CB_MEM0_INFO + i * 4,