radeonsi/gfx12: fix compute register settings for global_atomic_ordered_add

This is for future documentation/reference. It's likely radeonsi won't use
the atomic in compute shaders.

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30063>
This commit is contained in:
Marek Olšák 2024-07-03 16:09:08 -04:00 committed by Marge Bot
parent acb3d5f132
commit 641ec0ae6e
3 changed files with 13 additions and 2 deletions

View file

@ -936,7 +936,8 @@ static void si_emit_dispatch_packets(struct si_context *sctx, const struct pipe_
* allow launching waves out-of-order. (same as Vulkan)
* Not available in gfx940.
*/
S_00B800_ORDER_MODE(sctx->gfx_level >= GFX7 &&
S_00B800_ORDER_MODE(!sctx->cs_shader_state.program->sel.info.uses_atomic_ordered_add &&
sctx->gfx_level >= GFX7 &&
(sctx->family < CHIP_GFX940 || sctx->screen->info.has_graphics)) |
S_00B800_CS_W32_EN(sctx->cs_shader_state.program->shader.wave_size == 32);
@ -972,7 +973,8 @@ static void si_emit_dispatch_packets(struct si_context *sctx, const struct pipe_
/* Set PING_PONG_EN for every other dispatch.
* Only allowed on a gfx queue, and PARTIAL_TG_EN and USE_THREAD_DIMENSIONS must be 0.
*/
if (sctx->has_graphics && !partial_block_en) {
if (sctx->has_graphics && !partial_block_en &&
!sctx->cs_shader_state.program->sel.info.uses_atomic_ordered_add) {
dispatch_initiator |= S_00B800_PING_PONG_EN(sctx->compute_ping_pong_launch);
sctx->compute_ping_pong_launch ^= 1;
}

View file

@ -497,6 +497,7 @@ struct si_shader_info {
bool uses_variable_block_size;
bool uses_grid_size;
bool uses_tg_size;
bool uses_atomic_ordered_add;
bool writes_position;
bool writes_psize;
bool writes_clipvertex;

View file

@ -501,6 +501,11 @@ static void scan_instruction(const struct nir_shader *nir, struct si_shader_info
!nir_src_is_const(intr->src[0]))
info->uses_indirect_descriptor = true;
if (nir_intrinsic_has_atomic_op(intr)) {
if (nir_intrinsic_atomic_op(intr) == nir_atomic_op_ordered_add_gfx12_amd)
info->uses_atomic_ordered_add = true;
}
switch (intr->intrinsic) {
case nir_intrinsic_store_ssbo:
if (!nir_src_is_const(intr->src[1]))
@ -609,6 +614,9 @@ static void scan_instruction(const struct nir_shader *nir, struct si_shader_info
case nir_intrinsic_interp_deref_at_offset:
unreachable("these opcodes should have been lowered");
break;
case nir_intrinsic_ordered_add_loop_gfx12_amd:
info->uses_atomic_ordered_add = true;
break;
default:
break;
}