aco: Fix 1D->2D dispatch conversion on <gfx9

out_args->scratch_offset and in_wg_id_x will alias on <gfx9.
To avoid the conversion code reading a garbage WG ID, move the
scratch/ring offset writing to the very end.

Fixes: 1e354172 ("radv,aco: Convert 1D ray launches to 2D")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30707>
This commit is contained in:
Friedrich Vock 2024-08-17 10:26:08 +02:00 committed by Marge Bot
parent 563ec4754a
commit bd525f4282

View file

@ -12598,6 +12598,11 @@ select_rt_prolog(Program* program, ac_shader_config* config,
assert(in_stack_base == out_launch_size_z);
assert(in_local_ids[0] == out_launch_ids[0]);
/* <gfx9 reads in_scratch_offset at the end of the prolog to write out the scratch_offset
* arg. Make sure no other outputs have overwritten it by then.
*/
assert(in_scratch_offset.reg() >= out_args->num_sgprs_used);
/* load raygen sbt */
bld.smem(aco_opcode::s_load_dwordx2, Definition(tmp_raygen_sbt, s2), Operand(in_sbt_desc, s2),
Operand::c32(0u));
@ -12649,15 +12654,6 @@ select_rt_prolog(Program* program, ac_shader_config* config,
bld.vop3(aco_opcode::v_mad_u32_u24, Definition(out_launch_ids[0], v1), Operand(in_wg_id_x, s1),
Operand::c32(8), Operand(in_local_ids[0], v1));
if (options->gfx_level < GFX9) {
/* write scratch/ring offsets to outputs, if needed */
bld.sop1(aco_opcode::s_mov_b32,
Definition(get_arg_reg(out_args, out_args->scratch_offset), s1),
Operand(in_scratch_offset, s1));
bld.sop1(aco_opcode::s_mov_b64, Definition(get_arg_reg(out_args, out_args->ring_offsets), s2),
Operand(tmp_ring_offsets, s2));
}
/* calculate shader record ptr: SBT + RADV_RT_HANDLE_SIZE */
if (options->gfx_level < GFX9) {
bld.vop2_e64(aco_opcode::v_add_co_u32, Definition(out_record_ptr, v1), Definition(vcc, s2),
@ -12699,6 +12695,15 @@ select_rt_prolog(Program* program, ac_shader_config* config,
bld.vop2(aco_opcode::v_cndmask_b32, Definition(out_launch_ids[1], v1), Operand::zero(),
Operand(out_launch_ids[1], v1), Operand(vcc, bld.lm));
if (options->gfx_level < GFX9) {
/* write scratch/ring offsets to outputs, if needed */
bld.sop1(aco_opcode::s_mov_b32,
Definition(get_arg_reg(out_args, out_args->scratch_offset), s1),
Operand(in_scratch_offset, s1));
bld.sop1(aco_opcode::s_mov_b64, Definition(get_arg_reg(out_args, out_args->ring_offsets), s2),
Operand(tmp_ring_offsets, s2));
}
/* jump to raygen */
bld.sop1(aco_opcode::s_setpc_b64, Operand(out_uniform_shader_addr, s2));